HN2new | past | comments | ask | show | jobs | submitlogin

> horrible run times on craftily-constructed regexes

Here is a simple patch to make the runtime O(len(pattern) * len(text)) instead of exponential. It adds 5 lines and a new parameter. The idea is to memoize (cache) the results, specifically the failures (there is no need to cache the successes, because all the functions return immediately on success).

     // Match reports whether regexp matches anywhere in text.
     func Match(regexp, text string) bool {
    +    seen := make(map[[2]int]bool)
         if regexp != "" && regexp[0] == '^' {
    -        return matchHere(regexp[1:], text)
    +        return matchHere(regexp[1:], text, seen)
         }
         for {
    -        if matchHere(regexp, text) {
    +        if matchHere(regexp, text, seen) {
                 return true
             }
             if text == "" {
                 return false
             }
             text = text[1:]
         }
     }
     
     // matchHere reports whether regexp matches at beginning of text.
    -func matchHere(regexp, text string) bool {
    +func matchHere(regexp, text string, seen map[[2]int]bool) bool {
         switch {
         case regexp == "":
             return true
         case regexp == "$":
             return text == ""
         case len(regexp) >= 2 && regexp[1] == '*':
    -        return matchStar(regexp[0], regexp[2:], text)
    +        return matchStar(regexp[0], regexp[2:], text, seen)
         case text != "" && (regexp[0] == '.' || regexp[0] == text[0]):
    -        return matchHere(regexp[1:], text[1:])
    +        return matchHere(regexp[1:], text[1:], seen)
         }
         return false
     }
     
     // matchStar reports whether c*regexp matches at beginning of text.
    -func matchStar(c byte, regexp, text string) bool {
    +func matchStar(c byte, regexp, text string, seen map[[2]int]bool) bool {
         for {
    -        if matchHere(regexp, text) {
    +        if seen[[2]int{len(regexp), len(text)}] {
    +            return false
    +        }
    +        if matchHere(regexp, text, seen) {
                 return true
             }
             if text == "" || (text[0] != c && c != '.') {
                 return false
             }
    +        seen[[2]int{len(regexp), len(text)}] = true
             text = text[1:]
         }
     }

Demo: https://go.dev/play/p/aD9vzXwTHGE


Guidelines | FAQ | Lists | API | Security | Legal | Apply to YC | Contact

Search: