From cb97a254a56e79a5ab14b011f6406bd32331665f Mon Sep 17 00:00:00 2001 From: Eugene Bujak Date: Thu, 4 Oct 2018 13:19:43 +0300 Subject: [PATCH 1/2] dnsfilter -- avoid using regexps when simple suffix match is enough. This covers 96.98% of all adguard dns rules. --- dnsfilter/dnsfilter.go | 26 +++++++++++++++++++++++-- dnsfilter/dnsfilter_test.go | 34 +++++++++++++++++++++++++++++++- dnsfilter/rule_to_regexp.go | 39 +++++++++++++++++++++++++++++++++++-- 3 files changed, 94 insertions(+), 5 deletions(-) diff --git a/dnsfilter/dnsfilter.go b/dnsfilter/dnsfilter.go index 5b4c66fc..b40d6a38 100644 --- a/dnsfilter/dnsfilter.go +++ b/dnsfilter/dnsfilter.go @@ -67,6 +67,10 @@ type rule struct { // user-supplied data listID uint32 + // suffix matching + isSuffix bool + suffix string + // compiled regexp compiled *regexp.Regexp @@ -387,12 +391,21 @@ func (rule *rule) extractShortcut() { func (rule *rule) compile() error { rule.RLock() - isCompiled := rule.compiled != nil + isCompiled := rule.isSuffix || rule.compiled != nil rule.RUnlock() if isCompiled { return nil } + isSuffix, suffix := getSuffix(rule.text) + if isSuffix { + rule.Lock() + rule.isSuffix = isSuffix + rule.suffix = suffix + rule.Unlock() + return nil + } + expr, err := ruleToRegexp(rule.text) if err != nil { return err @@ -417,7 +430,16 @@ func (rule *rule) match(host string) (Result, error) { return res, err } rule.RLock() - matched := rule.compiled.MatchString(host) + matched := false + if rule.isSuffix { + if host == rule.suffix { + matched = true + } else if strings.HasSuffix(host, "."+rule.suffix) { + matched = true + } + } else { + matched = rule.compiled.MatchString(host) + } rule.RUnlock() if matched { res.Reason = FilteredBlackList diff --git a/dnsfilter/dnsfilter_test.go b/dnsfilter/dnsfilter_test.go index 61d0a4ae..19ae1c77 100644 --- a/dnsfilter/dnsfilter_test.go +++ b/dnsfilter/dnsfilter_test.go @@ -195,7 +195,7 @@ func TestRuleToRegexp(t *testing.T) { {"/doubleclick/", "doubleclick", nil}, {"/", "", ErrInvalidSyntax}, {`|double*?.+[]|(){}#$\|`, `^double.*\?\.\+\[\]\|\(\)\{\}\#\$\\$`, nil}, - {`||doubleclick.net^`, `^([a-z0-9-_.]+\.)?doubleclick\.net([^ a-zA-Z0-9.%]|$)`, nil}, + {`||doubleclick.net^`, `(?:^|\.)doubleclick\.net$`, nil}, } for _, testcase := range tests { converted, err := ruleToRegexp(testcase.rule) @@ -208,6 +208,38 @@ func TestRuleToRegexp(t *testing.T) { } } +func TestSuffixRule(t *testing.T) { + for _, testcase := range []struct { + rule string + isSuffix bool + suffix string + }{ + {`||doubleclick.net^`, true, `doubleclick.net`}, // entire string or subdomain match + {`||doubleclick.net|`, true, `doubleclick.net`}, // entire string or subdomain match + {`|doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net + {`*doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net + {`doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net + {`|*doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net + {`||*doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net + {`||*doubleclick.net|`, false, ``}, // TODO: ends with doubleclick.net + {`||*doublec*lick.net^`, false, ``}, // has a wildcard inside, has to be regexp + {`||*doublec|lick.net^`, false, ``}, // has a special symbol inside, has to be regexp + {`/abracadabra/`, false, ``}, // regexp, not anchored + {`/abracadabra$/`, false, ``}, // TODO: simplify simple suffix regexes + } { + isSuffix, suffix := getSuffix(testcase.rule) + if testcase.isSuffix != isSuffix { + t.Errorf("Results do not match for \"%s\": got %v expected %v", testcase.rule, isSuffix, testcase.isSuffix) + continue + } + if testcase.isSuffix && testcase.suffix != suffix { + t.Errorf("Result suffix does not match for \"%s\": got \"%s\" expected \"%s\"", testcase.rule, suffix, testcase.suffix) + continue + } + // trace("\"%s\": %v: %s", testcase.rule, isSuffix, suffix) + } +} + // // helper functions // diff --git a/dnsfilter/rule_to_regexp.go b/dnsfilter/rule_to_regexp.go index 87fe3792..79c0320d 100644 --- a/dnsfilter/rule_to_regexp.go +++ b/dnsfilter/rule_to_regexp.go @@ -5,8 +5,8 @@ import ( ) func ruleToRegexp(rule string) (string, error) { - const hostStart = "^([a-z0-9-_.]+\\.)?" - const hostEnd = "([^ a-zA-Z0-9.%]|$)" + const hostStart = `(?:^|\.)` + const hostEnd = `$` // empty or short rule -- do nothing if !isValidRule(rule) { @@ -49,3 +49,38 @@ func ruleToRegexp(rule string) (string, error) { return sb.String(), nil } + +// handle suffix rule ||example.com^ -- either entire string is example.com or *.example.com +func getSuffix(rule string) (bool, string) { + // if starts with / and ends with /, it's already a regexp + // TODO: if a regexp is simple `/abracadabra$/`, then simplify it maybe? + if rule[0] == '/' && rule[len(rule)-1] == '/' { + return false, "" + } + + // must start with || + if rule[0] != '|' || rule[1] != '|' { + return false, "" + } + rule = rule[2:] + + // suffix rule must end with ^ or | + lastChar := rule[len(rule)-1] + if lastChar != '^' && lastChar != '|' { + return false, "" + } + // last char was checked, eat it + rule = rule[:len(rule)-1] + + // check that it doesn't have any special characters inside + for _, r := range rule { + switch r { + case '|': + return false, "" + case '*': + return false, "" + } + } + + return true, rule +} From 1cc1e3749df6ccefb741232d7949fd5893d84f66 Mon Sep 17 00:00:00 2001 From: Eugene Bujak Date: Thu, 4 Oct 2018 13:38:52 +0300 Subject: [PATCH 2/2] dnsfilter -- lazily initialize safebrowsing and parental lookup cache --- dnsfilter/dnsfilter.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/dnsfilter/dnsfilter.go b/dnsfilter/dnsfilter.go index b40d6a38..41ca687e 100644 --- a/dnsfilter/dnsfilter.go +++ b/dnsfilter/dnsfilter.go @@ -130,8 +130,8 @@ const ( // these variables need to survive coredns reload var ( stats Stats - safebrowsingCache = gcache.New(defaultCacheSize).LRU().Expiration(defaultCacheTime).Build() - parentalCache = gcache.New(defaultCacheSize).LRU().Expiration(defaultCacheTime).Build() + safebrowsingCache gcache.Cache + parentalCache gcache.Cache ) // Result holds state of hostname check @@ -551,6 +551,9 @@ func (d *Dnsfilter) checkSafeBrowsing(host string) (Result, error) { } return result, nil } + if safebrowsingCache == nil { + safebrowsingCache = gcache.New(defaultCacheSize).LRU().Expiration(defaultCacheTime).Build() + } result, err := d.lookupCommon(host, &stats.Safebrowsing, safebrowsingCache, true, format, handleBody) return result, err } @@ -594,6 +597,9 @@ func (d *Dnsfilter) checkParental(host string) (Result, error) { } return result, nil } + if parentalCache == nil { + parentalCache = gcache.New(defaultCacheSize).LRU().Expiration(defaultCacheTime).Build() + } result, err := d.lookupCommon(host, &stats.Parental, parentalCache, false, format, handleBody) return result, err } @@ -781,7 +787,9 @@ func New() *Dnsfilter { // Destroy is optional if you want to tidy up goroutines without waiting for them to die off // right now it closes idle HTTP connections if there are any func (d *Dnsfilter) Destroy() { - d.transport.CloseIdleConnections() + if d != nil && d.transport != nil { + d.transport.CloseIdleConnections() + } } //