dnsfilter -- avoid using regexps when simple suffix match is enough.
This covers 96.98% of all adguard dns rules.
This commit is contained in:
parent
9e939e5754
commit
cb97a254a5
@ -67,6 +67,10 @@ type rule struct {
|
||||
// user-supplied data
|
||||
listID uint32
|
||||
|
||||
// suffix matching
|
||||
isSuffix bool
|
||||
suffix string
|
||||
|
||||
// compiled regexp
|
||||
compiled *regexp.Regexp
|
||||
|
||||
@ -387,12 +391,21 @@ func (rule *rule) extractShortcut() {
|
||||
|
||||
func (rule *rule) compile() error {
|
||||
rule.RLock()
|
||||
isCompiled := rule.compiled != nil
|
||||
isCompiled := rule.isSuffix || rule.compiled != nil
|
||||
rule.RUnlock()
|
||||
if isCompiled {
|
||||
return nil
|
||||
}
|
||||
|
||||
isSuffix, suffix := getSuffix(rule.text)
|
||||
if isSuffix {
|
||||
rule.Lock()
|
||||
rule.isSuffix = isSuffix
|
||||
rule.suffix = suffix
|
||||
rule.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
expr, err := ruleToRegexp(rule.text)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -417,7 +430,16 @@ func (rule *rule) match(host string) (Result, error) {
|
||||
return res, err
|
||||
}
|
||||
rule.RLock()
|
||||
matched := rule.compiled.MatchString(host)
|
||||
matched := false
|
||||
if rule.isSuffix {
|
||||
if host == rule.suffix {
|
||||
matched = true
|
||||
} else if strings.HasSuffix(host, "."+rule.suffix) {
|
||||
matched = true
|
||||
}
|
||||
} else {
|
||||
matched = rule.compiled.MatchString(host)
|
||||
}
|
||||
rule.RUnlock()
|
||||
if matched {
|
||||
res.Reason = FilteredBlackList
|
||||
|
@ -195,7 +195,7 @@ func TestRuleToRegexp(t *testing.T) {
|
||||
{"/doubleclick/", "doubleclick", nil},
|
||||
{"/", "", ErrInvalidSyntax},
|
||||
{`|double*?.+[]|(){}#$\|`, `^double.*\?\.\+\[\]\|\(\)\{\}\#\$\\$`, nil},
|
||||
{`||doubleclick.net^`, `^([a-z0-9-_.]+\.)?doubleclick\.net([^ a-zA-Z0-9.%]|$)`, nil},
|
||||
{`||doubleclick.net^`, `(?:^|\.)doubleclick\.net$`, nil},
|
||||
}
|
||||
for _, testcase := range tests {
|
||||
converted, err := ruleToRegexp(testcase.rule)
|
||||
@ -208,6 +208,38 @@ func TestRuleToRegexp(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSuffixRule(t *testing.T) {
|
||||
for _, testcase := range []struct {
|
||||
rule string
|
||||
isSuffix bool
|
||||
suffix string
|
||||
}{
|
||||
{`||doubleclick.net^`, true, `doubleclick.net`}, // entire string or subdomain match
|
||||
{`||doubleclick.net|`, true, `doubleclick.net`}, // entire string or subdomain match
|
||||
{`|doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net
|
||||
{`*doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net
|
||||
{`doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net
|
||||
{`|*doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net
|
||||
{`||*doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net
|
||||
{`||*doubleclick.net|`, false, ``}, // TODO: ends with doubleclick.net
|
||||
{`||*doublec*lick.net^`, false, ``}, // has a wildcard inside, has to be regexp
|
||||
{`||*doublec|lick.net^`, false, ``}, // has a special symbol inside, has to be regexp
|
||||
{`/abracadabra/`, false, ``}, // regexp, not anchored
|
||||
{`/abracadabra$/`, false, ``}, // TODO: simplify simple suffix regexes
|
||||
} {
|
||||
isSuffix, suffix := getSuffix(testcase.rule)
|
||||
if testcase.isSuffix != isSuffix {
|
||||
t.Errorf("Results do not match for \"%s\": got %v expected %v", testcase.rule, isSuffix, testcase.isSuffix)
|
||||
continue
|
||||
}
|
||||
if testcase.isSuffix && testcase.suffix != suffix {
|
||||
t.Errorf("Result suffix does not match for \"%s\": got \"%s\" expected \"%s\"", testcase.rule, suffix, testcase.suffix)
|
||||
continue
|
||||
}
|
||||
// trace("\"%s\": %v: %s", testcase.rule, isSuffix, suffix)
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// helper functions
|
||||
//
|
||||
|
@ -5,8 +5,8 @@ import (
|
||||
)
|
||||
|
||||
func ruleToRegexp(rule string) (string, error) {
|
||||
const hostStart = "^([a-z0-9-_.]+\\.)?"
|
||||
const hostEnd = "([^ a-zA-Z0-9.%]|$)"
|
||||
const hostStart = `(?:^|\.)`
|
||||
const hostEnd = `$`
|
||||
|
||||
// empty or short rule -- do nothing
|
||||
if !isValidRule(rule) {
|
||||
@ -49,3 +49,38 @@ func ruleToRegexp(rule string) (string, error) {
|
||||
|
||||
return sb.String(), nil
|
||||
}
|
||||
|
||||
// handle suffix rule ||example.com^ -- either entire string is example.com or *.example.com
|
||||
func getSuffix(rule string) (bool, string) {
|
||||
// if starts with / and ends with /, it's already a regexp
|
||||
// TODO: if a regexp is simple `/abracadabra$/`, then simplify it maybe?
|
||||
if rule[0] == '/' && rule[len(rule)-1] == '/' {
|
||||
return false, ""
|
||||
}
|
||||
|
||||
// must start with ||
|
||||
if rule[0] != '|' || rule[1] != '|' {
|
||||
return false, ""
|
||||
}
|
||||
rule = rule[2:]
|
||||
|
||||
// suffix rule must end with ^ or |
|
||||
lastChar := rule[len(rule)-1]
|
||||
if lastChar != '^' && lastChar != '|' {
|
||||
return false, ""
|
||||
}
|
||||
// last char was checked, eat it
|
||||
rule = rule[:len(rule)-1]
|
||||
|
||||
// check that it doesn't have any special characters inside
|
||||
for _, r := range rule {
|
||||
switch r {
|
||||
case '|':
|
||||
return false, ""
|
||||
case '*':
|
||||
return false, ""
|
||||
}
|
||||
}
|
||||
|
||||
return true, rule
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user