From 3396d6801990b45c0c48a8508d9b95587e30352d Mon Sep 17 00:00:00 2001 From: Simon Zolin Date: Wed, 15 May 2019 15:48:05 +0300 Subject: [PATCH] * dnsfilter: remove code for filtering rules --- dnsfilter/dnsfilter.go | 471 ------------------------------------ dnsfilter/helpers.go | 40 --- dnsfilter/rule_to_regexp.go | 91 ------- 3 files changed, 602 deletions(-) delete mode 100644 dnsfilter/rule_to_regexp.go diff --git a/dnsfilter/dnsfilter.go b/dnsfilter/dnsfilter.go index 0e7338ef..80b59fc5 100644 --- a/dnsfilter/dnsfilter.go +++ b/dnsfilter/dnsfilter.go @@ -11,9 +11,7 @@ import ( "io/ioutil" "net" "net/http" - "regexp" "strings" - "sync" "sync/atomic" "time" @@ -60,33 +58,6 @@ type privateConfig struct { safeBrowsingServer string // access via methods } -type rule struct { - text string // text without @@ decorators or $ options - shortcut string // for speeding up lookup - originalText string // original text for reporting back to applications - ip net.IP // IP address (for the case when we're matching a hosts file) - - // options - options []string // optional options after $ - - // parsed options - apps []string - isWhitelist bool - isImportant bool - - // user-supplied data - listID int64 - - // suffix matching - isSuffix bool - suffix string - - // compiled regexp - compiled *regexp.Regexp - - sync.RWMutex -} - // LookupStats store stats collected during safebrowsing or parental checks type LookupStats struct { Requests uint64 // number of HTTP requests that were sent @@ -104,14 +75,6 @@ type Stats struct { // Dnsfilter holds added rules and performs hostname matches against the rules type Dnsfilter struct { - storage map[string]bool // rule storage, not used for matching, just for filtering out duplicates - storageMutex sync.RWMutex - - // rules are checked against these lists in the order defined here - important *rulesTable // more important than whitelist and is checked first - whiteList *rulesTable // more important than blacklist - blackList *rulesTable - // HTTP lookups for safebrowsing and parental client http.Client // handle for http client -- single instance as recommended by docs transport *http.Transport // handle for http transport used by http client @@ -242,308 +205,6 @@ func (d *Dnsfilter) CheckHost(host string) (Result, error) { return Result{}, nil } -// -// rules table -// - -type rulesTable struct { - rulesByHost map[string]*rule - rulesByShortcut map[string][]*rule - rulesLeftovers []*rule - sync.RWMutex -} - -func newRulesTable() *rulesTable { - return &rulesTable{ - rulesByHost: make(map[string]*rule), - rulesByShortcut: make(map[string][]*rule), - rulesLeftovers: make([]*rule, 0), - } -} - -func (r *rulesTable) Add(rule *rule) { - r.Lock() - if rule.ip != nil { - // Hosts syntax - r.rulesByHost[rule.text] = rule - } else if len(rule.shortcut) == shortcutLength && enableFastLookup { - // Adblock syntax with a shortcut - r.rulesByShortcut[rule.shortcut] = append(r.rulesByShortcut[rule.shortcut], rule) - } else { - // Adblock syntax -- too short to have a shortcut - r.rulesLeftovers = append(r.rulesLeftovers, rule) - } - r.Unlock() -} - -func (r *rulesTable) matchByHost(host string) (Result, error) { - // First: examine the hosts-syntax rules - res, err := r.searchByHost(host) - if err != nil { - return res, err - } - if res.Reason.Matched() { - return res, nil - } - - // Second: examine the adblock-syntax rules with shortcuts - res, err = r.searchShortcuts(host) - if err != nil { - return res, err - } - if res.Reason.Matched() { - return res, nil - } - - // Third: examine the others - res, err = r.searchLeftovers(host) - if err != nil { - return res, err - } - if res.Reason.Matched() { - return res, nil - } - - return Result{}, nil -} - -func (r *rulesTable) searchByHost(host string) (Result, error) { - rule, ok := r.rulesByHost[host] - - if ok { - return rule.match(host) - } - - return Result{}, nil -} - -func (r *rulesTable) searchShortcuts(host string) (Result, error) { - // check in shortcuts first - for i := 0; i < len(host); i++ { - shortcut := host[i:] - if len(shortcut) > shortcutLength { - shortcut = shortcut[:shortcutLength] - } - if len(shortcut) != shortcutLength { - continue - } - rules, ok := r.rulesByShortcut[shortcut] - if !ok { - continue - } - for _, rule := range rules { - res, err := rule.match(host) - // error? stop search - if err != nil { - return res, err - } - // matched? stop search - if res.Reason.Matched() { - return res, err - } - // continue otherwise - } - } - return Result{}, nil -} - -func (r *rulesTable) searchLeftovers(host string) (Result, error) { - for _, rule := range r.rulesLeftovers { - res, err := rule.match(host) - // error? stop search - if err != nil { - return res, err - } - // matched? stop search - if res.Reason.Matched() { - return res, err - } - // continue otherwise - } - return Result{}, nil -} - -func findOptionIndex(text string) int { - for i, r := range text { - // ignore non-$ - if r != '$' { - continue - } - // ignore `\$` - if i > 0 && text[i-1] == '\\' { - continue - } - // ignore `$/` - if i > len(text) && text[i+1] == '/' { - continue - } - return i + 1 - } - return -1 -} - -func (rule *rule) extractOptions() error { - optIndex := findOptionIndex(rule.text) - if optIndex == 0 { // starts with $ - return ErrInvalidSyntax - } - if optIndex == len(rule.text) { // ends with $ - return ErrInvalidSyntax - } - if optIndex < 0 { - return nil - } - - optionsStr := rule.text[optIndex:] - rule.text = rule.text[:optIndex-1] // remove options from text - - begin := 0 - i := 0 - for i = 0; i < len(optionsStr); i++ { - switch optionsStr[i] { - case ',': - if i > 0 { - // it might be escaped, if so, ignore - if optionsStr[i-1] == '\\' { - break // from switch, not for loop - } - } - rule.options = append(rule.options, optionsStr[begin:i]) - begin = i + 1 - } - } - if begin != i { - // there's still an option remaining - rule.options = append(rule.options, optionsStr[begin:]) - } - - return nil -} - -func (rule *rule) parseOptions() error { - err := rule.extractOptions() - if err != nil { - return err - } - - for _, option := range rule.options { - switch { - case option == "important": - rule.isImportant = true - case strings.HasPrefix(option, "app="): - option = strings.TrimPrefix(option, "app=") - rule.apps = strings.Split(option, "|") - default: - return ErrInvalidSyntax - } - } - - return nil -} - -func (rule *rule) extractShortcut() { - // regex rules have no shortcuts - if rule.text[0] == '/' && rule.text[len(rule.text)-1] == '/' { - return - } - - fields := strings.FieldsFunc(rule.text, func(r rune) bool { - switch r { - case '*', '^', '|': - return true - } - return false - }) - longestField := "" - for _, field := range fields { - if len(field) > len(longestField) { - longestField = field - } - } - if len(longestField) > shortcutLength { - longestField = longestField[:shortcutLength] - } - rule.shortcut = strings.ToLower(longestField) -} - -func (rule *rule) compile() error { - rule.RLock() - isCompiled := rule.isSuffix || rule.compiled != nil - rule.RUnlock() - if isCompiled { - return nil - } - - isSuffix, suffix := getSuffix(rule.text) - if isSuffix { - rule.Lock() - rule.isSuffix = isSuffix - rule.suffix = suffix - rule.Unlock() - return nil - } - - expr, err := ruleToRegexp(rule.text) - if err != nil { - return err - } - - compiled, err := regexp.Compile(expr) - if err != nil { - return err - } - - rule.Lock() - rule.compiled = compiled - rule.Unlock() - - return nil -} - -// Checks if the rule matches the specified host and returns a corresponding Result object -func (rule *rule) match(host string) (Result, error) { - res := Result{} - - if rule.ip != nil && rule.text == host { - // This is a hosts-syntax rule -- just check that the hostname matches and return the result - return Result{ - IsFiltered: true, - Reason: FilteredBlackList, - Rule: rule.originalText, - IP: rule.ip, - FilterID: rule.listID, - }, nil - } - - err := rule.compile() - if err != nil { - return res, err - } - rule.RLock() - matched := false - if rule.isSuffix { - if host == rule.suffix { - matched = true - } else if strings.HasSuffix(host, "."+rule.suffix) { - matched = true - } - } else { - matched = rule.compiled.MatchString(host) - } - rule.RUnlock() - if matched { - res.Reason = FilteredBlackList - res.IsFiltered = true - res.FilterID = rule.listID - res.Rule = rule.originalText - if rule.isWhitelist { - res.Reason = NotFilteredWhiteList - res.IsFiltered = false - } - } - return res, nil -} - func getCachedReason(cache gcache.Cache, host string) (result Result, isFound bool, err error) { isFound = false // not found yet @@ -840,133 +501,11 @@ func (d *Dnsfilter) lookupCommon(host string, lookupstats *LookupStats, cache gc // AddRules is a convinience function to add an array of filters in one call func (d *Dnsfilter) AddRules(filters []Filter) error { - for _, f := range filters { - for _, rule := range f.Rules { - err := d.AddRule(rule, f.ID) - if err == ErrAlreadyExists || err == ErrInvalidSyntax { - continue - } - if err != nil { - log.Printf("Cannot add rule %s: %s", rule, err) - // Just ignore invalid rules - continue - } - } - } return nil } -// AddRule adds a rule, checking if it is a valid rule first and if it wasn't added already -func (d *Dnsfilter) AddRule(input string, filterListID int64) error { - input = strings.TrimSpace(input) - d.storageMutex.RLock() - _, exists := d.storage[input] - d.storageMutex.RUnlock() - if exists { - // already added - return ErrAlreadyExists - } - - if !isValidRule(input) { - return ErrInvalidSyntax - } - - // First, check if this is a hosts-syntax rule - if d.parseEtcHosts(input, filterListID) { - // This is a valid hosts-syntax rule, no need for further parsing - return nil - } - - // Start parsing the rule - r := rule{ - text: input, // will be modified - originalText: input, - listID: filterListID, - } - - // Mark rule as whitelist if it starts with @@ - if strings.HasPrefix(r.text, "@@") { - r.isWhitelist = true - r.text = r.text[2:] - } - - err := r.parseOptions() - if err != nil { - return err - } - - r.extractShortcut() - - if !enableDelayedCompilation { - err := r.compile() - if err != nil { - return err - } - } - - destination := d.blackList - if r.isImportant { - destination = d.important - } else if r.isWhitelist { - destination = d.whiteList - } - - d.storageMutex.Lock() - d.storage[input] = true - d.storageMutex.Unlock() - destination.Add(&r) - return nil -} - -// Parses the hosts-syntax rules. Returns false if the input string is not of hosts-syntax. -func (d *Dnsfilter) parseEtcHosts(input string, filterListID int64) bool { - // Strip the trailing comment - ruleText := input - if pos := strings.IndexByte(ruleText, '#'); pos != -1 { - ruleText = ruleText[0:pos] - } - fields := strings.Fields(ruleText) - if len(fields) < 2 { - return false - } - addr := net.ParseIP(fields[0]) - if addr == nil { - return false - } - - d.storageMutex.Lock() - d.storage[input] = true - d.storageMutex.Unlock() - - for _, host := range fields[1:] { - r := rule{ - text: host, - originalText: input, - listID: filterListID, - ip: addr, - } - d.blackList.Add(&r) - } - return true -} - // matchHost is a low-level way to check only if hostname is filtered by rules, skipping expensive safebrowsing and parental lookups func (d *Dnsfilter) matchHost(host string) (Result, error) { - lists := []*rulesTable{ - d.important, - d.whiteList, - d.blackList, - } - - for _, table := range lists { - res, err := table.matchByHost(host) - if err != nil { - return res, err - } - if res.Reason.Matched() { - return res, nil - } - } return Result{}, nil } @@ -1061,11 +600,6 @@ func (d *Dnsfilter) createCustomDialContext(resolverAddr string) dialFunctionTyp func New(c *Config) *Dnsfilter { d := new(Dnsfilter) - d.storage = make(map[string]bool) - d.important = newRulesTable() - d.whiteList = newRulesTable() - d.blackList = newRulesTable() - // Customize the Transport to have larger connection pool, // We are not (re)using http.DefaultTransport because of race conditions found by tests d.transport = &http.Transport{ @@ -1141,8 +675,3 @@ func (d *Dnsfilter) SafeSearchDomain(host string) (string, bool) { func (d *Dnsfilter) GetStats() Stats { return stats } - -// Count returns number of rules added to filter -func (d *Dnsfilter) Count() int { - return len(d.storage) -} diff --git a/dnsfilter/helpers.go b/dnsfilter/helpers.go index 68d4ba26..2d60c47c 100644 --- a/dnsfilter/helpers.go +++ b/dnsfilter/helpers.go @@ -1,49 +1,9 @@ package dnsfilter import ( - "strings" "sync/atomic" ) -func isValidRule(rule string) bool { - if len(rule) < 4 { - return false - } - if rule[0] == '!' { - return false - } - if rule[0] == '#' { - return false - } - if strings.HasPrefix(rule, "[Adblock") { - return false - } - - // Filter out all sorts of cosmetic rules: - // https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#cosmetic-rules - masks := []string{ - "##", - "#@#", - "#?#", - "#@?#", - "#$#", - "#@$#", - "#?$#", - "#@?$#", - "$$", - "$@$", - "#%#", - "#@%#", - } - for _, mask := range masks { - if strings.Contains(rule, mask) { - return false - } - } - - return true -} - func updateMax(valuePtr *int64, maxPtr *int64) { for { current := atomic.LoadInt64(valuePtr) diff --git a/dnsfilter/rule_to_regexp.go b/dnsfilter/rule_to_regexp.go deleted file mode 100644 index 41d55e30..00000000 --- a/dnsfilter/rule_to_regexp.go +++ /dev/null @@ -1,91 +0,0 @@ -package dnsfilter - -import ( - "strings" -) - -func ruleToRegexp(rule string) (string, error) { - const hostStart = `(?:^|\.)` - const hostEnd = `$` - - // empty or short rule -- do nothing - if !isValidRule(rule) { - return "", ErrInvalidSyntax - } - - // if starts with / and ends with /, it's already a regexp, just strip the slashes - if rule[0] == '/' && rule[len(rule)-1] == '/' { - return rule[1 : len(rule)-1], nil - } - - var sb strings.Builder - - if rule[0] == '|' && rule[1] == '|' { - sb.WriteString(hostStart) - rule = rule[2:] - } - - for i, r := range rule { - switch { - case r == '?' || r == '.' || r == '+' || r == '[' || r == ']' || r == '(' || r == ')' || r == '{' || r == '}' || r == '#' || r == '\\' || r == '$': - sb.WriteRune('\\') - sb.WriteRune(r) - case r == '|' && i == 0: - // | at start and it's not || at start - sb.WriteRune('^') - case r == '|' && i == len(rule)-1: - // | at end - sb.WriteRune('$') - case r == '|' && i != 0 && i != len(rule)-1: - sb.WriteString(`\|`) - case r == '*': - sb.WriteString(`.*`) - case r == '^': - sb.WriteString(hostEnd) - default: - sb.WriteRune(r) - } - } - - return sb.String(), nil -} - -// handle suffix rule ||example.com^ -- either entire string is example.com or *.example.com -func getSuffix(rule string) (bool, string) { - // if starts with / and ends with /, it's already a regexp - // TODO: if a regexp is simple `/abracadabra$/`, then simplify it maybe? - if rule[0] == '/' && rule[len(rule)-1] == '/' { - return false, "" - } - - // must start with || - if rule[0] != '|' || rule[1] != '|' { - return false, "" - } - rule = rule[2:] - - // suffix rule must end with ^ or | - lastChar := rule[len(rule)-1] - if lastChar != '^' && lastChar != '|' { - return false, "" - } - // last char was checked, eat it - rule = rule[:len(rule)-1] - - // it might also end with ^| - if rule[len(rule)-1] == '^' { - rule = rule[:len(rule)-1] - } - - // check that it doesn't have any special characters inside - for _, r := range rule { - switch r { - case '|': - return false, "" - case '*': - return false, "" - } - } - - return true, rule -}