* dnsfilter: remove code for filtering rules

This commit is contained in:
Simon Zolin 2019-05-15 15:48:05 +03:00
parent bd68bf2e25
commit 3396d68019
3 changed files with 0 additions and 602 deletions

View File

@ -11,9 +11,7 @@ import (
"io/ioutil" "io/ioutil"
"net" "net"
"net/http" "net/http"
"regexp"
"strings" "strings"
"sync"
"sync/atomic" "sync/atomic"
"time" "time"
@ -60,33 +58,6 @@ type privateConfig struct {
safeBrowsingServer string // access via methods safeBrowsingServer string // access via methods
} }
type rule struct {
text string // text without @@ decorators or $ options
shortcut string // for speeding up lookup
originalText string // original text for reporting back to applications
ip net.IP // IP address (for the case when we're matching a hosts file)
// options
options []string // optional options after $
// parsed options
apps []string
isWhitelist bool
isImportant bool
// user-supplied data
listID int64
// suffix matching
isSuffix bool
suffix string
// compiled regexp
compiled *regexp.Regexp
sync.RWMutex
}
// LookupStats store stats collected during safebrowsing or parental checks // LookupStats store stats collected during safebrowsing or parental checks
type LookupStats struct { type LookupStats struct {
Requests uint64 // number of HTTP requests that were sent Requests uint64 // number of HTTP requests that were sent
@ -104,14 +75,6 @@ type Stats struct {
// Dnsfilter holds added rules and performs hostname matches against the rules // Dnsfilter holds added rules and performs hostname matches against the rules
type Dnsfilter struct { type Dnsfilter struct {
storage map[string]bool // rule storage, not used for matching, just for filtering out duplicates
storageMutex sync.RWMutex
// rules are checked against these lists in the order defined here
important *rulesTable // more important than whitelist and is checked first
whiteList *rulesTable // more important than blacklist
blackList *rulesTable
// HTTP lookups for safebrowsing and parental // HTTP lookups for safebrowsing and parental
client http.Client // handle for http client -- single instance as recommended by docs client http.Client // handle for http client -- single instance as recommended by docs
transport *http.Transport // handle for http transport used by http client transport *http.Transport // handle for http transport used by http client
@ -242,308 +205,6 @@ func (d *Dnsfilter) CheckHost(host string) (Result, error) {
return Result{}, nil return Result{}, nil
} }
//
// rules table
//
type rulesTable struct {
rulesByHost map[string]*rule
rulesByShortcut map[string][]*rule
rulesLeftovers []*rule
sync.RWMutex
}
func newRulesTable() *rulesTable {
return &rulesTable{
rulesByHost: make(map[string]*rule),
rulesByShortcut: make(map[string][]*rule),
rulesLeftovers: make([]*rule, 0),
}
}
func (r *rulesTable) Add(rule *rule) {
r.Lock()
if rule.ip != nil {
// Hosts syntax
r.rulesByHost[rule.text] = rule
} else if len(rule.shortcut) == shortcutLength && enableFastLookup {
// Adblock syntax with a shortcut
r.rulesByShortcut[rule.shortcut] = append(r.rulesByShortcut[rule.shortcut], rule)
} else {
// Adblock syntax -- too short to have a shortcut
r.rulesLeftovers = append(r.rulesLeftovers, rule)
}
r.Unlock()
}
func (r *rulesTable) matchByHost(host string) (Result, error) {
// First: examine the hosts-syntax rules
res, err := r.searchByHost(host)
if err != nil {
return res, err
}
if res.Reason.Matched() {
return res, nil
}
// Second: examine the adblock-syntax rules with shortcuts
res, err = r.searchShortcuts(host)
if err != nil {
return res, err
}
if res.Reason.Matched() {
return res, nil
}
// Third: examine the others
res, err = r.searchLeftovers(host)
if err != nil {
return res, err
}
if res.Reason.Matched() {
return res, nil
}
return Result{}, nil
}
func (r *rulesTable) searchByHost(host string) (Result, error) {
rule, ok := r.rulesByHost[host]
if ok {
return rule.match(host)
}
return Result{}, nil
}
func (r *rulesTable) searchShortcuts(host string) (Result, error) {
// check in shortcuts first
for i := 0; i < len(host); i++ {
shortcut := host[i:]
if len(shortcut) > shortcutLength {
shortcut = shortcut[:shortcutLength]
}
if len(shortcut) != shortcutLength {
continue
}
rules, ok := r.rulesByShortcut[shortcut]
if !ok {
continue
}
for _, rule := range rules {
res, err := rule.match(host)
// error? stop search
if err != nil {
return res, err
}
// matched? stop search
if res.Reason.Matched() {
return res, err
}
// continue otherwise
}
}
return Result{}, nil
}
func (r *rulesTable) searchLeftovers(host string) (Result, error) {
for _, rule := range r.rulesLeftovers {
res, err := rule.match(host)
// error? stop search
if err != nil {
return res, err
}
// matched? stop search
if res.Reason.Matched() {
return res, err
}
// continue otherwise
}
return Result{}, nil
}
func findOptionIndex(text string) int {
for i, r := range text {
// ignore non-$
if r != '$' {
continue
}
// ignore `\$`
if i > 0 && text[i-1] == '\\' {
continue
}
// ignore `$/`
if i > len(text) && text[i+1] == '/' {
continue
}
return i + 1
}
return -1
}
func (rule *rule) extractOptions() error {
optIndex := findOptionIndex(rule.text)
if optIndex == 0 { // starts with $
return ErrInvalidSyntax
}
if optIndex == len(rule.text) { // ends with $
return ErrInvalidSyntax
}
if optIndex < 0 {
return nil
}
optionsStr := rule.text[optIndex:]
rule.text = rule.text[:optIndex-1] // remove options from text
begin := 0
i := 0
for i = 0; i < len(optionsStr); i++ {
switch optionsStr[i] {
case ',':
if i > 0 {
// it might be escaped, if so, ignore
if optionsStr[i-1] == '\\' {
break // from switch, not for loop
}
}
rule.options = append(rule.options, optionsStr[begin:i])
begin = i + 1
}
}
if begin != i {
// there's still an option remaining
rule.options = append(rule.options, optionsStr[begin:])
}
return nil
}
func (rule *rule) parseOptions() error {
err := rule.extractOptions()
if err != nil {
return err
}
for _, option := range rule.options {
switch {
case option == "important":
rule.isImportant = true
case strings.HasPrefix(option, "app="):
option = strings.TrimPrefix(option, "app=")
rule.apps = strings.Split(option, "|")
default:
return ErrInvalidSyntax
}
}
return nil
}
func (rule *rule) extractShortcut() {
// regex rules have no shortcuts
if rule.text[0] == '/' && rule.text[len(rule.text)-1] == '/' {
return
}
fields := strings.FieldsFunc(rule.text, func(r rune) bool {
switch r {
case '*', '^', '|':
return true
}
return false
})
longestField := ""
for _, field := range fields {
if len(field) > len(longestField) {
longestField = field
}
}
if len(longestField) > shortcutLength {
longestField = longestField[:shortcutLength]
}
rule.shortcut = strings.ToLower(longestField)
}
func (rule *rule) compile() error {
rule.RLock()
isCompiled := rule.isSuffix || rule.compiled != nil
rule.RUnlock()
if isCompiled {
return nil
}
isSuffix, suffix := getSuffix(rule.text)
if isSuffix {
rule.Lock()
rule.isSuffix = isSuffix
rule.suffix = suffix
rule.Unlock()
return nil
}
expr, err := ruleToRegexp(rule.text)
if err != nil {
return err
}
compiled, err := regexp.Compile(expr)
if err != nil {
return err
}
rule.Lock()
rule.compiled = compiled
rule.Unlock()
return nil
}
// Checks if the rule matches the specified host and returns a corresponding Result object
func (rule *rule) match(host string) (Result, error) {
res := Result{}
if rule.ip != nil && rule.text == host {
// This is a hosts-syntax rule -- just check that the hostname matches and return the result
return Result{
IsFiltered: true,
Reason: FilteredBlackList,
Rule: rule.originalText,
IP: rule.ip,
FilterID: rule.listID,
}, nil
}
err := rule.compile()
if err != nil {
return res, err
}
rule.RLock()
matched := false
if rule.isSuffix {
if host == rule.suffix {
matched = true
} else if strings.HasSuffix(host, "."+rule.suffix) {
matched = true
}
} else {
matched = rule.compiled.MatchString(host)
}
rule.RUnlock()
if matched {
res.Reason = FilteredBlackList
res.IsFiltered = true
res.FilterID = rule.listID
res.Rule = rule.originalText
if rule.isWhitelist {
res.Reason = NotFilteredWhiteList
res.IsFiltered = false
}
}
return res, nil
}
func getCachedReason(cache gcache.Cache, host string) (result Result, isFound bool, err error) { func getCachedReason(cache gcache.Cache, host string) (result Result, isFound bool, err error) {
isFound = false // not found yet isFound = false // not found yet
@ -840,133 +501,11 @@ func (d *Dnsfilter) lookupCommon(host string, lookupstats *LookupStats, cache gc
// AddRules is a convinience function to add an array of filters in one call // AddRules is a convinience function to add an array of filters in one call
func (d *Dnsfilter) AddRules(filters []Filter) error { func (d *Dnsfilter) AddRules(filters []Filter) error {
for _, f := range filters {
for _, rule := range f.Rules {
err := d.AddRule(rule, f.ID)
if err == ErrAlreadyExists || err == ErrInvalidSyntax {
continue
}
if err != nil {
log.Printf("Cannot add rule %s: %s", rule, err)
// Just ignore invalid rules
continue
}
}
}
return nil return nil
} }
// AddRule adds a rule, checking if it is a valid rule first and if it wasn't added already
func (d *Dnsfilter) AddRule(input string, filterListID int64) error {
input = strings.TrimSpace(input)
d.storageMutex.RLock()
_, exists := d.storage[input]
d.storageMutex.RUnlock()
if exists {
// already added
return ErrAlreadyExists
}
if !isValidRule(input) {
return ErrInvalidSyntax
}
// First, check if this is a hosts-syntax rule
if d.parseEtcHosts(input, filterListID) {
// This is a valid hosts-syntax rule, no need for further parsing
return nil
}
// Start parsing the rule
r := rule{
text: input, // will be modified
originalText: input,
listID: filterListID,
}
// Mark rule as whitelist if it starts with @@
if strings.HasPrefix(r.text, "@@") {
r.isWhitelist = true
r.text = r.text[2:]
}
err := r.parseOptions()
if err != nil {
return err
}
r.extractShortcut()
if !enableDelayedCompilation {
err := r.compile()
if err != nil {
return err
}
}
destination := d.blackList
if r.isImportant {
destination = d.important
} else if r.isWhitelist {
destination = d.whiteList
}
d.storageMutex.Lock()
d.storage[input] = true
d.storageMutex.Unlock()
destination.Add(&r)
return nil
}
// Parses the hosts-syntax rules. Returns false if the input string is not of hosts-syntax.
func (d *Dnsfilter) parseEtcHosts(input string, filterListID int64) bool {
// Strip the trailing comment
ruleText := input
if pos := strings.IndexByte(ruleText, '#'); pos != -1 {
ruleText = ruleText[0:pos]
}
fields := strings.Fields(ruleText)
if len(fields) < 2 {
return false
}
addr := net.ParseIP(fields[0])
if addr == nil {
return false
}
d.storageMutex.Lock()
d.storage[input] = true
d.storageMutex.Unlock()
for _, host := range fields[1:] {
r := rule{
text: host,
originalText: input,
listID: filterListID,
ip: addr,
}
d.blackList.Add(&r)
}
return true
}
// matchHost is a low-level way to check only if hostname is filtered by rules, skipping expensive safebrowsing and parental lookups // matchHost is a low-level way to check only if hostname is filtered by rules, skipping expensive safebrowsing and parental lookups
func (d *Dnsfilter) matchHost(host string) (Result, error) { func (d *Dnsfilter) matchHost(host string) (Result, error) {
lists := []*rulesTable{
d.important,
d.whiteList,
d.blackList,
}
for _, table := range lists {
res, err := table.matchByHost(host)
if err != nil {
return res, err
}
if res.Reason.Matched() {
return res, nil
}
}
return Result{}, nil return Result{}, nil
} }
@ -1061,11 +600,6 @@ func (d *Dnsfilter) createCustomDialContext(resolverAddr string) dialFunctionTyp
func New(c *Config) *Dnsfilter { func New(c *Config) *Dnsfilter {
d := new(Dnsfilter) d := new(Dnsfilter)
d.storage = make(map[string]bool)
d.important = newRulesTable()
d.whiteList = newRulesTable()
d.blackList = newRulesTable()
// Customize the Transport to have larger connection pool, // Customize the Transport to have larger connection pool,
// We are not (re)using http.DefaultTransport because of race conditions found by tests // We are not (re)using http.DefaultTransport because of race conditions found by tests
d.transport = &http.Transport{ d.transport = &http.Transport{
@ -1141,8 +675,3 @@ func (d *Dnsfilter) SafeSearchDomain(host string) (string, bool) {
func (d *Dnsfilter) GetStats() Stats { func (d *Dnsfilter) GetStats() Stats {
return stats return stats
} }
// Count returns number of rules added to filter
func (d *Dnsfilter) Count() int {
return len(d.storage)
}

View File

@ -1,49 +1,9 @@
package dnsfilter package dnsfilter
import ( import (
"strings"
"sync/atomic" "sync/atomic"
) )
func isValidRule(rule string) bool {
if len(rule) < 4 {
return false
}
if rule[0] == '!' {
return false
}
if rule[0] == '#' {
return false
}
if strings.HasPrefix(rule, "[Adblock") {
return false
}
// Filter out all sorts of cosmetic rules:
// https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#cosmetic-rules
masks := []string{
"##",
"#@#",
"#?#",
"#@?#",
"#$#",
"#@$#",
"#?$#",
"#@?$#",
"$$",
"$@$",
"#%#",
"#@%#",
}
for _, mask := range masks {
if strings.Contains(rule, mask) {
return false
}
}
return true
}
func updateMax(valuePtr *int64, maxPtr *int64) { func updateMax(valuePtr *int64, maxPtr *int64) {
for { for {
current := atomic.LoadInt64(valuePtr) current := atomic.LoadInt64(valuePtr)

View File

@ -1,91 +0,0 @@
package dnsfilter
import (
"strings"
)
func ruleToRegexp(rule string) (string, error) {
const hostStart = `(?:^|\.)`
const hostEnd = `$`
// empty or short rule -- do nothing
if !isValidRule(rule) {
return "", ErrInvalidSyntax
}
// if starts with / and ends with /, it's already a regexp, just strip the slashes
if rule[0] == '/' && rule[len(rule)-1] == '/' {
return rule[1 : len(rule)-1], nil
}
var sb strings.Builder
if rule[0] == '|' && rule[1] == '|' {
sb.WriteString(hostStart)
rule = rule[2:]
}
for i, r := range rule {
switch {
case r == '?' || r == '.' || r == '+' || r == '[' || r == ']' || r == '(' || r == ')' || r == '{' || r == '}' || r == '#' || r == '\\' || r == '$':
sb.WriteRune('\\')
sb.WriteRune(r)
case r == '|' && i == 0:
// | at start and it's not || at start
sb.WriteRune('^')
case r == '|' && i == len(rule)-1:
// | at end
sb.WriteRune('$')
case r == '|' && i != 0 && i != len(rule)-1:
sb.WriteString(`\|`)
case r == '*':
sb.WriteString(`.*`)
case r == '^':
sb.WriteString(hostEnd)
default:
sb.WriteRune(r)
}
}
return sb.String(), nil
}
// handle suffix rule ||example.com^ -- either entire string is example.com or *.example.com
func getSuffix(rule string) (bool, string) {
// if starts with / and ends with /, it's already a regexp
// TODO: if a regexp is simple `/abracadabra$/`, then simplify it maybe?
if rule[0] == '/' && rule[len(rule)-1] == '/' {
return false, ""
}
// must start with ||
if rule[0] != '|' || rule[1] != '|' {
return false, ""
}
rule = rule[2:]
// suffix rule must end with ^ or |
lastChar := rule[len(rule)-1]
if lastChar != '^' && lastChar != '|' {
return false, ""
}
// last char was checked, eat it
rule = rule[:len(rule)-1]
// it might also end with ^|
if rule[len(rule)-1] == '^' {
rule = rule[:len(rule)-1]
}
// check that it doesn't have any special characters inside
for _, r := range rule {
switch r {
case '|':
return false, ""
case '*':
return false, ""
}
}
return true, rule
}