diff --git a/.gitignore b/.gitignore index 4f695bdf..e22df4e9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .DS_Store .vscode +.idea debug /AdGuardHome /AdGuardHome.yaml @@ -9,4 +10,8 @@ debug /Corefile /dnsfilter.txt /querylog.json -/querylog.json.1 \ No newline at end of file +/querylog.json.1 + +# Test output +dnsfilter/dnsfilter.TestLotsOfRules*.pprof +tests/top-1m.csv diff --git a/client/src/helpers/trackers/adguard.json b/client/src/helpers/trackers/adguard.json index 1272827e..1ae36e5b 100644 --- a/client/src/helpers/trackers/adguard.json +++ b/client/src/helpers/trackers/adguard.json @@ -61,6 +61,11 @@ "name": "Branch.io", "categoryId": 101, "url": "https://branch.io/" + }, + "adocean": { + "name": "Gemius Adocean", + "categoryId": 4, + "url": "https://adocean-global.com/" } }, "trackerDomains": { @@ -72,6 +77,7 @@ "appsflyer.com": "appsflyer", "appmetrica.yandex.com": "yandex_appmetrica", "adjust.com": "adjust", - "mobileapptracking.com": "branch" + "mobileapptracking.com": "branch", + "adocean.cz": "adocean" } } \ No newline at end of file diff --git a/control.go b/control.go index 91eb8c68..1afd4e24 100644 --- a/control.go +++ b/control.go @@ -731,12 +731,13 @@ func (filter *filter) update(now time.Time) (bool, error) { } } else if len(line) != 0 { err = d.AddRule(line, 0) - if err == dnsfilter.ErrInvalidSyntax { + if err == dnsfilter.ErrAlreadyExists || err == dnsfilter.ErrInvalidSyntax { continue } if err != nil { - log.Printf("Couldn't add rule %s: %s", filter.URL, err) - return false, err + log.Printf("Cannot add rule %s from %s: %s", line, filter.URL, err) + // Just ignore invalid rules + continue } rulesCount++ } diff --git a/coredns_plugin/coredns_plugin.go b/coredns_plugin/coredns_plugin.go index 8bd8b74c..31b147cf 100644 --- a/coredns_plugin/coredns_plugin.go +++ b/coredns_plugin/coredns_plugin.go @@ -62,7 +62,6 @@ type plug struct { d *dnsfilter.Dnsfilter Next plugin.Handler upstream upstream.Upstream - hosts map[string]net.IP settings plugSettings sync.RWMutex @@ -82,7 +81,6 @@ func setupPlugin(c *caddy.Controller) (*plug, error) { p := &plug{ settings: defaultPluginSettings, d: dnsfilter.New(), - hosts: make(map[string]net.IP), } filterFileNames := []string{} @@ -149,15 +147,15 @@ func setupPlugin(c *caddy.Controller) (*plug, error) { scanner := bufio.NewScanner(file) for scanner.Scan() { text := scanner.Text() - if p.parseEtcHosts(text) { - continue - } + err = p.d.AddRule(text, uint32(i)) - if err == dnsfilter.ErrInvalidSyntax { + if err == dnsfilter.ErrAlreadyExists || err == dnsfilter.ErrInvalidSyntax { continue } if err != nil { - return nil, err + log.Printf("Cannot add rule %s: %s", text, err) + // Just ignore invalid rules + continue } count++ } @@ -231,28 +229,6 @@ func setup(c *caddy.Controller) error { return nil } -func (p *plug) parseEtcHosts(text string) bool { - if pos := strings.IndexByte(text, '#'); pos != -1 { - text = text[0:pos] - } - fields := strings.Fields(text) - if len(fields) < 2 { - return false - } - addr := net.ParseIP(fields[0]) - if addr == nil { - return false - } - for _, host := range fields[1:] { - // debug logging for duplicate values, pretty common if you subscribe to many hosts files - // if val, ok := p.hosts[host]; ok { - // log.Printf("warning: host %s already has value %s, will overwrite it with %s", host, val, addr) - // } - p.hosts[host] = addr - } - return true -} - func (p *plug) onShutdown() error { p.Lock() p.d.Destroy() @@ -432,27 +408,6 @@ func (p *plug) serveDNSInternal(ctx context.Context, w dns.ResponseWriter, r *dn } p.RUnlock() - // is it in hosts? - if val, ok := p.hosts[host]; ok { - // it is, if it's a loopback host, reply with NXDOMAIN - // TODO: research if it's better than 127.0.0.1 - if false && val.IsLoopback() { - rcode, err := p.writeNXdomain(ctx, w, r) - if err != nil { - return rcode, dnsfilter.Result{}, err - } - return rcode, dnsfilter.Result{Reason: dnsfilter.FilteredInvalid}, err - } - // it's not a loopback host, replace it with value specified - rcode, err := p.replaceHostWithValAndReply(ctx, w, r, host, val.String(), question) - if err != nil { - return rcode, dnsfilter.Result{}, err - } - // TODO: This must be handled in the dnsfilter and not here! - rule := val.String() + " " + host - return rcode, dnsfilter.Result{Reason: dnsfilter.FilteredBlackList, Rule: rule}, err - } - // needs to be filtered instead p.RLock() result, err := p.d.CheckHost(host) @@ -482,12 +437,22 @@ func (p *plug) serveDNSInternal(ctx context.Context, w dns.ResponseWriter, r *dn } return rcode, result, err case dnsfilter.FilteredBlackList: - // return NXdomain - rcode, err := p.writeNXdomain(ctx, w, r) - if err != nil { - return rcode, dnsfilter.Result{}, err + + if result.Ip == nil { + // return NXDomain + rcode, err := p.writeNXdomain(ctx, w, r) + if err != nil { + return rcode, dnsfilter.Result{}, err + } + return rcode, result, err + } else { + // This is a hosts-syntax rule + rcode, err := p.replaceHostWithValAndReply(ctx, w, r, host, result.Ip.String(), question) + if err != nil { + return rcode, dnsfilter.Result{}, err + } + return rcode, result, err } - return rcode, result, err case dnsfilter.FilteredInvalid: // return NXdomain rcode, err := p.writeNXdomain(ctx, w, r) diff --git a/coredns_plugin/coredns_plugin_test.go b/coredns_plugin/coredns_plugin_test.go index 17210b63..2f65cf9a 100644 --- a/coredns_plugin/coredns_plugin_test.go +++ b/coredns_plugin/coredns_plugin_test.go @@ -40,41 +40,6 @@ func TestSetup(t *testing.T) { } } -func TestEtcHostsParse(t *testing.T) { - addr := "216.239.38.120" - text := []byte(fmt.Sprintf(" %s google.com www.google.com # enforce google's safesearch ", addr)) - tmpfile, err := ioutil.TempFile("", "") - if err != nil { - t.Fatal(err) - } - if _, err = tmpfile.Write(text); err != nil { - t.Fatal(err) - } - if err = tmpfile.Close(); err != nil { - t.Fatal(err) - } - - defer os.Remove(tmpfile.Name()) - - c := caddy.NewTestController("dns", fmt.Sprintf("dnsfilter %s", tmpfile.Name())) - p, err := setupPlugin(c) - if err != nil { - t.Fatal(err) - } - - if len(p.hosts) != 2 { - t.Fatal("Expected p.hosts to have two keys") - } - - val, ok := p.hosts["google.com"] - if !ok { - t.Fatal("Expected google.com to be set in p.hosts") - } - if !val.Equal(net.ParseIP(addr)) { - t.Fatalf("Expected google.com's value %s to match %s", val, addr) - } -} - func TestEtcHostsFilter(t *testing.T) { text := []byte("127.0.0.1 doubleclick.net\n" + "127.0.0.1 example.org example.net www.example.org www.example.net") tmpfile, err := ioutil.TempFile("", "") diff --git a/dnsfilter/dnsfilter.go b/dnsfilter/dnsfilter.go index 3b3627f8..96ba2512 100644 --- a/dnsfilter/dnsfilter.go +++ b/dnsfilter/dnsfilter.go @@ -9,6 +9,7 @@ import ( "fmt" "io/ioutil" "log" + "net" "net/http" "regexp" "strings" @@ -32,9 +33,12 @@ const defaultSafebrowsingURL = "http://%s/safebrowsing-lookup-hash.html?prefixes const defaultParentalServer = "pctrl.adguard.com" const defaultParentalURL = "http://%s/check-parental-control-hash?prefixes=%s&sensitivity=%d" -// ErrInvalidSyntax is returned by AddRule when rule is invalid +// ErrInvalidSyntax is returned by AddRule when the rule is invalid var ErrInvalidSyntax = errors.New("dnsfilter: invalid rule syntax") +// ErrInvalidSyntax is returned by AddRule when the rule was already added to the filter +var ErrAlreadyExists = errors.New("dnsfilter: rule was already added") + // ErrInvalidParental is returned by EnableParental when sensitivity is not a valid value var ErrInvalidParental = errors.New("dnsfilter: invalid parental sensitivity, must be either 3, 10, 13 or 17") @@ -56,6 +60,7 @@ type rule struct { text string // text without @@ decorators or $ options shortcut string // for speeding up lookup originalText string // original text for reporting back to applications + ip net.IP // IP address (for the case when we're matching a hosts file) // options options []string // optional options after $ @@ -94,7 +99,7 @@ type Stats struct { // Dnsfilter holds added rules and performs hostname matches against the rules type Dnsfilter struct { - storage map[string]*rule // rule storage, not used for matching, needs to be key->value + storage map[string]bool // rule storage, not used for matching, just for filtering out duplicates storageMutex sync.RWMutex // rules are checked against these lists in the order defined here @@ -137,9 +142,11 @@ var ( // Result holds state of hostname check type Result struct { - IsFiltered bool `json:",omitempty"` - Reason Reason `json:",omitempty"` - Rule string `json:",omitempty"` + IsFiltered bool `json:",omitempty"` // True if the host name is filtered + Reason Reason `json:",omitempty"` // Reason for blocking / unblocking + Rule string `json:",omitempty"` // Original rule text + Ip net.IP `json:",omitempty"` // Not nil only in the case of a hosts file syntax + FilterID uint32 `json:",omitempty"` // Filter ID the rule belongs to } // Matched can be used to see if any match at all was found, no matter filtered or not @@ -199,6 +206,7 @@ func (d *Dnsfilter) CheckHost(host string) (Result, error) { // type rulesTable struct { + rulesByHost map[string]*rule rulesByShortcut map[string][]*rule rulesLeftovers []*rule sync.RWMutex @@ -206,6 +214,7 @@ type rulesTable struct { func newRulesTable() *rulesTable { return &rulesTable{ + rulesByHost: make(map[string]*rule), rulesByShortcut: make(map[string][]*rule), rulesLeftovers: make([]*rule, 0), } @@ -213,16 +222,27 @@ func newRulesTable() *rulesTable { func (r *rulesTable) Add(rule *rule) { r.Lock() - if len(rule.shortcut) == shortcutLength && enableFastLookup { + + if rule.ip != nil { + // Hosts syntax + r.rulesByHost[rule.text] = rule + } else if //noinspection GoBoolExpressions + len(rule.shortcut) == shortcutLength && enableFastLookup { + + // Adblock syntax with a shortcut r.rulesByShortcut[rule.shortcut] = append(r.rulesByShortcut[rule.shortcut], rule) } else { + + // Adblock syntax -- too short to have a shortcut r.rulesLeftovers = append(r.rulesLeftovers, rule) } r.Unlock() } func (r *rulesTable) matchByHost(host string) (Result, error) { - res, err := r.searchShortcuts(host) + + // First: examine the hosts-syntax rules + res, err := r.searchByHost(host) if err != nil { return res, err } @@ -230,6 +250,16 @@ func (r *rulesTable) matchByHost(host string) (Result, error) { return res, nil } + // Second: examine the adblock-syntax rules with shortcuts + res, err = r.searchShortcuts(host) + if err != nil { + return res, err + } + if res.Reason.Matched() { + return res, nil + } + + // Third: examine the others res, err = r.searchLeftovers(host) if err != nil { return res, err @@ -241,6 +271,17 @@ func (r *rulesTable) matchByHost(host string) (Result, error) { return Result{}, nil } +func (r *rulesTable) searchByHost(host string) (Result, error) { + + rule, ok := r.rulesByHost[host] + + if ok { + return rule.match(host) + } + + return Result{}, nil +} + func (r *rulesTable) searchShortcuts(host string) (Result, error) { // check in shortcuts first for i := 0; i < len(host); i++ { @@ -424,8 +465,21 @@ func (rule *rule) compile() error { return nil } +// Checks if the rule matches the specified host and returns a corresponding Result object func (rule *rule) match(host string) (Result, error) { res := Result{} + + if rule.ip != nil && rule.text == host { + // This is a hosts-syntax rule -- just check that the hostname matches and return the result + return Result{ + IsFiltered: true, + Reason: FilteredBlackList, + Rule: rule.originalText, + Ip: rule.ip, + FilterID: rule.listID, + }, nil + } + err := rule.compile() if err != nil { return res, err @@ -686,20 +740,27 @@ func (d *Dnsfilter) AddRule(input string, filterListID uint32) error { d.storageMutex.RUnlock() if exists { // already added - return ErrInvalidSyntax + return ErrAlreadyExists } if !isValidRule(input) { return ErrInvalidSyntax } + // First, check if this is a hosts-syntax rule + if d.parseEtcHosts(input, filterListID) { + // This is a valid hosts-syntax rule, no need for further parsing + return nil + } + + // Start parsing the rule rule := rule{ text: input, // will be modified originalText: input, listID: filterListID, } - // mark rule as whitelist if it starts with @@ + // Mark rule as whitelist if it starts with @@ if strings.HasPrefix(rule.text, "@@") { rule.isWhitelist = true rule.text = rule.text[2:] @@ -712,6 +773,7 @@ func (d *Dnsfilter) AddRule(input string, filterListID uint32) error { rule.extractShortcut() + //noinspection GoBoolExpressions if !enableDelayedCompilation { err := rule.compile() if err != nil { @@ -727,12 +789,44 @@ func (d *Dnsfilter) AddRule(input string, filterListID uint32) error { } d.storageMutex.Lock() - d.storage[input] = &rule + d.storage[input] = true d.storageMutex.Unlock() destination.Add(&rule) return nil } +// Parses the hosts-syntax rules. Returns false if the input string is not of hosts-syntax. +func (d *Dnsfilter) parseEtcHosts(input string, filterListID uint32) bool { + // Strip the trailing comment + ruleText := input + if pos := strings.IndexByte(ruleText, '#'); pos != -1 { + ruleText = ruleText[0:pos] + } + fields := strings.Fields(ruleText) + if len(fields) < 2 { + return false + } + addr := net.ParseIP(fields[0]) + if addr == nil { + return false + } + + d.storageMutex.Lock() + d.storage[input] = true + d.storageMutex.Unlock() + + for _, host := range fields[1:] { + rule := rule{ + text: host, + originalText: input, + listID: filterListID, + ip: addr, + } + d.blackList.Add(&rule) + } + return true +} + // matchHost is a low-level way to check only if hostname is filtered by rules, skipping expensive safebrowsing and parental lookups func (d *Dnsfilter) matchHost(host string) (Result, error) { lists := []*rulesTable{ @@ -761,7 +855,7 @@ func (d *Dnsfilter) matchHost(host string) (Result, error) { func New() *Dnsfilter { d := new(Dnsfilter) - d.storage = make(map[string]*rule) + d.storage = make(map[string]bool) d.important = newRulesTable() d.whiteList = newRulesTable() d.blackList = newRulesTable() diff --git a/dnsfilter/dnsfilter_test.go b/dnsfilter/dnsfilter_test.go index 19ae1c77..f186ecce 100644 --- a/dnsfilter/dnsfilter_test.go +++ b/dnsfilter/dnsfilter_test.go @@ -261,7 +261,7 @@ func (d *Dnsfilter) checkAddRule(t *testing.T, rule string) { func (d *Dnsfilter) checkAddRuleFail(t *testing.T, rule string) { t.Helper() err := d.AddRule(rule, 0) - if err == ErrInvalidSyntax { + if err == ErrInvalidSyntax || err == ErrAlreadyExists { return } if err != nil { @@ -281,6 +281,20 @@ func (d *Dnsfilter) checkMatch(t *testing.T, hostname string) { } } +func (d *Dnsfilter) checkMatchIp(t *testing.T, hostname string, ip string) { + t.Helper() + ret, err := d.CheckHost(hostname) + if err != nil { + t.Errorf("Error while matching host %s: %s", hostname, err) + } + if !ret.IsFiltered { + t.Errorf("Expected hostname %s to match", hostname) + } + if ret.Ip == nil || ret.Ip.String() != ip { + t.Errorf("Expected ip %s to match, actual: %v", ip, ret.Ip) + } +} + func (d *Dnsfilter) checkMatchEmpty(t *testing.T, hostname string) { t.Helper() ret, err := d.CheckHost(hostname) @@ -304,7 +318,7 @@ func loadTestRules(d *Dnsfilter) error { for scanner.Scan() { rule := scanner.Text() err = d.AddRule(rule, 0) - if err == ErrInvalidSyntax { + if err == ErrInvalidSyntax || err == ErrAlreadyExists { continue } if err != nil { @@ -345,6 +359,20 @@ func TestSanityCheck(t *testing.T) { d.checkAddRuleFail(t, "lkfaojewhoawehfwacoefawr$@#$@3413841384") } +func TestEtcHostsMatching(t *testing.T) { + d := NewForTest() + defer d.Destroy() + + addr := "216.239.38.120" + text := fmt.Sprintf(" %s google.com www.google.com # enforce google's safesearch ", addr) + + d.checkAddRule(t, text) + d.checkMatchIp(t, "google.com", addr) + d.checkMatchIp(t, "www.google.com", addr) + d.checkMatchEmpty(t, "subdomain.google.com") + d.checkMatchEmpty(t, "example.org") +} + func TestSuffixMatching1(t *testing.T) { d := NewForTest() defer d.Destroy() @@ -696,6 +724,7 @@ func BenchmarkAddRule(b *testing.B) { err := d.AddRule(rule, 0) switch err { case nil: + case ErrAlreadyExists: // ignore rules which were already added case ErrInvalidSyntax: // ignore invalid syntax default: b.Fatalf("Error while adding rule %s: %s", rule, err) @@ -715,6 +744,7 @@ func BenchmarkAddRuleParallel(b *testing.B) { } switch err { case nil: + case ErrAlreadyExists: // ignore rules which were already added case ErrInvalidSyntax: // ignore invalid syntax default: b.Fatalf("Error while adding rule %s: %s", rule, err) diff --git a/dnsfilter/helpers.go b/dnsfilter/helpers.go index fb34cff9..68d4ba26 100644 --- a/dnsfilter/helpers.go +++ b/dnsfilter/helpers.go @@ -19,11 +19,17 @@ func isValidRule(rule string) bool { return false } + // Filter out all sorts of cosmetic rules: + // https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#cosmetic-rules masks := []string{ "##", "#@#", + "#?#", + "#@?#", "#$#", "#@$#", + "#?$#", + "#@?$#", "$$", "$@$", "#%#",