Moved hosts-syntax matching to DnsFilter

This commit is contained in:
Andrey Meshkov 2018-10-29 15:46:58 +03:00
parent 234ab23557
commit 2b2a797cf7
6 changed files with 166 additions and 103 deletions

9
.gitignore vendored
View File

@ -11,3 +11,12 @@ debug
/dnsfilter.txt /dnsfilter.txt
/querylog.json /querylog.json
/querylog.json.1 /querylog.json.1
# Test output
dnsfilter/dnsfilter.TestLotsOfRulesLotsOfHostsMemoryUsage1.pprof
dnsfilter/dnsfilter.TestLotsOfRulesLotsOfHostsMemoryUsage2.pprof
dnsfilter/dnsfilter.TestLotsOfRulesLotsOfHostsMemoryUsage3.pprof
dnsfilter/dnsfilter.TestLotsOfRulesMemoryUsage1.pprof
dnsfilter/dnsfilter.TestLotsOfRulesMemoryUsage2.pprof
dnsfilter/dnsfilter.TestLotsOfRulesMemoryUsage3.pprof
tests/top-1m.csv

View File

@ -62,7 +62,6 @@ type plug struct {
d *dnsfilter.Dnsfilter d *dnsfilter.Dnsfilter
Next plugin.Handler Next plugin.Handler
upstream upstream.Upstream upstream upstream.Upstream
hosts map[string]net.IP
settings plugSettings settings plugSettings
sync.RWMutex sync.RWMutex
@ -82,7 +81,6 @@ func setupPlugin(c *caddy.Controller) (*plug, error) {
p := &plug{ p := &plug{
settings: defaultPluginSettings, settings: defaultPluginSettings,
d: dnsfilter.New(), d: dnsfilter.New(),
hosts: make(map[string]net.IP),
} }
filterFileNames := []string{} filterFileNames := []string{}
@ -149,9 +147,7 @@ func setupPlugin(c *caddy.Controller) (*plug, error) {
scanner := bufio.NewScanner(file) scanner := bufio.NewScanner(file)
for scanner.Scan() { for scanner.Scan() {
text := scanner.Text() text := scanner.Text()
if p.parseEtcHosts(text) {
continue
}
err = p.d.AddRule(text, uint32(i)) err = p.d.AddRule(text, uint32(i))
if err == dnsfilter.ErrInvalidSyntax { if err == dnsfilter.ErrInvalidSyntax {
continue continue
@ -231,28 +227,6 @@ func setup(c *caddy.Controller) error {
return nil return nil
} }
func (p *plug) parseEtcHosts(text string) bool {
if pos := strings.IndexByte(text, '#'); pos != -1 {
text = text[0:pos]
}
fields := strings.Fields(text)
if len(fields) < 2 {
return false
}
addr := net.ParseIP(fields[0])
if addr == nil {
return false
}
for _, host := range fields[1:] {
// debug logging for duplicate values, pretty common if you subscribe to many hosts files
// if val, ok := p.hosts[host]; ok {
// log.Printf("warning: host %s already has value %s, will overwrite it with %s", host, val, addr)
// }
p.hosts[host] = addr
}
return true
}
func (p *plug) onShutdown() error { func (p *plug) onShutdown() error {
p.Lock() p.Lock()
p.d.Destroy() p.d.Destroy()
@ -432,27 +406,6 @@ func (p *plug) serveDNSInternal(ctx context.Context, w dns.ResponseWriter, r *dn
} }
p.RUnlock() p.RUnlock()
// is it in hosts?
if val, ok := p.hosts[host]; ok {
// it is, if it's a loopback host, reply with NXDOMAIN
// TODO: research if it's better than 127.0.0.1
if false && val.IsLoopback() {
rcode, err := p.writeNXdomain(ctx, w, r)
if err != nil {
return rcode, dnsfilter.Result{}, err
}
return rcode, dnsfilter.Result{Reason: dnsfilter.FilteredInvalid}, err
}
// it's not a loopback host, replace it with value specified
rcode, err := p.replaceHostWithValAndReply(ctx, w, r, host, val.String(), question)
if err != nil {
return rcode, dnsfilter.Result{}, err
}
// TODO: This must be handled in the dnsfilter and not here!
rule := val.String() + " " + host
return rcode, dnsfilter.Result{Reason: dnsfilter.FilteredBlackList, Rule: rule}, err
}
// needs to be filtered instead // needs to be filtered instead
p.RLock() p.RLock()
result, err := p.d.CheckHost(host) result, err := p.d.CheckHost(host)
@ -482,12 +435,22 @@ func (p *plug) serveDNSInternal(ctx context.Context, w dns.ResponseWriter, r *dn
} }
return rcode, result, err return rcode, result, err
case dnsfilter.FilteredBlackList: case dnsfilter.FilteredBlackList:
// return NXdomain
if result.Ip == nil {
// return NXDomain
rcode, err := p.writeNXdomain(ctx, w, r) rcode, err := p.writeNXdomain(ctx, w, r)
if err != nil { if err != nil {
return rcode, dnsfilter.Result{}, err return rcode, dnsfilter.Result{}, err
} }
return rcode, result, err return rcode, result, err
} else {
// This is a hosts-syntax rule
rcode, err := p.replaceHostWithValAndReply(ctx, w, r, host, result.Ip.String(), question)
if err != nil {
return rcode, dnsfilter.Result{}, err
}
return rcode, result, err
}
case dnsfilter.FilteredInvalid: case dnsfilter.FilteredInvalid:
// return NXdomain // return NXdomain
rcode, err := p.writeNXdomain(ctx, w, r) rcode, err := p.writeNXdomain(ctx, w, r)

View File

@ -40,41 +40,6 @@ func TestSetup(t *testing.T) {
} }
} }
func TestEtcHostsParse(t *testing.T) {
addr := "216.239.38.120"
text := []byte(fmt.Sprintf(" %s google.com www.google.com # enforce google's safesearch ", addr))
tmpfile, err := ioutil.TempFile("", "")
if err != nil {
t.Fatal(err)
}
if _, err = tmpfile.Write(text); err != nil {
t.Fatal(err)
}
if err = tmpfile.Close(); err != nil {
t.Fatal(err)
}
defer os.Remove(tmpfile.Name())
c := caddy.NewTestController("dns", fmt.Sprintf("dnsfilter %s", tmpfile.Name()))
p, err := setupPlugin(c)
if err != nil {
t.Fatal(err)
}
if len(p.hosts) != 2 {
t.Fatal("Expected p.hosts to have two keys")
}
val, ok := p.hosts["google.com"]
if !ok {
t.Fatal("Expected google.com to be set in p.hosts")
}
if !val.Equal(net.ParseIP(addr)) {
t.Fatalf("Expected google.com's value %s to match %s", val, addr)
}
}
func TestEtcHostsFilter(t *testing.T) { func TestEtcHostsFilter(t *testing.T) {
text := []byte("127.0.0.1 doubleclick.net\n" + "127.0.0.1 example.org example.net www.example.org www.example.net") text := []byte("127.0.0.1 doubleclick.net\n" + "127.0.0.1 example.org example.net www.example.org www.example.net")
tmpfile, err := ioutil.TempFile("", "") tmpfile, err := ioutil.TempFile("", "")

View File

@ -9,6 +9,7 @@ import (
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"log" "log"
"net"
"net/http" "net/http"
"regexp" "regexp"
"strings" "strings"
@ -56,6 +57,7 @@ type rule struct {
text string // text without @@ decorators or $ options text string // text without @@ decorators or $ options
shortcut string // for speeding up lookup shortcut string // for speeding up lookup
originalText string // original text for reporting back to applications originalText string // original text for reporting back to applications
ip net.IP // IP address (for the case when we're matching a hosts file)
// options // options
options []string // optional options after $ options []string // optional options after $
@ -94,7 +96,7 @@ type Stats struct {
// Dnsfilter holds added rules and performs hostname matches against the rules // Dnsfilter holds added rules and performs hostname matches against the rules
type Dnsfilter struct { type Dnsfilter struct {
storage map[string]*rule // rule storage, not used for matching, needs to be key->value storage map[string]bool // rule storage, not used for matching, just for filtering out duplicates
storageMutex sync.RWMutex storageMutex sync.RWMutex
// rules are checked against these lists in the order defined here // rules are checked against these lists in the order defined here
@ -137,9 +139,11 @@ var (
// Result holds state of hostname check // Result holds state of hostname check
type Result struct { type Result struct {
IsFiltered bool `json:",omitempty"` IsFiltered bool `json:",omitempty"` // True if the host name is filtered
Reason Reason `json:",omitempty"` Reason Reason `json:",omitempty"` // Reason for blocking / unblocking
Rule string `json:",omitempty"` Rule string `json:",omitempty"` // Original rule text
Ip net.IP `json:",omitempty"` // Not nil only in the case of a hosts file syntax
FilterID uint32 `json:",omitempty"` // Filter ID the rule belongs to
} }
// Matched can be used to see if any match at all was found, no matter filtered or not // Matched can be used to see if any match at all was found, no matter filtered or not
@ -199,6 +203,7 @@ func (d *Dnsfilter) CheckHost(host string) (Result, error) {
// //
type rulesTable struct { type rulesTable struct {
rulesByHost map[string]*rule
rulesByShortcut map[string][]*rule rulesByShortcut map[string][]*rule
rulesLeftovers []*rule rulesLeftovers []*rule
sync.RWMutex sync.RWMutex
@ -206,6 +211,7 @@ type rulesTable struct {
func newRulesTable() *rulesTable { func newRulesTable() *rulesTable {
return &rulesTable{ return &rulesTable{
rulesByHost: make(map[string]*rule),
rulesByShortcut: make(map[string][]*rule), rulesByShortcut: make(map[string][]*rule),
rulesLeftovers: make([]*rule, 0), rulesLeftovers: make([]*rule, 0),
} }
@ -213,16 +219,27 @@ func newRulesTable() *rulesTable {
func (r *rulesTable) Add(rule *rule) { func (r *rulesTable) Add(rule *rule) {
r.Lock() r.Lock()
if len(rule.shortcut) == shortcutLength && enableFastLookup {
if rule.ip != nil {
// Hosts syntax
r.rulesByHost[rule.text] = rule
} else if //noinspection GoBoolExpressions
len(rule.shortcut) == shortcutLength && enableFastLookup {
// Adblock syntax with a shortcut
r.rulesByShortcut[rule.shortcut] = append(r.rulesByShortcut[rule.shortcut], rule) r.rulesByShortcut[rule.shortcut] = append(r.rulesByShortcut[rule.shortcut], rule)
} else { } else {
// Adblock syntax -- too short to have a shortcut
r.rulesLeftovers = append(r.rulesLeftovers, rule) r.rulesLeftovers = append(r.rulesLeftovers, rule)
} }
r.Unlock() r.Unlock()
} }
func (r *rulesTable) matchByHost(host string) (Result, error) { func (r *rulesTable) matchByHost(host string) (Result, error) {
res, err := r.searchShortcuts(host)
// First: examine the hosts-syntax rules
res, err := r.searchByHost(host)
if err != nil { if err != nil {
return res, err return res, err
} }
@ -230,6 +247,16 @@ func (r *rulesTable) matchByHost(host string) (Result, error) {
return res, nil return res, nil
} }
// Second: examine the adblock-syntax rules with shortcuts
res, err = r.searchShortcuts(host)
if err != nil {
return res, err
}
if res.Reason.Matched() {
return res, nil
}
// Third: examine the others
res, err = r.searchLeftovers(host) res, err = r.searchLeftovers(host)
if err != nil { if err != nil {
return res, err return res, err
@ -241,6 +268,17 @@ func (r *rulesTable) matchByHost(host string) (Result, error) {
return Result{}, nil return Result{}, nil
} }
func (r *rulesTable) searchByHost(host string) (Result, error) {
rule, ok := r.rulesByHost[host]
if ok {
return rule.match(host)
}
return Result{}, nil
}
func (r *rulesTable) searchShortcuts(host string) (Result, error) { func (r *rulesTable) searchShortcuts(host string) (Result, error) {
// check in shortcuts first // check in shortcuts first
for i := 0; i < len(host); i++ { for i := 0; i < len(host); i++ {
@ -424,8 +462,21 @@ func (rule *rule) compile() error {
return nil return nil
} }
// Checks if the rule matches the specified host and returns a corresponding Result object
func (rule *rule) match(host string) (Result, error) { func (rule *rule) match(host string) (Result, error) {
res := Result{} res := Result{}
if rule.ip != nil && rule.text == host {
// This is a hosts-syntax rule -- just check that the hostname matches and return the result
return Result{
IsFiltered: true,
Reason: FilteredBlackList,
Rule: rule.originalText,
Ip: rule.ip,
FilterID: rule.listID,
}, nil
}
err := rule.compile() err := rule.compile()
if err != nil { if err != nil {
return res, err return res, err
@ -686,6 +737,7 @@ func (d *Dnsfilter) AddRule(input string, filterListID uint32) error {
d.storageMutex.RUnlock() d.storageMutex.RUnlock()
if exists { if exists {
// already added // already added
// TODO: Change the error type
return ErrInvalidSyntax return ErrInvalidSyntax
} }
@ -693,13 +745,20 @@ func (d *Dnsfilter) AddRule(input string, filterListID uint32) error {
return ErrInvalidSyntax return ErrInvalidSyntax
} }
// First, check if this is a hosts-syntax rule
if d.parseEtcHosts(input, filterListID) {
// This is a valid hosts-syntax rule, no need for further parsing
return nil
}
// Start parsing the rule
rule := rule{ rule := rule{
text: input, // will be modified text: input, // will be modified
originalText: input, originalText: input,
listID: filterListID, listID: filterListID,
} }
// mark rule as whitelist if it starts with @@ // Mark rule as whitelist if it starts with @@
if strings.HasPrefix(rule.text, "@@") { if strings.HasPrefix(rule.text, "@@") {
rule.isWhitelist = true rule.isWhitelist = true
rule.text = rule.text[2:] rule.text = rule.text[2:]
@ -712,6 +771,7 @@ func (d *Dnsfilter) AddRule(input string, filterListID uint32) error {
rule.extractShortcut() rule.extractShortcut()
//noinspection GoBoolExpressions
if !enableDelayedCompilation { if !enableDelayedCompilation {
err := rule.compile() err := rule.compile()
if err != nil { if err != nil {
@ -727,12 +787,44 @@ func (d *Dnsfilter) AddRule(input string, filterListID uint32) error {
} }
d.storageMutex.Lock() d.storageMutex.Lock()
d.storage[input] = &rule d.storage[input] = true
d.storageMutex.Unlock() d.storageMutex.Unlock()
destination.Add(&rule) destination.Add(&rule)
return nil return nil
} }
// Parses the hosts-syntax rules. Returns false if the input string is not of hosts-syntax.
func (d *Dnsfilter) parseEtcHosts(input string, filterListID uint32) bool {
// Strip the trailing comment
ruleText := input
if pos := strings.IndexByte(ruleText, '#'); pos != -1 {
ruleText = ruleText[0:pos]
}
fields := strings.Fields(ruleText)
if len(fields) < 2 {
return false
}
addr := net.ParseIP(fields[0])
if addr == nil {
return false
}
d.storageMutex.Lock()
d.storage[input] = true
d.storageMutex.Unlock()
for _, host := range fields[1:] {
rule := rule{
text: host,
originalText: input,
listID: filterListID,
ip: addr,
}
d.blackList.Add(&rule)
}
return true
}
// matchHost is a low-level way to check only if hostname is filtered by rules, skipping expensive safebrowsing and parental lookups // matchHost is a low-level way to check only if hostname is filtered by rules, skipping expensive safebrowsing and parental lookups
func (d *Dnsfilter) matchHost(host string) (Result, error) { func (d *Dnsfilter) matchHost(host string) (Result, error) {
lists := []*rulesTable{ lists := []*rulesTable{
@ -761,7 +853,7 @@ func (d *Dnsfilter) matchHost(host string) (Result, error) {
func New() *Dnsfilter { func New() *Dnsfilter {
d := new(Dnsfilter) d := new(Dnsfilter)
d.storage = make(map[string]*rule) d.storage = make(map[string]bool)
d.important = newRulesTable() d.important = newRulesTable()
d.whiteList = newRulesTable() d.whiteList = newRulesTable()
d.blackList = newRulesTable() d.blackList = newRulesTable()

View File

@ -281,6 +281,20 @@ func (d *Dnsfilter) checkMatch(t *testing.T, hostname string) {
} }
} }
func (d *Dnsfilter) checkMatchIp(t *testing.T, hostname string, ip string) {
t.Helper()
ret, err := d.CheckHost(hostname)
if err != nil {
t.Errorf("Error while matching host %s: %s", hostname, err)
}
if !ret.IsFiltered {
t.Errorf("Expected hostname %s to match", hostname)
}
if ret.Ip == nil || ret.Ip.String() != ip {
t.Errorf("Expected ip %s to match, actual: %v", ip, ret.Ip)
}
}
func (d *Dnsfilter) checkMatchEmpty(t *testing.T, hostname string) { func (d *Dnsfilter) checkMatchEmpty(t *testing.T, hostname string) {
t.Helper() t.Helper()
ret, err := d.CheckHost(hostname) ret, err := d.CheckHost(hostname)
@ -345,6 +359,20 @@ func TestSanityCheck(t *testing.T) {
d.checkAddRuleFail(t, "lkfaojewhoawehfwacoefawr$@#$@3413841384") d.checkAddRuleFail(t, "lkfaojewhoawehfwacoefawr$@#$@3413841384")
} }
func TestEtcHostsMatching(t *testing.T) {
d := NewForTest()
defer d.Destroy()
addr := "216.239.38.120"
text := fmt.Sprintf(" %s google.com www.google.com # enforce google's safesearch ", addr)
d.checkAddRule(t, text)
d.checkMatchIp(t, "google.com", addr)
d.checkMatchIp(t, "www.google.com", addr)
d.checkMatchEmpty(t, "subdomain.google.com")
d.checkMatchEmpty(t, "example.org")
}
func TestSuffixMatching1(t *testing.T) { func TestSuffixMatching1(t *testing.T) {
d := NewForTest() d := NewForTest()
defer d.Destroy() defer d.Destroy()

View File

@ -19,11 +19,17 @@ func isValidRule(rule string) bool {
return false return false
} }
// Filter out all sorts of cosmetic rules:
// https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#cosmetic-rules
masks := []string{ masks := []string{
"##", "##",
"#@#", "#@#",
"#?#",
"#@?#",
"#$#", "#$#",
"#@$#", "#@$#",
"#?$#",
"#@?$#",
"$$", "$$",
"$@$", "$@$",
"#%#", "#%#",