* dnsfilter: remove code for filtering rules
This commit is contained in:
parent
bd68bf2e25
commit
3396d68019
|
@ -11,9 +11,7 @@ import (
|
|||
"io/ioutil"
|
||||
"net"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
|
@ -60,33 +58,6 @@ type privateConfig struct {
|
|||
safeBrowsingServer string // access via methods
|
||||
}
|
||||
|
||||
type rule struct {
|
||||
text string // text without @@ decorators or $ options
|
||||
shortcut string // for speeding up lookup
|
||||
originalText string // original text for reporting back to applications
|
||||
ip net.IP // IP address (for the case when we're matching a hosts file)
|
||||
|
||||
// options
|
||||
options []string // optional options after $
|
||||
|
||||
// parsed options
|
||||
apps []string
|
||||
isWhitelist bool
|
||||
isImportant bool
|
||||
|
||||
// user-supplied data
|
||||
listID int64
|
||||
|
||||
// suffix matching
|
||||
isSuffix bool
|
||||
suffix string
|
||||
|
||||
// compiled regexp
|
||||
compiled *regexp.Regexp
|
||||
|
||||
sync.RWMutex
|
||||
}
|
||||
|
||||
// LookupStats store stats collected during safebrowsing or parental checks
|
||||
type LookupStats struct {
|
||||
Requests uint64 // number of HTTP requests that were sent
|
||||
|
@ -104,14 +75,6 @@ type Stats struct {
|
|||
|
||||
// Dnsfilter holds added rules and performs hostname matches against the rules
|
||||
type Dnsfilter struct {
|
||||
storage map[string]bool // rule storage, not used for matching, just for filtering out duplicates
|
||||
storageMutex sync.RWMutex
|
||||
|
||||
// rules are checked against these lists in the order defined here
|
||||
important *rulesTable // more important than whitelist and is checked first
|
||||
whiteList *rulesTable // more important than blacklist
|
||||
blackList *rulesTable
|
||||
|
||||
// HTTP lookups for safebrowsing and parental
|
||||
client http.Client // handle for http client -- single instance as recommended by docs
|
||||
transport *http.Transport // handle for http transport used by http client
|
||||
|
@ -242,308 +205,6 @@ func (d *Dnsfilter) CheckHost(host string) (Result, error) {
|
|||
return Result{}, nil
|
||||
}
|
||||
|
||||
//
|
||||
// rules table
|
||||
//
|
||||
|
||||
type rulesTable struct {
|
||||
rulesByHost map[string]*rule
|
||||
rulesByShortcut map[string][]*rule
|
||||
rulesLeftovers []*rule
|
||||
sync.RWMutex
|
||||
}
|
||||
|
||||
func newRulesTable() *rulesTable {
|
||||
return &rulesTable{
|
||||
rulesByHost: make(map[string]*rule),
|
||||
rulesByShortcut: make(map[string][]*rule),
|
||||
rulesLeftovers: make([]*rule, 0),
|
||||
}
|
||||
}
|
||||
|
||||
func (r *rulesTable) Add(rule *rule) {
|
||||
r.Lock()
|
||||
if rule.ip != nil {
|
||||
// Hosts syntax
|
||||
r.rulesByHost[rule.text] = rule
|
||||
} else if len(rule.shortcut) == shortcutLength && enableFastLookup {
|
||||
// Adblock syntax with a shortcut
|
||||
r.rulesByShortcut[rule.shortcut] = append(r.rulesByShortcut[rule.shortcut], rule)
|
||||
} else {
|
||||
// Adblock syntax -- too short to have a shortcut
|
||||
r.rulesLeftovers = append(r.rulesLeftovers, rule)
|
||||
}
|
||||
r.Unlock()
|
||||
}
|
||||
|
||||
func (r *rulesTable) matchByHost(host string) (Result, error) {
|
||||
// First: examine the hosts-syntax rules
|
||||
res, err := r.searchByHost(host)
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
if res.Reason.Matched() {
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Second: examine the adblock-syntax rules with shortcuts
|
||||
res, err = r.searchShortcuts(host)
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
if res.Reason.Matched() {
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Third: examine the others
|
||||
res, err = r.searchLeftovers(host)
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
if res.Reason.Matched() {
|
||||
return res, nil
|
||||
}
|
||||
|
||||
return Result{}, nil
|
||||
}
|
||||
|
||||
func (r *rulesTable) searchByHost(host string) (Result, error) {
|
||||
rule, ok := r.rulesByHost[host]
|
||||
|
||||
if ok {
|
||||
return rule.match(host)
|
||||
}
|
||||
|
||||
return Result{}, nil
|
||||
}
|
||||
|
||||
func (r *rulesTable) searchShortcuts(host string) (Result, error) {
|
||||
// check in shortcuts first
|
||||
for i := 0; i < len(host); i++ {
|
||||
shortcut := host[i:]
|
||||
if len(shortcut) > shortcutLength {
|
||||
shortcut = shortcut[:shortcutLength]
|
||||
}
|
||||
if len(shortcut) != shortcutLength {
|
||||
continue
|
||||
}
|
||||
rules, ok := r.rulesByShortcut[shortcut]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, rule := range rules {
|
||||
res, err := rule.match(host)
|
||||
// error? stop search
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
// matched? stop search
|
||||
if res.Reason.Matched() {
|
||||
return res, err
|
||||
}
|
||||
// continue otherwise
|
||||
}
|
||||
}
|
||||
return Result{}, nil
|
||||
}
|
||||
|
||||
func (r *rulesTable) searchLeftovers(host string) (Result, error) {
|
||||
for _, rule := range r.rulesLeftovers {
|
||||
res, err := rule.match(host)
|
||||
// error? stop search
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
// matched? stop search
|
||||
if res.Reason.Matched() {
|
||||
return res, err
|
||||
}
|
||||
// continue otherwise
|
||||
}
|
||||
return Result{}, nil
|
||||
}
|
||||
|
||||
func findOptionIndex(text string) int {
|
||||
for i, r := range text {
|
||||
// ignore non-$
|
||||
if r != '$' {
|
||||
continue
|
||||
}
|
||||
// ignore `\$`
|
||||
if i > 0 && text[i-1] == '\\' {
|
||||
continue
|
||||
}
|
||||
// ignore `$/`
|
||||
if i > len(text) && text[i+1] == '/' {
|
||||
continue
|
||||
}
|
||||
return i + 1
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func (rule *rule) extractOptions() error {
|
||||
optIndex := findOptionIndex(rule.text)
|
||||
if optIndex == 0 { // starts with $
|
||||
return ErrInvalidSyntax
|
||||
}
|
||||
if optIndex == len(rule.text) { // ends with $
|
||||
return ErrInvalidSyntax
|
||||
}
|
||||
if optIndex < 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
optionsStr := rule.text[optIndex:]
|
||||
rule.text = rule.text[:optIndex-1] // remove options from text
|
||||
|
||||
begin := 0
|
||||
i := 0
|
||||
for i = 0; i < len(optionsStr); i++ {
|
||||
switch optionsStr[i] {
|
||||
case ',':
|
||||
if i > 0 {
|
||||
// it might be escaped, if so, ignore
|
||||
if optionsStr[i-1] == '\\' {
|
||||
break // from switch, not for loop
|
||||
}
|
||||
}
|
||||
rule.options = append(rule.options, optionsStr[begin:i])
|
||||
begin = i + 1
|
||||
}
|
||||
}
|
||||
if begin != i {
|
||||
// there's still an option remaining
|
||||
rule.options = append(rule.options, optionsStr[begin:])
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rule *rule) parseOptions() error {
|
||||
err := rule.extractOptions()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, option := range rule.options {
|
||||
switch {
|
||||
case option == "important":
|
||||
rule.isImportant = true
|
||||
case strings.HasPrefix(option, "app="):
|
||||
option = strings.TrimPrefix(option, "app=")
|
||||
rule.apps = strings.Split(option, "|")
|
||||
default:
|
||||
return ErrInvalidSyntax
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rule *rule) extractShortcut() {
|
||||
// regex rules have no shortcuts
|
||||
if rule.text[0] == '/' && rule.text[len(rule.text)-1] == '/' {
|
||||
return
|
||||
}
|
||||
|
||||
fields := strings.FieldsFunc(rule.text, func(r rune) bool {
|
||||
switch r {
|
||||
case '*', '^', '|':
|
||||
return true
|
||||
}
|
||||
return false
|
||||
})
|
||||
longestField := ""
|
||||
for _, field := range fields {
|
||||
if len(field) > len(longestField) {
|
||||
longestField = field
|
||||
}
|
||||
}
|
||||
if len(longestField) > shortcutLength {
|
||||
longestField = longestField[:shortcutLength]
|
||||
}
|
||||
rule.shortcut = strings.ToLower(longestField)
|
||||
}
|
||||
|
||||
func (rule *rule) compile() error {
|
||||
rule.RLock()
|
||||
isCompiled := rule.isSuffix || rule.compiled != nil
|
||||
rule.RUnlock()
|
||||
if isCompiled {
|
||||
return nil
|
||||
}
|
||||
|
||||
isSuffix, suffix := getSuffix(rule.text)
|
||||
if isSuffix {
|
||||
rule.Lock()
|
||||
rule.isSuffix = isSuffix
|
||||
rule.suffix = suffix
|
||||
rule.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
expr, err := ruleToRegexp(rule.text)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
compiled, err := regexp.Compile(expr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rule.Lock()
|
||||
rule.compiled = compiled
|
||||
rule.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Checks if the rule matches the specified host and returns a corresponding Result object
|
||||
func (rule *rule) match(host string) (Result, error) {
|
||||
res := Result{}
|
||||
|
||||
if rule.ip != nil && rule.text == host {
|
||||
// This is a hosts-syntax rule -- just check that the hostname matches and return the result
|
||||
return Result{
|
||||
IsFiltered: true,
|
||||
Reason: FilteredBlackList,
|
||||
Rule: rule.originalText,
|
||||
IP: rule.ip,
|
||||
FilterID: rule.listID,
|
||||
}, nil
|
||||
}
|
||||
|
||||
err := rule.compile()
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
rule.RLock()
|
||||
matched := false
|
||||
if rule.isSuffix {
|
||||
if host == rule.suffix {
|
||||
matched = true
|
||||
} else if strings.HasSuffix(host, "."+rule.suffix) {
|
||||
matched = true
|
||||
}
|
||||
} else {
|
||||
matched = rule.compiled.MatchString(host)
|
||||
}
|
||||
rule.RUnlock()
|
||||
if matched {
|
||||
res.Reason = FilteredBlackList
|
||||
res.IsFiltered = true
|
||||
res.FilterID = rule.listID
|
||||
res.Rule = rule.originalText
|
||||
if rule.isWhitelist {
|
||||
res.Reason = NotFilteredWhiteList
|
||||
res.IsFiltered = false
|
||||
}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func getCachedReason(cache gcache.Cache, host string) (result Result, isFound bool, err error) {
|
||||
isFound = false // not found yet
|
||||
|
||||
|
@ -840,133 +501,11 @@ func (d *Dnsfilter) lookupCommon(host string, lookupstats *LookupStats, cache gc
|
|||
|
||||
// AddRules is a convinience function to add an array of filters in one call
|
||||
func (d *Dnsfilter) AddRules(filters []Filter) error {
|
||||
for _, f := range filters {
|
||||
for _, rule := range f.Rules {
|
||||
err := d.AddRule(rule, f.ID)
|
||||
if err == ErrAlreadyExists || err == ErrInvalidSyntax {
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("Cannot add rule %s: %s", rule, err)
|
||||
// Just ignore invalid rules
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddRule adds a rule, checking if it is a valid rule first and if it wasn't added already
|
||||
func (d *Dnsfilter) AddRule(input string, filterListID int64) error {
|
||||
input = strings.TrimSpace(input)
|
||||
d.storageMutex.RLock()
|
||||
_, exists := d.storage[input]
|
||||
d.storageMutex.RUnlock()
|
||||
if exists {
|
||||
// already added
|
||||
return ErrAlreadyExists
|
||||
}
|
||||
|
||||
if !isValidRule(input) {
|
||||
return ErrInvalidSyntax
|
||||
}
|
||||
|
||||
// First, check if this is a hosts-syntax rule
|
||||
if d.parseEtcHosts(input, filterListID) {
|
||||
// This is a valid hosts-syntax rule, no need for further parsing
|
||||
return nil
|
||||
}
|
||||
|
||||
// Start parsing the rule
|
||||
r := rule{
|
||||
text: input, // will be modified
|
||||
originalText: input,
|
||||
listID: filterListID,
|
||||
}
|
||||
|
||||
// Mark rule as whitelist if it starts with @@
|
||||
if strings.HasPrefix(r.text, "@@") {
|
||||
r.isWhitelist = true
|
||||
r.text = r.text[2:]
|
||||
}
|
||||
|
||||
err := r.parseOptions()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
r.extractShortcut()
|
||||
|
||||
if !enableDelayedCompilation {
|
||||
err := r.compile()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
destination := d.blackList
|
||||
if r.isImportant {
|
||||
destination = d.important
|
||||
} else if r.isWhitelist {
|
||||
destination = d.whiteList
|
||||
}
|
||||
|
||||
d.storageMutex.Lock()
|
||||
d.storage[input] = true
|
||||
d.storageMutex.Unlock()
|
||||
destination.Add(&r)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Parses the hosts-syntax rules. Returns false if the input string is not of hosts-syntax.
|
||||
func (d *Dnsfilter) parseEtcHosts(input string, filterListID int64) bool {
|
||||
// Strip the trailing comment
|
||||
ruleText := input
|
||||
if pos := strings.IndexByte(ruleText, '#'); pos != -1 {
|
||||
ruleText = ruleText[0:pos]
|
||||
}
|
||||
fields := strings.Fields(ruleText)
|
||||
if len(fields) < 2 {
|
||||
return false
|
||||
}
|
||||
addr := net.ParseIP(fields[0])
|
||||
if addr == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
d.storageMutex.Lock()
|
||||
d.storage[input] = true
|
||||
d.storageMutex.Unlock()
|
||||
|
||||
for _, host := range fields[1:] {
|
||||
r := rule{
|
||||
text: host,
|
||||
originalText: input,
|
||||
listID: filterListID,
|
||||
ip: addr,
|
||||
}
|
||||
d.blackList.Add(&r)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// matchHost is a low-level way to check only if hostname is filtered by rules, skipping expensive safebrowsing and parental lookups
|
||||
func (d *Dnsfilter) matchHost(host string) (Result, error) {
|
||||
lists := []*rulesTable{
|
||||
d.important,
|
||||
d.whiteList,
|
||||
d.blackList,
|
||||
}
|
||||
|
||||
for _, table := range lists {
|
||||
res, err := table.matchByHost(host)
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
if res.Reason.Matched() {
|
||||
return res, nil
|
||||
}
|
||||
}
|
||||
return Result{}, nil
|
||||
}
|
||||
|
||||
|
@ -1061,11 +600,6 @@ func (d *Dnsfilter) createCustomDialContext(resolverAddr string) dialFunctionTyp
|
|||
func New(c *Config) *Dnsfilter {
|
||||
d := new(Dnsfilter)
|
||||
|
||||
d.storage = make(map[string]bool)
|
||||
d.important = newRulesTable()
|
||||
d.whiteList = newRulesTable()
|
||||
d.blackList = newRulesTable()
|
||||
|
||||
// Customize the Transport to have larger connection pool,
|
||||
// We are not (re)using http.DefaultTransport because of race conditions found by tests
|
||||
d.transport = &http.Transport{
|
||||
|
@ -1141,8 +675,3 @@ func (d *Dnsfilter) SafeSearchDomain(host string) (string, bool) {
|
|||
func (d *Dnsfilter) GetStats() Stats {
|
||||
return stats
|
||||
}
|
||||
|
||||
// Count returns number of rules added to filter
|
||||
func (d *Dnsfilter) Count() int {
|
||||
return len(d.storage)
|
||||
}
|
||||
|
|
|
@ -1,49 +1,9 @@
|
|||
package dnsfilter
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
func isValidRule(rule string) bool {
|
||||
if len(rule) < 4 {
|
||||
return false
|
||||
}
|
||||
if rule[0] == '!' {
|
||||
return false
|
||||
}
|
||||
if rule[0] == '#' {
|
||||
return false
|
||||
}
|
||||
if strings.HasPrefix(rule, "[Adblock") {
|
||||
return false
|
||||
}
|
||||
|
||||
// Filter out all sorts of cosmetic rules:
|
||||
// https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#cosmetic-rules
|
||||
masks := []string{
|
||||
"##",
|
||||
"#@#",
|
||||
"#?#",
|
||||
"#@?#",
|
||||
"#$#",
|
||||
"#@$#",
|
||||
"#?$#",
|
||||
"#@?$#",
|
||||
"$$",
|
||||
"$@$",
|
||||
"#%#",
|
||||
"#@%#",
|
||||
}
|
||||
for _, mask := range masks {
|
||||
if strings.Contains(rule, mask) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func updateMax(valuePtr *int64, maxPtr *int64) {
|
||||
for {
|
||||
current := atomic.LoadInt64(valuePtr)
|
||||
|
|
|
@ -1,91 +0,0 @@
|
|||
package dnsfilter
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func ruleToRegexp(rule string) (string, error) {
|
||||
const hostStart = `(?:^|\.)`
|
||||
const hostEnd = `$`
|
||||
|
||||
// empty or short rule -- do nothing
|
||||
if !isValidRule(rule) {
|
||||
return "", ErrInvalidSyntax
|
||||
}
|
||||
|
||||
// if starts with / and ends with /, it's already a regexp, just strip the slashes
|
||||
if rule[0] == '/' && rule[len(rule)-1] == '/' {
|
||||
return rule[1 : len(rule)-1], nil
|
||||
}
|
||||
|
||||
var sb strings.Builder
|
||||
|
||||
if rule[0] == '|' && rule[1] == '|' {
|
||||
sb.WriteString(hostStart)
|
||||
rule = rule[2:]
|
||||
}
|
||||
|
||||
for i, r := range rule {
|
||||
switch {
|
||||
case r == '?' || r == '.' || r == '+' || r == '[' || r == ']' || r == '(' || r == ')' || r == '{' || r == '}' || r == '#' || r == '\\' || r == '$':
|
||||
sb.WriteRune('\\')
|
||||
sb.WriteRune(r)
|
||||
case r == '|' && i == 0:
|
||||
// | at start and it's not || at start
|
||||
sb.WriteRune('^')
|
||||
case r == '|' && i == len(rule)-1:
|
||||
// | at end
|
||||
sb.WriteRune('$')
|
||||
case r == '|' && i != 0 && i != len(rule)-1:
|
||||
sb.WriteString(`\|`)
|
||||
case r == '*':
|
||||
sb.WriteString(`.*`)
|
||||
case r == '^':
|
||||
sb.WriteString(hostEnd)
|
||||
default:
|
||||
sb.WriteRune(r)
|
||||
}
|
||||
}
|
||||
|
||||
return sb.String(), nil
|
||||
}
|
||||
|
||||
// handle suffix rule ||example.com^ -- either entire string is example.com or *.example.com
|
||||
func getSuffix(rule string) (bool, string) {
|
||||
// if starts with / and ends with /, it's already a regexp
|
||||
// TODO: if a regexp is simple `/abracadabra$/`, then simplify it maybe?
|
||||
if rule[0] == '/' && rule[len(rule)-1] == '/' {
|
||||
return false, ""
|
||||
}
|
||||
|
||||
// must start with ||
|
||||
if rule[0] != '|' || rule[1] != '|' {
|
||||
return false, ""
|
||||
}
|
||||
rule = rule[2:]
|
||||
|
||||
// suffix rule must end with ^ or |
|
||||
lastChar := rule[len(rule)-1]
|
||||
if lastChar != '^' && lastChar != '|' {
|
||||
return false, ""
|
||||
}
|
||||
// last char was checked, eat it
|
||||
rule = rule[:len(rule)-1]
|
||||
|
||||
// it might also end with ^|
|
||||
if rule[len(rule)-1] == '^' {
|
||||
rule = rule[:len(rule)-1]
|
||||
}
|
||||
|
||||
// check that it doesn't have any special characters inside
|
||||
for _, r := range rule {
|
||||
switch r {
|
||||
case '|':
|
||||
return false, ""
|
||||
case '*':
|
||||
return false, ""
|
||||
}
|
||||
}
|
||||
|
||||
return true, rule
|
||||
}
|
Loading…
Reference in New Issue