Pull request: querylog: imp perf
Merge in DNS/adguard-home from contains-fold to master Squashed commit of the following: commit 45c79b4b7618c8f3108766cc776b5bd3f0571761 Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Wed May 19 21:26:09 2021 +0300 querylog: imp perf
This commit is contained in:
parent
6f7fd33afd
commit
21972e49cb
|
@ -2,6 +2,8 @@ package querylog
|
|||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/AdguardTeam/AdGuardHome/internal/dnsfilter"
|
||||
)
|
||||
|
@ -63,6 +65,37 @@ func (c *searchCriterion) ctDomainOrClientCaseStrict(
|
|||
strings.EqualFold(name, term)
|
||||
}
|
||||
|
||||
// containsFold reports whehter s contains, ignoring letter case, substr.
|
||||
//
|
||||
// TODO(a.garipov): Move to aghstrings if needed elsewhere.
|
||||
func containsFold(s, substr string) (ok bool) {
|
||||
sLen, substrLen := len(s), len(substr)
|
||||
if sLen < substrLen {
|
||||
return false
|
||||
}
|
||||
|
||||
if sLen == substrLen {
|
||||
return strings.EqualFold(s, substr)
|
||||
}
|
||||
|
||||
first, _ := utf8.DecodeRuneInString(substr)
|
||||
firstFolded := unicode.SimpleFold(first)
|
||||
|
||||
for i := 0; i != -1 && len(s) >= len(substr); {
|
||||
if strings.EqualFold(s[:substrLen], substr) {
|
||||
return true
|
||||
}
|
||||
|
||||
i = strings.IndexFunc(s[1:], func(r rune) (eq bool) {
|
||||
return r == first || r == firstFolded
|
||||
})
|
||||
|
||||
s = s[1+i:]
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (c *searchCriterion) ctDomainOrClientCaseNonStrict(
|
||||
term string,
|
||||
clientID string,
|
||||
|
@ -70,19 +103,10 @@ func (c *searchCriterion) ctDomainOrClientCaseNonStrict(
|
|||
host string,
|
||||
ip string,
|
||||
) (ok bool) {
|
||||
// TODO(a.garipov): Write a performant, case-insensitive version of
|
||||
// strings.Contains instead of generating garbage. Or, perhaps in the
|
||||
// future, use a locale-appropriate matcher from golang.org/x/text.
|
||||
clientID = strings.ToLower(clientID)
|
||||
host = strings.ToLower(host)
|
||||
ip = strings.ToLower(ip)
|
||||
name = strings.ToLower(name)
|
||||
term = strings.ToLower(term)
|
||||
|
||||
return strings.Contains(clientID, term) ||
|
||||
strings.Contains(host, term) ||
|
||||
strings.Contains(ip, term) ||
|
||||
strings.Contains(name, term)
|
||||
return containsFold(clientID, term) ||
|
||||
containsFold(host, term) ||
|
||||
containsFold(ip, term) ||
|
||||
containsFold(name, term)
|
||||
}
|
||||
|
||||
// quickMatch quickly checks if the line matches the given search criterion.
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
package querylog
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestContainsFold(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
inS string
|
||||
inSubstr string
|
||||
want bool
|
||||
}{{
|
||||
name: "empty",
|
||||
inS: "",
|
||||
inSubstr: "",
|
||||
want: true,
|
||||
}, {
|
||||
name: "shorter",
|
||||
inS: "a",
|
||||
inSubstr: "abc",
|
||||
want: false,
|
||||
}, {
|
||||
name: "same_len_true",
|
||||
inS: "abc",
|
||||
inSubstr: "abc",
|
||||
want: true,
|
||||
}, {
|
||||
name: "same_len_true_fold",
|
||||
inS: "abc",
|
||||
inSubstr: "aBc",
|
||||
want: true,
|
||||
}, {
|
||||
name: "same_len_false",
|
||||
inS: "abc",
|
||||
inSubstr: "def",
|
||||
want: false,
|
||||
}, {
|
||||
name: "longer_true",
|
||||
inS: "abcdedef",
|
||||
inSubstr: "def",
|
||||
want: true,
|
||||
}, {
|
||||
name: "longer_false",
|
||||
inS: "abcded",
|
||||
inSubstr: "ghi",
|
||||
want: false,
|
||||
}, {
|
||||
name: "longer_true_fold",
|
||||
inS: "abcdedef",
|
||||
inSubstr: "dEf",
|
||||
want: true,
|
||||
}, {
|
||||
name: "longer_false_fold",
|
||||
inS: "abcded",
|
||||
inSubstr: "gHi",
|
||||
want: false,
|
||||
}, {
|
||||
name: "longer_true_cyr_fold",
|
||||
inS: "абвгдедеё",
|
||||
inSubstr: "дЕЁ",
|
||||
want: true,
|
||||
}, {
|
||||
name: "longer_false_cyr_fold",
|
||||
inS: "абвгдедеё",
|
||||
inSubstr: "жЗИ",
|
||||
want: false,
|
||||
}, {
|
||||
name: "no_letters_true",
|
||||
inS: "1.2.3.4",
|
||||
inSubstr: "2.3.4",
|
||||
want: true,
|
||||
}, {
|
||||
name: "no_letters_false",
|
||||
inS: "1.2.3.4",
|
||||
inSubstr: "2.3.5",
|
||||
want: false,
|
||||
}}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if tc.want {
|
||||
assert.True(t, containsFold(tc.inS, tc.inSubstr))
|
||||
} else {
|
||||
assert.False(t, containsFold(tc.inS, tc.inSubstr))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
var sink bool
|
||||
|
||||
func BenchmarkContainsFold(b *testing.B) {
|
||||
const s = "aaahBbBhccchDDDeEehFfFhGGGhHhh"
|
||||
const substr = "HHH"
|
||||
|
||||
// Compare our implementation of containsFold against a stupid solution
|
||||
// of calling strings.ToLower and strings.Contains.
|
||||
b.Run("containsfold", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
sink = containsFold(s, substr)
|
||||
}
|
||||
|
||||
assert.True(b, sink)
|
||||
})
|
||||
|
||||
b.Run("tolower_contains", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
sink = strings.Contains(strings.ToLower(s), strings.ToLower(substr))
|
||||
}
|
||||
|
||||
assert.True(b, sink)
|
||||
})
|
||||
}
|
Loading…
Reference in New Issue