Pull request: querylog: imp perf
Merge in DNS/adguard-home from contains-fold to master Squashed commit of the following: commit 45c79b4b7618c8f3108766cc776b5bd3f0571761 Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Wed May 19 21:26:09 2021 +0300 querylog: imp perf
This commit is contained in:
parent
6f7fd33afd
commit
21972e49cb
|
@ -2,6 +2,8 @@ package querylog
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"strings"
|
"strings"
|
||||||
|
"unicode"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/AdguardTeam/AdGuardHome/internal/dnsfilter"
|
"github.com/AdguardTeam/AdGuardHome/internal/dnsfilter"
|
||||||
)
|
)
|
||||||
|
@ -63,6 +65,37 @@ func (c *searchCriterion) ctDomainOrClientCaseStrict(
|
||||||
strings.EqualFold(name, term)
|
strings.EqualFold(name, term)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// containsFold reports whehter s contains, ignoring letter case, substr.
|
||||||
|
//
|
||||||
|
// TODO(a.garipov): Move to aghstrings if needed elsewhere.
|
||||||
|
func containsFold(s, substr string) (ok bool) {
|
||||||
|
sLen, substrLen := len(s), len(substr)
|
||||||
|
if sLen < substrLen {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if sLen == substrLen {
|
||||||
|
return strings.EqualFold(s, substr)
|
||||||
|
}
|
||||||
|
|
||||||
|
first, _ := utf8.DecodeRuneInString(substr)
|
||||||
|
firstFolded := unicode.SimpleFold(first)
|
||||||
|
|
||||||
|
for i := 0; i != -1 && len(s) >= len(substr); {
|
||||||
|
if strings.EqualFold(s[:substrLen], substr) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
i = strings.IndexFunc(s[1:], func(r rune) (eq bool) {
|
||||||
|
return r == first || r == firstFolded
|
||||||
|
})
|
||||||
|
|
||||||
|
s = s[1+i:]
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func (c *searchCriterion) ctDomainOrClientCaseNonStrict(
|
func (c *searchCriterion) ctDomainOrClientCaseNonStrict(
|
||||||
term string,
|
term string,
|
||||||
clientID string,
|
clientID string,
|
||||||
|
@ -70,19 +103,10 @@ func (c *searchCriterion) ctDomainOrClientCaseNonStrict(
|
||||||
host string,
|
host string,
|
||||||
ip string,
|
ip string,
|
||||||
) (ok bool) {
|
) (ok bool) {
|
||||||
// TODO(a.garipov): Write a performant, case-insensitive version of
|
return containsFold(clientID, term) ||
|
||||||
// strings.Contains instead of generating garbage. Or, perhaps in the
|
containsFold(host, term) ||
|
||||||
// future, use a locale-appropriate matcher from golang.org/x/text.
|
containsFold(ip, term) ||
|
||||||
clientID = strings.ToLower(clientID)
|
containsFold(name, term)
|
||||||
host = strings.ToLower(host)
|
|
||||||
ip = strings.ToLower(ip)
|
|
||||||
name = strings.ToLower(name)
|
|
||||||
term = strings.ToLower(term)
|
|
||||||
|
|
||||||
return strings.Contains(clientID, term) ||
|
|
||||||
strings.Contains(host, term) ||
|
|
||||||
strings.Contains(ip, term) ||
|
|
||||||
strings.Contains(name, term)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// quickMatch quickly checks if the line matches the given search criterion.
|
// quickMatch quickly checks if the line matches the given search criterion.
|
||||||
|
|
|
@ -0,0 +1,121 @@
|
||||||
|
package querylog
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestContainsFold(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
inS string
|
||||||
|
inSubstr string
|
||||||
|
want bool
|
||||||
|
}{{
|
||||||
|
name: "empty",
|
||||||
|
inS: "",
|
||||||
|
inSubstr: "",
|
||||||
|
want: true,
|
||||||
|
}, {
|
||||||
|
name: "shorter",
|
||||||
|
inS: "a",
|
||||||
|
inSubstr: "abc",
|
||||||
|
want: false,
|
||||||
|
}, {
|
||||||
|
name: "same_len_true",
|
||||||
|
inS: "abc",
|
||||||
|
inSubstr: "abc",
|
||||||
|
want: true,
|
||||||
|
}, {
|
||||||
|
name: "same_len_true_fold",
|
||||||
|
inS: "abc",
|
||||||
|
inSubstr: "aBc",
|
||||||
|
want: true,
|
||||||
|
}, {
|
||||||
|
name: "same_len_false",
|
||||||
|
inS: "abc",
|
||||||
|
inSubstr: "def",
|
||||||
|
want: false,
|
||||||
|
}, {
|
||||||
|
name: "longer_true",
|
||||||
|
inS: "abcdedef",
|
||||||
|
inSubstr: "def",
|
||||||
|
want: true,
|
||||||
|
}, {
|
||||||
|
name: "longer_false",
|
||||||
|
inS: "abcded",
|
||||||
|
inSubstr: "ghi",
|
||||||
|
want: false,
|
||||||
|
}, {
|
||||||
|
name: "longer_true_fold",
|
||||||
|
inS: "abcdedef",
|
||||||
|
inSubstr: "dEf",
|
||||||
|
want: true,
|
||||||
|
}, {
|
||||||
|
name: "longer_false_fold",
|
||||||
|
inS: "abcded",
|
||||||
|
inSubstr: "gHi",
|
||||||
|
want: false,
|
||||||
|
}, {
|
||||||
|
name: "longer_true_cyr_fold",
|
||||||
|
inS: "абвгдедеё",
|
||||||
|
inSubstr: "дЕЁ",
|
||||||
|
want: true,
|
||||||
|
}, {
|
||||||
|
name: "longer_false_cyr_fold",
|
||||||
|
inS: "абвгдедеё",
|
||||||
|
inSubstr: "жЗИ",
|
||||||
|
want: false,
|
||||||
|
}, {
|
||||||
|
name: "no_letters_true",
|
||||||
|
inS: "1.2.3.4",
|
||||||
|
inSubstr: "2.3.4",
|
||||||
|
want: true,
|
||||||
|
}, {
|
||||||
|
name: "no_letters_false",
|
||||||
|
inS: "1.2.3.4",
|
||||||
|
inSubstr: "2.3.5",
|
||||||
|
want: false,
|
||||||
|
}}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
if tc.want {
|
||||||
|
assert.True(t, containsFold(tc.inS, tc.inSubstr))
|
||||||
|
} else {
|
||||||
|
assert.False(t, containsFold(tc.inS, tc.inSubstr))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var sink bool
|
||||||
|
|
||||||
|
func BenchmarkContainsFold(b *testing.B) {
|
||||||
|
const s = "aaahBbBhccchDDDeEehFfFhGGGhHhh"
|
||||||
|
const substr = "HHH"
|
||||||
|
|
||||||
|
// Compare our implementation of containsFold against a stupid solution
|
||||||
|
// of calling strings.ToLower and strings.Contains.
|
||||||
|
b.Run("containsfold", func(b *testing.B) {
|
||||||
|
b.ReportAllocs()
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
sink = containsFold(s, substr)
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.True(b, sink)
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("tolower_contains", func(b *testing.B) {
|
||||||
|
b.ReportAllocs()
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
sink = strings.Contains(strings.ToLower(s), strings.ToLower(substr))
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.True(b, sink)
|
||||||
|
})
|
||||||
|
}
|
Loading…
Reference in New Issue