Pull request: querylog: more opt

Updates 1273.

Squashed commit of the following:

commit 167c0b5acaab8a2676de2cea556c861dc0efbc72
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date:   Mon Apr 12 18:12:43 2021 +0300

    querylog: imp naming

commit 5010ad113e46335011a721cbcc9fc9b1fc623722
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date:   Mon Apr 12 17:53:41 2021 +0300

    querylog: more opt
This commit is contained in:
Ainar Garipov 2021-04-12 18:22:11 +03:00
parent 279350e4a3
commit 7c6557b05e
9 changed files with 209 additions and 80 deletions

View File

@ -243,6 +243,33 @@ on GitHub and most other Markdown renderers. -->
* Use named returns to improve readability of function signatures. * Use named returns to improve readability of function signatures.
* When naming a file which defines an enitity, use singular nouns, unless the
entity is some form of a container for other entities:
```go
// File: client.go
package foo
type Client struct {
// …
}
```
```go
// File: clients.go
package foo
type Clients []*Client
// …
type ClientsWithCache struct {
// …
}
```
### <a id="testing" href="#testing">Testing</a> ### <a id="testing" href="#testing">Testing</a>
* Use `assert.NoError` and `require.NoError` instead of `assert.Nil` and * Use `assert.NoError` and `require.NoError` instead of `assert.Nil` and

View File

@ -575,18 +575,3 @@ func decodeLogEntry(ent *logEntry, str string) {
} }
} }
} }
// Get value from "key":"value"
func readJSONValue(s, name string) string {
i := strings.Index(s, "\""+name+"\":\"")
if i == -1 {
return ""
}
start := i + 1 + len(name) + 3
i = strings.IndexByte(s[start:], '"')
if i == -1 {
return ""
}
end := start + i
return s[start:end]
}

View File

@ -126,15 +126,15 @@ func getDoubleQuotesEnclosedValue(s *string) bool {
return false return false
} }
// parseSearchCriteria - parses "searchCriteria" from the specified query parameter // parseSearchCriterion parses a search criterion from the query parameter.
func (l *queryLog) parseSearchCriteria(q url.Values, name string, ct criteriaType) (bool, searchCriteria, error) { func (l *queryLog) parseSearchCriterion(q url.Values, name string, ct criterionType) (bool, searchCriterion, error) {
val := q.Get(name) val := q.Get(name)
if len(val) == 0 { if len(val) == 0 {
return false, searchCriteria{}, nil return false, searchCriterion{}, nil
} }
c := searchCriteria{ c := searchCriterion{
criteriaType: ct, criterionType: ct,
value: val, value: val,
} }
if getDoubleQuotesEnclosedValue(&c.value) { if getDoubleQuotesEnclosedValue(&c.value) {
@ -175,15 +175,15 @@ func (l *queryLog) parseSearchParams(r *http.Request) (p *searchParams, err erro
p.maxFileScanEntries = 0 p.maxFileScanEntries = 0
} }
paramNames := map[string]criteriaType{ paramNames := map[string]criterionType{
"search": ctDomainOrClient, "search": ctDomainOrClient,
"response_status": ctFilteringStatus, "response_status": ctFilteringStatus,
} }
for k, v := range paramNames { for k, v := range paramNames {
var ok bool var ok bool
var c searchCriteria var c searchCriterion
ok, c, err = l.parseSearchCriteria(q, k, v) ok, c, err = l.parseSearchCriterion(q, k, v)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -53,11 +53,11 @@ func TestQueryLog(t *testing.T) {
testCases := []struct { testCases := []struct {
name string name string
sCr []searchCriteria sCr []searchCriterion
want []tcAssertion want []tcAssertion
}{{ }{{
name: "all", name: "all",
sCr: []searchCriteria{}, sCr: []searchCriterion{},
want: []tcAssertion{ want: []tcAssertion{
{num: 0, host: "example.com", answer: net.IPv4(1, 1, 1, 4), client: net.IPv4(2, 2, 2, 4)}, {num: 0, host: "example.com", answer: net.IPv4(1, 1, 1, 4), client: net.IPv4(2, 2, 2, 4)},
{num: 1, host: "test.example.org", answer: net.IPv4(1, 1, 1, 3), client: net.IPv4(2, 2, 2, 3)}, {num: 1, host: "test.example.org", answer: net.IPv4(1, 1, 1, 3), client: net.IPv4(2, 2, 2, 3)},
@ -66,8 +66,8 @@ func TestQueryLog(t *testing.T) {
}, },
}, { }, {
name: "by_domain_strict", name: "by_domain_strict",
sCr: []searchCriteria{{ sCr: []searchCriterion{{
criteriaType: ctDomainOrClient, criterionType: ctDomainOrClient,
strict: true, strict: true,
value: "TEST.example.org", value: "TEST.example.org",
}}, }},
@ -76,8 +76,8 @@ func TestQueryLog(t *testing.T) {
}}, }},
}, { }, {
name: "by_domain_non-strict", name: "by_domain_non-strict",
sCr: []searchCriteria{{ sCr: []searchCriterion{{
criteriaType: ctDomainOrClient, criterionType: ctDomainOrClient,
strict: false, strict: false,
value: "example.ORG", value: "example.ORG",
}}, }},
@ -88,8 +88,8 @@ func TestQueryLog(t *testing.T) {
}, },
}, { }, {
name: "by_client_ip_strict", name: "by_client_ip_strict",
sCr: []searchCriteria{{ sCr: []searchCriterion{{
criteriaType: ctDomainOrClient, criterionType: ctDomainOrClient,
strict: true, strict: true,
value: "2.2.2.2", value: "2.2.2.2",
}}, }},
@ -98,8 +98,8 @@ func TestQueryLog(t *testing.T) {
}}, }},
}, { }, {
name: "by_client_ip_non-strict", name: "by_client_ip_non-strict",
sCr: []searchCriteria{{ sCr: []searchCriterion{{
criteriaType: ctDomainOrClient, criterionType: ctDomainOrClient,
strict: false, strict: false,
value: "2.2.2", value: "2.2.2",
}}, }},

View File

@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"io" "io"
"os" "os"
"strings"
"sync" "sync"
"time" "time"
@ -321,11 +322,29 @@ func (q *QLogFile) readProbeLine(position int64) (string, int64, int64, error) {
return string(buffer[startLine:endLine]), lineIdx, lineEndIdx, nil return string(buffer[startLine:endLine]), lineIdx, lineEndIdx, nil
} }
// readJSONvalue reads a JSON string in form of '"key":"value"'. prefix must be
// of the form '"key":"' to generate less garbage.
func readJSONValue(s, prefix string) string {
i := strings.Index(s, prefix)
if i == -1 {
return ""
}
start := i + len(prefix)
i = strings.IndexByte(s[start:], '"')
if i == -1 {
return ""
}
end := start + i
return s[start:end]
}
// readQLogTimestamp reads the timestamp field from the query log line // readQLogTimestamp reads the timestamp field from the query log line
func readQLogTimestamp(str string) int64 { func readQLogTimestamp(str string) int64 {
val := readJSONValue(str, "T") val := readJSONValue(str, `"T":"`)
if len(val) == 0 { if len(val) == 0 {
val = readJSONValue(str, "Time") val = readJSONValue(str, `"Time":"`)
} }
if len(val) == 0 { if len(val) == 0 {

View File

@ -118,7 +118,7 @@ func (l *queryLog) readFileFirstTimeValue() int64 {
} }
buf = buf[:r] buf = buf[:r]
val := readJSONValue(string(buf), "T") val := readJSONValue(string(buf), `"T":"`)
t, err := time.Parse(time.RFC3339Nano, val) t, err := time.Parse(time.RFC3339Nano, val)
if err != nil { if err != nil {
return -1 return -1

View File

@ -171,6 +171,7 @@ func (l *queryLog) searchFiles(
for total < params.maxFileScanEntries || params.maxFileScanEntries <= 0 { for total < params.maxFileScanEntries || params.maxFileScanEntries <= 0 {
var e *logEntry var e *logEntry
var ts int64 var ts int64
e, ts, err = l.readNextEntry(r, params, cache) e, ts, err = l.readNextEntry(r, params, cache)
if err != nil { if err != nil {
if err == io.EOF { if err == io.EOF {
@ -198,6 +199,29 @@ func (l *queryLog) searchFiles(
return entries, oldest, total return entries, oldest, total
} }
// quickMatchClientFinder is a wrapper around the usual client finding function
// to make it easier to use with quick matches.
type quickMatchClientFinder struct {
client func(clientID, ip string, cache clientCache) (c *Client, err error)
cache clientCache
}
// findClient is a method that can be used as a quickMatchClientFinder.
func (f quickMatchClientFinder) findClient(clientID, ip string) (c *Client) {
var err error
c, err = f.client(clientID, ip, f.cache)
if err != nil {
log.Error("querylog: enriching file record for quick search:"+
" for client %q (client id %q): %s",
ip,
clientID,
err,
)
}
return c
}
// readNextEntry reads the next log entry and checks if it matches the search // readNextEntry reads the next log entry and checks if it matches the search
// criteria. It optionally uses the client cache, if provided. e is nil if the // criteria. It optionally uses the client cache, if provided. e is nil if the
// entry doesn't match the search criteria. ts is the timestamp of the // entry doesn't match the search criteria. ts is the timestamp of the
@ -213,6 +237,17 @@ func (l *queryLog) readNextEntry(
return nil, 0, err return nil, 0, err
} }
clientFinder := quickMatchClientFinder{
client: l.client,
cache: cache,
}
if !params.quickMatch(line, clientFinder.findClient) {
ts = readQLogTimestamp(line)
return nil, ts, nil
}
e = &logEntry{} e = &logEntry{}
decodeLogEntry(e, line) decodeLogEntry(e, line)

View File

@ -6,15 +6,15 @@ import (
"github.com/AdguardTeam/AdGuardHome/internal/dnsfilter" "github.com/AdguardTeam/AdGuardHome/internal/dnsfilter"
) )
type criteriaType int type criterionType int
const ( const (
// ctDomainOrClient is for searching by the domain name, the client's IP // ctDomainOrClient is for searching by the domain name, the client's IP
// address, or the clinet's ID. // address, or the clinet's ID.
ctDomainOrClient criteriaType = iota ctDomainOrClient criterionType = iota
// ctFilteringStatus is for searching by the filtering status. // ctFilteringStatus is for searching by the filtering status.
// //
// See (*searchCriteria).ctFilteringStatusCase for details. // See (*searchCriterion).ctFilteringStatusCase for details.
ctFilteringStatus ctFilteringStatus
) )
@ -40,17 +40,83 @@ var filteringStatusValues = []string{
filteringStatusProcessed, filteringStatusProcessed,
} }
// searchCriteria - every search request may contain a list of different search criteria // searchCriterion is a search criterion that is used to match a record.
// we use each of them to match the query type searchCriterion struct {
type searchCriteria struct { value string
value string // search criteria value criterionType criterionType
criteriaType criteriaType // type of the criteria // strict, if true, means that the criterion must be applied to the
strict bool // should we strictly match (equality) or not (indexOf) // whole value rather than the part of it. That is, equality and not
// containment.
strict bool
} }
// match - checks if the log entry matches this search criteria func (c *searchCriterion) ctDomainOrClientCaseStrict(
func (c *searchCriteria) match(entry *logEntry) bool { term string,
switch c.criteriaType { clientID string,
name string,
host string,
ip string,
) (ok bool) {
return strings.EqualFold(host, term) ||
strings.EqualFold(clientID, term) ||
strings.EqualFold(ip, term) ||
strings.EqualFold(name, term)
}
func (c *searchCriterion) ctDomainOrClientCaseNonStrict(
term string,
clientID string,
name string,
host string,
ip string,
) (ok bool) {
// TODO(a.garipov): Write a performant, case-insensitive version of
// strings.Contains instead of generating garbage. Or, perhaps in the
// future, use a locale-appropriate matcher from golang.org/x/text.
clientID = strings.ToLower(clientID)
host = strings.ToLower(host)
ip = strings.ToLower(ip)
name = strings.ToLower(name)
term = strings.ToLower(term)
return strings.Contains(clientID, term) ||
strings.Contains(host, term) ||
strings.Contains(ip, term) ||
strings.Contains(name, term)
}
// quickMatch quickly checks if the line matches the given search criterion.
// It returns false if the like doesn't match. This method is only here for
// optimisation purposes.
func (c *searchCriterion) quickMatch(line string, findClient quickMatchClientFunc) (ok bool) {
switch c.criterionType {
case ctDomainOrClient:
host := readJSONValue(line, `"QH":"`)
ip := readJSONValue(line, `"IP":"`)
clientID := readJSONValue(line, `"CID":"`)
var name string
if cli := findClient(clientID, ip); cli != nil {
name = cli.Name
}
if c.strict {
return c.ctDomainOrClientCaseStrict(c.value, clientID, name, host, ip)
}
return c.ctDomainOrClientCaseNonStrict(c.value, clientID, name, host, ip)
case ctFilteringStatus:
// Go on, as we currently don't do quick matches against
// filtering statuses.
return true
default:
return true
}
}
// match checks if the log entry matches this search criterion.
func (c *searchCriterion) match(entry *logEntry) bool {
switch c.criterionType {
case ctDomainOrClient: case ctDomainOrClient:
return c.ctDomainOrClientCase(entry) return c.ctDomainOrClientCase(entry)
case ctFilteringStatus: case ctFilteringStatus:
@ -60,14 +126,7 @@ func (c *searchCriteria) match(entry *logEntry) bool {
return false return false
} }
func (c *searchCriteria) ctDomainOrClientCaseStrict(term, clientID, name, host, ip string) bool { func (c *searchCriterion) ctDomainOrClientCase(e *logEntry) bool {
return strings.EqualFold(host, term) ||
strings.EqualFold(clientID, term) ||
strings.EqualFold(ip, term) ||
strings.EqualFold(name, term)
}
func (c *searchCriteria) ctDomainOrClientCase(e *logEntry) bool {
clientID := e.ClientID clientID := e.ClientID
host := e.QHost host := e.QHost
@ -82,22 +141,10 @@ func (c *searchCriteria) ctDomainOrClientCase(e *logEntry) bool {
return c.ctDomainOrClientCaseStrict(term, clientID, name, host, ip) return c.ctDomainOrClientCaseStrict(term, clientID, name, host, ip)
} }
// TODO(a.garipov): Write a case-insensitive version of strings.Contains return c.ctDomainOrClientCaseNonStrict(term, clientID, name, host, ip)
// instead of generating garbage. Or, perhaps in the future, use
// a locale-appropriate matcher from golang.org/x/text.
clientID = strings.ToLower(clientID)
host = strings.ToLower(host)
ip = strings.ToLower(ip)
name = strings.ToLower(name)
term = strings.ToLower(term)
return strings.Contains(clientID, term) ||
strings.Contains(host, term) ||
strings.Contains(ip, term) ||
strings.Contains(name, term)
} }
func (c *searchCriteria) ctFilteringStatusCase(res dnsfilter.Result) bool { func (c *searchCriterion) ctFilteringStatusCase(res dnsfilter.Result) bool {
switch c.value { switch c.value {
case filteringStatusAll: case filteringStatusAll:
return true return true

View File

@ -5,7 +5,7 @@ import "time"
// searchParams represent the search query sent by the client // searchParams represent the search query sent by the client
type searchParams struct { type searchParams struct {
// searchCriteria - list of search criteria that we use to get filter results // searchCriteria - list of search criteria that we use to get filter results
searchCriteria []searchCriteria searchCriteria []searchCriterion
// olderThen - return entries that are older than this value // olderThen - return entries that are older than this value
// if not set - disregard it and return any value // if not set - disregard it and return any value
@ -27,6 +27,22 @@ func newSearchParams() *searchParams {
} }
} }
// quickMatchClientFunc is a simplified client finder for quick matches.
type quickMatchClientFunc = func(clientID, ip string) (c *Client)
// quickMatch quickly checks if the line matches the given search parameters.
// It returns false if the line doesn't match. This method is only here for
// optimisation purposes.
func (s *searchParams) quickMatch(line string, findClient quickMatchClientFunc) (ok bool) {
for _, c := range s.searchCriteria {
if !c.quickMatch(line, findClient) {
return false
}
}
return true
}
// match - checks if the logEntry matches the searchParams // match - checks if the logEntry matches the searchParams
func (s *searchParams) match(entry *logEntry) bool { func (s *searchParams) match(entry *logEntry) bool {
if !s.olderThan.IsZero() && entry.Time.UnixNano() >= s.olderThan.UnixNano() { if !s.olderThan.IsZero() && entry.Time.UnixNano() >= s.olderThan.UnixNano() {