7c35d208b1
Updates #1273. Squashed commit of the following: commit 55b78153b1b775c855e759011141bbbe6d4b962c Author: Artem Baskal <a.baskal@adguard.com> Date: Fri Apr 2 16:55:39 2021 +0300 Update client_info in case of null commit 5c80c1438ed9d961af11617831b704d6ae15cc34 Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Fri Apr 2 16:24:14 2021 +0300 querylog: always set client_info commit b48efd64d757cc0bcf5b34de22fdd0b0464d98a6 Merge: 4ed7eab523c9f528
Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Fri Apr 2 16:22:08 2021 +0300 Merge branch 'master' into 1273-querylog-client-name commit 4ed7eab52b6b5b0c0ddb5aa5a3225a62d1f9265b Merge: dbf990eb70d4c70e
Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Fri Apr 2 12:57:17 2021 +0300 Merge branch 'master' into 1273-querylog-client-name commit dbf990eb881116754554270e7b691b5db8e9ee34 Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Fri Apr 2 12:56:13 2021 +0300 home: imp names commit c2cfdef494ca26fff62b9fa008f1b389d9d4d46b Author: Artem Baskal <a.baskal@adguard.com> Date: Thu Apr 1 19:26:04 2021 +0300 Rename to whois commit e3cc4a68ee576770b1922680155308e33bed31e8 Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Thu Apr 1 19:03:42 2021 +0300 home: imp whois more commit 3b8ef8691c298aff35946b35923ef2e5b1f9bbbe Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Thu Apr 1 18:51:14 2021 +0300 home: imp whois resp commit fb97e0d74976723a512d6ff4c69e830fe59c8df8 Author: Artem Baskal <a.baskal@adguard.com> Date: Thu Apr 1 18:00:03 2021 +0300 Fix client_info ids prop types commit 298005189e372651ceff453e88aca19ee925a138 Author: Artem Baskal <a.baskal@adguard.com> Date: Thu Apr 1 17:58:14 2021 +0300 Adapt changes on client commit aa1769f64197d865478a66271da483babfc5dfd0 Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Thu Apr 1 17:18:36 2021 +0300 all: add more fields to querylog client commit 4b2a2dbd380ec410f3068d15ea16430912e03e33 Merge: cda92c3f2e4e2f62
Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Thu Apr 1 16:57:26 2021 +0300 Merge branch 'master' into 1273-querylog-client-name commit cda92c3f0331cbac252f3163d31457f716bc7f2c Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Mon Mar 29 18:03:51 2021 +0300 querylog: fix windows tests commit 5a56f0a32608869ed93a38f18f63ea3a20f7bde2 Merge: 627e4958e710ce11
Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Mon Mar 29 17:45:53 2021 +0300 Merge branch 'master' into 1273-querylog-client-name commit 627e495828e82d44cc77aa393536479f23cc68b7 Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Mon Mar 29 17:44:49 2021 +0300 querylog: add tests, imp code, docs commit 6dec468a2f0c29357875ff99458e0e8f8e580e6d Author: Ainar Garipov <A.Garipov@AdGuard.COM> Date: Fri Mar 26 16:10:47 2021 +0300 querylog: search clients by name, enrich http resp
238 lines
5.8 KiB
Go
238 lines
5.8 KiB
Go
package querylog
|
|
|
|
import (
|
|
"io"
|
|
"sort"
|
|
"time"
|
|
|
|
"github.com/AdguardTeam/golibs/log"
|
|
)
|
|
|
|
// client finds the client info, if any, by its client ID and IP address,
|
|
// optionally checking the provided cache. It will use the IP address
|
|
// regardless of if the IP anonymization is enabled now, because the
|
|
// anonymization could have been disabled in the past, and client will try to
|
|
// find those records as well.
|
|
func (l *queryLog) client(clientID, ip string, cache clientCache) (c *Client, err error) {
|
|
cck := clientCacheKey{clientID: clientID, ip: ip}
|
|
if c = cache[cck]; c != nil {
|
|
return c, nil
|
|
}
|
|
|
|
var ids []string
|
|
if clientID != "" {
|
|
ids = append(ids, clientID)
|
|
}
|
|
|
|
if ip != "" {
|
|
ids = append(ids, ip)
|
|
}
|
|
|
|
c, err = l.findClient(ids)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if cache != nil {
|
|
cache[cck] = c
|
|
}
|
|
|
|
return c, nil
|
|
}
|
|
|
|
// searchMemory looks up log records which are currently in the in-memory
|
|
// buffer. It optionally uses the client cache, if provided. It also returns
|
|
// the total amount of records in the buffer at the moment of searching.
|
|
func (l *queryLog) searchMemory(params *searchParams, cache clientCache) (entries []*logEntry, total int) {
|
|
l.bufferLock.Lock()
|
|
defer l.bufferLock.Unlock()
|
|
|
|
// Go through the buffer in the reverse order, from newer to older.
|
|
var err error
|
|
for i := len(l.buffer) - 1; i >= 0; i-- {
|
|
e := l.buffer[i]
|
|
|
|
e.client, err = l.client(e.ClientID, e.IP.String(), cache)
|
|
if err != nil {
|
|
msg := "querylog: enriching memory record at time %s" +
|
|
" for client %q (client id %q): %s"
|
|
log.Error(msg, e.Time, e.IP, e.ClientID, err)
|
|
|
|
// Go on and try to match anyway.
|
|
}
|
|
|
|
if params.match(e) {
|
|
entries = append(entries, e)
|
|
}
|
|
}
|
|
|
|
return entries, len(l.buffer)
|
|
}
|
|
|
|
// search - searches log entries in the query log using specified parameters
|
|
// returns the list of entries found + time of the oldest entry
|
|
func (l *queryLog) search(params *searchParams) ([]*logEntry, time.Time) {
|
|
now := time.Now()
|
|
|
|
if params.limit == 0 {
|
|
return []*logEntry{}, time.Time{}
|
|
}
|
|
|
|
cache := clientCache{}
|
|
fileEntries, oldest, total := l.searchFiles(params, cache)
|
|
memoryEntries, bufLen := l.searchMemory(params, cache)
|
|
total += bufLen
|
|
|
|
totalLimit := params.offset + params.limit
|
|
|
|
// now let's get a unified collection
|
|
entries := append(memoryEntries, fileEntries...)
|
|
if len(entries) > totalLimit {
|
|
// remove extra records
|
|
entries = entries[:totalLimit]
|
|
}
|
|
|
|
// Resort entries on start time to partially mitigate query log looking
|
|
// weird on the frontend.
|
|
//
|
|
// See https://github.com/AdguardTeam/AdGuardHome/issues/2293.
|
|
sort.SliceStable(entries, func(i, j int) (less bool) {
|
|
return entries[i].Time.After(entries[j].Time)
|
|
})
|
|
|
|
if params.offset > 0 {
|
|
if len(entries) > params.offset {
|
|
entries = entries[params.offset:]
|
|
} else {
|
|
entries = make([]*logEntry, 0)
|
|
oldest = time.Time{}
|
|
}
|
|
}
|
|
|
|
if len(entries) > 0 && len(entries) <= totalLimit {
|
|
// Update oldest after merging in the memory buffer.
|
|
oldest = entries[len(entries)-1].Time
|
|
}
|
|
|
|
log.Debug("QueryLog: prepared data (%d/%d) older than %s in %s",
|
|
len(entries), total, params.olderThan, time.Since(now))
|
|
|
|
return entries, oldest
|
|
}
|
|
|
|
// searchFiles looks up log records from all log files. It optionally uses the
|
|
// client cache, if provided. searchFiles does not scan more than
|
|
// maxFileScanEntries so callers may need to call it several times to get all
|
|
// results. oldset and total are the time of the oldest processed entry and the
|
|
// total number of processed entries, including discarded ones, correspondingly.
|
|
func (l *queryLog) searchFiles(
|
|
params *searchParams,
|
|
cache clientCache,
|
|
) (entries []*logEntry, oldest time.Time, total int) {
|
|
files := []string{
|
|
l.logFile + ".1",
|
|
l.logFile,
|
|
}
|
|
|
|
r, err := NewQLogReader(files)
|
|
if err != nil {
|
|
log.Error("querylog: failed to open qlog reader: %s", err)
|
|
|
|
return entries, oldest, 0
|
|
}
|
|
defer r.Close()
|
|
|
|
if params.olderThan.IsZero() {
|
|
err = r.SeekStart()
|
|
} else {
|
|
err = r.SeekTS(params.olderThan.UnixNano())
|
|
if err == nil {
|
|
// Read to the next record, because we only need the one
|
|
// that goes after it.
|
|
_, err = r.ReadNext()
|
|
}
|
|
}
|
|
|
|
if err != nil {
|
|
log.Debug("querylog: cannot seek to %s: %s", params.olderThan, err)
|
|
|
|
return entries, oldest, 0
|
|
}
|
|
|
|
totalLimit := params.offset + params.limit
|
|
oldestNano := int64(0)
|
|
|
|
// By default, we do not scan more than maxFileScanEntries at once.
|
|
// The idea is to make search calls faster so that the UI could handle
|
|
// it and show something quicker. This behavior can be overridden if
|
|
// maxFileScanEntries is set to 0.
|
|
for total < params.maxFileScanEntries || params.maxFileScanEntries <= 0 {
|
|
var e *logEntry
|
|
var ts int64
|
|
e, ts, err = l.readNextEntry(r, params, cache)
|
|
if err != nil {
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
|
|
log.Error("querylog: reading next entry: %s", err)
|
|
}
|
|
|
|
oldestNano = ts
|
|
total++
|
|
|
|
if e != nil {
|
|
entries = append(entries, e)
|
|
if len(entries) == totalLimit {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
if oldestNano != 0 {
|
|
oldest = time.Unix(0, oldestNano)
|
|
}
|
|
|
|
return entries, oldest, total
|
|
}
|
|
|
|
// readNextEntry reads the next log entry and checks if it matches the search
|
|
// criteria. It optionally uses the client cache, if provided. e is nil if the
|
|
// entry doesn't match the search criteria. ts is the timestamp of the
|
|
// processed entry.
|
|
func (l *queryLog) readNextEntry(
|
|
r *QLogReader,
|
|
params *searchParams,
|
|
cache clientCache,
|
|
) (e *logEntry, ts int64, err error) {
|
|
var line string
|
|
line, err = r.ReadNext()
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
|
|
e = &logEntry{}
|
|
decodeLogEntry(e, line)
|
|
|
|
e.client, err = l.client(e.ClientID, e.IP.String(), cache)
|
|
if err != nil {
|
|
log.Error(
|
|
"querylog: enriching file record at time %s"+
|
|
" for client %q (client id %q): %s",
|
|
e.Time,
|
|
e.IP,
|
|
e.ClientID,
|
|
err,
|
|
)
|
|
|
|
// Go on and try to match anyway.
|
|
}
|
|
|
|
ts = e.Time.UnixNano()
|
|
if !params.match(e) {
|
|
return nil, ts, nil
|
|
}
|
|
|
|
return e, ts, nil
|
|
}
|