* querylog: POST /control/querylog

This commit is contained in:
Simon Zolin 2019-09-16 17:07:18 +03:00
parent 215a488a64
commit 81828c87c1
6 changed files with 579 additions and 113 deletions

View File

@ -43,6 +43,7 @@ Contents:
* API: Set statistics parameters
* API: Get statistics parameters
* Query logs
* API: Get query log
* API: Set querylog parameters
* API: Get querylog parameters
* Filtering
@ -1007,6 +1008,92 @@ Response:
## Query logs
When a new DNS request is received and processed, we store information about this event in "query log". It is a file on disk in JSON format:
{
"Question":"...","
Answer":"...",
"Result":{
"IsFiltered":true,
"Reason":3,
"Rule":"...",
"FilterID":1
},
"Time":"...",
"Elapsed":12345,
"IP":"127.0.0.1"
}
### Adding new data
First, new data is stored in a memory region. When this array is filled to a particular amount of entries (e.g. 5000), we flush this data to a file and clear the array.
### Getting data
When UI asks for data from query log (see "API: Get query log"), server reads the newest entries from memory array and the file. The maximum number of items returned per one request is limited by configuration.
### Removing old data
We store data for a limited amount of time - the log file is automatically rotated.
### API: Get query log
Request:
POST /control/querylog
{
older_than: "2006-01-02T15:04:05.999999999Z07:00" // must be "" for the first request
filter:{
domain: "..."
client: "..."
question_type: "A" | "AAAA"
response_status: "" | "filtered"
}
}
If `older_than` value is set, server returns the next chunk of entries that are older than this time stamp. This setting is used for paging. UI sets this value to `""` on the first request and gets the latest log entries. To get the older entries, UI sets this value to the timestamp of the last (the oldest) entry from the previous response from Server.
If "filter" settings are set, server returns only entries that match the specified request.
For `filter.domain` and `filter.client` the server matches substrings by default: `adguard.com` matches `www.adguard.com`. Strict matching can be enabled by enclosing the value in double quotes: `"adguard.com"` matches `adguard.com` but doesn't match `www.adguard.com`.
Response:
[
{
"answer":[
{
"ttl":10,
"type":"AAAA",
"value":"::"
}
...
],
"client":"127.0.0.1",
"elapsedMs":"0.098403",
"filterId":1,
"question":{
"class":"IN",
"host":"doubleclick.net",
"type":"AAAA"
},
"reason":"FilteredBlackList",
"rule":"||doubleclick.net^",
"status":"NOERROR",
"time":"2006-01-02T15:04:05.999999999Z07:00"
}
...
]
The most recent entries are at the top of list.
### API: Set querylog parameters
Request:

View File

@ -3,12 +3,81 @@ package home
import (
"encoding/json"
"net/http"
"time"
"github.com/AdguardTeam/AdGuardHome/querylog"
"github.com/miekg/dns"
)
type qlogFilterJSON struct {
Domain string `json:"domain"`
Client string `json:"client"`
QuestionType string `json:"question_type"`
ResponseStatus string `json:"response_status"`
}
type queryLogRequest struct {
OlderThan string `json:"older_than"`
Filter qlogFilterJSON `json:"filter"`
}
// "value" -> value, return TRUE
func getDoubleQuotesEnclosedValue(s *string) bool {
t := *s
if len(t) >= 2 && t[0] == '"' && t[len(t)-1] == '"' {
*s = t[1 : len(t)-1]
return true
}
return false
}
func handleQueryLog(w http.ResponseWriter, r *http.Request) {
data := config.queryLog.GetData()
req := queryLogRequest{}
err := json.NewDecoder(r.Body).Decode(&req)
if err != nil {
httpError(w, http.StatusBadRequest, "json decode: %s", err)
return
}
params := querylog.GetDataParams{
Domain: req.Filter.Domain,
Client: req.Filter.Client,
}
if len(req.OlderThan) != 0 {
params.OlderThan, err = time.Parse(time.RFC3339Nano, req.OlderThan)
if err != nil {
httpError(w, http.StatusBadRequest, "invalid time stamp: %s", err)
return
}
}
if getDoubleQuotesEnclosedValue(&params.Domain) {
params.StrictMatchDomain = true
}
if getDoubleQuotesEnclosedValue(&params.Client) {
params.StrictMatchClient = true
}
if len(req.Filter.QuestionType) != 0 {
qtype, ok := dns.StringToType[req.Filter.QuestionType]
if !ok {
httpError(w, http.StatusBadRequest, "invalid question_type")
return
}
params.QuestionType = qtype
}
if len(req.Filter.ResponseStatus) != 0 {
switch req.Filter.ResponseStatus {
case "filtered":
params.ResponseStatus = querylog.ResponseStatusFiltered
default:
httpError(w, http.StatusBadRequest, "invalid response_status")
return
}
}
data := config.queryLog.GetData(params)
jsonVal, err := json.Marshal(data)
if err != nil {
@ -84,7 +153,7 @@ func checkQueryLogInterval(i uint32) bool {
// RegisterQueryLogHandlers - register handlers
func RegisterQueryLogHandlers() {
httpRegister(http.MethodGet, "/control/querylog", handleQueryLog)
httpRegister("POST", "/control/querylog", handleQueryLog)
httpRegister(http.MethodGet, "/control/querylog_info", handleQueryLogInfo)
httpRegister(http.MethodPost, "/control/querylog_clear", handleQueryLogClear)
httpRegister(http.MethodPost, "/control/querylog_config", handleQueryLogConfig)

View File

@ -18,7 +18,10 @@ import (
const (
logBufferCap = 5000 // maximum capacity of logBuffer before it's flushed to disk
queryLogFileName = "querylog.json" // .gz added during compression
queryLogSize = 5000 // maximum API response for /querylog
getDataLimit = 500 // GetData(): maximum log entries to return
// maximum data chunks to parse when filtering entries
maxFilteringChunks = 10
)
// queryLog is a structure that writes and reads the DNS query log
@ -30,9 +33,6 @@ type queryLog struct {
logBuffer []*logEntry
fileFlushLock sync.Mutex // synchronize a file-flushing goroutine and main thread
flushPending bool // don't start another goroutine while the previous one is still running
cache []*logEntry
lock sync.RWMutex
}
// create a new instance of the query log
@ -41,7 +41,6 @@ func newQueryLog(conf Config) *queryLog {
l.logFile = filepath.Join(conf.BaseDir, queryLogFileName)
l.conf = conf
go l.periodicQueryLogRotate()
go l.fillFromFile()
return &l
}
@ -62,10 +61,6 @@ func (l *queryLog) Clear() {
l.flushPending = false
l.logBufferLock.Unlock()
l.lock.Lock()
l.cache = nil
l.lock.Unlock()
err := os.Remove(l.logFile + ".1")
if err != nil {
log.Error("file remove: %s: %s", l.logFile+".1", err)
@ -147,13 +142,6 @@ func (l *queryLog) Add(question *dns.Msg, answer *dns.Msg, result *dnsfilter.Res
}
}
l.logBufferLock.Unlock()
l.lock.Lock()
l.cache = append(l.cache, &entry)
if len(l.cache) > queryLogSize {
toremove := len(l.cache) - queryLogSize
l.cache = l.cache[toremove:]
}
l.lock.Unlock()
// if buffer needs to be flushed to disk, do it now
if needFlush {
@ -163,20 +151,143 @@ func (l *queryLog) Add(question *dns.Msg, answer *dns.Msg, result *dnsfilter.Res
}
}
func (l *queryLog) GetData() []map[string]interface{} {
l.lock.RLock()
values := make([]*logEntry, len(l.cache))
copy(values, l.cache)
l.lock.RUnlock()
// reverse it so that newest is first
for left, right := 0, len(values)-1; left < right; left, right = left+1, right-1 {
values[left], values[right] = values[right], values[left]
// Return TRUE if this entry is needed
func isNeeded(entry *logEntry, params GetDataParams) bool {
if params.ResponseStatus != 0 {
if params.ResponseStatus == ResponseStatusFiltered && !entry.Result.IsFiltered {
return false
}
}
// iterate
if len(params.Domain) != 0 || params.QuestionType != 0 {
m := dns.Msg{}
_ = m.Unpack(entry.Question)
if params.QuestionType != 0 {
if m.Question[0].Qtype != params.QuestionType {
return false
}
}
if len(params.Domain) != 0 && params.StrictMatchDomain {
if m.Question[0].Name != params.Domain {
return false
}
} else if len(params.Domain) != 0 {
if strings.Index(m.Question[0].Name, params.Domain) == -1 {
return false
}
}
}
if len(params.Client) != 0 && params.StrictMatchClient {
if entry.IP != params.Client {
return false
}
} else if len(params.Client) != 0 {
if strings.Index(entry.IP, params.Client) == -1 {
return false
}
}
return true
}
func (l *queryLog) readFromFile(params GetDataParams) ([]*logEntry, int) {
entries := []*logEntry{}
olderThan := params.OlderThan
totalChunks := 0
total := 0
r := l.OpenReader()
if r == nil {
return entries, 0
}
r.BeginRead(olderThan, getDataLimit)
for totalChunks < maxFilteringChunks {
first := true
newEntries := []*logEntry{}
for {
entry := r.Next()
if entry == nil {
break
}
total++
if first {
first = false
olderThan = entry.Time
}
if !isNeeded(entry, params) {
continue
}
if len(newEntries) == getDataLimit {
newEntries = newEntries[1:]
}
newEntries = append(newEntries, entry)
}
log.Debug("entries: +%d (%d) older-than:%s", len(newEntries), len(entries), olderThan)
entries = append(newEntries, entries...)
if len(entries) > getDataLimit {
toremove := len(entries) - getDataLimit
entries = entries[toremove:]
break
}
if first || len(entries) == getDataLimit {
break
}
totalChunks++
r.BeginReadPrev(olderThan, getDataLimit)
}
r.Close()
return entries, total
}
func (l *queryLog) GetData(params GetDataParams) []map[string]interface{} {
var data = []map[string]interface{}{}
for _, entry := range values {
if len(params.Domain) != 0 && params.StrictMatchDomain {
params.Domain = params.Domain + "."
}
now := time.Now()
entries := []*logEntry{}
total := 0
// add from file
entries, total = l.readFromFile(params)
if params.OlderThan.IsZero() {
params.OlderThan = now
}
// add from memory buffer
l.logBufferLock.Lock()
total += len(l.logBuffer)
for _, entry := range l.logBuffer {
if !isNeeded(entry, params) {
continue
}
if entry.Time.UnixNano() >= params.OlderThan.UnixNano() {
break
}
if len(entries) == getDataLimit {
entries = entries[1:]
}
entries = append(entries, entry)
}
l.logBufferLock.Unlock()
// process the elements from latest to oldest
for i := len(entries) - 1; i >= 0; i-- {
entry := entries[i]
var q *dns.Msg
var a *dns.Msg
@ -200,7 +311,7 @@ func (l *queryLog) GetData() []map[string]interface{} {
jsonEntry := map[string]interface{}{
"reason": entry.Result.Reason.String(),
"elapsedMs": strconv.FormatFloat(entry.Elapsed.Seconds()*1000, 'f', -1, 64),
"time": entry.Time.Format(time.RFC3339),
"time": entry.Time.Format(time.RFC3339Nano),
"client": entry.IP,
}
if q != nil {
@ -231,6 +342,8 @@ func (l *queryLog) GetData() []map[string]interface{} {
data = append(data, jsonEntry)
}
log.Debug("QueryLog: prepared data (%d/%d) older than %s in %s",
len(entries), total, params.OlderThan, time.Since(now))
return data
}

View File

@ -21,7 +21,7 @@ type QueryLog interface {
Add(question *dns.Msg, answer *dns.Msg, result *dnsfilter.Result, elapsed time.Duration, addr net.Addr, upstream string)
// Get log entries
GetData() []map[string]interface{}
GetData(params GetDataParams) []map[string]interface{}
// Clear memory buffer and remove log files
Clear()
@ -37,3 +37,23 @@ type Config struct {
func New(conf Config) QueryLog {
return newQueryLog(conf)
}
// GetDataParams - parameters for GetData()
type GetDataParams struct {
OlderThan time.Time // return entries that are older than this value
Domain string // filter by domain name in question
Client string // filter by client IP
QuestionType uint16 // filter by question type
ResponseStatus ResponseStatusType // filter by response status
StrictMatchDomain bool // if Domain value must be matched strictly
StrictMatchClient bool // if Client value must be matched strictly
}
// ResponseStatusType - response status
type ResponseStatusType int32
// Response status constants
const (
ResponseStatusAll ResponseStatusType = iota + 1
ResponseStatusFiltered
)

View File

@ -5,13 +5,13 @@ import (
"compress/gzip"
"encoding/json"
"fmt"
"io"
"os"
"sync"
"time"
"github.com/AdguardTeam/golibs/log"
"github.com/go-test/deep"
"github.com/miekg/dns"
)
var (
@ -19,6 +19,7 @@ var (
)
const enableGzip = false
const maxEntrySize = 1000
// flushLogBuffer flushes the current buffer to file and resets the current buffer
func (l *queryLog) flushLogBuffer(fullFlush bool) error {
@ -182,50 +183,232 @@ func (l *queryLog) periodicQueryLogRotate() {
// Reader is the DB reader context
type Reader struct {
f *os.File
jd *json.Decoder
now time.Time
ql *queryLog
ql *queryLog
f *os.File
jd *json.Decoder
now time.Time
validFrom int64 // UNIX time (ns)
olderThan int64 // UNIX time (ns)
files []string
ifile int
count uint64 // returned elements counter
limit uint64
count uint64 // counter for returned elements
latest bool // return the latest entries
filePrepared bool
searching bool // we're seaching for an entry with exact time stamp
fseeker fileSeeker // file seeker object
fpos uint64 // current file offset
nSeekRequests uint32 // number of Seek() requests made (finding a new line doesn't count)
}
// OpenReader locks the file and returns reader object or nil on error
type fileSeeker struct {
target uint64 // target value
pos uint64 // current offset, may be adjusted by user for increased accuracy
lastpos uint64 // the last offset returned
lo uint64 // low boundary offset
hi uint64 // high boundary offset
}
// OpenReader - return reader object
func (l *queryLog) OpenReader() *Reader {
r := Reader{}
r.ql = l
r.now = time.Now()
r.validFrom = r.now.Unix() - int64(l.conf.Interval*60*60)
r.validFrom *= 1000000000
r.files = []string{
r.ql.logFile,
r.ql.logFile + ".1",
}
return &r
}
// Close closes the reader
// Close - close the reader
func (r *Reader) Close() {
elapsed := time.Since(r.now)
var perunit time.Duration
if r.count > 0 {
perunit = elapsed / time.Duration(r.count)
}
log.Debug("querylog: read %d entries in %v, %v/entry",
r.count, elapsed, perunit)
log.Debug("querylog: read %d entries in %v, %v/entry, seek-reqs:%d",
r.count, elapsed, perunit, r.nSeekRequests)
if r.f != nil {
r.f.Close()
}
}
// BeginRead starts reading
func (r *Reader) BeginRead() {
r.files = []string{
r.ql.logFile,
r.ql.logFile + ".1",
// BeginRead - start reading
// olderThan: stop returning entries when an entry with this time is reached
// count: minimum number of entries to return
func (r *Reader) BeginRead(olderThan time.Time, count uint64) {
r.olderThan = olderThan.UnixNano()
r.latest = olderThan.IsZero()
r.limit = count
if r.latest {
r.olderThan = r.now.UnixNano()
}
r.filePrepared = false
r.searching = false
r.jd = nil
}
// Next returns the next entry or nil if reading is finished
// BeginReadPrev - start reading the previous data chunk
func (r *Reader) BeginReadPrev(olderThan time.Time, count uint64) {
r.olderThan = olderThan.UnixNano()
r.latest = olderThan.IsZero()
r.limit = count
if r.latest {
r.olderThan = r.now.UnixNano()
}
off := r.fpos - maxEntrySize*(r.limit+1)
if int64(off) < maxEntrySize {
off = 0
}
r.fpos = uint64(off)
log.Debug("QueryLog: seek: %x", off)
_, err := r.f.Seek(int64(off), io.SeekStart)
if err != nil {
log.Error("file.Seek: %s: %s", r.files[r.ifile], err)
return
}
r.nSeekRequests++
r.seekToNewLine()
r.fseeker.pos = r.fpos
r.filePrepared = true
r.searching = false
r.jd = nil
}
// Perform binary seek
// Return 0: success; 1: seek reqiured; -1: error
func (fs *fileSeeker) seekBinary(cur uint64) int32 {
log.Debug("QueryLog: seek: tgt=%x cur=%x, %x: [%x..%x]", fs.target, cur, fs.pos, fs.lo, fs.hi)
off := uint64(0)
if fs.pos >= fs.lo && fs.pos < fs.hi {
if cur == fs.target {
return 0
} else if cur < fs.target {
fs.lo = fs.pos + 1
} else {
fs.hi = fs.pos
}
off = fs.lo + (fs.hi-fs.lo)/2
} else {
// we didn't find another entry from the last file offset: now return the boundary beginning
off = fs.lo
}
if off == fs.lastpos {
return -1
}
fs.lastpos = off
fs.pos = off
return 1
}
// Seek to a new line
func (r *Reader) seekToNewLine() bool {
b := make([]byte, maxEntrySize*2)
_, err := r.f.Read(b)
if err != nil {
log.Error("QueryLog: file.Read: %s: %s", r.files[r.ifile], err)
return false
}
off := bytes.IndexByte(b, '\n') + 1
if off == 0 {
log.Error("QueryLog: Can't find a new line: %s", r.files[r.ifile])
return false
}
r.fpos += uint64(off)
log.Debug("QueryLog: seek: %x (+%d)", r.fpos, off)
_, err = r.f.Seek(int64(r.fpos), io.SeekStart)
if err != nil {
log.Error("QueryLog: file.Seek: %s: %s", r.files[r.ifile], err)
return false
}
return true
}
// Open a file
func (r *Reader) openFile() bool {
var err error
fn := r.files[r.ifile]
r.f, err = os.Open(fn)
if err != nil {
if !os.IsNotExist(err) {
log.Error("QueryLog: Failed to open file \"%s\": %s", fn, err)
}
return false
}
return true
}
// Seek to the needed position
func (r *Reader) prepareRead() bool {
fn := r.files[r.ifile]
fi, err := r.f.Stat()
if err != nil {
log.Error("QueryLog: file.Stat: %s: %s", fn, err)
return false
}
fsize := uint64(fi.Size())
off := uint64(0)
if r.latest {
// read data from the end of file
off = fsize - maxEntrySize*(r.limit+1)
if int64(off) < maxEntrySize {
off = 0
}
r.fpos = uint64(off)
log.Debug("QueryLog: seek: %x", off)
_, err = r.f.Seek(int64(off), io.SeekStart)
if err != nil {
log.Error("QueryLog: file.Seek: %s: %s", fn, err)
return false
}
} else {
// start searching in file: we'll read the first chunk of data from the middle of file
r.searching = true
r.fseeker = fileSeeker{}
r.fseeker.target = uint64(r.olderThan)
r.fseeker.hi = fsize
rc := r.fseeker.seekBinary(0)
r.fpos = r.fseeker.pos
if rc == 1 {
_, err = r.f.Seek(int64(r.fpos), io.SeekStart)
if err != nil {
log.Error("QueryLog: file.Seek: %s: %s", fn, err)
return false
}
}
}
r.nSeekRequests++
if !r.seekToNewLine() {
return false
}
r.fseeker.pos = r.fpos
return true
}
// Next - return the next entry or nil if reading is finished
func (r *Reader) Next() *logEntry { // nolint
var err error
for {
@ -234,15 +417,19 @@ func (r *Reader) Next() *logEntry { // nolint
if r.ifile == len(r.files) {
return nil
}
fn := r.files[r.ifile]
r.f, err = os.Open(fn)
if err != nil {
log.Error("Failed to open file \"%s\": %s", fn, err)
if !r.openFile() {
r.ifile++
continue
}
}
if !r.filePrepared {
if !r.prepareRead() {
return nil
}
r.filePrepared = true
}
// open decoder if needed
if r.jd == nil {
r.jd = json.NewDecoder(r.f)
@ -251,20 +438,60 @@ func (r *Reader) Next() *logEntry { // nolint
// check if there's data
if !r.jd.More() {
r.jd = nil
r.f.Close()
r.f = nil
r.ifile++
continue
return nil
}
// read data
var entry logEntry
err = r.jd.Decode(&entry)
if err != nil {
log.Error("Failed to decode: %s", err)
// next entry can be fine, try more
log.Error("QueryLog: Failed to decode: %s", err)
r.jd = nil
return nil
}
t := entry.Time.UnixNano()
if r.searching {
r.jd = nil
rr := r.fseeker.seekBinary(uint64(t))
r.fpos = r.fseeker.pos
if rr < 0 {
log.Error("QueryLog: File seek error: can't find the target entry: %s", r.files[r.ifile])
return nil
} else if rr == 0 {
// We found the target entry.
// We'll start reading the previous chunk of data.
r.searching = false
off := r.fpos - (maxEntrySize * (r.limit + 1))
if int64(off) < maxEntrySize {
off = 0
}
r.fpos = off
}
_, err = r.f.Seek(int64(r.fpos), io.SeekStart)
if err != nil {
log.Error("QueryLog: file.Seek: %s: %s", r.files[r.ifile], err)
return nil
}
r.nSeekRequests++
if !r.seekToNewLine() {
return nil
}
r.fseeker.pos = r.fpos
continue
}
if t < r.validFrom {
continue
}
if t >= r.olderThan {
return nil
}
r.count++
return &entry
}
@ -274,57 +501,3 @@ func (r *Reader) Next() *logEntry { // nolint
func (r *Reader) Total() int {
return 0
}
// Fill cache from file
func (l *queryLog) fillFromFile() {
now := time.Now()
validFrom := now.Unix() - int64(l.conf.Interval*60*60)
r := l.OpenReader()
if r == nil {
return
}
r.BeginRead()
for {
entry := r.Next()
if entry == nil {
break
}
if entry.Time.Unix() < validFrom {
continue
}
if len(entry.Question) == 0 {
log.Printf("entry question is absent, skipping")
continue
}
if entry.Time.After(now) {
log.Printf("t %v vs %v is in the future, ignoring", entry.Time, now)
continue
}
q := new(dns.Msg)
if err := q.Unpack(entry.Question); err != nil {
log.Printf("failed to unpack dns message question: %s", err)
continue
}
if len(q.Question) != 1 {
log.Printf("malformed dns message, has no questions, skipping")
continue
}
l.lock.Lock()
l.cache = append(l.cache, entry)
if len(l.cache) > queryLogSize {
toremove := len(l.cache) - queryLogSize
l.cache = l.cache[toremove:]
}
l.lock.Unlock()
}
r.Close()
}

View File

@ -3,6 +3,7 @@ package querylog
import (
"net"
"testing"
"time"
"github.com/AdguardTeam/AdGuardHome/dnsfilter"
"github.com/miekg/dns"
@ -36,7 +37,10 @@ func TestQueryLog(t *testing.T) {
res := dnsfilter.Result{}
l.Add(&q, &a, &res, 0, nil, "upstream")
d := l.GetData()
params := GetDataParams{
OlderThan: time.Now(),
}
d := l.GetData(params)
m := d[0]
mq := m["question"].(map[string]interface{})
assert.True(t, mq["host"].(string) == "example.org")