Pull request: 3567 filters update

Merge in DNS/adguard-home from 3567-old-filters to master

Updates #3567.

Squashed commit of the following:

commit d5cc419f1b01f89b2cbf40ff98b562d3498c15c2
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Wed Sep 15 19:22:49 2021 +0300

    home: lock doc

commit 54edba6b3bd87a5e6a46c626db8eca9f4cd50858
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Wed Sep 15 14:16:20 2021 +0300

    home: imp code, docs

commit e6dde1d3b3e3e0b196361806e77708bb797f5d29
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Wed Sep 15 13:53:50 2021 +0300

    home: imp code, logic

commit b258b62948504e62d0e6366605dbd288f4584ada
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Tue Sep 14 19:35:14 2021 +0300

    all: imp log of changes

commit 9b66cde852ae1741d10e54fcb1d13d9676b42436
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Tue Sep 14 18:56:52 2021 +0300

    home: imp filter upd
This commit is contained in:
Eugene Burkov 2021-09-15 20:09:32 +03:00
parent 48b0cefb29
commit 176a344aee
4 changed files with 191 additions and 109 deletions

View File

@ -10,7 +10,7 @@ and this project adheres to
## [Unreleased] ## [Unreleased]
<!-- <!--
## [v0.107.0] - 2021-09-14 (APPROX.) ## [v0.107.0] - 2021-09-28 (APPROX.)
--> -->
### Added ### Added
@ -115,6 +115,7 @@ In this release, the schema version has changed from 10 to 12.
### Fixed ### Fixed
- Removal of temporary filter files ([#3567]).
- Panic when an upstream server responds with an empty question section - Panic when an upstream server responds with an empty question section
([#3551]). ([#3551]).
- 9GAG blocking ([#3564]). - 9GAG blocking ([#3564]).
@ -195,6 +196,7 @@ In this release, the schema version has changed from 10 to 12.
[#3538]: https://github.com/AdguardTeam/AdGuardHome/issues/3538 [#3538]: https://github.com/AdguardTeam/AdGuardHome/issues/3538
[#3551]: https://github.com/AdguardTeam/AdGuardHome/issues/3551 [#3551]: https://github.com/AdguardTeam/AdGuardHome/issues/3551
[#3564]: https://github.com/AdguardTeam/AdGuardHome/issues/3564 [#3564]: https://github.com/AdguardTeam/AdGuardHome/issues/3564
[#3567]: https://github.com/AdguardTeam/AdGuardHome/issues/3567
[#3568]: https://github.com/AdguardTeam/AdGuardHome/issues/3568 [#3568]: https://github.com/AdguardTeam/AdGuardHome/issues/3568
[#3579]: https://github.com/AdguardTeam/AdGuardHome/issues/3579 [#3579]: https://github.com/AdguardTeam/AdGuardHome/issues/3579

View File

@ -254,13 +254,21 @@ func (f *Filtering) handleFilteringRefresh(w http.ResponseWriter, r *http.Reques
return return
} }
Context.controlLock.Unlock()
flags := filterRefreshBlocklists flags := filterRefreshBlocklists
if req.White { if req.White {
flags = filterRefreshAllowlists flags = filterRefreshAllowlists
} }
func() {
// Temporarily unlock the Context.controlLock because the
// f.refreshFilters waits for it to be unlocked but it's
// actually locked in ensure wrapper.
//
// TODO(e.burkov): Reconsider this messy syncing process.
Context.controlLock.Unlock()
defer Context.controlLock.Lock()
resp.Updated, err = f.refreshFilters(flags|filterRefreshForce, false) resp.Updated, err = f.refreshFilters(flags|filterRefreshForce, false)
Context.controlLock.Lock() }()
if err != nil { if err != nil {
httpError(w, http.StatusInternalServerError, "%s", err) httpError(w, http.StatusInternalServerError, "%s", err)
return return

View File

@ -18,6 +18,7 @@ import (
"github.com/AdguardTeam/AdGuardHome/internal/filtering" "github.com/AdguardTeam/AdGuardHome/internal/filtering"
"github.com/AdguardTeam/golibs/errors" "github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/log" "github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/golibs/stringutil"
) )
var nextFilterID = time.Now().Unix() // semi-stable way to generate an unique ID var nextFilterID = time.Now().Unix() // semi-stable way to generate an unique ID
@ -535,26 +536,85 @@ func (f *Filtering) read(reader io.Reader, tmpFile *os.File, filter *filter) (in
} }
} }
// updateIntl returns true if filter update performed successfully. // finalizeUpdate closes and gets rid of temporary file f with filter's content
func (f *Filtering) updateIntl(filter *filter) (updated bool, err error) { // according to updated. It also saves new values of flt's name, rules number
updated = false // and checksum if sucсeeded.
log.Tracef("Downloading update for filter %d from %s", filter.ID, filter.URL) func finalizeUpdate(
f *os.File,
flt *filter,
updated bool,
name string,
rnum int,
cs uint32,
) (err error) {
tmpFileName := f.Name()
tmpFile, err := os.CreateTemp(filepath.Join(Context.getDataDir(), filterDir), "") // Close the file before renaming it because it's required on Windows.
//
// See https://github.com/adguardTeam/adGuardHome/issues/1553.
if err = f.Close(); err != nil {
return fmt.Errorf("closing temporary file: %w", err)
}
if !updated {
log.Tracef("filter #%d from %s has no changes, skip", flt.ID, flt.URL)
return os.Remove(tmpFileName)
}
log.Printf("saving filter %d contents to: %s", flt.ID, flt.Path())
if err = os.Rename(tmpFileName, flt.Path()); err != nil {
return errors.WithDeferred(err, os.Remove(tmpFileName))
}
flt.Name = stringutil.Coalesce(flt.Name, name)
flt.checksum = cs
flt.RulesCount = rnum
return nil
}
// processUpdate copies filter's content from src to dst and returns the name,
// rules number, and checksum for it. It also returns the number of bytes read
// from src.
func (f *Filtering) processUpdate(
src io.Reader,
dst *os.File,
flt *filter,
) (name string, rnum int, cs uint32, n int, err error) {
if n, err = f.read(src, dst, flt); err != nil {
return "", 0, 0, 0, err
}
if _, err = dst.Seek(0, io.SeekStart); err != nil {
return "", 0, 0, 0, err
}
rnum, cs, name = f.parseFilterContents(dst)
return name, rnum, cs, n, nil
}
// updateIntl updates the flt rewriting it's actual file. It returns true if
// the actual update has been performed.
func (f *Filtering) updateIntl(flt *filter) (ok bool, err error) {
log.Tracef("downloading update for filter %d from %s", flt.ID, flt.URL)
var name string
var rnum, n int
var cs uint32
var tmpFile *os.File
tmpFile, err = os.CreateTemp(filepath.Join(Context.getDataDir(), filterDir), "")
if err != nil { if err != nil {
return updated, err return false, err
} }
defer func() { defer func() {
var derr error err = errors.WithDeferred(err, finalizeUpdate(tmpFile, flt, ok, name, rnum, cs))
if tmpFile != nil { ok = ok && err == nil
if derr = tmpFile.Close(); derr != nil { if ok {
log.Printf("Couldn't close temporary file: %s", derr) log.Printf("updated filter %d: %d bytes, %d rules", flt.ID, n, rnum)
}
tmpFileName := tmpFile.Name()
if derr = os.Remove(tmpFileName); derr != nil {
log.Printf("Couldn't delete temporary file %s: %s", tmpFileName, derr)
}
} }
}() }()
@ -562,72 +622,42 @@ func (f *Filtering) updateIntl(filter *filter) (updated bool, err error) {
// end users. // end users.
// //
// See https://github.com/AdguardTeam/AdGuardHome/issues/3198. // See https://github.com/AdguardTeam/AdGuardHome/issues/3198.
err = tmpFile.Chmod(0o644) if err = tmpFile.Chmod(0o644); err != nil {
if err != nil { return false, fmt.Errorf("changing file mode: %w", err)
return updated, fmt.Errorf("changing file mode: %w", err)
} }
var reader io.Reader var r io.Reader
if filepath.IsAbs(filter.URL) { if filepath.IsAbs(flt.URL) {
var f io.ReadCloser var file io.ReadCloser
f, err = os.Open(filter.URL) file, err = os.Open(flt.URL)
if err != nil { if err != nil {
return updated, fmt.Errorf("open file: %w", err) return false, fmt.Errorf("open file: %w", err)
} }
defer func() { err = errors.WithDeferred(err, f.Close()) }() defer func() { err = errors.WithDeferred(err, file.Close()) }()
reader = f r = file
} else { } else {
var resp *http.Response var resp *http.Response
resp, err = Context.client.Get(filter.URL) resp, err = Context.client.Get(flt.URL)
if err != nil { if err != nil {
log.Printf("Couldn't request filter from URL %s, skipping: %s", filter.URL, err) log.Printf("requesting filter from %s, skip: %s", flt.URL, err)
return updated, err return false, err
} }
defer func() { err = errors.WithDeferred(err, resp.Body.Close()) }() defer func() { err = errors.WithDeferred(err, resp.Body.Close()) }()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
log.Printf("Got status code %d from URL %s, skipping", resp.StatusCode, filter.URL) log.Printf("got status code %d from %s, skip", resp.StatusCode, flt.URL)
return updated, fmt.Errorf("got status code != 200: %d", resp.StatusCode)
} return false, fmt.Errorf("got status code != 200: %d", resp.StatusCode)
reader = resp.Body
} }
total, err := f.read(reader, tmpFile, filter) r = resp.Body
if err != nil {
return updated, err
} }
// Extract filter name and count number of rules name, rnum, cs, n, err = f.processUpdate(r, tmpFile, flt)
_, _ = tmpFile.Seek(0, io.SeekStart)
rulesCount, checksum, filterName := f.parseFilterContents(tmpFile)
// Check if the filter has been really changed
if filter.checksum == checksum {
log.Tracef("Filter #%d at URL %s hasn't changed, not updating it", filter.ID, filter.URL)
return updated, nil
}
log.Printf("Filter %d has been updated: %d bytes, %d rules", return cs != flt.checksum, err
filter.ID, total, rulesCount)
if len(filter.Name) == 0 {
filter.Name = filterName
}
filter.RulesCount = rulesCount
filter.checksum = checksum
filterFilePath := filter.Path()
log.Printf("Saving filter %d contents to: %s", filter.ID, filterFilePath)
// Closing the file before renaming it is necessary on Windows
_ = tmpFile.Close()
err = os.Rename(tmpFile.Name(), filterFilePath)
if err != nil {
return updated, err
}
tmpFile = nil
updated = true
return updated, nil
} }
// loads filter contents from the file in dataDir // loads filter contents from the file in dataDir

View File

@ -1,76 +1,118 @@
package home package home
import ( import (
"fmt" "io/fs"
"net" "net"
"net/http" "net/http"
"net/url"
"os" "os"
"path"
"path/filepath"
"testing" "testing"
"time" "time"
"github.com/AdguardTeam/golibs/netutil"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
func testStartFilterListener(t *testing.T) net.Listener { const testFltsFileName = "1.txt"
func testStartFilterListener(t *testing.T, fltContent *[]byte) (l net.Listener) {
t.Helper() t.Helper()
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
n, werr := w.Write(*fltContent)
require.NoError(t, werr)
require.Equal(t, len(*fltContent), n)
})
var err error
l, err = net.Listen("tcp", ":0")
require.NoError(t, err)
go func() {
_ = http.Serve(l, h)
}()
t.Cleanup(func() {
require.NoError(t, l.Close())
})
return l
}
func TestFilters(t *testing.T) {
const content = `||example.org^$third-party const content = `||example.org^$third-party
# Inline comment example # Inline comment example
||example.com^$third-party ||example.com^$third-party
0.0.0.0 example.com 0.0.0.0 example.com
` `
mux := http.NewServeMux() fltContent := []byte(content)
mux.HandleFunc("/filters/1.txt", func(w http.ResponseWriter, r *http.Request) {
_, werr := w.Write([]byte(content))
assert.Nil(t, werr)
})
listener, err := net.Listen("tcp", ":0") l := testStartFilterListener(t, &fltContent)
require.Nil(t, err)
go func() {
_ = http.Serve(listener, mux)
}()
t.Cleanup(func() {
assert.Nil(t, listener.Close())
})
return listener
}
func TestFilters(t *testing.T) {
l := testStartFilterListener(t)
dir := t.TempDir()
Context = homeContext{ Context = homeContext{
workDir: dir, workDir: t.TempDir(),
client: &http.Client{ client: &http.Client{
Timeout: 5 * time.Second, Timeout: 5 * time.Second,
}, },
} }
Context.filters.Init() Context.filters.Init()
f := filter{ f := &filter{
URL: fmt.Sprintf("http://127.0.0.1:%d/filters/1.txt", l.Addr().(*net.TCPAddr).Port), URL: (&url.URL{
Scheme: "http",
Host: (&netutil.IPPort{
IP: net.IP{127, 0, 0, 1},
Port: l.Addr().(*net.TCPAddr).Port,
}).String(),
Path: path.Join(filterDir, testFltsFileName),
}).String(),
} }
// Download. updateAndAssert := func(t *testing.T, want require.BoolAssertionFunc, wantRulesCount int) {
ok, err := Context.filters.update(&f) ok, err := Context.filters.update(f)
require.Nil(t, err) require.NoError(t, err)
require.True(t, ok) want(t, ok)
assert.Equal(t, 3, f.RulesCount)
// Refresh. assert.Equal(t, wantRulesCount, f.RulesCount)
ok, err = Context.filters.update(&f)
require.Nil(t, err)
require.False(t, ok)
err = Context.filters.load(&f) var dir []fs.DirEntry
require.Nil(t, err) dir, err = os.ReadDir(filepath.Join(Context.getDataDir(), filterDir))
require.NoError(t, err)
assert.Len(t, dir, 1)
require.FileExists(t, f.Path())
err = Context.filters.load(f)
require.NoError(t, err)
}
t.Run("download", func(t *testing.T) {
updateAndAssert(t, require.True, 3)
})
t.Run("refresh_idle", func(t *testing.T) {
updateAndAssert(t, require.False, 3)
})
t.Run("refresh_actually", func(t *testing.T) {
fltContent = []byte(`||example.com^`)
t.Cleanup(func() {
fltContent = []byte(content)
})
updateAndAssert(t, require.True, 1)
})
t.Run("load_unload", func(t *testing.T) {
err := Context.filters.load(f)
require.NoError(t, err)
f.unload() f.unload()
require.Nil(t, os.Remove(f.Path())) })
require.NoError(t, os.Remove(f.Path()))
} }