Merge: * filter update: ensure filter data is text

Close #527

* commit 'b078b82a89940bd96c7e9157c2f5c758224a72ae':
  * filter update: ensure filter data is text (but not html)
This commit is contained in:
Simon Zolin 2019-09-11 16:45:56 +03:00
commit 2356ae5bdd
1 changed files with 21 additions and 6 deletions

View File

@ -275,6 +275,17 @@ func refreshFiltersIfNecessary(force bool) int {
return updateCount return updateCount
} }
// Allows printable UTF-8 text with CR, LF, TAB characters
func isPrintableText(data []byte) bool {
for _, c := range data {
if (c >= ' ' && c != 0x7f) || c == '\n' || c == '\r' || c == '\t' {
continue
}
return false
}
return true
}
// A helper function that parses filter contents and returns a number of rules and a filter name (if there's any) // A helper function that parses filter contents and returns a number of rules and a filter name (if there's any)
func parseFilterContents(contents []byte) (int, string) { func parseFilterContents(contents []byte) (int, string) {
lines := strings.Split(string(contents), "\n") lines := strings.Split(string(contents), "\n")
@ -322,12 +333,6 @@ func (filter *filter) update() (bool, error) {
return false, fmt.Errorf("got status code != 200: %d", resp.StatusCode) return false, fmt.Errorf("got status code != 200: %d", resp.StatusCode)
} }
contentType := strings.ToLower(resp.Header.Get("content-type"))
if !strings.HasPrefix(contentType, "text/plain") {
log.Printf("Non-text response %s from %s, skipping", contentType, filter.URL)
return false, fmt.Errorf("non-text response %s", contentType)
}
body, err := ioutil.ReadAll(resp.Body) body, err := ioutil.ReadAll(resp.Body)
if err != nil { if err != nil {
log.Printf("Couldn't fetch filter contents from URL %s, skipping: %s", filter.URL, err) log.Printf("Couldn't fetch filter contents from URL %s, skipping: %s", filter.URL, err)
@ -341,6 +346,16 @@ func (filter *filter) update() (bool, error) {
return false, nil return false, nil
} }
if !isPrintableText(body[:4096]) {
return false, fmt.Errorf("Data contains non-printable characters")
}
s := strings.ToLower(string(body[:4096]))
if strings.Index(s, "<html") >= 0 ||
strings.Index(s, "<!doctype") >= 0 {
return false, fmt.Errorf("Data is HTML, not plain text")
}
// Extract filter name and count number of rules // Extract filter name and count number of rules
rulesCount, filterName := parseFilterContents(body) rulesCount, filterName := parseFilterContents(body)
log.Printf("Filter %d has been updated: %d bytes, %d rules", filter.ID, len(body), rulesCount) log.Printf("Filter %d has been updated: %d bytes, %d rules", filter.ID, len(body), rulesCount)