optimise ua tracking

reduce number of bytes used when we don't know the lang
add more spammy domain strings
This commit is contained in:
Azareal 2020-03-08 14:15:27 +10:00
parent f55a7a23ba
commit 7d2be466b3
7 changed files with 139 additions and 65 deletions

View File

@ -49,12 +49,14 @@ var IsDBDown int32 = 0 // 0 = false, 1 = true. this is value w
var ErrNoRows = sql.ErrNoRows
// ? - Make this more customisable?
var ExternalSites = map[string]string{
/*var ExternalSites = map[string]string{
"YT": "https://www.youtube.com/",
}
}*/
// TODO: Make this more customisable
var SpammyDomainBits = []string{"porn", "sex", "lesbian", "acup", "nude", "milf", "tits", "vape", "busty", "kink", "lingerie", "problog", "fet", "xblog", "blogin", "blognetwork"}
var SpammyDomainBits = []string{"porn", "sex", "lesbian", "acup", "nude", "milf", "tits", "vape", "busty", "kink", "lingerie", "strapon", "problog", "fet", "xblog", "blogin", "blognetwork", "relayblog"}
var Chrome, Firefox int // ! Temporary Hack for http push
type StringList []string

View File

@ -157,7 +157,7 @@ func (co *DefaultLangViewCounter) Bump(langCode string) (validCode bool) {
}
// TODO: Test this check
c.DebugDetail("buckets[", id, "]: ", co.buckets[id])
c.DebugDetail("buckets ", id, ": ", co.buckets[id])
if len(co.buckets) <= id || id < 0 {
return validCode
}
@ -165,3 +165,12 @@ func (co *DefaultLangViewCounter) Bump(langCode string) (validCode bool) {
return validCode
}
func (co *DefaultLangViewCounter) Bump2(id int) {
// TODO: Test this check
c.DebugDetail("bucket ", id, ": ", co.buckets[id])
if len(co.buckets) <= id || id < 0 {
return
}
atomic.AddInt64(&co.buckets[id], 1)
}

View File

@ -93,14 +93,14 @@ var Template_account_handle = genIntTmpl("account")
func tmplInitUsers() (User, User, User) {
avatar, microAvatar := BuildAvatar(62, "")
user := User{62, BuildProfileURL("fake-user", 62), "Fake User", "compiler@localhost", 0, false, false, false, false, false, false, GuestPerms, make(map[string]bool), "", false, "", avatar, microAvatar, "", "", 0, 0, 0, 0, StartTime, "0.0.0.0.0", "", 0, nil}
user := User{62, BuildProfileURL("fake-user", 62), "Fake User", "compiler@localhost", 0, false, false, false, false, false, false, GuestPerms, make(map[string]bool), "", false, "", avatar, microAvatar, "", "", 0, 0, 0, 0, StartTime, "0.0.0.0.0", 0, 0, nil}
// TODO: Do a more accurate level calculation for this?
avatar, microAvatar = BuildAvatar(1, "")
user2 := User{1, BuildProfileURL("admin-alice", 1), "Admin Alice", "alice@localhost", 1, true, true, true, true, false, false, AllPerms, make(map[string]bool), "", true, "", avatar, microAvatar, "", "", 58, 1000, 0, 1000, StartTime, "127.0.0.1", "", 0, nil}
user2 := User{1, BuildProfileURL("admin-alice", 1), "Admin Alice", "alice@localhost", 1, true, true, true, true, false, false, AllPerms, make(map[string]bool), "", true, "", avatar, microAvatar, "", "", 58, 1000, 0, 1000, StartTime, "127.0.0.1", 0, 0, nil}
avatar, microAvatar = BuildAvatar(2, "")
user3 := User{2, BuildProfileURL("admin-fred", 62), "Admin Fred", "fred@localhost", 1, true, true, true, true, false, false, AllPerms, make(map[string]bool), "", true, "", avatar, microAvatar, "", "", 42, 900, 0, 900, StartTime, "::1", "", 0, nil}
user3 := User{2, BuildProfileURL("admin-fred", 62), "Admin Fred", "fred@localhost", 1, true, true, true, true, false, false, AllPerms, make(map[string]bool), "", true, "", avatar, microAvatar, "", "", 42, 900, 0, 900, StartTime, "::1", 0, 0, nil}
return user, user2, user3
}

View File

@ -58,7 +58,7 @@ type User struct {
Liked int
CreatedAt time.Time
LastIP string // ! This part of the UserCache data might fall out of date
LastAgent string // ! Temporary hack, don't use
LastAgent int // ! Temporary hack for http push, don't use
TempGroup int
ParseSettings *ParseSettings

View File

@ -708,6 +708,50 @@ var markToAgent = map[string]string{
"RU_Bot": "mail_ru",
"zgrab": "zgrab",
}
var markToID = map[string]int{
"OPR": 3,
"Chrome": 2,
"Firefox": 1,
"MSIE": 6,
"Trident": 7,
"Edge": 5,
"Lynx": 33,
"SamsungBrowser": 10,
"UCBrowser": 11,
"Google": 12,
"Googlebot": 12,
"yandex": 13,
"DuckDuckBot": 21,
"Baiduspider": 17,
"Sogou": 18,
"ToutiaoSpider": 19,
"360Spider": 20,
"bingbot": 14,
"BingPreview": 14,
"Slurp": 15,
"Exabot": 16,
"SeznamBot": 22,
"CloudFlare": 26,
"archive": 27,
"Uptimebot": 28,
"Slackbot": 29,
"Slack": 29,
"Discordbot": 23,
"Twitterbot": 24,
"facebookexternalhit": 25,
"Facebot": 25,
"Applebot": 30,
"Discourse": 31,
"ia_archiver": 32,
"SemrushBot": 37,
"DotBot": 38,
"AhrefsBot": 39,
"proximic": 40,
"MJ12bot": 41,
"AspiegelBot": 42,
"RU_Bot": 43,
"zgrab": 44,
}
/*var agentRank = map[string]int{
"opera":9,
"chrome":8,
@ -723,6 +767,8 @@ func init() {
co.SetReverseAgentMapEnum(reverseAgentMapEnum)
co.SetOSMapEnum(osMapEnum)
co.SetReverseOSMapEnum(reverseOSMapEnum)
c.Chrome = agentMapEnum["chrome"]
c.Firefox = agentMapEnum["firefox"]
}
type WriterIntercept struct {
@ -808,7 +854,8 @@ func (r *GenRouter) RemoveFunc(pattern string) error {
return nil
}
func (r *GenRouter) DumpRequest(req *http.Request, prepend string) {
// TODO: Use strings builder?
func (r *GenRouter) DumpRequest(req *http.Request, pre string) {
var heads string
for key, value := range req.Header {
for _, vvalue := range value {
@ -816,7 +863,7 @@ func (r *GenRouter) DumpRequest(req *http.Request, prepend string) {
}
}
r.requestLogger.Print(prepend +
r.requestLogger.Print(pre +
"\nUA: " + c.SanitiseSingleLine(req.UserAgent()) + "\n" +
"Method: " + c.SanitiseSingleLine(req.Method) + "\n" + heads +
"Host: " + c.SanitiseSingleLine(req.Host) + "\n" +
@ -826,11 +873,11 @@ func (r *GenRouter) DumpRequest(req *http.Request, prepend string) {
"IP: " + req.RemoteAddr + "\n")
}
func (r *GenRouter) SuspiciousRequest(req *http.Request, prepend string) {
if prepend != "" {
prepend += "\n"
func (r *GenRouter) SuspiciousRequest(req *http.Request, pre string) {
if pre != "" {
pre += "\n"
}
r.DumpRequest(req,prepend+"Suspicious Request")
r.DumpRequest(req,pre+"Suspicious Request")
co.AgentViewCounter.Bump(36)
}
@ -983,18 +1030,19 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
// Track the user agents. Unfortunately, everyone pretends to be Mozilla, so this'll be a little less efficient than I would like.
// TODO: Add a setting to disable this?
// TODO: Use a more efficient detector instead of smashing every possible combination in
var agent string
//var agent string
var agent int
if !c.Config.DisableAnalytics {
ua := strings.TrimSpace(strings.Replace(strings.TrimPrefix(req.UserAgent(),"Mozilla/5.0 ")," Safari/537.36","",-1)) // Noise, no one's going to be running this and it would require some sort of agent ranking system to determine which identifier should be prioritised over another
if ua == "" {
co.AgentViewCounter.Bump(34)
if c.Dev.DebugMode {
var prepend string
var pre string
for _, char := range req.UserAgent() {
prepend += strconv.Itoa(int(char)) + " "
pre += strconv.Itoa(int(char)) + " "
}
r.DumpRequest(req,"Blank UA: " + prepend)
r.DumpRequest(req,"Blank UA: " + pre)
}
} else {
// WIP UA Parser
@ -1039,10 +1087,11 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
// Iterate over this in reverse as the real UA tends to be on the right side
for i := len(items) - 1; i >= 0; i-- {
fAgent, ok := markToAgent[items[i]]
//fAgent, ok := markToAgent[items[i]]
fAgent, ok := markToID[items[i]]
if ok {
agent = fAgent
if agent != "safari" {
if agent != 4 {
break
}
}
@ -1055,42 +1104,43 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
// Special handling
switch(agent) {
case "chrome":
case 2:
if os == 4 {
agent = "androidchrome"
agent = 8
}
case "safari":
case 4:
if os == 5 {
agent = "mobilesafari"
agent = 9
}
case "trident":
case 7:
// Hack to support IE11, change this after we start logging versions
if strings.Contains(ua,"rv:11") {
agent = "internetexplorer"
agent = 6
}
case "zgrab":
case 44:
r.SuspiciousRequest(req,"Vulnerability Scanner")
}
if agent == "" {
if agent == 0 {
co.AgentViewCounter.Bump(0)
if c.Dev.DebugMode {
var prepend string
var pre string
for _, char := range req.UserAgent() {
prepend += strconv.Itoa(int(char)) + " "
pre += strconv.Itoa(int(char)) + " "
}
r.DumpRequest(req,"Blank UA: " + prepend)
r.DumpRequest(req,"Blank UA: " + pre)
} else {
r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(ua))
}
} else {
co.AgentViewCounter.Bump(agentMapEnum[agent])
//co.AgentViewCounter.Bump(agentMapEnum[agent])
co.AgentViewCounter.Bump(agent)
}
//co.OSViewCounter.Bump(osMapEnum[os])
co.OSViewCounter.Bump(os)
}
// TODO: Do we want to track missing language headers too? Maybe as it's own type, e.g. "noheader"?
// TODO: Default to anything other than en, if anything else is present, to avoid over-representing it for multi-linguals?
lang := req.Header.Get("Accept-Language")
if lang != "" {
lLang := strings.Split(strings.TrimSpace(lang),"-")
@ -1105,12 +1155,11 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
break
}
c.DebugDetail("llLang:", llLang)
validCode := co.LangViewCounter.Bump(llLang)
if !validCode {
if !co.LangViewCounter.Bump(llLang) {
r.DumpRequest(req,"Invalid ISO Code")
}
} else {
co.LangViewCounter.Bump("")
co.LangViewCounter.Bump2(0)
}
if !c.Config.RefNoTrack {

View File

@ -19,6 +19,7 @@ type TmplVars struct {
AllAgentMap map[string]int
AllAgentMarkNames []string
AllAgentMarks map[string]string
AllAgentMarkIDs map[string]int
AllOSNames []string
AllOSMap map[string]int
}
@ -376,6 +377,11 @@ func main() {
"zgrab": "zgrab",
}
tmplVars.AllAgentMarkIDs = make(map[string]int)
for mark, agent := range tmplVars.AllAgentMarks {
tmplVars.AllAgentMarkIDs[mark] = tmplVars.AllAgentMap[agent]
}
fileData := `// Code generated by Gosora's Router Generator. DO NOT EDIT.
/* This file was automatically generated by the software. Please don't edit it as your changes may be overwritten at any moment. */
package main
@ -428,6 +434,9 @@ var reverseAgentMapEnum = map[int]string{ {{range $index, $element := .AllAgentN
var markToAgent = map[string]string{ {{range $index, $element := .AllAgentMarkNames}}
"{{$element}}": "{{index $.AllAgentMarks $element}}",{{end}}
}
var markToID = map[string]int{ {{range $index, $element := .AllAgentMarkNames}}
"{{$element}}": {{index $.AllAgentMarkIDs $element}},{{end}}
}
/*var agentRank = map[string]int{
"opera":9,
"chrome":8,
@ -443,6 +452,8 @@ func init() {
co.SetReverseAgentMapEnum(reverseAgentMapEnum)
co.SetOSMapEnum(osMapEnum)
co.SetReverseOSMapEnum(reverseOSMapEnum)
c.Chrome = agentMapEnum["chrome"]
c.Firefox = agentMapEnum["firefox"]
}
type WriterIntercept struct {
@ -528,7 +539,8 @@ func (r *GenRouter) RemoveFunc(pattern string) error {
return nil
}
func (r *GenRouter) DumpRequest(req *http.Request, prepend string) {
// TODO: Use strings builder?
func (r *GenRouter) DumpRequest(req *http.Request, pre string) {
var heads string
for key, value := range req.Header {
for _, vvalue := range value {
@ -536,7 +548,7 @@ func (r *GenRouter) DumpRequest(req *http.Request, prepend string) {
}
}
r.requestLogger.Print(prepend +
r.requestLogger.Print(pre +
"\nUA: " + c.SanitiseSingleLine(req.UserAgent()) + "\n" +
"Method: " + c.SanitiseSingleLine(req.Method) + "\n" + heads +
"Host: " + c.SanitiseSingleLine(req.Host) + "\n" +
@ -546,11 +558,11 @@ func (r *GenRouter) DumpRequest(req *http.Request, prepend string) {
"IP: " + req.RemoteAddr + "\n")
}
func (r *GenRouter) SuspiciousRequest(req *http.Request, prepend string) {
if prepend != "" {
prepend += "\n"
func (r *GenRouter) SuspiciousRequest(req *http.Request, pre string) {
if pre != "" {
pre += "\n"
}
r.DumpRequest(req,prepend+"Suspicious Request")
r.DumpRequest(req,pre+"Suspicious Request")
co.AgentViewCounter.Bump({{.AllAgentMap.suspicious}})
}
@ -703,18 +715,19 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
// Track the user agents. Unfortunately, everyone pretends to be Mozilla, so this'll be a little less efficient than I would like.
// TODO: Add a setting to disable this?
// TODO: Use a more efficient detector instead of smashing every possible combination in
var agent string
//var agent string
var agent int
if !c.Config.DisableAnalytics {
ua := strings.TrimSpace(strings.Replace(strings.TrimPrefix(req.UserAgent(),"Mozilla/5.0 ")," Safari/537.36","",-1)) // Noise, no one's going to be running this and it would require some sort of agent ranking system to determine which identifier should be prioritised over another
if ua == "" {
co.AgentViewCounter.Bump({{.AllAgentMap.blank}})
if c.Dev.DebugMode {
var prepend string
var pre string
for _, char := range req.UserAgent() {
prepend += strconv.Itoa(int(char)) + " "
pre += strconv.Itoa(int(char)) + " "
}
r.DumpRequest(req,"Blank UA: " + prepend)
r.DumpRequest(req,"Blank UA: " + pre)
}
} else {
// WIP UA Parser
@ -759,10 +772,11 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
// Iterate over this in reverse as the real UA tends to be on the right side
for i := len(items) - 1; i >= 0; i-- {
fAgent, ok := markToAgent[items[i]]
//fAgent, ok := markToAgent[items[i]]
fAgent, ok := markToID[items[i]]
if ok {
agent = fAgent
if agent != "safari" {
if agent != {{.AllAgentMap.safari}} {
break
}
}
@ -775,42 +789,43 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
// Special handling
switch(agent) {
case "chrome":
case {{.AllAgentMap.chrome}}:
if os == {{.AllOSMap.android}} {
agent = "androidchrome"
agent = {{.AllAgentMap.androidchrome}}
}
case "safari":
case {{.AllAgentMap.safari}}:
if os == {{.AllOSMap.iphone}} {
agent = "mobilesafari"
agent = {{.AllAgentMap.mobilesafari}}
}
case "trident":
case {{.AllAgentMap.trident}}:
// Hack to support IE11, change this after we start logging versions
if strings.Contains(ua,"rv:11") {
agent = "internetexplorer"
agent = {{.AllAgentMap.internetexplorer}}
}
case "zgrab":
case {{.AllAgentMap.zgrab}}:
r.SuspiciousRequest(req,"Vulnerability Scanner")
}
if agent == "" {
if agent == 0 {
co.AgentViewCounter.Bump({{.AllAgentMap.unknown}})
if c.Dev.DebugMode {
var prepend string
var pre string
for _, char := range req.UserAgent() {
prepend += strconv.Itoa(int(char)) + " "
pre += strconv.Itoa(int(char)) + " "
}
r.DumpRequest(req,"Blank UA: " + prepend)
r.DumpRequest(req,"Blank UA: " + pre)
} else {
r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(ua))
}
} else {
co.AgentViewCounter.Bump(agentMapEnum[agent])
//co.AgentViewCounter.Bump(agentMapEnum[agent])
co.AgentViewCounter.Bump(agent)
}
//co.OSViewCounter.Bump(osMapEnum[os])
co.OSViewCounter.Bump(os)
}
// TODO: Do we want to track missing language headers too? Maybe as it's own type, e.g. "noheader"?
// TODO: Default to anything other than en, if anything else is present, to avoid over-representing it for multi-linguals?
lang := req.Header.Get("Accept-Language")
if lang != "" {
lLang := strings.Split(strings.TrimSpace(lang),"-")
@ -825,12 +840,11 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
break
}
c.DebugDetail("llLang:", llLang)
validCode := co.LangViewCounter.Bump(llLang)
if !validCode {
if !co.LangViewCounter.Bump(llLang) {
r.DumpRequest(req,"Invalid ISO Code")
}
} else {
co.LangViewCounter.Bump("")
co.LangViewCounter.Bump2(0)
}
if !c.Config.RefNoTrack {

View File

@ -111,7 +111,7 @@ func FootHeaders(w http.ResponseWriter, header *c.Header) {
// Server pushes can backfire on certain browsers, so we want to make sure it's only triggered for ones where it'll help
lastAgent := header.CurrentUser.LastAgent
//fmt.Println("lastAgent:", lastAgent)
if lastAgent == "chrome" || lastAgent == "firefox" {
if lastAgent == c.Chrome || lastAgent == c.Firefox {
doPush(w, header)
}
}