From 7d2be466b3f83affd8e8fbd1b19fe19e4d06e3c2 Mon Sep 17 00:00:00 2001 From: Azareal Date: Sun, 8 Mar 2020 14:15:27 +1000 Subject: [PATCH] optimise ua tracking reduce number of bytes used when we don't know the lang add more spammy domain strings --- common/common.go | 8 +-- common/counters/langs.go | 11 +++- common/template_init.go | 6 +-- common/user.go | 2 +- gen_router.go | 105 ++++++++++++++++++++++++++++----------- router_gen/main.go | 70 +++++++++++++++----------- routes/common.go | 2 +- 7 files changed, 139 insertions(+), 65 deletions(-) diff --git a/common/common.go b/common/common.go index 064b3f96..6180a8ff 100644 --- a/common/common.go +++ b/common/common.go @@ -49,12 +49,14 @@ var IsDBDown int32 = 0 // 0 = false, 1 = true. this is value w var ErrNoRows = sql.ErrNoRows // ? - Make this more customisable? -var ExternalSites = map[string]string{ +/*var ExternalSites = map[string]string{ "YT": "https://www.youtube.com/", -} +}*/ // TODO: Make this more customisable -var SpammyDomainBits = []string{"porn", "sex", "lesbian", "acup", "nude", "milf", "tits", "vape", "busty", "kink", "lingerie", "problog", "fet", "xblog", "blogin", "blognetwork"} +var SpammyDomainBits = []string{"porn", "sex", "lesbian", "acup", "nude", "milf", "tits", "vape", "busty", "kink", "lingerie", "strapon", "problog", "fet", "xblog", "blogin", "blognetwork", "relayblog"} + +var Chrome, Firefox int // ! Temporary Hack for http push type StringList []string diff --git a/common/counters/langs.go b/common/counters/langs.go index 729eae10..4e052dfc 100644 --- a/common/counters/langs.go +++ b/common/counters/langs.go @@ -157,7 +157,7 @@ func (co *DefaultLangViewCounter) Bump(langCode string) (validCode bool) { } // TODO: Test this check - c.DebugDetail("buckets[", id, "]: ", co.buckets[id]) + c.DebugDetail("buckets ", id, ": ", co.buckets[id]) if len(co.buckets) <= id || id < 0 { return validCode } @@ -165,3 +165,12 @@ func (co *DefaultLangViewCounter) Bump(langCode string) (validCode bool) { return validCode } + +func (co *DefaultLangViewCounter) Bump2(id int) { + // TODO: Test this check + c.DebugDetail("bucket ", id, ": ", co.buckets[id]) + if len(co.buckets) <= id || id < 0 { + return + } + atomic.AddInt64(&co.buckets[id], 1) +} diff --git a/common/template_init.go b/common/template_init.go index c99be523..b946a7bd 100644 --- a/common/template_init.go +++ b/common/template_init.go @@ -93,14 +93,14 @@ var Template_account_handle = genIntTmpl("account") func tmplInitUsers() (User, User, User) { avatar, microAvatar := BuildAvatar(62, "") - user := User{62, BuildProfileURL("fake-user", 62), "Fake User", "compiler@localhost", 0, false, false, false, false, false, false, GuestPerms, make(map[string]bool), "", false, "", avatar, microAvatar, "", "", 0, 0, 0, 0, StartTime, "0.0.0.0.0", "", 0, nil} + user := User{62, BuildProfileURL("fake-user", 62), "Fake User", "compiler@localhost", 0, false, false, false, false, false, false, GuestPerms, make(map[string]bool), "", false, "", avatar, microAvatar, "", "", 0, 0, 0, 0, StartTime, "0.0.0.0.0", 0, 0, nil} // TODO: Do a more accurate level calculation for this? avatar, microAvatar = BuildAvatar(1, "") - user2 := User{1, BuildProfileURL("admin-alice", 1), "Admin Alice", "alice@localhost", 1, true, true, true, true, false, false, AllPerms, make(map[string]bool), "", true, "", avatar, microAvatar, "", "", 58, 1000, 0, 1000, StartTime, "127.0.0.1", "", 0, nil} + user2 := User{1, BuildProfileURL("admin-alice", 1), "Admin Alice", "alice@localhost", 1, true, true, true, true, false, false, AllPerms, make(map[string]bool), "", true, "", avatar, microAvatar, "", "", 58, 1000, 0, 1000, StartTime, "127.0.0.1", 0, 0, nil} avatar, microAvatar = BuildAvatar(2, "") - user3 := User{2, BuildProfileURL("admin-fred", 62), "Admin Fred", "fred@localhost", 1, true, true, true, true, false, false, AllPerms, make(map[string]bool), "", true, "", avatar, microAvatar, "", "", 42, 900, 0, 900, StartTime, "::1", "", 0, nil} + user3 := User{2, BuildProfileURL("admin-fred", 62), "Admin Fred", "fred@localhost", 1, true, true, true, true, false, false, AllPerms, make(map[string]bool), "", true, "", avatar, microAvatar, "", "", 42, 900, 0, 900, StartTime, "::1", 0, 0, nil} return user, user2, user3 } diff --git a/common/user.go b/common/user.go index 16861c3c..cd1c4462 100644 --- a/common/user.go +++ b/common/user.go @@ -58,7 +58,7 @@ type User struct { Liked int CreatedAt time.Time LastIP string // ! This part of the UserCache data might fall out of date - LastAgent string // ! Temporary hack, don't use + LastAgent int // ! Temporary hack for http push, don't use TempGroup int ParseSettings *ParseSettings diff --git a/gen_router.go b/gen_router.go index a0625453..52b50983 100644 --- a/gen_router.go +++ b/gen_router.go @@ -708,6 +708,50 @@ var markToAgent = map[string]string{ "RU_Bot": "mail_ru", "zgrab": "zgrab", } +var markToID = map[string]int{ + "OPR": 3, + "Chrome": 2, + "Firefox": 1, + "MSIE": 6, + "Trident": 7, + "Edge": 5, + "Lynx": 33, + "SamsungBrowser": 10, + "UCBrowser": 11, + "Google": 12, + "Googlebot": 12, + "yandex": 13, + "DuckDuckBot": 21, + "Baiduspider": 17, + "Sogou": 18, + "ToutiaoSpider": 19, + "360Spider": 20, + "bingbot": 14, + "BingPreview": 14, + "Slurp": 15, + "Exabot": 16, + "SeznamBot": 22, + "CloudFlare": 26, + "archive": 27, + "Uptimebot": 28, + "Slackbot": 29, + "Slack": 29, + "Discordbot": 23, + "Twitterbot": 24, + "facebookexternalhit": 25, + "Facebot": 25, + "Applebot": 30, + "Discourse": 31, + "ia_archiver": 32, + "SemrushBot": 37, + "DotBot": 38, + "AhrefsBot": 39, + "proximic": 40, + "MJ12bot": 41, + "AspiegelBot": 42, + "RU_Bot": 43, + "zgrab": 44, +} /*var agentRank = map[string]int{ "opera":9, "chrome":8, @@ -723,6 +767,8 @@ func init() { co.SetReverseAgentMapEnum(reverseAgentMapEnum) co.SetOSMapEnum(osMapEnum) co.SetReverseOSMapEnum(reverseOSMapEnum) + c.Chrome = agentMapEnum["chrome"] + c.Firefox = agentMapEnum["firefox"] } type WriterIntercept struct { @@ -808,7 +854,8 @@ func (r *GenRouter) RemoveFunc(pattern string) error { return nil } -func (r *GenRouter) DumpRequest(req *http.Request, prepend string) { +// TODO: Use strings builder? +func (r *GenRouter) DumpRequest(req *http.Request, pre string) { var heads string for key, value := range req.Header { for _, vvalue := range value { @@ -816,7 +863,7 @@ func (r *GenRouter) DumpRequest(req *http.Request, prepend string) { } } - r.requestLogger.Print(prepend + + r.requestLogger.Print(pre + "\nUA: " + c.SanitiseSingleLine(req.UserAgent()) + "\n" + "Method: " + c.SanitiseSingleLine(req.Method) + "\n" + heads + "Host: " + c.SanitiseSingleLine(req.Host) + "\n" + @@ -826,11 +873,11 @@ func (r *GenRouter) DumpRequest(req *http.Request, prepend string) { "IP: " + req.RemoteAddr + "\n") } -func (r *GenRouter) SuspiciousRequest(req *http.Request, prepend string) { - if prepend != "" { - prepend += "\n" +func (r *GenRouter) SuspiciousRequest(req *http.Request, pre string) { + if pre != "" { + pre += "\n" } - r.DumpRequest(req,prepend+"Suspicious Request") + r.DumpRequest(req,pre+"Suspicious Request") co.AgentViewCounter.Bump(36) } @@ -983,18 +1030,19 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) { // Track the user agents. Unfortunately, everyone pretends to be Mozilla, so this'll be a little less efficient than I would like. // TODO: Add a setting to disable this? // TODO: Use a more efficient detector instead of smashing every possible combination in - var agent string + //var agent string + var agent int if !c.Config.DisableAnalytics { ua := strings.TrimSpace(strings.Replace(strings.TrimPrefix(req.UserAgent(),"Mozilla/5.0 ")," Safari/537.36","",-1)) // Noise, no one's going to be running this and it would require some sort of agent ranking system to determine which identifier should be prioritised over another if ua == "" { co.AgentViewCounter.Bump(34) if c.Dev.DebugMode { - var prepend string + var pre string for _, char := range req.UserAgent() { - prepend += strconv.Itoa(int(char)) + " " + pre += strconv.Itoa(int(char)) + " " } - r.DumpRequest(req,"Blank UA: " + prepend) + r.DumpRequest(req,"Blank UA: " + pre) } } else { // WIP UA Parser @@ -1039,10 +1087,11 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) { // Iterate over this in reverse as the real UA tends to be on the right side for i := len(items) - 1; i >= 0; i-- { - fAgent, ok := markToAgent[items[i]] + //fAgent, ok := markToAgent[items[i]] + fAgent, ok := markToID[items[i]] if ok { agent = fAgent - if agent != "safari" { + if agent != 4 { break } } @@ -1055,42 +1104,43 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) { // Special handling switch(agent) { - case "chrome": + case 2: if os == 4 { - agent = "androidchrome" + agent = 8 } - case "safari": + case 4: if os == 5 { - agent = "mobilesafari" + agent = 9 } - case "trident": + case 7: // Hack to support IE11, change this after we start logging versions if strings.Contains(ua,"rv:11") { - agent = "internetexplorer" + agent = 6 } - case "zgrab": + case 44: r.SuspiciousRequest(req,"Vulnerability Scanner") } - if agent == "" { + if agent == 0 { co.AgentViewCounter.Bump(0) if c.Dev.DebugMode { - var prepend string + var pre string for _, char := range req.UserAgent() { - prepend += strconv.Itoa(int(char)) + " " + pre += strconv.Itoa(int(char)) + " " } - r.DumpRequest(req,"Blank UA: " + prepend) + r.DumpRequest(req,"Blank UA: " + pre) } else { r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(ua)) } } else { - co.AgentViewCounter.Bump(agentMapEnum[agent]) + //co.AgentViewCounter.Bump(agentMapEnum[agent]) + co.AgentViewCounter.Bump(agent) } - //co.OSViewCounter.Bump(osMapEnum[os]) co.OSViewCounter.Bump(os) } // TODO: Do we want to track missing language headers too? Maybe as it's own type, e.g. "noheader"? + // TODO: Default to anything other than en, if anything else is present, to avoid over-representing it for multi-linguals? lang := req.Header.Get("Accept-Language") if lang != "" { lLang := strings.Split(strings.TrimSpace(lang),"-") @@ -1105,12 +1155,11 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) { break } c.DebugDetail("llLang:", llLang) - validCode := co.LangViewCounter.Bump(llLang) - if !validCode { + if !co.LangViewCounter.Bump(llLang) { r.DumpRequest(req,"Invalid ISO Code") } } else { - co.LangViewCounter.Bump("") + co.LangViewCounter.Bump2(0) } if !c.Config.RefNoTrack { diff --git a/router_gen/main.go b/router_gen/main.go index d519c776..43fecb86 100644 --- a/router_gen/main.go +++ b/router_gen/main.go @@ -19,6 +19,7 @@ type TmplVars struct { AllAgentMap map[string]int AllAgentMarkNames []string AllAgentMarks map[string]string + AllAgentMarkIDs map[string]int AllOSNames []string AllOSMap map[string]int } @@ -376,6 +377,11 @@ func main() { "zgrab": "zgrab", } + tmplVars.AllAgentMarkIDs = make(map[string]int) + for mark, agent := range tmplVars.AllAgentMarks { + tmplVars.AllAgentMarkIDs[mark] = tmplVars.AllAgentMap[agent] + } + fileData := `// Code generated by Gosora's Router Generator. DO NOT EDIT. /* This file was automatically generated by the software. Please don't edit it as your changes may be overwritten at any moment. */ package main @@ -428,6 +434,9 @@ var reverseAgentMapEnum = map[int]string{ {{range $index, $element := .AllAgentN var markToAgent = map[string]string{ {{range $index, $element := .AllAgentMarkNames}} "{{$element}}": "{{index $.AllAgentMarks $element}}",{{end}} } +var markToID = map[string]int{ {{range $index, $element := .AllAgentMarkNames}} + "{{$element}}": {{index $.AllAgentMarkIDs $element}},{{end}} +} /*var agentRank = map[string]int{ "opera":9, "chrome":8, @@ -443,6 +452,8 @@ func init() { co.SetReverseAgentMapEnum(reverseAgentMapEnum) co.SetOSMapEnum(osMapEnum) co.SetReverseOSMapEnum(reverseOSMapEnum) + c.Chrome = agentMapEnum["chrome"] + c.Firefox = agentMapEnum["firefox"] } type WriterIntercept struct { @@ -528,7 +539,8 @@ func (r *GenRouter) RemoveFunc(pattern string) error { return nil } -func (r *GenRouter) DumpRequest(req *http.Request, prepend string) { +// TODO: Use strings builder? +func (r *GenRouter) DumpRequest(req *http.Request, pre string) { var heads string for key, value := range req.Header { for _, vvalue := range value { @@ -536,7 +548,7 @@ func (r *GenRouter) DumpRequest(req *http.Request, prepend string) { } } - r.requestLogger.Print(prepend + + r.requestLogger.Print(pre + "\nUA: " + c.SanitiseSingleLine(req.UserAgent()) + "\n" + "Method: " + c.SanitiseSingleLine(req.Method) + "\n" + heads + "Host: " + c.SanitiseSingleLine(req.Host) + "\n" + @@ -546,11 +558,11 @@ func (r *GenRouter) DumpRequest(req *http.Request, prepend string) { "IP: " + req.RemoteAddr + "\n") } -func (r *GenRouter) SuspiciousRequest(req *http.Request, prepend string) { - if prepend != "" { - prepend += "\n" +func (r *GenRouter) SuspiciousRequest(req *http.Request, pre string) { + if pre != "" { + pre += "\n" } - r.DumpRequest(req,prepend+"Suspicious Request") + r.DumpRequest(req,pre+"Suspicious Request") co.AgentViewCounter.Bump({{.AllAgentMap.suspicious}}) } @@ -703,18 +715,19 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) { // Track the user agents. Unfortunately, everyone pretends to be Mozilla, so this'll be a little less efficient than I would like. // TODO: Add a setting to disable this? // TODO: Use a more efficient detector instead of smashing every possible combination in - var agent string + //var agent string + var agent int if !c.Config.DisableAnalytics { ua := strings.TrimSpace(strings.Replace(strings.TrimPrefix(req.UserAgent(),"Mozilla/5.0 ")," Safari/537.36","",-1)) // Noise, no one's going to be running this and it would require some sort of agent ranking system to determine which identifier should be prioritised over another if ua == "" { co.AgentViewCounter.Bump({{.AllAgentMap.blank}}) if c.Dev.DebugMode { - var prepend string + var pre string for _, char := range req.UserAgent() { - prepend += strconv.Itoa(int(char)) + " " + pre += strconv.Itoa(int(char)) + " " } - r.DumpRequest(req,"Blank UA: " + prepend) + r.DumpRequest(req,"Blank UA: " + pre) } } else { // WIP UA Parser @@ -759,10 +772,11 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) { // Iterate over this in reverse as the real UA tends to be on the right side for i := len(items) - 1; i >= 0; i-- { - fAgent, ok := markToAgent[items[i]] + //fAgent, ok := markToAgent[items[i]] + fAgent, ok := markToID[items[i]] if ok { agent = fAgent - if agent != "safari" { + if agent != {{.AllAgentMap.safari}} { break } } @@ -775,42 +789,43 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) { // Special handling switch(agent) { - case "chrome": + case {{.AllAgentMap.chrome}}: if os == {{.AllOSMap.android}} { - agent = "androidchrome" + agent = {{.AllAgentMap.androidchrome}} } - case "safari": + case {{.AllAgentMap.safari}}: if os == {{.AllOSMap.iphone}} { - agent = "mobilesafari" + agent = {{.AllAgentMap.mobilesafari}} } - case "trident": + case {{.AllAgentMap.trident}}: // Hack to support IE11, change this after we start logging versions if strings.Contains(ua,"rv:11") { - agent = "internetexplorer" + agent = {{.AllAgentMap.internetexplorer}} } - case "zgrab": + case {{.AllAgentMap.zgrab}}: r.SuspiciousRequest(req,"Vulnerability Scanner") } - if agent == "" { + if agent == 0 { co.AgentViewCounter.Bump({{.AllAgentMap.unknown}}) if c.Dev.DebugMode { - var prepend string + var pre string for _, char := range req.UserAgent() { - prepend += strconv.Itoa(int(char)) + " " + pre += strconv.Itoa(int(char)) + " " } - r.DumpRequest(req,"Blank UA: " + prepend) + r.DumpRequest(req,"Blank UA: " + pre) } else { r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(ua)) } } else { - co.AgentViewCounter.Bump(agentMapEnum[agent]) + //co.AgentViewCounter.Bump(agentMapEnum[agent]) + co.AgentViewCounter.Bump(agent) } - //co.OSViewCounter.Bump(osMapEnum[os]) co.OSViewCounter.Bump(os) } // TODO: Do we want to track missing language headers too? Maybe as it's own type, e.g. "noheader"? + // TODO: Default to anything other than en, if anything else is present, to avoid over-representing it for multi-linguals? lang := req.Header.Get("Accept-Language") if lang != "" { lLang := strings.Split(strings.TrimSpace(lang),"-") @@ -825,12 +840,11 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) { break } c.DebugDetail("llLang:", llLang) - validCode := co.LangViewCounter.Bump(llLang) - if !validCode { + if !co.LangViewCounter.Bump(llLang) { r.DumpRequest(req,"Invalid ISO Code") } } else { - co.LangViewCounter.Bump("") + co.LangViewCounter.Bump2(0) } if !c.Config.RefNoTrack { diff --git a/routes/common.go b/routes/common.go index 7df1c58e..bdb73050 100644 --- a/routes/common.go +++ b/routes/common.go @@ -111,7 +111,7 @@ func FootHeaders(w http.ResponseWriter, header *c.Header) { // Server pushes can backfire on certain browsers, so we want to make sure it's only triggered for ones where it'll help lastAgent := header.CurrentUser.LastAgent //fmt.Println("lastAgent:", lastAgent) - if lastAgent == "chrome" || lastAgent == "firefox" { + if lastAgent == c.Chrome || lastAgent == c.Firefox { doPush(w, header) } }