From bfea6b740f0c28ee5dfe5daa0817f9a451749b57 Mon Sep 17 00:00:00 2001 From: Azareal Date: Fri, 13 Mar 2020 16:37:18 +1000 Subject: [PATCH] add cliqz, blexbot, curl user agents add a /static/ redirect for the slower bots x.x tweak slack, facebook ua phrases avoid more allocs for ajax topic --- gen_router.go | 192 ++++++++++++++++++++++++--------------------- langs/english.json | 7 +- router_gen/main.go | 13 +++ routes/common.go | 14 ++-- 4 files changed, 129 insertions(+), 97 deletions(-) diff --git a/gen_router.go b/gen_router.go index 697c35fe..0cb4f6f3 100644 --- a/gen_router.go +++ b/gen_router.go @@ -589,34 +589,37 @@ var agentMapEnum = map[string]int{ "slurp": 15, "exabot": 16, "mojeek": 17, - "baidu": 18, - "sogou": 19, - "toutiao": 20, - "haosou": 21, - "duckduckgo": 22, - "seznambot": 23, - "discord": 24, - "twitter": 25, - "facebook": 26, - "cloudflare": 27, - "archive_org": 28, - "uptimebot": 29, - "slackbot": 30, - "apple": 31, - "discourse": 32, - "alexa": 33, - "lynx": 34, - "blank": 35, - "malformed": 36, - "suspicious": 37, - "semrush": 38, - "dotbot": 39, - "ahrefs": 40, - "proximic": 41, - "majestic": 42, - "aspiegel": 43, - "mail_ru": 44, - "zgrab": 45, + "cliqz": 18, + "baidu": 19, + "sogou": 20, + "toutiao": 21, + "haosou": 22, + "duckduckgo": 23, + "seznambot": 24, + "discord": 25, + "twitter": 26, + "facebook": 27, + "cloudflare": 28, + "archive_org": 29, + "uptimebot": 30, + "slackbot": 31, + "apple": 32, + "discourse": 33, + "alexa": 34, + "lynx": 35, + "blank": 36, + "malformed": 37, + "suspicious": 38, + "semrush": 39, + "dotbot": 40, + "ahrefs": 41, + "proximic": 42, + "majestic": 43, + "blexbot": 44, + "aspiegel": 45, + "mail_ru": 46, + "zgrab": 47, + "curl": 48, } var reverseAgentMapEnum = map[int]string{ 0: "unknown", @@ -637,34 +640,37 @@ var reverseAgentMapEnum = map[int]string{ 15: "slurp", 16: "exabot", 17: "mojeek", - 18: "baidu", - 19: "sogou", - 20: "toutiao", - 21: "haosou", - 22: "duckduckgo", - 23: "seznambot", - 24: "discord", - 25: "twitter", - 26: "facebook", - 27: "cloudflare", - 28: "archive_org", - 29: "uptimebot", - 30: "slackbot", - 31: "apple", - 32: "discourse", - 33: "alexa", - 34: "lynx", - 35: "blank", - 36: "malformed", - 37: "suspicious", - 38: "semrush", - 39: "dotbot", - 40: "ahrefs", - 41: "proximic", - 42: "majestic", - 43: "aspiegel", - 44: "mail_ru", - 45: "zgrab", + 18: "cliqz", + 19: "baidu", + 20: "sogou", + 21: "toutiao", + 22: "haosou", + 23: "duckduckgo", + 24: "seznambot", + 25: "discord", + 26: "twitter", + 27: "facebook", + 28: "cloudflare", + 29: "archive_org", + 30: "uptimebot", + 31: "slackbot", + 32: "apple", + 33: "discourse", + 34: "alexa", + 35: "lynx", + 36: "blank", + 37: "malformed", + 38: "suspicious", + 39: "semrush", + 40: "dotbot", + 41: "ahrefs", + 42: "proximic", + 43: "majestic", + 44: "blexbot", + 45: "aspiegel", + 46: "mail_ru", + 47: "zgrab", + 48: "curl", } var markToAgent = map[string]string{ "OPR": "opera", @@ -691,6 +697,7 @@ var markToAgent = map[string]string{ "Slurp": "slurp", "Exabot": "exabot", "MojeekBot": "mojeek", + "Cliqzbot": "cliqz", "SeznamBot": "seznambot", "CloudFlare": "cloudflare", "archive": "archive_org", @@ -709,9 +716,11 @@ var markToAgent = map[string]string{ "AhrefsBot": "ahrefs", "proximic": "proximic", "MJ12bot": "majestic", + "BLEXBot": "blexbot", "AspiegelBot": "aspiegel", "RU_Bot": "mail_ru", "zgrab": "zgrab", + "curl": "curl", } var markToID = map[string]int{ "OPR": 3, @@ -721,44 +730,47 @@ var markToID = map[string]int{ "MSIE": 6, "Trident": 7, "Edge": 5, - "Lynx": 34, + "Lynx": 35, "SamsungBrowser": 10, "UCBrowser": 11, "Google": 12, "Googlebot": 12, "yandex": 13, - "DuckDuckBot": 22, - "DuckDuckGo": 22, - "Baiduspider": 18, - "Sogou": 19, - "ToutiaoSpider": 20, - "360Spider": 21, + "DuckDuckBot": 23, + "DuckDuckGo": 23, + "Baiduspider": 19, + "Sogou": 20, + "ToutiaoSpider": 21, + "360Spider": 22, "bingbot": 14, "BingPreview": 14, "Slurp": 15, "Exabot": 16, "MojeekBot": 17, - "SeznamBot": 23, - "CloudFlare": 27, - "archive": 28, - "Uptimebot": 29, - "Slackbot": 30, - "Slack": 30, - "Discordbot": 24, - "Twitterbot": 25, - "facebookexternalhit": 26, - "Facebot": 26, - "Applebot": 31, - "Discourse": 32, - "ia_archiver": 33, - "SemrushBot": 38, - "DotBot": 39, - "AhrefsBot": 40, - "proximic": 41, - "MJ12bot": 42, - "AspiegelBot": 43, - "RU_Bot": 44, - "zgrab": 45, + "Cliqzbot": 18, + "SeznamBot": 24, + "CloudFlare": 28, + "archive": 29, + "Uptimebot": 30, + "Slackbot": 31, + "Slack": 31, + "Discordbot": 25, + "Twitterbot": 26, + "facebookexternalhit": 27, + "Facebot": 27, + "Applebot": 32, + "Discourse": 33, + "ia_archiver": 34, + "SemrushBot": 39, + "DotBot": 40, + "AhrefsBot": 41, + "proximic": 42, + "MJ12bot": 43, + "BLEXBot": 44, + "AspiegelBot": 45, + "RU_Bot": 46, + "zgrab": 47, + "curl": 48, } /*var agentRank = map[string]int{ "opera":9, @@ -886,7 +898,7 @@ func (r *GenRouter) SuspiciousRequest(req *http.Request, pre string) { pre += "\n" } r.DumpRequest(req,pre+"Suspicious Request") - co.AgentViewCounter.Bump(37) + co.AgentViewCounter.Bump(38) } func isLocalHost(h string) bool { @@ -901,7 +913,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) { w.WriteHeader(200) // 400 w.Write([]byte("")) r.DumpRequest(req,"Malformed Request T"+strconv.Itoa(typ)) - co.AgentViewCounter.Bump(36) + co.AgentViewCounter.Bump(37) } // Split the Host and Port string @@ -1044,7 +1056,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) { ua := strings.TrimSpace(strings.Replace(strings.TrimPrefix(req.UserAgent(),"Mozilla/5.0 ")," Safari/537.36","",-1)) // Noise, no one's going to be running this and it would require some sort of agent ranking system to determine which identifier should be prioritised over another if ua == "" { - co.AgentViewCounter.Bump(35) + co.AgentViewCounter.Bump(36) if c.Dev.DebugMode { var pre string for _, char := range req.UserAgent() { @@ -1125,7 +1137,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) { if strings.Contains(ua,"rv:11") { agent = 6 } - case 45: + case 47: r.SuspiciousRequest(req,"Vulnerability Scanner") } @@ -2722,6 +2734,10 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user c /*case "/sitemaps": // TODO: Count these views req.URL.Path += extraData err = sitemapSwitch(w,req)*/ + // ! Temporary fix for certain bots + case "/static": + w.Header().Set("Connection", "close") + http.Redirect(w, req, "/s/"+extraData, http.StatusTemporaryRedirect) case "/uploads": if extraData == "" { co.RouteViewCounter.Bump3(166, cn) diff --git a/langs/english.json b/langs/english.json index 95daf661..0e170a52 100644 --- a/langs/english.json +++ b/langs/english.json @@ -203,6 +203,7 @@ "slurp":"Yahoo! Slurp", "exabot":"Exabot", "mojeek":"MojeekBot", + "cliqz":"Cliqzbot", "sogou":"Sogou", "toutiao":"Toutiao", "haosou":"Qihoo 360 Search", @@ -214,8 +215,8 @@ "cloudflare":"Cloudflare Alwayson", "archive_org":"Archive.org", "uptimebot":"Uptimebot", - "slackbot":"Slackbot", - "facebook":"FacebookBot", + "slackbot":"Slack", + "facebook":"Facebook", "apple":"AppleBot", "discourse":"Discourse Forum Onebox", "alexa":"Alexa", @@ -226,9 +227,11 @@ "ahrefs":"Ahrefs", "proximic":"Comscore", "majestic":"MJ12bot", + "blexbot":"BLEXBot", "aspiegel":"AspiegelBot", "mail_ru":"Mail.ru bot", "zgrab":"Zgrab App Scanner", + "curl":"curl", "suspicious":"Suspicious", "unknown":"Unknown", "blank":"Blank", diff --git a/router_gen/main.go b/router_gen/main.go index d122dc6b..4d11b1e3 100644 --- a/router_gen/main.go +++ b/router_gen/main.go @@ -249,6 +249,7 @@ func main() { "slurp", "exabot", "mojeek", + "cliqz", "baidu", "sogou", "toutiao", @@ -274,9 +275,11 @@ func main() { "ahrefs", "proximic", "majestic", + "blexbot", "aspiegel", "mail_ru", "zgrab", + "curl", } tmplVars.AllAgentMap = make(map[string]int) @@ -310,6 +313,7 @@ func main() { "Slurp", "Exabot", "MojeekBot", + "Cliqzbot", "SeznamBot", "CloudFlare", "archive", //archive.org_bot @@ -329,9 +333,11 @@ func main() { "AhrefsBot", "proximic", "MJ12bot", + "BLEXBot", "AspiegelBot", "RU_Bot", // Mail.RU_Bot "zgrab", + "curl", } tmplVars.AllAgentMarks = map[string]string{ @@ -360,6 +366,7 @@ func main() { "Slurp": "slurp", "Exabot": "exabot", "MojeekBot": "mojeek", + "Cliqzbot": "cliqz", "SeznamBot": "seznambot", "CloudFlare": "cloudflare", // Track alwayson specifically in case there are other bots? "archive": "archive_org", //archive.org_bot @@ -379,9 +386,11 @@ func main() { "AhrefsBot": "ahrefs", "proximic": "proximic", "MJ12bot": "majestic", + "BLEXBot": "blexbot", "AspiegelBot": "aspiegel", "RU_Bot": "mail_ru", // Mail.RU_Bot "zgrab": "zgrab", + "curl": "curl", } tmplVars.AllAgentMarkIDs = make(map[string]int) @@ -918,6 +927,10 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user c /*case "/sitemaps": // TODO: Count these views req.URL.Path += extraData err = sitemapSwitch(w,req)*/ + // ! Temporary fix for certain bots + case "/static": + w.Header().Set("Connection", "close") + http.Redirect(w, req, "/s/"+extraData, http.StatusTemporaryRedirect) case "/uploads": if extraData == "" { co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.UploadedFile"}}, cn) diff --git a/routes/common.go b/routes/common.go index fd2217c4..912276eb 100644 --- a/routes/common.go +++ b/routes/common.go @@ -121,9 +121,13 @@ func renderTemplate3(tmplName, hookName string, w http.ResponseWriter, r *http.R h.Stylesheets = nil if r.FormValue("i") != "1" { c.PrepResources(&h.CurrentUser, h, h.Theme) - } - for _, ss := range s { - h.Stylesheets = append(h.Stylesheets, ss) + for _, ss := range s { + h.Stylesheets = append(h.Stylesheets, ss) + } + h.AddScript("global.js") + if h.CurrentUser.Loggedin { + h.AddScriptAsync("member.js") + } } if h.CurrentUser.Loggedin { @@ -132,10 +136,6 @@ func renderTemplate3(tmplName, hookName string, w http.ResponseWriter, r *http.R } else if h.MetaDesc != "" && h.OGDesc == "" { h.OGDesc = h.MetaDesc } - h.AddScript("global.js") - if h.CurrentUser.Loggedin { - h.AddScriptAsync("member.js") - } FootHeaders(w, h) if h.Zone != "error" {