From 0740b3bf14896f9bf51474b6d5a24e5c511d4db2 Mon Sep 17 00:00:00 2001 From: Azareal Date: Wed, 17 Feb 2021 10:07:11 +1000 Subject: [PATCH] Reduce user agent mark boilerplate in router gen. Fix PetalBot detection. --- gen_router.go | 10 ++- router_gen/main.go | 215 ++++++++++++++++----------------------------- 2 files changed, 81 insertions(+), 144 deletions(-) diff --git a/gen_router.go b/gen_router.go index c04c5fa7..e35ad717 100644 --- a/gen_router.go +++ b/gen_router.go @@ -772,6 +772,7 @@ var markToAgent = map[string]string{ "BLEXBot": "blexbot", "Burf": "burf", "AspiegelBot": "aspiegel", + "PetalBot": "aspiegel", "RU_Bot": "mail_ru", "CCBot": "ccbot", "zgrab": "zgrab", @@ -841,6 +842,7 @@ var markToID = map[string]int{ "BLEXBot": 53, "Burf": 54, "AspiegelBot": 55, + "PetalBot": 55, "RU_Bot": 56, "CCBot": 57, "zgrab": 58, @@ -1437,8 +1439,8 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user * return err } - - if gzw, ok := w.(c.GzipResponseWriter); ok { + gzw, ok := w.(c.GzipResponseWriter) + if ok { w = gzw.ResponseWriter w.Header().Del("Content-Encoding") } @@ -2010,8 +2012,8 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user * return err } - - if gzw, ok := w.(c.GzipResponseWriter); ok { + gzw, ok := w.(c.GzipResponseWriter) + if ok { w = gzw.ResponseWriter w.Header().Del("Content-Encoding") } diff --git a/router_gen/main.go b/router_gen/main.go index 6a7697da..b522bf8c 100644 --- a/router_gen/main.go +++ b/router_gen/main.go @@ -304,150 +304,85 @@ func main() { tmplVars.AllAgentMap[agent] = id } - tmplVars.AllAgentMarkNames = []string{ - "OPR", - "Chrome", - "Firefox", - "Safari", - "MSIE", - "Trident", - "Edge", - "Lynx", - "SamsungBrowser", - "UCBrowser", + tmplVars.AllAgentMarkNames = []string{} + tmplVars.AllAgentMarks = map[string]string{} - "Google", - "Googlebot", - "yandex", - "DuckDuckBot", - "DuckDuckGo", - "Baiduspider", - "Sogou", - "ToutiaoSpider", - "360Spider", - "bingbot", - "BingPreview", - "msnbot", - "Slurp", - "Exabot", - "MojeekBot", - "Cliqzbot", - "netEstate", - "SeznamBot", - "CloudFlare", - "archive", //archive.org_bot - "Uptimebot", - "Slackbot", - "Slack", - "Discordbot", - "TelegramBot", - "Twitterbot", - "facebookexternalhit", - "Facebot", - "Applebot", - "Discourse", - "mattermost", - "ia_archiver", - - "SemrushBot", - "DotBot", - "AhrefsBot", - "proximic", - "MegaIndex", - "MJ12bot", - "mj12bot", - "Cocolyzebot", - "Barkrowler", - "SurdotlyBot", - "DomCopBot", - "NetcraftSurveyAgent", - "BLEXBot", - "Burf", - "AspiegelBot", - "RU_Bot", // Mail.RU_Bot - "CCBot", - "zgrab", - "Nimbostratus", - "MauiBot", - "curl", - "python", - "Go", - "HeadlessChrome", - "awesome_bot", + // Add agent marks + a := func(mark, agent string) { + tmplVars.AllAgentMarkNames = append(tmplVars.AllAgentMarkNames, mark) + tmplVars.AllAgentMarks[mark] = agent } + a("OPR", "opera") + a("Chrome", "chrome") + a("Firefox", "firefox") + a("Safari", "safari") + a("MSIE", "internetexplorer") + a("Trident", "trident") // Hack to support IE11 + a("Edge", "edge") + a("Lynx", "lynx") // There's a rare android variant of lynx which isn't covered by this + a("SamsungBrowser", "samsung") + a("UCBrowser", "ucbrowser") - tmplVars.AllAgentMarks = map[string]string{ - "OPR": "opera", - "Chrome": "chrome", - "Firefox": "firefox", - "Safari": "safari", - "MSIE": "internetexplorer", - "Trident": "trident", // Hack to support IE11 - "Edge": "edge", - "Lynx": "lynx", // There's a rare android variant of lynx which isn't covered by this - "SamsungBrowser": "samsung", - "UCBrowser": "ucbrowser", + a("Google", "googlebot") + a("Googlebot", "googlebot") + a("yandex", "yandex") // from the URL + a("DuckDuckBot", "duckduckgo") + a("DuckDuckGo", "duckduckgo") + a("Baiduspider", "baidu") + a("Sogou", "sogou") + a("ToutiaoSpider", "toutiao") + a("360Spider", "haosou") + a("bingbot", "bing") + a("BingPreview", "bing") + a("msnbot", "bing") + a("Slurp", "slurp") + a("Exabot", "exabot") + a("MojeekBot", "mojeek") + a("Cliqzbot", "cliqz") + a("netEstate", "datenbank") + a("SeznamBot", "seznambot") + a("CloudFlare", "cloudflare") // Track alwayson specifically in case there are other bots? + a("archive", "archive_org") //archive.org_bot + a("Uptimebot", "uptimebot") + a("Slackbot", "slackbot") + a("Slack", "slackbot") + a("Discordbot", "discord") + a("TelegramBot", "telegram") + a("Twitterbot", "twitter") + a("facebookexternalhit", "facebook") + a("Facebot", "facebook") + a("Applebot", "apple") + a("Discourse", "discourse") + a("mattermost", "mattermost") + a("ia_archiver", "alexa") - "Google": "googlebot", - "Googlebot": "googlebot", - "yandex": "yandex", // from the URL - "DuckDuckBot": "duckduckgo", - "DuckDuckGo": "duckduckgo", - "Baiduspider": "baidu", - "Sogou": "sogou", - "ToutiaoSpider": "toutiao", - "360Spider": "haosou", - "bingbot": "bing", - "BingPreview": "bing", - "msnbot": "bing", - "Slurp": "slurp", - "Exabot": "exabot", - "MojeekBot": "mojeek", - "Cliqzbot": "cliqz", - "netEstate": "datenbank", - "SeznamBot": "seznambot", - "CloudFlare": "cloudflare", // Track alwayson specifically in case there are other bots? - "archive": "archive_org", //archive.org_bot - "Uptimebot": "uptimebot", - "Slackbot": "slackbot", - "Slack": "slackbot", - "Discordbot": "discord", - "TelegramBot": "telegram", - "Twitterbot": "twitter", - "facebookexternalhit": "facebook", - "Facebot": "facebook", - "Applebot": "apple", - "Discourse": "discourse", - "mattermost": "mattermost", - "ia_archiver": "alexa", - - "SemrushBot": "semrush", - "DotBot": "dotbot", - "AhrefsBot": "ahrefs", - "proximic": "proximic", - "MegaIndex": "megaindex", - "MJ12bot": "majestic", - "mj12bot": "majestic", - "Cocolyzebot": "cocolyze", - "Barkrowler": "babbar", - "SurdotlyBot": "surdotly", - "DomCopBot": "domcop", - "NetcraftSurveyAgent": "netcraft", - "BLEXBot": "blexbot", - "Burf": "burf", - "AspiegelBot": "aspiegel", - "PetalBot": "aspiegel", - "RU_Bot": "mail_ru", // Mail.RU_Bot - "CCBot": "ccbot", - "zgrab": "zgrab", - "Nimbostratus": "cloudsystemnetworks", - "MauiBot": "maui", - "curl": "curl", - "python": "python", - "Go": "go", - "HeadlessChrome": "headlesschrome", - "awesome_bot": "awesome_bot", - } + a("SemrushBot", "semrush") + a("DotBot", "dotbot") + a("AhrefsBot", "ahrefs") + a("proximic", "proximic") + a("MegaIndex", "megaindex") + a("MJ12bot", "majestic") // TODO: This isn't matching bots out in the wild + a("mj12bot", "majestic") + a("Cocolyzebot", "cocolyze") + a("Barkrowler", "babbar") + a("SurdotlyBot", "surdotly") + a("DomCopBot", "domcop") + a("NetcraftSurveyAgent", "netcraft") + a("BLEXBot", "blexbot") + a("Burf", "burf") + a("AspiegelBot", "aspiegel") + a("PetalBot", "aspiegel") + a("RU_Bot", "mail_ru") // Mail.RU_Bot + a("CCBot", "ccbot") + a("zgrab", "zgrab") + a("Nimbostratus", "cloudsystemnetworks") + a("MauiBot", "maui") + a("curl", "curl") + a("python", "python") + a("Go", "go") + a("HeadlessChrome", "headlesschrome") + a("awesome_bot", "awesome_bot") + // TODO: Detect Adsbot/3.1, it has a similar user agent to Google's Adsbot, but it is different. No Google fragments. tmplVars.AllAgentMarkIDs = make(map[string]int) for mark, agent := range tmplVars.AllAgentMarks {