Reduce user agent mark boilerplate in router gen.

Fix PetalBot detection.
This commit is contained in:
Azareal 2021-02-17 10:07:11 +10:00
parent 3d2200bd18
commit 0740b3bf14
2 changed files with 81 additions and 144 deletions

View File

@ -772,6 +772,7 @@ var markToAgent = map[string]string{
"BLEXBot": "blexbot", "BLEXBot": "blexbot",
"Burf": "burf", "Burf": "burf",
"AspiegelBot": "aspiegel", "AspiegelBot": "aspiegel",
"PetalBot": "aspiegel",
"RU_Bot": "mail_ru", "RU_Bot": "mail_ru",
"CCBot": "ccbot", "CCBot": "ccbot",
"zgrab": "zgrab", "zgrab": "zgrab",
@ -841,6 +842,7 @@ var markToID = map[string]int{
"BLEXBot": 53, "BLEXBot": 53,
"Burf": 54, "Burf": 54,
"AspiegelBot": 55, "AspiegelBot": 55,
"PetalBot": 55,
"RU_Bot": 56, "RU_Bot": 56,
"CCBot": 57, "CCBot": 57,
"zgrab": 58, "zgrab": 58,
@ -1437,8 +1439,8 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
return err return err
} }
gzw, ok := w.(c.GzipResponseWriter)
if gzw, ok := w.(c.GzipResponseWriter); ok { if ok {
w = gzw.ResponseWriter w = gzw.ResponseWriter
w.Header().Del("Content-Encoding") w.Header().Del("Content-Encoding")
} }
@ -2010,8 +2012,8 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
return err return err
} }
gzw, ok := w.(c.GzipResponseWriter)
if gzw, ok := w.(c.GzipResponseWriter); ok { if ok {
w = gzw.ResponseWriter w = gzw.ResponseWriter
w.Header().Del("Content-Encoding") w.Header().Del("Content-Encoding")
} }

View File

@ -304,150 +304,85 @@ func main() {
tmplVars.AllAgentMap[agent] = id tmplVars.AllAgentMap[agent] = id
} }
tmplVars.AllAgentMarkNames = []string{ tmplVars.AllAgentMarkNames = []string{}
"OPR", tmplVars.AllAgentMarks = map[string]string{}
"Chrome",
"Firefox",
"Safari",
"MSIE",
"Trident",
"Edge",
"Lynx",
"SamsungBrowser",
"UCBrowser",
"Google", // Add agent marks
"Googlebot", a := func(mark, agent string) {
"yandex", tmplVars.AllAgentMarkNames = append(tmplVars.AllAgentMarkNames, mark)
"DuckDuckBot", tmplVars.AllAgentMarks[mark] = agent
"DuckDuckGo",
"Baiduspider",
"Sogou",
"ToutiaoSpider",
"360Spider",
"bingbot",
"BingPreview",
"msnbot",
"Slurp",
"Exabot",
"MojeekBot",
"Cliqzbot",
"netEstate",
"SeznamBot",
"CloudFlare",
"archive", //archive.org_bot
"Uptimebot",
"Slackbot",
"Slack",
"Discordbot",
"TelegramBot",
"Twitterbot",
"facebookexternalhit",
"Facebot",
"Applebot",
"Discourse",
"mattermost",
"ia_archiver",
"SemrushBot",
"DotBot",
"AhrefsBot",
"proximic",
"MegaIndex",
"MJ12bot",
"mj12bot",
"Cocolyzebot",
"Barkrowler",
"SurdotlyBot",
"DomCopBot",
"NetcraftSurveyAgent",
"BLEXBot",
"Burf",
"AspiegelBot",
"RU_Bot", // Mail.RU_Bot
"CCBot",
"zgrab",
"Nimbostratus",
"MauiBot",
"curl",
"python",
"Go",
"HeadlessChrome",
"awesome_bot",
} }
a("OPR", "opera")
a("Chrome", "chrome")
a("Firefox", "firefox")
a("Safari", "safari")
a("MSIE", "internetexplorer")
a("Trident", "trident") // Hack to support IE11
a("Edge", "edge")
a("Lynx", "lynx") // There's a rare android variant of lynx which isn't covered by this
a("SamsungBrowser", "samsung")
a("UCBrowser", "ucbrowser")
tmplVars.AllAgentMarks = map[string]string{ a("Google", "googlebot")
"OPR": "opera", a("Googlebot", "googlebot")
"Chrome": "chrome", a("yandex", "yandex") // from the URL
"Firefox": "firefox", a("DuckDuckBot", "duckduckgo")
"Safari": "safari", a("DuckDuckGo", "duckduckgo")
"MSIE": "internetexplorer", a("Baiduspider", "baidu")
"Trident": "trident", // Hack to support IE11 a("Sogou", "sogou")
"Edge": "edge", a("ToutiaoSpider", "toutiao")
"Lynx": "lynx", // There's a rare android variant of lynx which isn't covered by this a("360Spider", "haosou")
"SamsungBrowser": "samsung", a("bingbot", "bing")
"UCBrowser": "ucbrowser", a("BingPreview", "bing")
a("msnbot", "bing")
a("Slurp", "slurp")
a("Exabot", "exabot")
a("MojeekBot", "mojeek")
a("Cliqzbot", "cliqz")
a("netEstate", "datenbank")
a("SeznamBot", "seznambot")
a("CloudFlare", "cloudflare") // Track alwayson specifically in case there are other bots?
a("archive", "archive_org") //archive.org_bot
a("Uptimebot", "uptimebot")
a("Slackbot", "slackbot")
a("Slack", "slackbot")
a("Discordbot", "discord")
a("TelegramBot", "telegram")
a("Twitterbot", "twitter")
a("facebookexternalhit", "facebook")
a("Facebot", "facebook")
a("Applebot", "apple")
a("Discourse", "discourse")
a("mattermost", "mattermost")
a("ia_archiver", "alexa")
"Google": "googlebot", a("SemrushBot", "semrush")
"Googlebot": "googlebot", a("DotBot", "dotbot")
"yandex": "yandex", // from the URL a("AhrefsBot", "ahrefs")
"DuckDuckBot": "duckduckgo", a("proximic", "proximic")
"DuckDuckGo": "duckduckgo", a("MegaIndex", "megaindex")
"Baiduspider": "baidu", a("MJ12bot", "majestic") // TODO: This isn't matching bots out in the wild
"Sogou": "sogou", a("mj12bot", "majestic")
"ToutiaoSpider": "toutiao", a("Cocolyzebot", "cocolyze")
"360Spider": "haosou", a("Barkrowler", "babbar")
"bingbot": "bing", a("SurdotlyBot", "surdotly")
"BingPreview": "bing", a("DomCopBot", "domcop")
"msnbot": "bing", a("NetcraftSurveyAgent", "netcraft")
"Slurp": "slurp", a("BLEXBot", "blexbot")
"Exabot": "exabot", a("Burf", "burf")
"MojeekBot": "mojeek", a("AspiegelBot", "aspiegel")
"Cliqzbot": "cliqz", a("PetalBot", "aspiegel")
"netEstate": "datenbank", a("RU_Bot", "mail_ru") // Mail.RU_Bot
"SeznamBot": "seznambot", a("CCBot", "ccbot")
"CloudFlare": "cloudflare", // Track alwayson specifically in case there are other bots? a("zgrab", "zgrab")
"archive": "archive_org", //archive.org_bot a("Nimbostratus", "cloudsystemnetworks")
"Uptimebot": "uptimebot", a("MauiBot", "maui")
"Slackbot": "slackbot", a("curl", "curl")
"Slack": "slackbot", a("python", "python")
"Discordbot": "discord", a("Go", "go")
"TelegramBot": "telegram", a("HeadlessChrome", "headlesschrome")
"Twitterbot": "twitter", a("awesome_bot", "awesome_bot")
"facebookexternalhit": "facebook", // TODO: Detect Adsbot/3.1, it has a similar user agent to Google's Adsbot, but it is different. No Google fragments.
"Facebot": "facebook",
"Applebot": "apple",
"Discourse": "discourse",
"mattermost": "mattermost",
"ia_archiver": "alexa",
"SemrushBot": "semrush",
"DotBot": "dotbot",
"AhrefsBot": "ahrefs",
"proximic": "proximic",
"MegaIndex": "megaindex",
"MJ12bot": "majestic",
"mj12bot": "majestic",
"Cocolyzebot": "cocolyze",
"Barkrowler": "babbar",
"SurdotlyBot": "surdotly",
"DomCopBot": "domcop",
"NetcraftSurveyAgent": "netcraft",
"BLEXBot": "blexbot",
"Burf": "burf",
"AspiegelBot": "aspiegel",
"PetalBot": "aspiegel",
"RU_Bot": "mail_ru", // Mail.RU_Bot
"CCBot": "ccbot",
"zgrab": "zgrab",
"Nimbostratus": "cloudsystemnetworks",
"MauiBot": "maui",
"curl": "curl",
"python": "python",
"Go": "go",
"HeadlessChrome": "headlesschrome",
"awesome_bot": "awesome_bot",
}
tmplVars.AllAgentMarkIDs = make(map[string]int) tmplVars.AllAgentMarkIDs = make(map[string]int)
for mark, agent := range tmplVars.AllAgentMarks { for mark, agent := range tmplVars.AllAgentMarks {