add cliqz, blexbot, curl user agents

add a /static/ redirect for the slower bots x.x
tweak slack, facebook ua phrases
avoid more allocs for ajax topic
This commit is contained in:
Azareal 2020-03-13 16:37:18 +10:00
parent 372508bf98
commit bfea6b740f
4 changed files with 129 additions and 97 deletions

View File

@ -589,34 +589,37 @@ var agentMapEnum = map[string]int{
"slurp": 15, "slurp": 15,
"exabot": 16, "exabot": 16,
"mojeek": 17, "mojeek": 17,
"baidu": 18, "cliqz": 18,
"sogou": 19, "baidu": 19,
"toutiao": 20, "sogou": 20,
"haosou": 21, "toutiao": 21,
"duckduckgo": 22, "haosou": 22,
"seznambot": 23, "duckduckgo": 23,
"discord": 24, "seznambot": 24,
"twitter": 25, "discord": 25,
"facebook": 26, "twitter": 26,
"cloudflare": 27, "facebook": 27,
"archive_org": 28, "cloudflare": 28,
"uptimebot": 29, "archive_org": 29,
"slackbot": 30, "uptimebot": 30,
"apple": 31, "slackbot": 31,
"discourse": 32, "apple": 32,
"alexa": 33, "discourse": 33,
"lynx": 34, "alexa": 34,
"blank": 35, "lynx": 35,
"malformed": 36, "blank": 36,
"suspicious": 37, "malformed": 37,
"semrush": 38, "suspicious": 38,
"dotbot": 39, "semrush": 39,
"ahrefs": 40, "dotbot": 40,
"proximic": 41, "ahrefs": 41,
"majestic": 42, "proximic": 42,
"aspiegel": 43, "majestic": 43,
"mail_ru": 44, "blexbot": 44,
"zgrab": 45, "aspiegel": 45,
"mail_ru": 46,
"zgrab": 47,
"curl": 48,
} }
var reverseAgentMapEnum = map[int]string{ var reverseAgentMapEnum = map[int]string{
0: "unknown", 0: "unknown",
@ -637,34 +640,37 @@ var reverseAgentMapEnum = map[int]string{
15: "slurp", 15: "slurp",
16: "exabot", 16: "exabot",
17: "mojeek", 17: "mojeek",
18: "baidu", 18: "cliqz",
19: "sogou", 19: "baidu",
20: "toutiao", 20: "sogou",
21: "haosou", 21: "toutiao",
22: "duckduckgo", 22: "haosou",
23: "seznambot", 23: "duckduckgo",
24: "discord", 24: "seznambot",
25: "twitter", 25: "discord",
26: "facebook", 26: "twitter",
27: "cloudflare", 27: "facebook",
28: "archive_org", 28: "cloudflare",
29: "uptimebot", 29: "archive_org",
30: "slackbot", 30: "uptimebot",
31: "apple", 31: "slackbot",
32: "discourse", 32: "apple",
33: "alexa", 33: "discourse",
34: "lynx", 34: "alexa",
35: "blank", 35: "lynx",
36: "malformed", 36: "blank",
37: "suspicious", 37: "malformed",
38: "semrush", 38: "suspicious",
39: "dotbot", 39: "semrush",
40: "ahrefs", 40: "dotbot",
41: "proximic", 41: "ahrefs",
42: "majestic", 42: "proximic",
43: "aspiegel", 43: "majestic",
44: "mail_ru", 44: "blexbot",
45: "zgrab", 45: "aspiegel",
46: "mail_ru",
47: "zgrab",
48: "curl",
} }
var markToAgent = map[string]string{ var markToAgent = map[string]string{
"OPR": "opera", "OPR": "opera",
@ -691,6 +697,7 @@ var markToAgent = map[string]string{
"Slurp": "slurp", "Slurp": "slurp",
"Exabot": "exabot", "Exabot": "exabot",
"MojeekBot": "mojeek", "MojeekBot": "mojeek",
"Cliqzbot": "cliqz",
"SeznamBot": "seznambot", "SeznamBot": "seznambot",
"CloudFlare": "cloudflare", "CloudFlare": "cloudflare",
"archive": "archive_org", "archive": "archive_org",
@ -709,9 +716,11 @@ var markToAgent = map[string]string{
"AhrefsBot": "ahrefs", "AhrefsBot": "ahrefs",
"proximic": "proximic", "proximic": "proximic",
"MJ12bot": "majestic", "MJ12bot": "majestic",
"BLEXBot": "blexbot",
"AspiegelBot": "aspiegel", "AspiegelBot": "aspiegel",
"RU_Bot": "mail_ru", "RU_Bot": "mail_ru",
"zgrab": "zgrab", "zgrab": "zgrab",
"curl": "curl",
} }
var markToID = map[string]int{ var markToID = map[string]int{
"OPR": 3, "OPR": 3,
@ -721,44 +730,47 @@ var markToID = map[string]int{
"MSIE": 6, "MSIE": 6,
"Trident": 7, "Trident": 7,
"Edge": 5, "Edge": 5,
"Lynx": 34, "Lynx": 35,
"SamsungBrowser": 10, "SamsungBrowser": 10,
"UCBrowser": 11, "UCBrowser": 11,
"Google": 12, "Google": 12,
"Googlebot": 12, "Googlebot": 12,
"yandex": 13, "yandex": 13,
"DuckDuckBot": 22, "DuckDuckBot": 23,
"DuckDuckGo": 22, "DuckDuckGo": 23,
"Baiduspider": 18, "Baiduspider": 19,
"Sogou": 19, "Sogou": 20,
"ToutiaoSpider": 20, "ToutiaoSpider": 21,
"360Spider": 21, "360Spider": 22,
"bingbot": 14, "bingbot": 14,
"BingPreview": 14, "BingPreview": 14,
"Slurp": 15, "Slurp": 15,
"Exabot": 16, "Exabot": 16,
"MojeekBot": 17, "MojeekBot": 17,
"SeznamBot": 23, "Cliqzbot": 18,
"CloudFlare": 27, "SeznamBot": 24,
"archive": 28, "CloudFlare": 28,
"Uptimebot": 29, "archive": 29,
"Slackbot": 30, "Uptimebot": 30,
"Slack": 30, "Slackbot": 31,
"Discordbot": 24, "Slack": 31,
"Twitterbot": 25, "Discordbot": 25,
"facebookexternalhit": 26, "Twitterbot": 26,
"Facebot": 26, "facebookexternalhit": 27,
"Applebot": 31, "Facebot": 27,
"Discourse": 32, "Applebot": 32,
"ia_archiver": 33, "Discourse": 33,
"SemrushBot": 38, "ia_archiver": 34,
"DotBot": 39, "SemrushBot": 39,
"AhrefsBot": 40, "DotBot": 40,
"proximic": 41, "AhrefsBot": 41,
"MJ12bot": 42, "proximic": 42,
"AspiegelBot": 43, "MJ12bot": 43,
"RU_Bot": 44, "BLEXBot": 44,
"zgrab": 45, "AspiegelBot": 45,
"RU_Bot": 46,
"zgrab": 47,
"curl": 48,
} }
/*var agentRank = map[string]int{ /*var agentRank = map[string]int{
"opera":9, "opera":9,
@ -886,7 +898,7 @@ func (r *GenRouter) SuspiciousRequest(req *http.Request, pre string) {
pre += "\n" pre += "\n"
} }
r.DumpRequest(req,pre+"Suspicious Request") r.DumpRequest(req,pre+"Suspicious Request")
co.AgentViewCounter.Bump(37) co.AgentViewCounter.Bump(38)
} }
func isLocalHost(h string) bool { func isLocalHost(h string) bool {
@ -901,7 +913,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
w.WriteHeader(200) // 400 w.WriteHeader(200) // 400
w.Write([]byte("")) w.Write([]byte(""))
r.DumpRequest(req,"Malformed Request T"+strconv.Itoa(typ)) r.DumpRequest(req,"Malformed Request T"+strconv.Itoa(typ))
co.AgentViewCounter.Bump(36) co.AgentViewCounter.Bump(37)
} }
// Split the Host and Port string // Split the Host and Port string
@ -1044,7 +1056,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
ua := strings.TrimSpace(strings.Replace(strings.TrimPrefix(req.UserAgent(),"Mozilla/5.0 ")," Safari/537.36","",-1)) // Noise, no one's going to be running this and it would require some sort of agent ranking system to determine which identifier should be prioritised over another ua := strings.TrimSpace(strings.Replace(strings.TrimPrefix(req.UserAgent(),"Mozilla/5.0 ")," Safari/537.36","",-1)) // Noise, no one's going to be running this and it would require some sort of agent ranking system to determine which identifier should be prioritised over another
if ua == "" { if ua == "" {
co.AgentViewCounter.Bump(35) co.AgentViewCounter.Bump(36)
if c.Dev.DebugMode { if c.Dev.DebugMode {
var pre string var pre string
for _, char := range req.UserAgent() { for _, char := range req.UserAgent() {
@ -1125,7 +1137,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
if strings.Contains(ua,"rv:11") { if strings.Contains(ua,"rv:11") {
agent = 6 agent = 6
} }
case 45: case 47:
r.SuspiciousRequest(req,"Vulnerability Scanner") r.SuspiciousRequest(req,"Vulnerability Scanner")
} }
@ -2722,6 +2734,10 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user c
/*case "/sitemaps": // TODO: Count these views /*case "/sitemaps": // TODO: Count these views
req.URL.Path += extraData req.URL.Path += extraData
err = sitemapSwitch(w,req)*/ err = sitemapSwitch(w,req)*/
// ! Temporary fix for certain bots
case "/static":
w.Header().Set("Connection", "close")
http.Redirect(w, req, "/s/"+extraData, http.StatusTemporaryRedirect)
case "/uploads": case "/uploads":
if extraData == "" { if extraData == "" {
co.RouteViewCounter.Bump3(166, cn) co.RouteViewCounter.Bump3(166, cn)

View File

@ -203,6 +203,7 @@
"slurp":"Yahoo! Slurp", "slurp":"Yahoo! Slurp",
"exabot":"Exabot", "exabot":"Exabot",
"mojeek":"MojeekBot", "mojeek":"MojeekBot",
"cliqz":"Cliqzbot",
"sogou":"Sogou", "sogou":"Sogou",
"toutiao":"Toutiao", "toutiao":"Toutiao",
"haosou":"Qihoo 360 Search", "haosou":"Qihoo 360 Search",
@ -214,8 +215,8 @@
"cloudflare":"Cloudflare Alwayson", "cloudflare":"Cloudflare Alwayson",
"archive_org":"Archive.org", "archive_org":"Archive.org",
"uptimebot":"Uptimebot", "uptimebot":"Uptimebot",
"slackbot":"Slackbot", "slackbot":"Slack",
"facebook":"FacebookBot", "facebook":"Facebook",
"apple":"AppleBot", "apple":"AppleBot",
"discourse":"Discourse Forum Onebox", "discourse":"Discourse Forum Onebox",
"alexa":"Alexa", "alexa":"Alexa",
@ -226,9 +227,11 @@
"ahrefs":"Ahrefs", "ahrefs":"Ahrefs",
"proximic":"Comscore", "proximic":"Comscore",
"majestic":"MJ12bot", "majestic":"MJ12bot",
"blexbot":"BLEXBot",
"aspiegel":"AspiegelBot", "aspiegel":"AspiegelBot",
"mail_ru":"Mail.ru bot", "mail_ru":"Mail.ru bot",
"zgrab":"Zgrab App Scanner", "zgrab":"Zgrab App Scanner",
"curl":"curl",
"suspicious":"Suspicious", "suspicious":"Suspicious",
"unknown":"Unknown", "unknown":"Unknown",
"blank":"Blank", "blank":"Blank",

View File

@ -249,6 +249,7 @@ func main() {
"slurp", "slurp",
"exabot", "exabot",
"mojeek", "mojeek",
"cliqz",
"baidu", "baidu",
"sogou", "sogou",
"toutiao", "toutiao",
@ -274,9 +275,11 @@ func main() {
"ahrefs", "ahrefs",
"proximic", "proximic",
"majestic", "majestic",
"blexbot",
"aspiegel", "aspiegel",
"mail_ru", "mail_ru",
"zgrab", "zgrab",
"curl",
} }
tmplVars.AllAgentMap = make(map[string]int) tmplVars.AllAgentMap = make(map[string]int)
@ -310,6 +313,7 @@ func main() {
"Slurp", "Slurp",
"Exabot", "Exabot",
"MojeekBot", "MojeekBot",
"Cliqzbot",
"SeznamBot", "SeznamBot",
"CloudFlare", "CloudFlare",
"archive", //archive.org_bot "archive", //archive.org_bot
@ -329,9 +333,11 @@ func main() {
"AhrefsBot", "AhrefsBot",
"proximic", "proximic",
"MJ12bot", "MJ12bot",
"BLEXBot",
"AspiegelBot", "AspiegelBot",
"RU_Bot", // Mail.RU_Bot "RU_Bot", // Mail.RU_Bot
"zgrab", "zgrab",
"curl",
} }
tmplVars.AllAgentMarks = map[string]string{ tmplVars.AllAgentMarks = map[string]string{
@ -360,6 +366,7 @@ func main() {
"Slurp": "slurp", "Slurp": "slurp",
"Exabot": "exabot", "Exabot": "exabot",
"MojeekBot": "mojeek", "MojeekBot": "mojeek",
"Cliqzbot": "cliqz",
"SeznamBot": "seznambot", "SeznamBot": "seznambot",
"CloudFlare": "cloudflare", // Track alwayson specifically in case there are other bots? "CloudFlare": "cloudflare", // Track alwayson specifically in case there are other bots?
"archive": "archive_org", //archive.org_bot "archive": "archive_org", //archive.org_bot
@ -379,9 +386,11 @@ func main() {
"AhrefsBot": "ahrefs", "AhrefsBot": "ahrefs",
"proximic": "proximic", "proximic": "proximic",
"MJ12bot": "majestic", "MJ12bot": "majestic",
"BLEXBot": "blexbot",
"AspiegelBot": "aspiegel", "AspiegelBot": "aspiegel",
"RU_Bot": "mail_ru", // Mail.RU_Bot "RU_Bot": "mail_ru", // Mail.RU_Bot
"zgrab": "zgrab", "zgrab": "zgrab",
"curl": "curl",
} }
tmplVars.AllAgentMarkIDs = make(map[string]int) tmplVars.AllAgentMarkIDs = make(map[string]int)
@ -918,6 +927,10 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user c
/*case "/sitemaps": // TODO: Count these views /*case "/sitemaps": // TODO: Count these views
req.URL.Path += extraData req.URL.Path += extraData
err = sitemapSwitch(w,req)*/ err = sitemapSwitch(w,req)*/
// ! Temporary fix for certain bots
case "/static":
w.Header().Set("Connection", "close")
http.Redirect(w, req, "/s/"+extraData, http.StatusTemporaryRedirect)
case "/uploads": case "/uploads":
if extraData == "" { if extraData == "" {
co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.UploadedFile"}}, cn) co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.UploadedFile"}}, cn)

View File

@ -121,10 +121,14 @@ func renderTemplate3(tmplName, hookName string, w http.ResponseWriter, r *http.R
h.Stylesheets = nil h.Stylesheets = nil
if r.FormValue("i") != "1" { if r.FormValue("i") != "1" {
c.PrepResources(&h.CurrentUser, h, h.Theme) c.PrepResources(&h.CurrentUser, h, h.Theme)
}
for _, ss := range s { for _, ss := range s {
h.Stylesheets = append(h.Stylesheets, ss) h.Stylesheets = append(h.Stylesheets, ss)
} }
h.AddScript("global.js")
if h.CurrentUser.Loggedin {
h.AddScriptAsync("member.js")
}
}
if h.CurrentUser.Loggedin { if h.CurrentUser.Loggedin {
h.MetaDesc = "" h.MetaDesc = ""
@ -132,10 +136,6 @@ func renderTemplate3(tmplName, hookName string, w http.ResponseWriter, r *http.R
} else if h.MetaDesc != "" && h.OGDesc == "" { } else if h.MetaDesc != "" && h.OGDesc == "" {
h.OGDesc = h.MetaDesc h.OGDesc = h.MetaDesc
} }
h.AddScript("global.js")
if h.CurrentUser.Loggedin {
h.AddScriptAsync("member.js")
}
FootHeaders(w, h) FootHeaders(w, h)
if h.Zone != "error" { if h.Zone != "error" {