add MojeekBot user agent

This commit is contained in:
Azareal 2020-03-11 18:54:11 +10:00
parent d3336245d5
commit 64ec820453
3 changed files with 96 additions and 88 deletions

View File

@ -588,34 +588,35 @@ var agentMapEnum = map[string]int{
"bing": 14, "bing": 14,
"slurp": 15, "slurp": 15,
"exabot": 16, "exabot": 16,
"baidu": 17, "mojeek": 17,
"sogou": 18, "baidu": 18,
"toutiao": 19, "sogou": 19,
"haosou": 20, "toutiao": 20,
"duckduckgo": 21, "haosou": 21,
"seznambot": 22, "duckduckgo": 22,
"discord": 23, "seznambot": 23,
"twitter": 24, "discord": 24,
"facebook": 25, "twitter": 25,
"cloudflare": 26, "facebook": 26,
"archive_org": 27, "cloudflare": 27,
"uptimebot": 28, "archive_org": 28,
"slackbot": 29, "uptimebot": 29,
"apple": 30, "slackbot": 30,
"discourse": 31, "apple": 31,
"alexa": 32, "discourse": 32,
"lynx": 33, "alexa": 33,
"blank": 34, "lynx": 34,
"malformed": 35, "blank": 35,
"suspicious": 36, "malformed": 36,
"semrush": 37, "suspicious": 37,
"dotbot": 38, "semrush": 38,
"ahrefs": 39, "dotbot": 39,
"proximic": 40, "ahrefs": 40,
"majestic": 41, "proximic": 41,
"aspiegel": 42, "majestic": 42,
"mail_ru": 43, "aspiegel": 43,
"zgrab": 44, "mail_ru": 44,
"zgrab": 45,
} }
var reverseAgentMapEnum = map[int]string{ var reverseAgentMapEnum = map[int]string{
0: "unknown", 0: "unknown",
@ -635,34 +636,35 @@ var reverseAgentMapEnum = map[int]string{
14: "bing", 14: "bing",
15: "slurp", 15: "slurp",
16: "exabot", 16: "exabot",
17: "baidu", 17: "mojeek",
18: "sogou", 18: "baidu",
19: "toutiao", 19: "sogou",
20: "haosou", 20: "toutiao",
21: "duckduckgo", 21: "haosou",
22: "seznambot", 22: "duckduckgo",
23: "discord", 23: "seznambot",
24: "twitter", 24: "discord",
25: "facebook", 25: "twitter",
26: "cloudflare", 26: "facebook",
27: "archive_org", 27: "cloudflare",
28: "uptimebot", 28: "archive_org",
29: "slackbot", 29: "uptimebot",
30: "apple", 30: "slackbot",
31: "discourse", 31: "apple",
32: "alexa", 32: "discourse",
33: "lynx", 33: "alexa",
34: "blank", 34: "lynx",
35: "malformed", 35: "blank",
36: "suspicious", 36: "malformed",
37: "semrush", 37: "suspicious",
38: "dotbot", 38: "semrush",
39: "ahrefs", 39: "dotbot",
40: "proximic", 40: "ahrefs",
41: "majestic", 41: "proximic",
42: "aspiegel", 42: "majestic",
43: "mail_ru", 43: "aspiegel",
44: "zgrab", 44: "mail_ru",
45: "zgrab",
} }
var markToAgent = map[string]string{ var markToAgent = map[string]string{
"OPR": "opera", "OPR": "opera",
@ -688,6 +690,7 @@ var markToAgent = map[string]string{
"BingPreview": "bing", "BingPreview": "bing",
"Slurp": "slurp", "Slurp": "slurp",
"Exabot": "exabot", "Exabot": "exabot",
"MojeekBot": "mojeek",
"SeznamBot": "seznambot", "SeznamBot": "seznambot",
"CloudFlare": "cloudflare", "CloudFlare": "cloudflare",
"archive": "archive_org", "archive": "archive_org",
@ -718,43 +721,44 @@ var markToID = map[string]int{
"MSIE": 6, "MSIE": 6,
"Trident": 7, "Trident": 7,
"Edge": 5, "Edge": 5,
"Lynx": 33, "Lynx": 34,
"SamsungBrowser": 10, "SamsungBrowser": 10,
"UCBrowser": 11, "UCBrowser": 11,
"Google": 12, "Google": 12,
"Googlebot": 12, "Googlebot": 12,
"yandex": 13, "yandex": 13,
"DuckDuckBot": 21, "DuckDuckBot": 22,
"DuckDuckGo": 21, "DuckDuckGo": 22,
"Baiduspider": 17, "Baiduspider": 18,
"Sogou": 18, "Sogou": 19,
"ToutiaoSpider": 19, "ToutiaoSpider": 20,
"360Spider": 20, "360Spider": 21,
"bingbot": 14, "bingbot": 14,
"BingPreview": 14, "BingPreview": 14,
"Slurp": 15, "Slurp": 15,
"Exabot": 16, "Exabot": 16,
"SeznamBot": 22, "MojeekBot": 17,
"CloudFlare": 26, "SeznamBot": 23,
"archive": 27, "CloudFlare": 27,
"Uptimebot": 28, "archive": 28,
"Slackbot": 29, "Uptimebot": 29,
"Slack": 29, "Slackbot": 30,
"Discordbot": 23, "Slack": 30,
"Twitterbot": 24, "Discordbot": 24,
"facebookexternalhit": 25, "Twitterbot": 25,
"Facebot": 25, "facebookexternalhit": 26,
"Applebot": 30, "Facebot": 26,
"Discourse": 31, "Applebot": 31,
"ia_archiver": 32, "Discourse": 32,
"SemrushBot": 37, "ia_archiver": 33,
"DotBot": 38, "SemrushBot": 38,
"AhrefsBot": 39, "DotBot": 39,
"proximic": 40, "AhrefsBot": 40,
"MJ12bot": 41, "proximic": 41,
"AspiegelBot": 42, "MJ12bot": 42,
"RU_Bot": 43, "AspiegelBot": 43,
"zgrab": 44, "RU_Bot": 44,
"zgrab": 45,
} }
/*var agentRank = map[string]int{ /*var agentRank = map[string]int{
"opera":9, "opera":9,
@ -882,7 +886,7 @@ func (r *GenRouter) SuspiciousRequest(req *http.Request, pre string) {
pre += "\n" pre += "\n"
} }
r.DumpRequest(req,pre+"Suspicious Request") r.DumpRequest(req,pre+"Suspicious Request")
co.AgentViewCounter.Bump(36) co.AgentViewCounter.Bump(37)
} }
func isLocalHost(h string) bool { func isLocalHost(h string) bool {
@ -897,7 +901,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
w.WriteHeader(200) // 400 w.WriteHeader(200) // 400
w.Write([]byte("")) w.Write([]byte(""))
r.DumpRequest(req,"Malformed Request T"+strconv.Itoa(typ)) r.DumpRequest(req,"Malformed Request T"+strconv.Itoa(typ))
co.AgentViewCounter.Bump(35) co.AgentViewCounter.Bump(36)
} }
// Split the Host and Port string // Split the Host and Port string
@ -1040,7 +1044,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
ua := strings.TrimSpace(strings.Replace(strings.TrimPrefix(req.UserAgent(),"Mozilla/5.0 ")," Safari/537.36","",-1)) // Noise, no one's going to be running this and it would require some sort of agent ranking system to determine which identifier should be prioritised over another ua := strings.TrimSpace(strings.Replace(strings.TrimPrefix(req.UserAgent(),"Mozilla/5.0 ")," Safari/537.36","",-1)) // Noise, no one's going to be running this and it would require some sort of agent ranking system to determine which identifier should be prioritised over another
if ua == "" { if ua == "" {
co.AgentViewCounter.Bump(34) co.AgentViewCounter.Bump(35)
if c.Dev.DebugMode { if c.Dev.DebugMode {
var pre string var pre string
for _, char := range req.UserAgent() { for _, char := range req.UserAgent() {
@ -1121,7 +1125,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
if strings.Contains(ua,"rv:11") { if strings.Contains(ua,"rv:11") {
agent = 6 agent = 6
} }
case 44: case 45:
r.SuspiciousRequest(req,"Vulnerability Scanner") r.SuspiciousRequest(req,"Vulnerability Scanner")
} }

View File

@ -202,6 +202,7 @@
"bing":"Bing", "bing":"Bing",
"slurp":"Yahoo! Slurp", "slurp":"Yahoo! Slurp",
"exabot":"Exabot", "exabot":"Exabot",
"mojeek":"MojeekBot",
"sogou":"Sogou", "sogou":"Sogou",
"toutiao":"Toutiao", "toutiao":"Toutiao",
"haosou":"Qihoo 360 Search", "haosou":"Qihoo 360 Search",

View File

@ -248,6 +248,7 @@ func main() {
"bing", "bing",
"slurp", "slurp",
"exabot", "exabot",
"mojeek",
"baidu", "baidu",
"sogou", "sogou",
"toutiao", "toutiao",
@ -308,6 +309,7 @@ func main() {
"BingPreview", "BingPreview",
"Slurp", "Slurp",
"Exabot", "Exabot",
"MojeekBot",
"SeznamBot", "SeznamBot",
"CloudFlare", "CloudFlare",
"archive", //archive.org_bot "archive", //archive.org_bot
@ -357,6 +359,7 @@ func main() {
"BingPreview": "bing", "BingPreview": "bing",
"Slurp": "slurp", "Slurp": "slurp",
"Exabot": "exabot", "Exabot": "exabot",
"MojeekBot": "mojeek",
"SeznamBot": "seznambot", "SeznamBot": "seznambot",
"CloudFlare": "cloudflare", // Track alwayson specifically in case there are other bots? "CloudFlare": "cloudflare", // Track alwayson specifically in case there are other bots?
"archive": "archive_org", //archive.org_bot "archive": "archive_org", //archive.org_bot