add slurp, exabot, sogou and alexa user agents

try again to add aspiegelbot
This commit is contained in:
Azareal 2020-03-07 13:25:50 +10:00
parent f3bdfffbed
commit ce306d2a4b
3 changed files with 71 additions and 41 deletions

View File

@ -586,25 +586,29 @@ var agentMapEnum = map[string]int{
"googlebot": 12, "googlebot": 12,
"yandex": 13, "yandex": 13,
"bing": 14, "bing": 14,
"baidu": 15, "slurp": 15,
"duckduckgo": 16, "exabot": 16,
"seznambot": 17, "baidu": 17,
"discord": 18, "sogou": 18,
"twitter": 19, "duckduckgo": 19,
"facebook": 20, "seznambot": 20,
"cloudflare": 21, "discord": 21,
"uptimebot": 22, "twitter": 22,
"slackbot": 23, "facebook": 23,
"apple": 24, "cloudflare": 24,
"discourse": 25, "uptimebot": 25,
"lynx": 26, "slackbot": 26,
"blank": 27, "apple": 27,
"malformed": 28, "discourse": 28,
"suspicious": 29, "alexa": 29,
"semrush": 30, "lynx": 30,
"dotbot": 31, "blank": 31,
"aspiegel": 32, "malformed": 32,
"zgrab": 33, "suspicious": 33,
"semrush": 34,
"dotbot": 35,
"aspiegel": 36,
"zgrab": 37,
} }
var reverseAgentMapEnum = map[int]string{ var reverseAgentMapEnum = map[int]string{
0: "unknown", 0: "unknown",
@ -622,25 +626,29 @@ var reverseAgentMapEnum = map[int]string{
12: "googlebot", 12: "googlebot",
13: "yandex", 13: "yandex",
14: "bing", 14: "bing",
15: "baidu", 15: "slurp",
16: "duckduckgo", 16: "exabot",
17: "seznambot", 17: "baidu",
18: "discord", 18: "sogou",
19: "twitter", 19: "duckduckgo",
20: "facebook", 20: "seznambot",
21: "cloudflare", 21: "discord",
22: "uptimebot", 22: "twitter",
23: "slackbot", 23: "facebook",
24: "apple", 24: "cloudflare",
25: "discourse", 25: "uptimebot",
26: "lynx", 26: "slackbot",
27: "blank", 27: "apple",
28: "malformed", 28: "discourse",
29: "suspicious", 29: "alexa",
30: "semrush", 30: "lynx",
31: "dotbot", 31: "blank",
32: "aspiegel", 32: "malformed",
33: "zgrab", 33: "suspicious",
34: "semrush",
35: "dotbot",
36: "aspiegel",
37: "zgrab",
} }
var markToAgent = map[string]string{ var markToAgent = map[string]string{
"OPR": "opera", "OPR": "opera",
@ -657,8 +665,11 @@ var markToAgent = map[string]string{
"yandex": "yandex", "yandex": "yandex",
"DuckDuckBot": "duckduckgo", "DuckDuckBot": "duckduckgo",
"Baiduspider": "baidu", "Baiduspider": "baidu",
"Sogou": "sogou",
"bingbot": "bing", "bingbot": "bing",
"BingPreview": "bing", "BingPreview": "bing",
"Slurp": "slurp",
"Exabot": "exabot",
"SeznamBot": "seznambot", "SeznamBot": "seznambot",
"CloudFlare": "cloudflare", "CloudFlare": "cloudflare",
"Uptimebot": "uptimebot", "Uptimebot": "uptimebot",
@ -669,8 +680,10 @@ var markToAgent = map[string]string{
"Facebot": "facebook", "Facebot": "facebook",
"Applebot": "apple", "Applebot": "apple",
"Discourse": "discourse", "Discourse": "discourse",
"ia_archiver": "alexa",
"SemrushBot": "semrush", "SemrushBot": "semrush",
"DotBot": "dotbot", "DotBot": "dotbot",
"AspiegelBot": "aspiegel",
"zgrab": "zgrab", "zgrab": "zgrab",
} }
/*var agentRank = map[string]int{ /*var agentRank = map[string]int{
@ -796,7 +809,7 @@ func (r *GenRouter) SuspiciousRequest(req *http.Request, prepend string) {
prepend += "\n" prepend += "\n"
} }
r.DumpRequest(req,prepend+"Suspicious Request") r.DumpRequest(req,prepend+"Suspicious Request")
co.AgentViewCounter.Bump(29) co.AgentViewCounter.Bump(33)
} }
func isLocalHost(h string) bool { func isLocalHost(h string) bool {
@ -811,7 +824,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
w.WriteHeader(200) // 400 w.WriteHeader(200) // 400
w.Write([]byte("")) w.Write([]byte(""))
r.DumpRequest(req,"Malformed Request T"+strconv.Itoa(typ)) r.DumpRequest(req,"Malformed Request T"+strconv.Itoa(typ))
co.AgentViewCounter.Bump(28) co.AgentViewCounter.Bump(32)
} }
// Split the Host and Port string // Split the Host and Port string
@ -953,7 +966,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
ua := strings.TrimSpace(strings.Replace(strings.TrimPrefix(req.UserAgent(),"Mozilla/5.0 ")," Safari/537.36","",-1)) // Noise, no one's going to be running this and it would require some sort of agent ranking system to determine which identifier should be prioritised over another ua := strings.TrimSpace(strings.Replace(strings.TrimPrefix(req.UserAgent(),"Mozilla/5.0 ")," Safari/537.36","",-1)) // Noise, no one's going to be running this and it would require some sort of agent ranking system to determine which identifier should be prioritised over another
if ua == "" { if ua == "" {
co.AgentViewCounter.Bump(27) co.AgentViewCounter.Bump(31)
if c.Dev.DebugMode { if c.Dev.DebugMode {
var prepend string var prepend string
for _, char := range req.UserAgent() { for _, char := range req.UserAgent() {

View File

@ -200,6 +200,9 @@
"googlebot":"Googlebot", "googlebot":"Googlebot",
"yandex":"Yandex", "yandex":"Yandex",
"bing":"Bing", "bing":"Bing",
"slurp":"Yahoo! Slurp",
"exabot":"Exabot",
"sogou":"Sogou",
"baidu":"Baidu", "baidu":"Baidu",
"duckduckgo":"DuckDuckBot", "duckduckgo":"DuckDuckBot",
"seznambot":"SeznamBot", "seznambot":"SeznamBot",
@ -211,6 +214,7 @@
"facebook":"FacebookBot", "facebook":"FacebookBot",
"apple":"AppleBot", "apple":"AppleBot",
"discourse":"Discourse Forum Onebox", "discourse":"Discourse Forum Onebox",
"alexa":"Alexa",
"lynx":"Lynx", "lynx":"Lynx",
"semrush":"SemrushBot", "semrush":"SemrushBot",

View File

@ -245,7 +245,10 @@ func main() {
"googlebot", "googlebot",
"yandex", "yandex",
"bing", "bing",
"slurp",
"exabot",
"baidu", "baidu",
"sogou",
"duckduckgo", "duckduckgo",
"seznambot", "seznambot",
"discord", "discord",
@ -256,6 +259,7 @@ func main() {
"slackbot", "slackbot",
"apple", "apple",
"discourse", "discourse",
"alexa",
"lynx", "lynx",
"blank", "blank",
"malformed", "malformed",
@ -287,8 +291,11 @@ func main() {
"yandex", "yandex",
"DuckDuckBot", "DuckDuckBot",
"Baiduspider", "Baiduspider",
"Sogou",
"bingbot", "bingbot",
"BingPreview", "BingPreview",
"Slurp",
"Exabot",
"SeznamBot", "SeznamBot",
"CloudFlare", "CloudFlare",
"Uptimebot", "Uptimebot",
@ -299,9 +306,11 @@ func main() {
"Facebot", "Facebot",
"Applebot", "Applebot",
"Discourse", "Discourse",
"ia_archiver",
"SemrushBot", "SemrushBot",
"DotBot", "DotBot",
"AspiegelBot",
"zgrab", "zgrab",
} }
@ -321,8 +330,11 @@ func main() {
"yandex": "yandex", // from the URL "yandex": "yandex", // from the URL
"DuckDuckBot": "duckduckgo", "DuckDuckBot": "duckduckgo",
"Baiduspider": "baidu", "Baiduspider": "baidu",
"Sogou": "sogou",
"bingbot": "bing", "bingbot": "bing",
"BingPreview": "bing", "BingPreview": "bing",
"Slurp": "slurp",
"Exabot": "exabot",
"SeznamBot": "seznambot", "SeznamBot": "seznambot",
"CloudFlare": "cloudflare", // Track alwayson specifically in case there are other bots? "CloudFlare": "cloudflare", // Track alwayson specifically in case there are other bots?
"Uptimebot": "uptimebot", "Uptimebot": "uptimebot",
@ -333,6 +345,7 @@ func main() {
"Facebot": "facebook", "Facebot": "facebook",
"Applebot": "apple", "Applebot": "apple",
"Discourse": "discourse", "Discourse": "discourse",
"ia_archiver": "alexa",
"SemrushBot": "semrush", "SemrushBot": "semrush",
"DotBot": "dotbot", "DotBot": "dotbot",