add wappalyzer, yacy user agents

fix mj12bot user agent parsing
numbers should now work in user agent marks
This commit is contained in:
Azareal 2021-02-17 15:27:56 +10:00
parent 0740b3bf14
commit 1f0713bb27
4 changed files with 107 additions and 101 deletions

View File

@ -580,9 +580,9 @@ var reverseOSMapEnum = map[int]string{
} }
var agentMapEnum = map[string]int{ var agentMapEnum = map[string]int{
"unknown": 0, "unknown": 0,
"firefox": 1, "opera": 1,
"chrome": 2, "chrome": 2,
"opera": 3, "firefox": 3,
"safari": 4, "safari": 4,
"edge": 5, "edge": 5,
"internetexplorer": 6, "internetexplorer": 6,
@ -633,24 +633,25 @@ var agentMapEnum = map[string]int{
"domcop": 51, "domcop": 51,
"netcraft": 52, "netcraft": 52,
"blexbot": 53, "blexbot": 53,
"burf": 54, "wappalyzer": 54,
"aspiegel": 55, "burf": 55,
"mail_ru": 56, "aspiegel": 56,
"ccbot": 57, "mail_ru": 57,
"zgrab": 58, "ccbot": 58,
"cloudsystemnetworks": 59, "yacy": 59,
"maui": 60, "zgrab": 60,
"curl": 61, "cloudsystemnetworks": 61,
"python": 62, "maui": 62,
"go": 63, "curl": 63,
"headlesschrome": 64, "python": 64,
"awesome_bot": 65, "headlesschrome": 65,
"awesome_bot": 66,
} }
var reverseAgentMapEnum = map[int]string{ var reverseAgentMapEnum = map[int]string{
0: "unknown", 0: "unknown",
1: "firefox", 1: "opera",
2: "chrome", 2: "chrome",
3: "opera", 3: "firefox",
4: "safari", 4: "safari",
5: "edge", 5: "edge",
6: "internetexplorer", 6: "internetexplorer",
@ -701,18 +702,19 @@ var reverseAgentMapEnum = map[int]string{
51: "domcop", 51: "domcop",
52: "netcraft", 52: "netcraft",
53: "blexbot", 53: "blexbot",
54: "burf", 54: "wappalyzer",
55: "aspiegel", 55: "burf",
56: "mail_ru", 56: "aspiegel",
57: "ccbot", 57: "mail_ru",
58: "zgrab", 58: "ccbot",
59: "cloudsystemnetworks", 59: "yacy",
60: "maui", 60: "zgrab",
61: "curl", 61: "cloudsystemnetworks",
62: "python", 62: "maui",
63: "go", 63: "curl",
64: "headlesschrome", 64: "python",
65: "awesome_bot", 65: "headlesschrome",
66: "awesome_bot",
} }
var markToAgent = map[string]string{ var markToAgent = map[string]string{
"OPR": "opera", "OPR": "opera",
@ -770,24 +772,25 @@ var markToAgent = map[string]string{
"DomCopBot": "domcop", "DomCopBot": "domcop",
"NetcraftSurveyAgent": "netcraft", "NetcraftSurveyAgent": "netcraft",
"BLEXBot": "blexbot", "BLEXBot": "blexbot",
"Wappalyzer": "wappalyzer",
"Burf": "burf", "Burf": "burf",
"AspiegelBot": "aspiegel", "AspiegelBot": "aspiegel",
"PetalBot": "aspiegel", "PetalBot": "aspiegel",
"RU_Bot": "mail_ru", "RU_Bot": "mail_ru",
"CCBot": "ccbot", "CCBot": "ccbot",
"yacybot": "yacy",
"zgrab": "zgrab", "zgrab": "zgrab",
"Nimbostratus": "cloudsystemnetworks", "Nimbostratus": "cloudsystemnetworks",
"MauiBot": "maui", "MauiBot": "maui",
"curl": "curl", "curl": "curl",
"python": "python", "python": "python",
"Go": "go",
"HeadlessChrome": "headlesschrome", "HeadlessChrome": "headlesschrome",
"awesome_bot": "awesome_bot", "awesome_bot": "awesome_bot",
} }
var markToID = map[string]int{ var markToID = map[string]int{
"OPR": 3, "OPR": 1,
"Chrome": 2, "Chrome": 2,
"Firefox": 1, "Firefox": 3,
"Safari": 4, "Safari": 4,
"MSIE": 6, "MSIE": 6,
"Trident": 7, "Trident": 7,
@ -840,19 +843,20 @@ var markToID = map[string]int{
"DomCopBot": 51, "DomCopBot": 51,
"NetcraftSurveyAgent": 52, "NetcraftSurveyAgent": 52,
"BLEXBot": 53, "BLEXBot": 53,
"Burf": 54, "Wappalyzer": 54,
"AspiegelBot": 55, "Burf": 55,
"PetalBot": 55, "AspiegelBot": 56,
"RU_Bot": 56, "PetalBot": 56,
"CCBot": 57, "RU_Bot": 57,
"zgrab": 58, "CCBot": 58,
"Nimbostratus": 59, "yacybot": 59,
"MauiBot": 60, "zgrab": 60,
"curl": 61, "Nimbostratus": 61,
"python": 62, "MauiBot": 62,
"Go": 63, "curl": 63,
"HeadlessChrome": 64, "python": 64,
"awesome_bot": 65, "HeadlessChrome": 65,
"awesome_bot": 66,
} }
/*var agentRank = map[string]int{ /*var agentRank = map[string]int{
"opera":9, "opera":9,
@ -1140,6 +1144,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
// Track the user agents. Unfortunately, everyone pretends to be Mozilla, so this'll be a little less efficient than I would like. // Track the user agents. Unfortunately, everyone pretends to be Mozilla, so this'll be a little less efficient than I would like.
// TODO: Add a setting to disable this? // TODO: Add a setting to disable this?
// TODO: Use a more efficient detector instead of smashing every possible combination in // TODO: Use a more efficient detector instead of smashing every possible combination in
// TODO: Make this testable
var agent int var agent int
if !c.Config.DisableAnalytics { if !c.Config.DisableAnalytics {
@ -1163,10 +1168,12 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
var items []string var items []string
var os int var os int
for _, it := range uutils.StringToBytes(ua) { for _, it := range uutils.StringToBytes(ua) {
if (it > 64 && it < 91) || (it > 96 && it < 123) || it == '_' { if (it > 64 && it < 91) || (it > 96 && it < 123) || (it > 47 && it < 58) || it == '_' {
// TODO: Store an index and slice that instead? // TODO: Store an index and slice that instead?
buf = append(buf, it) buf = append(buf, it)
} else if it == ' ' || it == '(' || it == ')' || it == '-' || (it > 47 && it < 58) || it == ';' || it == ':' || it == '.' || it == '+' || it == '~' || it == '@' /*|| (it == ':' && bytes.Equal(buf,[]byte("http")))*/ || it == ',' || it == '/' { } else if it == ' ' || it == '(' || it == ')' || it == '-' || it == ';' || it == ':' || it == '.' || it == '+' || it == '~' || it == '@' /*|| (it == ':' && bytes.Equal(buf,[]byte("http")))*/ || it == ',' || it == '/' {
//log.Print("buf: ",string(buf))
//log.Print("it: ",string(it))
if len(buf) != 0 { if len(buf) != 0 {
if len(buf) > 2 { if len(buf) > 2 {
// Use an unsafe zero copy conversion here just to use the switch, it's not safe for this string to escape from here, as it will get mutated, so do a regular string conversion in append // Use an unsafe zero copy conversion here just to use the switch, it's not safe for this string to escape from here, as it will get mutated, so do a regular string conversion in append
@ -1181,12 +1188,14 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
os = 5 os = 5
case "Android": case "Android":
os = 4 os = 4
case "like","compatible","NT","X","KHTML": case "like","compatible","NT","X","com","KHTML":
// Skip these words // Skip these words
default: default:
//log.Print("append buf")
items = append(items, string(buf)) items = append(items, string(buf))
} }
} }
//log.Print("reset buf")
buf = buf[:0] buf = buf[:0]
} }
} else { } else {
@ -1235,11 +1244,11 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
if strings.Contains(ua,"rv:11") { if strings.Contains(ua,"rv:11") {
agent = 6 agent = 6
} }
case 58: case 60:
w.WriteHeader(200) // 400 w.WriteHeader(200) // 400
w.Write([]byte("")) w.Write([]byte(""))
r.DumpRequest(req,"Blocked Scanner") r.DumpRequest(req,"Blocked Scanner")
co.AgentViewCounter.Bump(58) co.AgentViewCounter.Bump(60)
return return
} }
@ -1252,7 +1261,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
} }
r.DumpRequest(req,"Blank UA: " + pre) r.DumpRequest(req,"Blank UA: " + pre)
} else { } else {
r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(ua)) r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(req.UserAgent()))
} }
}// else { }// else {
//co.AgentViewCounter.Bump(agentMapEnum[agent]) //co.AgentViewCounter.Bump(agentMapEnum[agent])
@ -1439,11 +1448,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
return err return err
} }
gzw, ok := w.(c.GzipResponseWriter) w = r.responseWriter(w)
if ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
err = routes.ShowAttachment(w,req,user,extraData) err = routes.ShowAttachment(w,req,user,extraData)
co.RouteViewCounter.Bump3(6, cn) co.RouteViewCounter.Bump3(6, cn)
case "/ws": case "/ws":
@ -2012,11 +2017,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
return err return err
} }
gzw, ok := w.(c.GzipResponseWriter) w = r.responseWriter(w)
if ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
err = panel.Backups(w,req,user,extraData) err = panel.Backups(w,req,user,extraData)
co.RouteViewCounter.Bump3(90, cn) co.RouteViewCounter.Bump3(90, cn)
case "/panel/logs/regs/": case "/panel/logs/regs/":
@ -2912,13 +2913,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
co.RouteViewCounter.Bump3(168, cn) co.RouteViewCounter.Bump3(168, cn)
return c.NotFound(w,req,nil) return c.NotFound(w,req,nil)
} }
/*if bzw, ok := w.(c.BrResponseWriter); ok { w = r.responseWriter(w)
w = bzw.ResponseWriter
w.Header().Del("Content-Encoding")
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
req.URL.Path += extraData req.URL.Path += extraData
// TODO: Find a way to propagate errors up from this? // TODO: Find a way to propagate errors up from this?
r.UploadHandler(w,req) // TODO: Count these views r.UploadHandler(w,req) // TODO: Count these views
@ -2932,13 +2927,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
co.RouteViewCounter.Bump3(170, cn) co.RouteViewCounter.Bump3(170, cn)
return routes.RobotsTxt(w,req) return routes.RobotsTxt(w,req)
case "favicon.ico": case "favicon.ico":
/*if bzw, ok := w.(c.BrResponseWriter); ok { w = r.responseWriter(w)
w = bzw.ResponseWriter
w.Header().Del("Content-Encoding")
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
req.URL.Path = "/s/favicon.ico" req.URL.Path = "/s/favicon.ico"
routes.StaticFile(w,req) routes.StaticFile(w,req)
co.RouteViewCounter.Bump3(173, cn) co.RouteViewCounter.Bump3(173, cn)
@ -2982,3 +2971,14 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
} }
return err return err
} }
func (r *GenRouter) responseWriter(w http.ResponseWriter) http.ResponseWriter {
/*if bzw, ok := w.(c.BrResponseWriter); ok {
w = bzw.ResponseWriter
w.Header().Del("Content-Encoding")
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
return w
}

View File

@ -237,10 +237,12 @@
"domcop":"DomCopBot", "domcop":"DomCopBot",
"netcraft":"Netcraft", "netcraft":"Netcraft",
"blexbot":"BLEXBot", "blexbot":"BLEXBot",
"wappalyzer":"Wappalyzer",
"burf":"Burf.co", "burf":"Burf.co",
"aspiegel":"Aspiegel", "aspiegel":"Aspiegel",
"mail_ru":"Mail.ru bot", "mail_ru":"Mail.ru bot",
"ccbot":"CCBot", "ccbot":"CCBot",
"yacy":"YaCy P2P Search Engine",
"zgrab":"Zgrab App Scanner", "zgrab":"Zgrab App Scanner",
"cloudsystemnetworks":"Nimbostratus / Cloud System Networks", "cloudsystemnetworks":"Nimbostratus / Cloud System Networks",
"maui":"MauiBot", "maui":"MauiBot",

View File

@ -230,9 +230,9 @@ func main() {
tmplVars.AllAgentNames = []string{ tmplVars.AllAgentNames = []string{
"unknown", "unknown",
"firefox",
"chrome",
"opera", "opera",
"chrome",
"firefox",
"safari", "safari",
"edge", "edge",
"internetexplorer", "internetexplorer",
@ -285,16 +285,18 @@ func main() {
"domcop", "domcop",
"netcraft", "netcraft",
"blexbot", "blexbot",
"wappalyzer",
"burf", "burf",
"aspiegel", "aspiegel",
"mail_ru", "mail_ru",
"ccbot", "ccbot",
"yacy",
"zgrab", "zgrab",
"cloudsystemnetworks", "cloudsystemnetworks",
"maui", "maui",
"curl", "curl",
"python", "python",
"go", //"go",
"headlesschrome", "headlesschrome",
"awesome_bot", "awesome_bot",
} }
@ -369,17 +371,19 @@ func main() {
a("DomCopBot", "domcop") a("DomCopBot", "domcop")
a("NetcraftSurveyAgent", "netcraft") a("NetcraftSurveyAgent", "netcraft")
a("BLEXBot", "blexbot") a("BLEXBot", "blexbot")
a("Wappalyzer", "wappalyzer")
a("Burf", "burf") a("Burf", "burf")
a("AspiegelBot", "aspiegel") a("AspiegelBot", "aspiegel")
a("PetalBot", "aspiegel") a("PetalBot", "aspiegel")
a("RU_Bot", "mail_ru") // Mail.RU_Bot a("RU_Bot", "mail_ru") // Mail.RU_Bot
a("CCBot", "ccbot") a("CCBot", "ccbot")
a("yacybot", "yacy")
a("zgrab", "zgrab") a("zgrab", "zgrab")
a("Nimbostratus", "cloudsystemnetworks") a("Nimbostratus", "cloudsystemnetworks")
a("MauiBot", "maui") a("MauiBot", "maui")
a("curl", "curl") a("curl", "curl")
a("python", "python") a("python", "python")
a("Go", "go") //a("Go", "go") // yacy has java as part of it's UA, try to avoid hitting crawlers written in go
a("HeadlessChrome", "headlesschrome") a("HeadlessChrome", "headlesschrome")
a("awesome_bot", "awesome_bot") a("awesome_bot", "awesome_bot")
// TODO: Detect Adsbot/3.1, it has a similar user agent to Google's Adsbot, but it is different. No Google fragments. // TODO: Detect Adsbot/3.1, it has a similar user agent to Google's Adsbot, but it is different. No Google fragments.
@ -732,6 +736,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
// Track the user agents. Unfortunately, everyone pretends to be Mozilla, so this'll be a little less efficient than I would like. // Track the user agents. Unfortunately, everyone pretends to be Mozilla, so this'll be a little less efficient than I would like.
// TODO: Add a setting to disable this? // TODO: Add a setting to disable this?
// TODO: Use a more efficient detector instead of smashing every possible combination in // TODO: Use a more efficient detector instead of smashing every possible combination in
// TODO: Make this testable
var agent int var agent int
if !c.Config.DisableAnalytics { if !c.Config.DisableAnalytics {
@ -755,10 +760,12 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
var items []string var items []string
var os int var os int
for _, it := range uutils.StringToBytes(ua) { for _, it := range uutils.StringToBytes(ua) {
if (it > 64 && it < 91) || (it > 96 && it < 123) || it == '_' { if (it > 64 && it < 91) || (it > 96 && it < 123) || (it > 47 && it < 58) || it == '_' {
// TODO: Store an index and slice that instead? // TODO: Store an index and slice that instead?
buf = append(buf, it) buf = append(buf, it)
} else if it == ' ' || it == '(' || it == ')' || it == '-' || (it > 47 && it < 58) || it == ';' || it == ':' || it == '.' || it == '+' || it == '~' || it == '@' /*|| (it == ':' && bytes.Equal(buf,[]byte("http")))*/ || it == ',' || it == '/' { } else if it == ' ' || it == '(' || it == ')' || it == '-' || it == ';' || it == ':' || it == '.' || it == '+' || it == '~' || it == '@' /*|| (it == ':' && bytes.Equal(buf,[]byte("http")))*/ || it == ',' || it == '/' {
//log.Print("buf: ",string(buf))
//log.Print("it: ",string(it))
if len(buf) != 0 { if len(buf) != 0 {
if len(buf) > 2 { if len(buf) > 2 {
// Use an unsafe zero copy conversion here just to use the switch, it's not safe for this string to escape from here, as it will get mutated, so do a regular string conversion in append // Use an unsafe zero copy conversion here just to use the switch, it's not safe for this string to escape from here, as it will get mutated, so do a regular string conversion in append
@ -773,12 +780,14 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
os = {{.AllOSMap.iphone}} os = {{.AllOSMap.iphone}}
case "Android": case "Android":
os = {{.AllOSMap.android}} os = {{.AllOSMap.android}}
case "like","compatible","NT","X","KHTML": case "like","compatible","NT","X","com","KHTML":
// Skip these words // Skip these words
default: default:
//log.Print("append buf")
items = append(items, string(buf)) items = append(items, string(buf))
} }
} }
//log.Print("reset buf")
buf = buf[:0] buf = buf[:0]
} }
} else { } else {
@ -844,7 +853,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
} }
r.DumpRequest(req,"Blank UA: " + pre) r.DumpRequest(req,"Blank UA: " + pre)
} else { } else {
r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(ua)) r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(req.UserAgent()))
} }
}// else { }// else {
//co.AgentViewCounter.Bump(agentMapEnum[agent]) //co.AgentViewCounter.Bump(agentMapEnum[agent])
@ -1001,13 +1010,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.UploadedFile"}}, cn) co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.UploadedFile"}}, cn)
return c.NotFound(w,req,nil) return c.NotFound(w,req,nil)
} }
/*if bzw, ok := w.(c.BrResponseWriter); ok { w = r.responseWriter(w)
w = bzw.ResponseWriter
w.Header().Del("Content-Encoding")
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
req.URL.Path += extraData req.URL.Path += extraData
// TODO: Find a way to propagate errors up from this? // TODO: Find a way to propagate errors up from this?
r.UploadHandler(w,req) // TODO: Count these views r.UploadHandler(w,req) // TODO: Count these views
@ -1021,13 +1024,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.RobotsTxt"}}, cn) co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.RobotsTxt"}}, cn)
return routes.RobotsTxt(w,req) return routes.RobotsTxt(w,req)
case "favicon.ico": case "favicon.ico":
/*if bzw, ok := w.(c.BrResponseWriter); ok { w = r.responseWriter(w)
w = bzw.ResponseWriter
w.Header().Del("Content-Encoding")
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
req.URL.Path = "/s/favicon.ico" req.URL.Path = "/s/favicon.ico"
routes.StaticFile(w,req) routes.StaticFile(w,req)
co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.Favicon"}}, cn) co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.Favicon"}}, cn)
@ -1071,6 +1068,17 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
} }
return err return err
} }
func (r *GenRouter) responseWriter(w http.ResponseWriter) http.ResponseWriter {
/*if bzw, ok := w.(c.BrResponseWriter); ok {
w = bzw.ResponseWriter
w.Header().Del("Content-Encoding")
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
return w
}
` `
tmpl := template.Must(template.New("router").Parse(fileData)) tmpl := template.Must(template.New("router").Parse(fileData))
var b bytes.Buffer var b bytes.Buffer

View File

@ -60,11 +60,7 @@ func (r *RouteImpl) hasBeforeItem(item string) bool {
} }
func (r *RouteImpl) NoGzip() *RouteImpl { func (r *RouteImpl) NoGzip() *RouteImpl {
return r.LitBeforeMultiline(`gzw, ok := w.(c.GzipResponseWriter) return r.LitBefore("w = r.responseWriter(w)")
if ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}`)
} }
func (r *RouteImpl) NoHeader() *RouteImpl { func (r *RouteImpl) NoHeader() *RouteImpl {