add wappalyzer, yacy user agents

fix mj12bot user agent parsing
numbers should now work in user agent marks
This commit is contained in:
Azareal 2021-02-17 15:27:56 +10:00
parent 0740b3bf14
commit 1f0713bb27
4 changed files with 107 additions and 101 deletions

View File

@ -580,9 +580,9 @@ var reverseOSMapEnum = map[int]string{
}
var agentMapEnum = map[string]int{
"unknown": 0,
"firefox": 1,
"opera": 1,
"chrome": 2,
"opera": 3,
"firefox": 3,
"safari": 4,
"edge": 5,
"internetexplorer": 6,
@ -633,24 +633,25 @@ var agentMapEnum = map[string]int{
"domcop": 51,
"netcraft": 52,
"blexbot": 53,
"burf": 54,
"aspiegel": 55,
"mail_ru": 56,
"ccbot": 57,
"zgrab": 58,
"cloudsystemnetworks": 59,
"maui": 60,
"curl": 61,
"python": 62,
"go": 63,
"headlesschrome": 64,
"awesome_bot": 65,
"wappalyzer": 54,
"burf": 55,
"aspiegel": 56,
"mail_ru": 57,
"ccbot": 58,
"yacy": 59,
"zgrab": 60,
"cloudsystemnetworks": 61,
"maui": 62,
"curl": 63,
"python": 64,
"headlesschrome": 65,
"awesome_bot": 66,
}
var reverseAgentMapEnum = map[int]string{
0: "unknown",
1: "firefox",
1: "opera",
2: "chrome",
3: "opera",
3: "firefox",
4: "safari",
5: "edge",
6: "internetexplorer",
@ -701,18 +702,19 @@ var reverseAgentMapEnum = map[int]string{
51: "domcop",
52: "netcraft",
53: "blexbot",
54: "burf",
55: "aspiegel",
56: "mail_ru",
57: "ccbot",
58: "zgrab",
59: "cloudsystemnetworks",
60: "maui",
61: "curl",
62: "python",
63: "go",
64: "headlesschrome",
65: "awesome_bot",
54: "wappalyzer",
55: "burf",
56: "aspiegel",
57: "mail_ru",
58: "ccbot",
59: "yacy",
60: "zgrab",
61: "cloudsystemnetworks",
62: "maui",
63: "curl",
64: "python",
65: "headlesschrome",
66: "awesome_bot",
}
var markToAgent = map[string]string{
"OPR": "opera",
@ -770,24 +772,25 @@ var markToAgent = map[string]string{
"DomCopBot": "domcop",
"NetcraftSurveyAgent": "netcraft",
"BLEXBot": "blexbot",
"Wappalyzer": "wappalyzer",
"Burf": "burf",
"AspiegelBot": "aspiegel",
"PetalBot": "aspiegel",
"RU_Bot": "mail_ru",
"CCBot": "ccbot",
"yacybot": "yacy",
"zgrab": "zgrab",
"Nimbostratus": "cloudsystemnetworks",
"MauiBot": "maui",
"curl": "curl",
"python": "python",
"Go": "go",
"HeadlessChrome": "headlesschrome",
"awesome_bot": "awesome_bot",
}
var markToID = map[string]int{
"OPR": 3,
"OPR": 1,
"Chrome": 2,
"Firefox": 1,
"Firefox": 3,
"Safari": 4,
"MSIE": 6,
"Trident": 7,
@ -840,19 +843,20 @@ var markToID = map[string]int{
"DomCopBot": 51,
"NetcraftSurveyAgent": 52,
"BLEXBot": 53,
"Burf": 54,
"AspiegelBot": 55,
"PetalBot": 55,
"RU_Bot": 56,
"CCBot": 57,
"zgrab": 58,
"Nimbostratus": 59,
"MauiBot": 60,
"curl": 61,
"python": 62,
"Go": 63,
"HeadlessChrome": 64,
"awesome_bot": 65,
"Wappalyzer": 54,
"Burf": 55,
"AspiegelBot": 56,
"PetalBot": 56,
"RU_Bot": 57,
"CCBot": 58,
"yacybot": 59,
"zgrab": 60,
"Nimbostratus": 61,
"MauiBot": 62,
"curl": 63,
"python": 64,
"HeadlessChrome": 65,
"awesome_bot": 66,
}
/*var agentRank = map[string]int{
"opera":9,
@ -1140,6 +1144,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
// Track the user agents. Unfortunately, everyone pretends to be Mozilla, so this'll be a little less efficient than I would like.
// TODO: Add a setting to disable this?
// TODO: Use a more efficient detector instead of smashing every possible combination in
// TODO: Make this testable
var agent int
if !c.Config.DisableAnalytics {
@ -1163,10 +1168,12 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
var items []string
var os int
for _, it := range uutils.StringToBytes(ua) {
if (it > 64 && it < 91) || (it > 96 && it < 123) || it == '_' {
if (it > 64 && it < 91) || (it > 96 && it < 123) || (it > 47 && it < 58) || it == '_' {
// TODO: Store an index and slice that instead?
buf = append(buf, it)
} else if it == ' ' || it == '(' || it == ')' || it == '-' || (it > 47 && it < 58) || it == ';' || it == ':' || it == '.' || it == '+' || it == '~' || it == '@' /*|| (it == ':' && bytes.Equal(buf,[]byte("http")))*/ || it == ',' || it == '/' {
} else if it == ' ' || it == '(' || it == ')' || it == '-' || it == ';' || it == ':' || it == '.' || it == '+' || it == '~' || it == '@' /*|| (it == ':' && bytes.Equal(buf,[]byte("http")))*/ || it == ',' || it == '/' {
//log.Print("buf: ",string(buf))
//log.Print("it: ",string(it))
if len(buf) != 0 {
if len(buf) > 2 {
// Use an unsafe zero copy conversion here just to use the switch, it's not safe for this string to escape from here, as it will get mutated, so do a regular string conversion in append
@ -1181,12 +1188,14 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
os = 5
case "Android":
os = 4
case "like","compatible","NT","X","KHTML":
case "like","compatible","NT","X","com","KHTML":
// Skip these words
default:
//log.Print("append buf")
items = append(items, string(buf))
}
}
//log.Print("reset buf")
buf = buf[:0]
}
} else {
@ -1235,11 +1244,11 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
if strings.Contains(ua,"rv:11") {
agent = 6
}
case 58:
case 60:
w.WriteHeader(200) // 400
w.Write([]byte(""))
r.DumpRequest(req,"Blocked Scanner")
co.AgentViewCounter.Bump(58)
co.AgentViewCounter.Bump(60)
return
}
@ -1252,7 +1261,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
}
r.DumpRequest(req,"Blank UA: " + pre)
} else {
r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(ua))
r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(req.UserAgent()))
}
}// else {
//co.AgentViewCounter.Bump(agentMapEnum[agent])
@ -1439,11 +1448,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
return err
}
gzw, ok := w.(c.GzipResponseWriter)
if ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
w = r.responseWriter(w)
err = routes.ShowAttachment(w,req,user,extraData)
co.RouteViewCounter.Bump3(6, cn)
case "/ws":
@ -2012,11 +2017,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
return err
}
gzw, ok := w.(c.GzipResponseWriter)
if ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
w = r.responseWriter(w)
err = panel.Backups(w,req,user,extraData)
co.RouteViewCounter.Bump3(90, cn)
case "/panel/logs/regs/":
@ -2912,13 +2913,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
co.RouteViewCounter.Bump3(168, cn)
return c.NotFound(w,req,nil)
}
/*if bzw, ok := w.(c.BrResponseWriter); ok {
w = bzw.ResponseWriter
w.Header().Del("Content-Encoding")
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
w = r.responseWriter(w)
req.URL.Path += extraData
// TODO: Find a way to propagate errors up from this?
r.UploadHandler(w,req) // TODO: Count these views
@ -2932,13 +2927,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
co.RouteViewCounter.Bump3(170, cn)
return routes.RobotsTxt(w,req)
case "favicon.ico":
/*if bzw, ok := w.(c.BrResponseWriter); ok {
w = bzw.ResponseWriter
w.Header().Del("Content-Encoding")
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
w = r.responseWriter(w)
req.URL.Path = "/s/favicon.ico"
routes.StaticFile(w,req)
co.RouteViewCounter.Bump3(173, cn)
@ -2982,3 +2971,14 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
}
return err
}
func (r *GenRouter) responseWriter(w http.ResponseWriter) http.ResponseWriter {
/*if bzw, ok := w.(c.BrResponseWriter); ok {
w = bzw.ResponseWriter
w.Header().Del("Content-Encoding")
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
return w
}

View File

@ -237,10 +237,12 @@
"domcop":"DomCopBot",
"netcraft":"Netcraft",
"blexbot":"BLEXBot",
"wappalyzer":"Wappalyzer",
"burf":"Burf.co",
"aspiegel":"Aspiegel",
"mail_ru":"Mail.ru bot",
"ccbot":"CCBot",
"yacy":"YaCy P2P Search Engine",
"zgrab":"Zgrab App Scanner",
"cloudsystemnetworks":"Nimbostratus / Cloud System Networks",
"maui":"MauiBot",

View File

@ -230,9 +230,9 @@ func main() {
tmplVars.AllAgentNames = []string{
"unknown",
"firefox",
"chrome",
"opera",
"chrome",
"firefox",
"safari",
"edge",
"internetexplorer",
@ -285,16 +285,18 @@ func main() {
"domcop",
"netcraft",
"blexbot",
"wappalyzer",
"burf",
"aspiegel",
"mail_ru",
"ccbot",
"yacy",
"zgrab",
"cloudsystemnetworks",
"maui",
"curl",
"python",
"go",
//"go",
"headlesschrome",
"awesome_bot",
}
@ -369,17 +371,19 @@ func main() {
a("DomCopBot", "domcop")
a("NetcraftSurveyAgent", "netcraft")
a("BLEXBot", "blexbot")
a("Wappalyzer", "wappalyzer")
a("Burf", "burf")
a("AspiegelBot", "aspiegel")
a("PetalBot", "aspiegel")
a("RU_Bot", "mail_ru") // Mail.RU_Bot
a("CCBot", "ccbot")
a("yacybot", "yacy")
a("zgrab", "zgrab")
a("Nimbostratus", "cloudsystemnetworks")
a("MauiBot", "maui")
a("curl", "curl")
a("python", "python")
a("Go", "go")
//a("Go", "go") // yacy has java as part of it's UA, try to avoid hitting crawlers written in go
a("HeadlessChrome", "headlesschrome")
a("awesome_bot", "awesome_bot")
// TODO: Detect Adsbot/3.1, it has a similar user agent to Google's Adsbot, but it is different. No Google fragments.
@ -732,6 +736,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
// Track the user agents. Unfortunately, everyone pretends to be Mozilla, so this'll be a little less efficient than I would like.
// TODO: Add a setting to disable this?
// TODO: Use a more efficient detector instead of smashing every possible combination in
// TODO: Make this testable
var agent int
if !c.Config.DisableAnalytics {
@ -755,10 +760,12 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
var items []string
var os int
for _, it := range uutils.StringToBytes(ua) {
if (it > 64 && it < 91) || (it > 96 && it < 123) || it == '_' {
if (it > 64 && it < 91) || (it > 96 && it < 123) || (it > 47 && it < 58) || it == '_' {
// TODO: Store an index and slice that instead?
buf = append(buf, it)
} else if it == ' ' || it == '(' || it == ')' || it == '-' || (it > 47 && it < 58) || it == ';' || it == ':' || it == '.' || it == '+' || it == '~' || it == '@' /*|| (it == ':' && bytes.Equal(buf,[]byte("http")))*/ || it == ',' || it == '/' {
} else if it == ' ' || it == '(' || it == ')' || it == '-' || it == ';' || it == ':' || it == '.' || it == '+' || it == '~' || it == '@' /*|| (it == ':' && bytes.Equal(buf,[]byte("http")))*/ || it == ',' || it == '/' {
//log.Print("buf: ",string(buf))
//log.Print("it: ",string(it))
if len(buf) != 0 {
if len(buf) > 2 {
// Use an unsafe zero copy conversion here just to use the switch, it's not safe for this string to escape from here, as it will get mutated, so do a regular string conversion in append
@ -773,12 +780,14 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
os = {{.AllOSMap.iphone}}
case "Android":
os = {{.AllOSMap.android}}
case "like","compatible","NT","X","KHTML":
case "like","compatible","NT","X","com","KHTML":
// Skip these words
default:
//log.Print("append buf")
items = append(items, string(buf))
}
}
//log.Print("reset buf")
buf = buf[:0]
}
} else {
@ -844,7 +853,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
}
r.DumpRequest(req,"Blank UA: " + pre)
} else {
r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(ua))
r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(req.UserAgent()))
}
}// else {
//co.AgentViewCounter.Bump(agentMapEnum[agent])
@ -1001,13 +1010,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.UploadedFile"}}, cn)
return c.NotFound(w,req,nil)
}
/*if bzw, ok := w.(c.BrResponseWriter); ok {
w = bzw.ResponseWriter
w.Header().Del("Content-Encoding")
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
w = r.responseWriter(w)
req.URL.Path += extraData
// TODO: Find a way to propagate errors up from this?
r.UploadHandler(w,req) // TODO: Count these views
@ -1021,13 +1024,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.RobotsTxt"}}, cn)
return routes.RobotsTxt(w,req)
case "favicon.ico":
/*if bzw, ok := w.(c.BrResponseWriter); ok {
w = bzw.ResponseWriter
w.Header().Del("Content-Encoding")
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
w = r.responseWriter(w)
req.URL.Path = "/s/favicon.ico"
routes.StaticFile(w,req)
co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.Favicon"}}, cn)
@ -1071,6 +1068,17 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
}
return err
}
func (r *GenRouter) responseWriter(w http.ResponseWriter) http.ResponseWriter {
/*if bzw, ok := w.(c.BrResponseWriter); ok {
w = bzw.ResponseWriter
w.Header().Del("Content-Encoding")
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}
return w
}
`
tmpl := template.Must(template.New("router").Parse(fileData))
var b bytes.Buffer

View File

@ -60,11 +60,7 @@ func (r *RouteImpl) hasBeforeItem(item string) bool {
}
func (r *RouteImpl) NoGzip() *RouteImpl {
return r.LitBeforeMultiline(`gzw, ok := w.(c.GzipResponseWriter)
if ok {
w = gzw.ResponseWriter
w.Header().Del("Content-Encoding")
}`)
return r.LitBefore("w = r.responseWriter(w)")
}
func (r *RouteImpl) NoHeader() *RouteImpl {