add wappalyzer, yacy user agents
fix mj12bot user agent parsing numbers should now work in user agent marks
This commit is contained in:
parent
0740b3bf14
commit
1f0713bb27
148
gen_router.go
148
gen_router.go
@ -580,9 +580,9 @@ var reverseOSMapEnum = map[int]string{
|
||||
}
|
||||
var agentMapEnum = map[string]int{
|
||||
"unknown": 0,
|
||||
"firefox": 1,
|
||||
"opera": 1,
|
||||
"chrome": 2,
|
||||
"opera": 3,
|
||||
"firefox": 3,
|
||||
"safari": 4,
|
||||
"edge": 5,
|
||||
"internetexplorer": 6,
|
||||
@ -633,24 +633,25 @@ var agentMapEnum = map[string]int{
|
||||
"domcop": 51,
|
||||
"netcraft": 52,
|
||||
"blexbot": 53,
|
||||
"burf": 54,
|
||||
"aspiegel": 55,
|
||||
"mail_ru": 56,
|
||||
"ccbot": 57,
|
||||
"zgrab": 58,
|
||||
"cloudsystemnetworks": 59,
|
||||
"maui": 60,
|
||||
"curl": 61,
|
||||
"python": 62,
|
||||
"go": 63,
|
||||
"headlesschrome": 64,
|
||||
"awesome_bot": 65,
|
||||
"wappalyzer": 54,
|
||||
"burf": 55,
|
||||
"aspiegel": 56,
|
||||
"mail_ru": 57,
|
||||
"ccbot": 58,
|
||||
"yacy": 59,
|
||||
"zgrab": 60,
|
||||
"cloudsystemnetworks": 61,
|
||||
"maui": 62,
|
||||
"curl": 63,
|
||||
"python": 64,
|
||||
"headlesschrome": 65,
|
||||
"awesome_bot": 66,
|
||||
}
|
||||
var reverseAgentMapEnum = map[int]string{
|
||||
0: "unknown",
|
||||
1: "firefox",
|
||||
1: "opera",
|
||||
2: "chrome",
|
||||
3: "opera",
|
||||
3: "firefox",
|
||||
4: "safari",
|
||||
5: "edge",
|
||||
6: "internetexplorer",
|
||||
@ -701,18 +702,19 @@ var reverseAgentMapEnum = map[int]string{
|
||||
51: "domcop",
|
||||
52: "netcraft",
|
||||
53: "blexbot",
|
||||
54: "burf",
|
||||
55: "aspiegel",
|
||||
56: "mail_ru",
|
||||
57: "ccbot",
|
||||
58: "zgrab",
|
||||
59: "cloudsystemnetworks",
|
||||
60: "maui",
|
||||
61: "curl",
|
||||
62: "python",
|
||||
63: "go",
|
||||
64: "headlesschrome",
|
||||
65: "awesome_bot",
|
||||
54: "wappalyzer",
|
||||
55: "burf",
|
||||
56: "aspiegel",
|
||||
57: "mail_ru",
|
||||
58: "ccbot",
|
||||
59: "yacy",
|
||||
60: "zgrab",
|
||||
61: "cloudsystemnetworks",
|
||||
62: "maui",
|
||||
63: "curl",
|
||||
64: "python",
|
||||
65: "headlesschrome",
|
||||
66: "awesome_bot",
|
||||
}
|
||||
var markToAgent = map[string]string{
|
||||
"OPR": "opera",
|
||||
@ -770,24 +772,25 @@ var markToAgent = map[string]string{
|
||||
"DomCopBot": "domcop",
|
||||
"NetcraftSurveyAgent": "netcraft",
|
||||
"BLEXBot": "blexbot",
|
||||
"Wappalyzer": "wappalyzer",
|
||||
"Burf": "burf",
|
||||
"AspiegelBot": "aspiegel",
|
||||
"PetalBot": "aspiegel",
|
||||
"RU_Bot": "mail_ru",
|
||||
"CCBot": "ccbot",
|
||||
"yacybot": "yacy",
|
||||
"zgrab": "zgrab",
|
||||
"Nimbostratus": "cloudsystemnetworks",
|
||||
"MauiBot": "maui",
|
||||
"curl": "curl",
|
||||
"python": "python",
|
||||
"Go": "go",
|
||||
"HeadlessChrome": "headlesschrome",
|
||||
"awesome_bot": "awesome_bot",
|
||||
}
|
||||
var markToID = map[string]int{
|
||||
"OPR": 3,
|
||||
"OPR": 1,
|
||||
"Chrome": 2,
|
||||
"Firefox": 1,
|
||||
"Firefox": 3,
|
||||
"Safari": 4,
|
||||
"MSIE": 6,
|
||||
"Trident": 7,
|
||||
@ -840,19 +843,20 @@ var markToID = map[string]int{
|
||||
"DomCopBot": 51,
|
||||
"NetcraftSurveyAgent": 52,
|
||||
"BLEXBot": 53,
|
||||
"Burf": 54,
|
||||
"AspiegelBot": 55,
|
||||
"PetalBot": 55,
|
||||
"RU_Bot": 56,
|
||||
"CCBot": 57,
|
||||
"zgrab": 58,
|
||||
"Nimbostratus": 59,
|
||||
"MauiBot": 60,
|
||||
"curl": 61,
|
||||
"python": 62,
|
||||
"Go": 63,
|
||||
"HeadlessChrome": 64,
|
||||
"awesome_bot": 65,
|
||||
"Wappalyzer": 54,
|
||||
"Burf": 55,
|
||||
"AspiegelBot": 56,
|
||||
"PetalBot": 56,
|
||||
"RU_Bot": 57,
|
||||
"CCBot": 58,
|
||||
"yacybot": 59,
|
||||
"zgrab": 60,
|
||||
"Nimbostratus": 61,
|
||||
"MauiBot": 62,
|
||||
"curl": 63,
|
||||
"python": 64,
|
||||
"HeadlessChrome": 65,
|
||||
"awesome_bot": 66,
|
||||
}
|
||||
/*var agentRank = map[string]int{
|
||||
"opera":9,
|
||||
@ -1140,6 +1144,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
// Track the user agents. Unfortunately, everyone pretends to be Mozilla, so this'll be a little less efficient than I would like.
|
||||
// TODO: Add a setting to disable this?
|
||||
// TODO: Use a more efficient detector instead of smashing every possible combination in
|
||||
// TODO: Make this testable
|
||||
var agent int
|
||||
if !c.Config.DisableAnalytics {
|
||||
|
||||
@ -1163,10 +1168,12 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
var items []string
|
||||
var os int
|
||||
for _, it := range uutils.StringToBytes(ua) {
|
||||
if (it > 64 && it < 91) || (it > 96 && it < 123) || it == '_' {
|
||||
if (it > 64 && it < 91) || (it > 96 && it < 123) || (it > 47 && it < 58) || it == '_' {
|
||||
// TODO: Store an index and slice that instead?
|
||||
buf = append(buf, it)
|
||||
} else if it == ' ' || it == '(' || it == ')' || it == '-' || (it > 47 && it < 58) || it == ';' || it == ':' || it == '.' || it == '+' || it == '~' || it == '@' /*|| (it == ':' && bytes.Equal(buf,[]byte("http")))*/ || it == ',' || it == '/' {
|
||||
} else if it == ' ' || it == '(' || it == ')' || it == '-' || it == ';' || it == ':' || it == '.' || it == '+' || it == '~' || it == '@' /*|| (it == ':' && bytes.Equal(buf,[]byte("http")))*/ || it == ',' || it == '/' {
|
||||
//log.Print("buf: ",string(buf))
|
||||
//log.Print("it: ",string(it))
|
||||
if len(buf) != 0 {
|
||||
if len(buf) > 2 {
|
||||
// Use an unsafe zero copy conversion here just to use the switch, it's not safe for this string to escape from here, as it will get mutated, so do a regular string conversion in append
|
||||
@ -1181,12 +1188,14 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
os = 5
|
||||
case "Android":
|
||||
os = 4
|
||||
case "like","compatible","NT","X","KHTML":
|
||||
case "like","compatible","NT","X","com","KHTML":
|
||||
// Skip these words
|
||||
default:
|
||||
//log.Print("append buf")
|
||||
items = append(items, string(buf))
|
||||
}
|
||||
}
|
||||
//log.Print("reset buf")
|
||||
buf = buf[:0]
|
||||
}
|
||||
} else {
|
||||
@ -1235,11 +1244,11 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
if strings.Contains(ua,"rv:11") {
|
||||
agent = 6
|
||||
}
|
||||
case 58:
|
||||
case 60:
|
||||
w.WriteHeader(200) // 400
|
||||
w.Write([]byte(""))
|
||||
r.DumpRequest(req,"Blocked Scanner")
|
||||
co.AgentViewCounter.Bump(58)
|
||||
co.AgentViewCounter.Bump(60)
|
||||
return
|
||||
}
|
||||
|
||||
@ -1252,7 +1261,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
}
|
||||
r.DumpRequest(req,"Blank UA: " + pre)
|
||||
} else {
|
||||
r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(ua))
|
||||
r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(req.UserAgent()))
|
||||
}
|
||||
}// else {
|
||||
//co.AgentViewCounter.Bump(agentMapEnum[agent])
|
||||
@ -1439,11 +1448,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
|
||||
return err
|
||||
}
|
||||
|
||||
gzw, ok := w.(c.GzipResponseWriter)
|
||||
if ok {
|
||||
w = gzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
}
|
||||
w = r.responseWriter(w)
|
||||
err = routes.ShowAttachment(w,req,user,extraData)
|
||||
co.RouteViewCounter.Bump3(6, cn)
|
||||
case "/ws":
|
||||
@ -2012,11 +2017,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
|
||||
return err
|
||||
}
|
||||
|
||||
gzw, ok := w.(c.GzipResponseWriter)
|
||||
if ok {
|
||||
w = gzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
}
|
||||
w = r.responseWriter(w)
|
||||
err = panel.Backups(w,req,user,extraData)
|
||||
co.RouteViewCounter.Bump3(90, cn)
|
||||
case "/panel/logs/regs/":
|
||||
@ -2912,13 +2913,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
|
||||
co.RouteViewCounter.Bump3(168, cn)
|
||||
return c.NotFound(w,req,nil)
|
||||
}
|
||||
/*if bzw, ok := w.(c.BrResponseWriter); ok {
|
||||
w = bzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
|
||||
w = gzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
}
|
||||
w = r.responseWriter(w)
|
||||
req.URL.Path += extraData
|
||||
// TODO: Find a way to propagate errors up from this?
|
||||
r.UploadHandler(w,req) // TODO: Count these views
|
||||
@ -2932,13 +2927,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
|
||||
co.RouteViewCounter.Bump3(170, cn)
|
||||
return routes.RobotsTxt(w,req)
|
||||
case "favicon.ico":
|
||||
/*if bzw, ok := w.(c.BrResponseWriter); ok {
|
||||
w = bzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
|
||||
w = gzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
}
|
||||
w = r.responseWriter(w)
|
||||
req.URL.Path = "/s/favicon.ico"
|
||||
routes.StaticFile(w,req)
|
||||
co.RouteViewCounter.Bump3(173, cn)
|
||||
@ -2982,3 +2971,14 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *GenRouter) responseWriter(w http.ResponseWriter) http.ResponseWriter {
|
||||
/*if bzw, ok := w.(c.BrResponseWriter); ok {
|
||||
w = bzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
|
||||
w = gzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
}
|
||||
return w
|
||||
}
|
||||
|
@ -237,10 +237,12 @@
|
||||
"domcop":"DomCopBot",
|
||||
"netcraft":"Netcraft",
|
||||
"blexbot":"BLEXBot",
|
||||
"wappalyzer":"Wappalyzer",
|
||||
"burf":"Burf.co",
|
||||
"aspiegel":"Aspiegel",
|
||||
"mail_ru":"Mail.ru bot",
|
||||
"ccbot":"CCBot",
|
||||
"yacy":"YaCy P2P Search Engine",
|
||||
"zgrab":"Zgrab App Scanner",
|
||||
"cloudsystemnetworks":"Nimbostratus / Cloud System Networks",
|
||||
"maui":"MauiBot",
|
||||
|
@ -230,9 +230,9 @@ func main() {
|
||||
|
||||
tmplVars.AllAgentNames = []string{
|
||||
"unknown",
|
||||
"firefox",
|
||||
"chrome",
|
||||
"opera",
|
||||
"chrome",
|
||||
"firefox",
|
||||
"safari",
|
||||
"edge",
|
||||
"internetexplorer",
|
||||
@ -285,16 +285,18 @@ func main() {
|
||||
"domcop",
|
||||
"netcraft",
|
||||
"blexbot",
|
||||
"wappalyzer",
|
||||
"burf",
|
||||
"aspiegel",
|
||||
"mail_ru",
|
||||
"ccbot",
|
||||
"yacy",
|
||||
"zgrab",
|
||||
"cloudsystemnetworks",
|
||||
"maui",
|
||||
"curl",
|
||||
"python",
|
||||
"go",
|
||||
//"go",
|
||||
"headlesschrome",
|
||||
"awesome_bot",
|
||||
}
|
||||
@ -369,17 +371,19 @@ func main() {
|
||||
a("DomCopBot", "domcop")
|
||||
a("NetcraftSurveyAgent", "netcraft")
|
||||
a("BLEXBot", "blexbot")
|
||||
a("Wappalyzer", "wappalyzer")
|
||||
a("Burf", "burf")
|
||||
a("AspiegelBot", "aspiegel")
|
||||
a("PetalBot", "aspiegel")
|
||||
a("RU_Bot", "mail_ru") // Mail.RU_Bot
|
||||
a("CCBot", "ccbot")
|
||||
a("yacybot", "yacy")
|
||||
a("zgrab", "zgrab")
|
||||
a("Nimbostratus", "cloudsystemnetworks")
|
||||
a("MauiBot", "maui")
|
||||
a("curl", "curl")
|
||||
a("python", "python")
|
||||
a("Go", "go")
|
||||
//a("Go", "go") // yacy has java as part of it's UA, try to avoid hitting crawlers written in go
|
||||
a("HeadlessChrome", "headlesschrome")
|
||||
a("awesome_bot", "awesome_bot")
|
||||
// TODO: Detect Adsbot/3.1, it has a similar user agent to Google's Adsbot, but it is different. No Google fragments.
|
||||
@ -732,6 +736,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
// Track the user agents. Unfortunately, everyone pretends to be Mozilla, so this'll be a little less efficient than I would like.
|
||||
// TODO: Add a setting to disable this?
|
||||
// TODO: Use a more efficient detector instead of smashing every possible combination in
|
||||
// TODO: Make this testable
|
||||
var agent int
|
||||
if !c.Config.DisableAnalytics {
|
||||
|
||||
@ -755,10 +760,12 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
var items []string
|
||||
var os int
|
||||
for _, it := range uutils.StringToBytes(ua) {
|
||||
if (it > 64 && it < 91) || (it > 96 && it < 123) || it == '_' {
|
||||
if (it > 64 && it < 91) || (it > 96 && it < 123) || (it > 47 && it < 58) || it == '_' {
|
||||
// TODO: Store an index and slice that instead?
|
||||
buf = append(buf, it)
|
||||
} else if it == ' ' || it == '(' || it == ')' || it == '-' || (it > 47 && it < 58) || it == ';' || it == ':' || it == '.' || it == '+' || it == '~' || it == '@' /*|| (it == ':' && bytes.Equal(buf,[]byte("http")))*/ || it == ',' || it == '/' {
|
||||
} else if it == ' ' || it == '(' || it == ')' || it == '-' || it == ';' || it == ':' || it == '.' || it == '+' || it == '~' || it == '@' /*|| (it == ':' && bytes.Equal(buf,[]byte("http")))*/ || it == ',' || it == '/' {
|
||||
//log.Print("buf: ",string(buf))
|
||||
//log.Print("it: ",string(it))
|
||||
if len(buf) != 0 {
|
||||
if len(buf) > 2 {
|
||||
// Use an unsafe zero copy conversion here just to use the switch, it's not safe for this string to escape from here, as it will get mutated, so do a regular string conversion in append
|
||||
@ -773,12 +780,14 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
os = {{.AllOSMap.iphone}}
|
||||
case "Android":
|
||||
os = {{.AllOSMap.android}}
|
||||
case "like","compatible","NT","X","KHTML":
|
||||
case "like","compatible","NT","X","com","KHTML":
|
||||
// Skip these words
|
||||
default:
|
||||
//log.Print("append buf")
|
||||
items = append(items, string(buf))
|
||||
}
|
||||
}
|
||||
//log.Print("reset buf")
|
||||
buf = buf[:0]
|
||||
}
|
||||
} else {
|
||||
@ -844,7 +853,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
}
|
||||
r.DumpRequest(req,"Blank UA: " + pre)
|
||||
} else {
|
||||
r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(ua))
|
||||
r.requestLogger.Print("unknown ua: ", c.SanitiseSingleLine(req.UserAgent()))
|
||||
}
|
||||
}// else {
|
||||
//co.AgentViewCounter.Bump(agentMapEnum[agent])
|
||||
@ -1001,13 +1010,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
|
||||
co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.UploadedFile"}}, cn)
|
||||
return c.NotFound(w,req,nil)
|
||||
}
|
||||
/*if bzw, ok := w.(c.BrResponseWriter); ok {
|
||||
w = bzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
|
||||
w = gzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
}
|
||||
w = r.responseWriter(w)
|
||||
req.URL.Path += extraData
|
||||
// TODO: Find a way to propagate errors up from this?
|
||||
r.UploadHandler(w,req) // TODO: Count these views
|
||||
@ -1021,13 +1024,7 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
|
||||
co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.RobotsTxt"}}, cn)
|
||||
return routes.RobotsTxt(w,req)
|
||||
case "favicon.ico":
|
||||
/*if bzw, ok := w.(c.BrResponseWriter); ok {
|
||||
w = bzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
|
||||
w = gzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
}
|
||||
w = r.responseWriter(w)
|
||||
req.URL.Path = "/s/favicon.ico"
|
||||
routes.StaticFile(w,req)
|
||||
co.RouteViewCounter.Bump3({{index .AllRouteMap "routes.Favicon"}}, cn)
|
||||
@ -1071,6 +1068,17 @@ func (r *GenRouter) routeSwitch(w http.ResponseWriter, req *http.Request, user *
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *GenRouter) responseWriter(w http.ResponseWriter) http.ResponseWriter {
|
||||
/*if bzw, ok := w.(c.BrResponseWriter); ok {
|
||||
w = bzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
} else */if gzw, ok := w.(c.GzipResponseWriter); ok {
|
||||
w = gzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
}
|
||||
return w
|
||||
}
|
||||
`
|
||||
tmpl := template.Must(template.New("router").Parse(fileData))
|
||||
var b bytes.Buffer
|
||||
|
@ -60,11 +60,7 @@ func (r *RouteImpl) hasBeforeItem(item string) bool {
|
||||
}
|
||||
|
||||
func (r *RouteImpl) NoGzip() *RouteImpl {
|
||||
return r.LitBeforeMultiline(`gzw, ok := w.(c.GzipResponseWriter)
|
||||
if ok {
|
||||
w = gzw.ResponseWriter
|
||||
w.Header().Del("Content-Encoding")
|
||||
}`)
|
||||
return r.LitBefore("w = r.responseWriter(w)")
|
||||
}
|
||||
|
||||
func (r *RouteImpl) NoHeader() *RouteImpl {
|
||||
|
Loading…
Reference in New Issue
Block a user