try to filter out bot referrers more effectively

megaindex update didn't save properly
This commit is contained in:
Azareal 2020-04-09 19:30:51 +10:00
parent 64edf18e73
commit 7d30a66d26
2 changed files with 71 additions and 56 deletions

View File

@ -616,18 +616,19 @@ var agentMapEnum = map[string]int{
"dotbot": 42, "dotbot": 42,
"ahrefs": 43, "ahrefs": 43,
"proximic": 44, "proximic": 44,
"majestic": 45, "megaindex": 45,
"netcraft": 46, "majestic": 46,
"blexbot": 47, "netcraft": 47,
"burf": 48, "blexbot": 48,
"aspiegel": 49, "burf": 49,
"mail_ru": 50, "aspiegel": 50,
"ccbot": 51, "mail_ru": 51,
"zgrab": 52, "ccbot": 52,
"cloudsystemnetworks": 53, "zgrab": 53,
"curl": 54, "cloudsystemnetworks": 54,
"python": 55, "curl": 55,
"go": 56, "python": 56,
"go": 57,
} }
var reverseAgentMapEnum = map[int]string{ var reverseAgentMapEnum = map[int]string{
0: "unknown", 0: "unknown",
@ -675,18 +676,19 @@ var reverseAgentMapEnum = map[int]string{
42: "dotbot", 42: "dotbot",
43: "ahrefs", 43: "ahrefs",
44: "proximic", 44: "proximic",
45: "majestic", 45: "megaindex",
46: "netcraft", 46: "majestic",
47: "blexbot", 47: "netcraft",
48: "burf", 48: "blexbot",
49: "aspiegel", 49: "burf",
50: "mail_ru", 50: "aspiegel",
51: "ccbot", 51: "mail_ru",
52: "zgrab", 52: "ccbot",
53: "cloudsystemnetworks", 53: "zgrab",
54: "curl", 54: "cloudsystemnetworks",
55: "python", 55: "curl",
56: "go", 56: "python",
57: "go",
} }
var markToAgent = map[string]string{ var markToAgent = map[string]string{
"OPR": "opera", "OPR": "opera",
@ -734,6 +736,7 @@ var markToAgent = map[string]string{
"DotBot": "dotbot", "DotBot": "dotbot",
"AhrefsBot": "ahrefs", "AhrefsBot": "ahrefs",
"proximic": "proximic", "proximic": "proximic",
"MegaIndex": "megaindex",
"MJ12bot": "majestic", "MJ12bot": "majestic",
"mj12bot": "majestic", "mj12bot": "majestic",
"NetcraftSurveyAgent": "netcraft", "NetcraftSurveyAgent": "netcraft",
@ -794,19 +797,20 @@ var markToID = map[string]int{
"DotBot": 42, "DotBot": 42,
"AhrefsBot": 43, "AhrefsBot": 43,
"proximic": 44, "proximic": 44,
"MJ12bot": 45, "MegaIndex": 45,
"mj12bot": 45, "MJ12bot": 46,
"NetcraftSurveyAgent": 46, "mj12bot": 46,
"BLEXBot": 47, "NetcraftSurveyAgent": 47,
"Burf": 48, "BLEXBot": 48,
"AspiegelBot": 49, "Burf": 49,
"RU_Bot": 50, "AspiegelBot": 50,
"CCBot": 51, "RU_Bot": 51,
"zgrab": 52, "CCBot": 52,
"Nimbostratus": 53, "zgrab": 53,
"curl": 54, "Nimbostratus": 54,
"python": 55, "curl": 55,
"Go": 56, "python": 56,
"Go": 57,
} }
/*var agentRank = map[string]int{ /*var agentRank = map[string]int{
"opera":9, "opera":9,
@ -1178,7 +1182,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
if strings.Contains(ua,"rv:11") { if strings.Contains(ua,"rv:11") {
agent = 6 agent = 6
} }
case 52: case 53:
r.SuspiciousRequest(req,"Vulnerability Scanner") r.SuspiciousRequest(req,"Vulnerability Scanner")
} }
@ -1225,15 +1229,19 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
} }
if !c.Config.RefNoTrack { if !c.Config.RefNoTrack {
ref := req.Header.Get("Referer") // Check the 'referrer' header too? :P ae := req.Header.Get("Accept-Encoding")
if ref != "" { likelyBot := ae == "gzip" || ae == ""
// ? Optimise this a little? if !likelyBot {
ref = strings.TrimPrefix(strings.TrimPrefix(ref,"http://"),"https://") ref := req.Header.Get("Referer") // Check the 'referrer' header too? :P
ref = strings.Split(ref,"/")[0] if ref != "" {
portless := strings.Split(ref,":")[0] // ? Optimise this a little?
// TODO: Handle c.Site.Host in uppercase too? ref = strings.TrimPrefix(strings.TrimPrefix(ref,"http://"),"https://")
if portless != "localhost" && portless != "127.0.0.1" && portless != c.Site.Host { ref = strings.Split(ref,"/")[0]
co.ReferrerTracker.Bump(ref) portless := strings.Split(ref,":")[0]
// TODO: Handle c.Site.Host in uppercase too?
if portless != "localhost" && portless != "127.0.0.1" && portless != c.Site.Host {
co.ReferrerTracker.Bump(ref)
}
} }
} }
} }

View File

@ -276,6 +276,7 @@ func main() {
"dotbot", "dotbot",
"ahrefs", "ahrefs",
"proximic", "proximic",
"megaindex",
"majestic", "majestic",
"netcraft", "netcraft",
"blexbot", "blexbot",
@ -343,6 +344,7 @@ func main() {
"DotBot", "DotBot",
"AhrefsBot", "AhrefsBot",
"proximic", "proximic",
"MegaIndex",
"MJ12bot", "MJ12bot",
"mj12bot", "mj12bot",
"NetcraftSurveyAgent", "NetcraftSurveyAgent",
@ -406,6 +408,7 @@ func main() {
"DotBot": "dotbot", "DotBot": "dotbot",
"AhrefsBot": "ahrefs", "AhrefsBot": "ahrefs",
"proximic": "proximic", "proximic": "proximic",
"MegaIndex": "megaindex",
"MJ12bot": "majestic", "MJ12bot": "majestic",
"mj12bot": "majestic", "mj12bot": "majestic",
"NetcraftSurveyAgent": "netcraft", "NetcraftSurveyAgent": "netcraft",
@ -898,15 +901,19 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
} }
if !c.Config.RefNoTrack { if !c.Config.RefNoTrack {
ref := req.Header.Get("Referer") // Check the 'referrer' header too? :P ae := req.Header.Get("Accept-Encoding")
if ref != "" { likelyBot := ae == "gzip" || ae == ""
// ? Optimise this a little? if !likelyBot {
ref = strings.TrimPrefix(strings.TrimPrefix(ref,"http://"),"https://") ref := req.Header.Get("Referer") // Check the 'referrer' header too? :P
ref = strings.Split(ref,"/")[0] if ref != "" {
portless := strings.Split(ref,":")[0] // ? Optimise this a little?
// TODO: Handle c.Site.Host in uppercase too? ref = strings.TrimPrefix(strings.TrimPrefix(ref,"http://"),"https://")
if portless != "localhost" && portless != "127.0.0.1" && portless != c.Site.Host { ref = strings.Split(ref,"/")[0]
co.ReferrerTracker.Bump(ref) portless := strings.Split(ref,":")[0]
// TODO: Handle c.Site.Host in uppercase too?
if portless != "localhost" && portless != "127.0.0.1" && portless != c.Site.Host {
co.ReferrerTracker.Bump(ref)
}
} }
} }
} }