From ade8f82af7e14766164c8c05db8647c43c5ff4d9 Mon Sep 17 00:00:00 2001 From: Azareal Date: Sun, 22 Mar 2020 14:37:45 +1000 Subject: [PATCH] add python and go user agents. add new bot class for python, go and curl to reduce resource usage. don't push canonicals for loggedin users try to avoid pushing google site verify when it isn't necessary give ahrefs the semrush treatment --- common/common.go | 3 ++- common/routes_common.go | 24 +++++++++++++----------- gen_router.go | 21 +++++++++++++++++---- langs/english.json | 2 ++ router_gen/main.go | 23 +++++++++++++++++------ routes/common.go | 12 ++++++++++-- routes/topic.go | 2 +- templates/forum.html | 2 +- templates/topic_alt_inner.html | 2 +- templates/topic_inner.html | 2 +- templates/topics.html | 2 +- 11 files changed, 66 insertions(+), 29 deletions(-) diff --git a/common/common.go b/common/common.go index 213991bf..4590723c 100644 --- a/common/common.go +++ b/common/common.go @@ -56,7 +56,8 @@ var ErrNoRows = sql.ErrNoRows // TODO: Make this more customisable var SpammyDomainBits = []string{"porn", "sex", "lesbian", "acup", "nude", "milf", "tits", "vape", "busty", "kink", "lingerie", "strapon", "problog", "fet", "xblog", "blogin", "blognetwork", "relayblog"} -var Chrome, Firefox, Semrush, Ahrefs int // ! Temporary Hack for http push and stopping semrush from wasting resources +var Chrome, Firefox int // ! Temporary Hack for http push +var SimpleBots []int // ! Temporary hack to stop semrush, ahrefs, python bots and other from wasting resources type StringList []string diff --git a/common/routes_common.go b/common/routes_common.go index cf76e75b..c89d7fe6 100644 --- a/common/routes_common.go +++ b/common/routes_common.go @@ -207,9 +207,9 @@ func userCheck(w http.ResponseWriter, r *http.Request, user *User) (header *Head // TODO: Add the ability for admins to restrict certain themes to certain groups? // ! Be careful about firing errors off here as CustomError uses this -func userCheck2(w http.ResponseWriter, r *http.Request, user *User, nano int64) (header *Header, rerr RouteError) { +func userCheck2(w http.ResponseWriter, r *http.Request, user *User, nano int64) (h *Header, rerr RouteError) { theme := GetThemeByReq(r) - header = &Header{ + h = &Header{ Site: Site, Settings: SettingBox.Load().(SettingMap), Themes: Themes, @@ -222,32 +222,34 @@ func userCheck2(w http.ResponseWriter, r *http.Request, user *User, nano int64) StartedAt: nano, } // TODO: Optimise this by avoiding accessing a map string index - header.GoogSiteVerify = header.Settings["google_site_verify"].(string) + if !user.Loggedin { + h.GoogSiteVerify = h.Settings["google_site_verify"].(string) + } if user.IsBanned { - header.AddNotice("account_banned") + h.AddNotice("account_banned") } if user.Loggedin && !user.Active { - header.AddNotice("account_inactive") + h.AddNotice("account_inactive") } // An optimisation so we don't populate StartedAt for users who shouldn't see the stat anyway // ? - Should we only show this in debug mode? It might be useful for detecting issues in production, if we show it there as-well //if user.IsAdmin { - //header.StartedAt = time.Now() + //h.StartedAt = time.Now() //} - //PrepResources(user,header,theme) - return header, nil + //PrepResources(user,h,theme) + return h, nil } -func PrepResources(user *User, h *Header, theme *Theme) { +func PrepResources(u *User, h *Header, theme *Theme) { h.AddSheet(theme.Name + "/main.css") if len(theme.Resources) > 0 { rlist := theme.Resources for _, res := range rlist { - if res.Loggedin && !user.Loggedin { + if res.Loggedin && !u.Loggedin { continue } if res.Location == "global" || res.Location == "frontend" { @@ -284,7 +286,7 @@ func PrepResources(user *User, h *Header, theme *Theme) { addPreScript("paginator") addPreScript("alert") addPreScript("notice") - if user.Loggedin { + if u.Loggedin { addPreScript("topic_c_edit_post") addPreScript("topic_c_attach_item") addPreScript("topic_c_poll_input") diff --git a/gen_router.go b/gen_router.go index 52962968..50f0d141 100644 --- a/gen_router.go +++ b/gen_router.go @@ -621,6 +621,8 @@ var agentMapEnum = map[string]int{ "mail_ru": 47, "zgrab": 48, "curl": 49, + "python": 50, + "go": 51, } var reverseAgentMapEnum = map[int]string{ 0: "unknown", @@ -673,6 +675,8 @@ var reverseAgentMapEnum = map[int]string{ 47: "mail_ru", 48: "zgrab", 49: "curl", + 50: "python", + 51: "go", } var markToAgent = map[string]string{ "OPR": "opera", @@ -725,6 +729,8 @@ var markToAgent = map[string]string{ "RU_Bot": "mail_ru", "zgrab": "zgrab", "curl": "curl", + "python": "python", + "Go": "go", } var markToID = map[string]int{ "OPR": 3, @@ -777,6 +783,8 @@ var markToID = map[string]int{ "RU_Bot": 47, "zgrab": 48, "curl": 49, + "python": 50, + "Go": 51, } /*var agentRank = map[string]int{ "opera":9, @@ -795,8 +803,13 @@ func init() { co.SetReverseOSMapEnum(reverseOSMapEnum) c.Chrome = agentMapEnum["chrome"] c.Firefox = agentMapEnum["firefox"] - c.Semrush = agentMapEnum["semrush"] - c.Ahrefs = agentMapEnum["ahrefs"] + c.SimpleBots = []int{ + agentMapEnum["semrush"], + agentMapEnum["ahrefs"], + agentMapEnum["python"], + agentMapEnum["go"], + agentMapEnum["curl"], + } } type WriterIntercept struct { @@ -851,7 +864,7 @@ func NewGenRouter(uploads http.Handler) (*GenRouter, error) { }, nil } -func (r *GenRouter) handleError(err c.RouteError, w http.ResponseWriter, req *http.Request, user *c.User) { +func (r *GenRouter) handleError(err c.RouteError, w http.ResponseWriter, req *http.Request, u *c.User) { if err.Handled() { return } @@ -859,7 +872,7 @@ func (r *GenRouter) handleError(err c.RouteError, w http.ResponseWriter, req *ht c.InternalErrorJSQ(err, w, req, err.JSON()) return } - c.LocalErrorJSQ(err.Error(), w, req, user, err.JSON()) + c.LocalErrorJSQ(err.Error(), w, req, u, err.JSON()) } func (r *GenRouter) Handle(_ string, _ http.Handler) { diff --git a/langs/english.json b/langs/english.json index b002d17c..3bb970e2 100644 --- a/langs/english.json +++ b/langs/english.json @@ -233,6 +233,8 @@ "mail_ru":"Mail.ru bot", "zgrab":"Zgrab App Scanner", "curl":"curl", + "python":"Python Bot", + "go":"Go Bot", "suspicious":"Suspicious", "unknown":"Unknown", "blank":"Blank", diff --git a/router_gen/main.go b/router_gen/main.go index 89554f6d..96c0e297 100644 --- a/router_gen/main.go +++ b/router_gen/main.go @@ -281,6 +281,8 @@ func main() { "mail_ru", "zgrab", "curl", + "python", + "go", } tmplVars.AllAgentMap = make(map[string]int) @@ -341,6 +343,8 @@ func main() { "RU_Bot", // Mail.RU_Bot "zgrab", "curl", + "python", + "Go", } tmplVars.AllAgentMarks = map[string]string{ @@ -366,12 +370,12 @@ func main() { "360Spider": "haosou", "bingbot": "bing", "BingPreview": "bing", - "msnbot":"bing", + "msnbot": "bing", "Slurp": "slurp", "Exabot": "exabot", "MojeekBot": "mojeek", "Cliqzbot": "cliqz", - "netEstate":"datenbank", + "netEstate": "datenbank", "SeznamBot": "seznambot", "CloudFlare": "cloudflare", // Track alwayson specifically in case there are other bots? "archive": "archive_org", //archive.org_bot @@ -396,6 +400,8 @@ func main() { "RU_Bot": "mail_ru", // Mail.RU_Bot "zgrab": "zgrab", "curl": "curl", + "python": "python", + "Go": "go", } tmplVars.AllAgentMarkIDs = make(map[string]int) @@ -475,8 +481,13 @@ func init() { co.SetReverseOSMapEnum(reverseOSMapEnum) c.Chrome = agentMapEnum["chrome"] c.Firefox = agentMapEnum["firefox"] - c.Semrush = agentMapEnum["semrush"] - c.Ahrefs = agentMapEnum["ahrefs"] + c.SimpleBots = []int{ + agentMapEnum["semrush"], + agentMapEnum["ahrefs"], + agentMapEnum["python"], + agentMapEnum["go"], + agentMapEnum["curl"], + } } type WriterIntercept struct { @@ -531,7 +542,7 @@ func NewGenRouter(uploads http.Handler) (*GenRouter, error) { }, nil } -func (r *GenRouter) handleError(err c.RouteError, w http.ResponseWriter, req *http.Request, user *c.User) { +func (r *GenRouter) handleError(err c.RouteError, w http.ResponseWriter, req *http.Request, u *c.User) { if err.Handled() { return } @@ -539,7 +550,7 @@ func (r *GenRouter) handleError(err c.RouteError, w http.ResponseWriter, req *ht c.InternalErrorJSQ(err, w, req, err.JSON()) return } - c.LocalErrorJSQ(err.Error(), w, req, user, err.JSON()) + c.LocalErrorJSQ(err.Error(), w, req, u, err.JSON()) } func (r *GenRouter) Handle(_ string, _ http.Handler) { diff --git a/routes/common.go b/routes/common.go index e4884d01..bed1264c 100644 --- a/routes/common.go +++ b/routes/common.go @@ -119,7 +119,13 @@ func FootHeaders(w http.ResponseWriter, h *c.Header) { func renderTemplate3(tmplName, hookName string, w http.ResponseWriter, r *http.Request, h *c.Header, pi interface{}) error { s := h.Stylesheets h.Stylesheets = nil - simpleBot := h.CurrentUser.LastAgent == c.Semrush || h.CurrentUser.LastAgent == c.Ahrefs + noDescSimpleBot := h.CurrentUser.LastAgent == c.SimpleBots[0] || h.CurrentUser.LastAgent == c.SimpleBots[1] + var simpleBot bool + for _, agent := range c.SimpleBots { + if h.CurrentUser.LastAgent == agent { + simpleBot = true + } + } inner := r.FormValue("i") == "1" if !inner && !simpleBot { c.PrepResources(h.CurrentUser, h, h.Theme) @@ -134,7 +140,7 @@ func renderTemplate3(tmplName, hookName string, w http.ResponseWriter, r *http.R h.CurrentUser.LastAgent = 0 } - if h.CurrentUser.Loggedin || inner || simpleBot { + if h.CurrentUser.Loggedin || inner || noDescSimpleBot { h.MetaDesc = "" h.OGDesc = "" } else if h.MetaDesc != "" && h.OGDesc == "" { @@ -143,6 +149,8 @@ func renderTemplate3(tmplName, hookName string, w http.ResponseWriter, r *http.R if !simpleBot { FootHeaders(w, h) + } else { + h.GoogSiteVerify = "" } if h.Zone != "error" { since := time.Duration(uutils.Nanotime() - h.StartedAt) diff --git a/routes/topic.go b/routes/topic.go index 72aeaee2..ba25cccc 100644 --- a/routes/topic.go +++ b/routes/topic.go @@ -75,7 +75,7 @@ func ViewTopic(w http.ResponseWriter, r *http.Request, user *c.User, header *c.H } topic.ContentLines = strings.Count(topic.Content, "\n") - if !user.Loggedin && user.LastAgent != c.Semrush { + if !user.Loggedin && user.LastAgent != c.SimpleBots[0] && user.LastAgent != c.SimpleBots[1] { if len(topic.Content) > 200 { header.OGDesc = topic.Content[:197] + "..." } else { diff --git a/templates/forum.html b/templates/forum.html index d1eb3467..8455ce0c 100644 --- a/templates/forum.html +++ b/templates/forum.html @@ -4,7 +4,7 @@ {{if gt .Page 1}}
{{end}} {{if ne .LastPage .Page}}
{{end}} - +{{if not .CurrentUser.Loggedin}}{{end}}
diff --git a/templates/topic_alt_inner.html b/templates/topic_alt_inner.html index 30b00fb5..83027a39 100644 --- a/templates/topic_alt_inner.html +++ b/templates/topic_alt_inner.html @@ -2,7 +2,7 @@ {{if gt .Page 1}}{{end}} {{if ne .LastPage .Page}}{{end}} - +{{if not .CurrentUser.Loggedin}}{{end}}
diff --git a/templates/topic_inner.html b/templates/topic_inner.html index 4ab5855d..a9983338 100644 --- a/templates/topic_inner.html +++ b/templates/topic_inner.html @@ -7,7 +7,7 @@ {{end}} - +{{if not .CurrentUser.Loggedin}}{{end}}
diff --git a/templates/topics.html b/templates/topics.html index cf8f0f92..69baaadd 100644 --- a/templates/topics.html +++ b/templates/topics.html @@ -1,6 +1,6 @@ {{template "header.html" . }}
- +{{if not .CurrentUser.Loggedin}}{{end}}

{{.Title}}