add python and go user agents.

add new bot class for python, go and curl to reduce resource usage.
don't push canonicals for loggedin users
try to avoid pushing google site verify when it isn't necessary
give ahrefs the semrush treatment
This commit is contained in:
Azareal 2020-03-22 14:37:45 +10:00
parent f86346ba22
commit ade8f82af7
11 changed files with 66 additions and 29 deletions

View File

@ -56,7 +56,8 @@ var ErrNoRows = sql.ErrNoRows
// TODO: Make this more customisable
var SpammyDomainBits = []string{"porn", "sex", "lesbian", "acup", "nude", "milf", "tits", "vape", "busty", "kink", "lingerie", "strapon", "problog", "fet", "xblog", "blogin", "blognetwork", "relayblog"}
var Chrome, Firefox, Semrush, Ahrefs int // ! Temporary Hack for http push and stopping semrush from wasting resources
var Chrome, Firefox int // ! Temporary Hack for http push
var SimpleBots []int // ! Temporary hack to stop semrush, ahrefs, python bots and other from wasting resources
type StringList []string

View File

@ -207,9 +207,9 @@ func userCheck(w http.ResponseWriter, r *http.Request, user *User) (header *Head
// TODO: Add the ability for admins to restrict certain themes to certain groups?
// ! Be careful about firing errors off here as CustomError uses this
func userCheck2(w http.ResponseWriter, r *http.Request, user *User, nano int64) (header *Header, rerr RouteError) {
func userCheck2(w http.ResponseWriter, r *http.Request, user *User, nano int64) (h *Header, rerr RouteError) {
theme := GetThemeByReq(r)
header = &Header{
h = &Header{
Site: Site,
Settings: SettingBox.Load().(SettingMap),
Themes: Themes,
@ -222,32 +222,34 @@ func userCheck2(w http.ResponseWriter, r *http.Request, user *User, nano int64)
StartedAt: nano,
}
// TODO: Optimise this by avoiding accessing a map string index
header.GoogSiteVerify = header.Settings["google_site_verify"].(string)
if !user.Loggedin {
h.GoogSiteVerify = h.Settings["google_site_verify"].(string)
}
if user.IsBanned {
header.AddNotice("account_banned")
h.AddNotice("account_banned")
}
if user.Loggedin && !user.Active {
header.AddNotice("account_inactive")
h.AddNotice("account_inactive")
}
// An optimisation so we don't populate StartedAt for users who shouldn't see the stat anyway
// ? - Should we only show this in debug mode? It might be useful for detecting issues in production, if we show it there as-well
//if user.IsAdmin {
//header.StartedAt = time.Now()
//h.StartedAt = time.Now()
//}
//PrepResources(user,header,theme)
return header, nil
//PrepResources(user,h,theme)
return h, nil
}
func PrepResources(user *User, h *Header, theme *Theme) {
func PrepResources(u *User, h *Header, theme *Theme) {
h.AddSheet(theme.Name + "/main.css")
if len(theme.Resources) > 0 {
rlist := theme.Resources
for _, res := range rlist {
if res.Loggedin && !user.Loggedin {
if res.Loggedin && !u.Loggedin {
continue
}
if res.Location == "global" || res.Location == "frontend" {
@ -284,7 +286,7 @@ func PrepResources(user *User, h *Header, theme *Theme) {
addPreScript("paginator")
addPreScript("alert")
addPreScript("notice")
if user.Loggedin {
if u.Loggedin {
addPreScript("topic_c_edit_post")
addPreScript("topic_c_attach_item")
addPreScript("topic_c_poll_input")

View File

@ -621,6 +621,8 @@ var agentMapEnum = map[string]int{
"mail_ru": 47,
"zgrab": 48,
"curl": 49,
"python": 50,
"go": 51,
}
var reverseAgentMapEnum = map[int]string{
0: "unknown",
@ -673,6 +675,8 @@ var reverseAgentMapEnum = map[int]string{
47: "mail_ru",
48: "zgrab",
49: "curl",
50: "python",
51: "go",
}
var markToAgent = map[string]string{
"OPR": "opera",
@ -725,6 +729,8 @@ var markToAgent = map[string]string{
"RU_Bot": "mail_ru",
"zgrab": "zgrab",
"curl": "curl",
"python": "python",
"Go": "go",
}
var markToID = map[string]int{
"OPR": 3,
@ -777,6 +783,8 @@ var markToID = map[string]int{
"RU_Bot": 47,
"zgrab": 48,
"curl": 49,
"python": 50,
"Go": 51,
}
/*var agentRank = map[string]int{
"opera":9,
@ -795,8 +803,13 @@ func init() {
co.SetReverseOSMapEnum(reverseOSMapEnum)
c.Chrome = agentMapEnum["chrome"]
c.Firefox = agentMapEnum["firefox"]
c.Semrush = agentMapEnum["semrush"]
c.Ahrefs = agentMapEnum["ahrefs"]
c.SimpleBots = []int{
agentMapEnum["semrush"],
agentMapEnum["ahrefs"],
agentMapEnum["python"],
agentMapEnum["go"],
agentMapEnum["curl"],
}
}
type WriterIntercept struct {
@ -851,7 +864,7 @@ func NewGenRouter(uploads http.Handler) (*GenRouter, error) {
}, nil
}
func (r *GenRouter) handleError(err c.RouteError, w http.ResponseWriter, req *http.Request, user *c.User) {
func (r *GenRouter) handleError(err c.RouteError, w http.ResponseWriter, req *http.Request, u *c.User) {
if err.Handled() {
return
}
@ -859,7 +872,7 @@ func (r *GenRouter) handleError(err c.RouteError, w http.ResponseWriter, req *ht
c.InternalErrorJSQ(err, w, req, err.JSON())
return
}
c.LocalErrorJSQ(err.Error(), w, req, user, err.JSON())
c.LocalErrorJSQ(err.Error(), w, req, u, err.JSON())
}
func (r *GenRouter) Handle(_ string, _ http.Handler) {

View File

@ -233,6 +233,8 @@
"mail_ru":"Mail.ru bot",
"zgrab":"Zgrab App Scanner",
"curl":"curl",
"python":"Python Bot",
"go":"Go Bot",
"suspicious":"Suspicious",
"unknown":"Unknown",
"blank":"Blank",

View File

@ -281,6 +281,8 @@ func main() {
"mail_ru",
"zgrab",
"curl",
"python",
"go",
}
tmplVars.AllAgentMap = make(map[string]int)
@ -341,6 +343,8 @@ func main() {
"RU_Bot", // Mail.RU_Bot
"zgrab",
"curl",
"python",
"Go",
}
tmplVars.AllAgentMarks = map[string]string{
@ -366,12 +370,12 @@ func main() {
"360Spider": "haosou",
"bingbot": "bing",
"BingPreview": "bing",
"msnbot":"bing",
"msnbot": "bing",
"Slurp": "slurp",
"Exabot": "exabot",
"MojeekBot": "mojeek",
"Cliqzbot": "cliqz",
"netEstate":"datenbank",
"netEstate": "datenbank",
"SeznamBot": "seznambot",
"CloudFlare": "cloudflare", // Track alwayson specifically in case there are other bots?
"archive": "archive_org", //archive.org_bot
@ -396,6 +400,8 @@ func main() {
"RU_Bot": "mail_ru", // Mail.RU_Bot
"zgrab": "zgrab",
"curl": "curl",
"python": "python",
"Go": "go",
}
tmplVars.AllAgentMarkIDs = make(map[string]int)
@ -475,8 +481,13 @@ func init() {
co.SetReverseOSMapEnum(reverseOSMapEnum)
c.Chrome = agentMapEnum["chrome"]
c.Firefox = agentMapEnum["firefox"]
c.Semrush = agentMapEnum["semrush"]
c.Ahrefs = agentMapEnum["ahrefs"]
c.SimpleBots = []int{
agentMapEnum["semrush"],
agentMapEnum["ahrefs"],
agentMapEnum["python"],
agentMapEnum["go"],
agentMapEnum["curl"],
}
}
type WriterIntercept struct {
@ -531,7 +542,7 @@ func NewGenRouter(uploads http.Handler) (*GenRouter, error) {
}, nil
}
func (r *GenRouter) handleError(err c.RouteError, w http.ResponseWriter, req *http.Request, user *c.User) {
func (r *GenRouter) handleError(err c.RouteError, w http.ResponseWriter, req *http.Request, u *c.User) {
if err.Handled() {
return
}
@ -539,7 +550,7 @@ func (r *GenRouter) handleError(err c.RouteError, w http.ResponseWriter, req *ht
c.InternalErrorJSQ(err, w, req, err.JSON())
return
}
c.LocalErrorJSQ(err.Error(), w, req, user, err.JSON())
c.LocalErrorJSQ(err.Error(), w, req, u, err.JSON())
}
func (r *GenRouter) Handle(_ string, _ http.Handler) {

View File

@ -119,7 +119,13 @@ func FootHeaders(w http.ResponseWriter, h *c.Header) {
func renderTemplate3(tmplName, hookName string, w http.ResponseWriter, r *http.Request, h *c.Header, pi interface{}) error {
s := h.Stylesheets
h.Stylesheets = nil
simpleBot := h.CurrentUser.LastAgent == c.Semrush || h.CurrentUser.LastAgent == c.Ahrefs
noDescSimpleBot := h.CurrentUser.LastAgent == c.SimpleBots[0] || h.CurrentUser.LastAgent == c.SimpleBots[1]
var simpleBot bool
for _, agent := range c.SimpleBots {
if h.CurrentUser.LastAgent == agent {
simpleBot = true
}
}
inner := r.FormValue("i") == "1"
if !inner && !simpleBot {
c.PrepResources(h.CurrentUser, h, h.Theme)
@ -134,7 +140,7 @@ func renderTemplate3(tmplName, hookName string, w http.ResponseWriter, r *http.R
h.CurrentUser.LastAgent = 0
}
if h.CurrentUser.Loggedin || inner || simpleBot {
if h.CurrentUser.Loggedin || inner || noDescSimpleBot {
h.MetaDesc = ""
h.OGDesc = ""
} else if h.MetaDesc != "" && h.OGDesc == "" {
@ -143,6 +149,8 @@ func renderTemplate3(tmplName, hookName string, w http.ResponseWriter, r *http.R
if !simpleBot {
FootHeaders(w, h)
} else {
h.GoogSiteVerify = ""
}
if h.Zone != "error" {
since := time.Duration(uutils.Nanotime() - h.StartedAt)

View File

@ -75,7 +75,7 @@ func ViewTopic(w http.ResponseWriter, r *http.Request, user *c.User, header *c.H
}
topic.ContentLines = strings.Count(topic.Content, "\n")
if !user.Loggedin && user.LastAgent != c.Semrush {
if !user.Loggedin && user.LastAgent != c.SimpleBots[0] && user.LastAgent != c.SimpleBots[1] {
if len(topic.Content) > 200 {
header.OGDesc = topic.Content[:197] + "..."
} else {

View File

@ -4,7 +4,7 @@
{{if gt .Page 1}}<div id="prevFloat" class="prev_button"><a class="prev_link" aria-label="{{lang "paginator.prev_page_aria"}}" rel="prev" href="{{.Forum.Link}}?page={{subtract .Page 1}}">{{lang "paginator.less_than"}}</a></div>{{end}}
{{if ne .LastPage .Page}}<div id="nextFloat" class="next_button"><a class="next_link" aria-label="{{lang "paginator.next_page_aria"}}" rel="next" href="{{.Forum.Link}}?page={{add .Page 1}}">{{lang "paginator.greater_than"}}</a></div>{{end}}
<link rel="canonical" href="//{{.Site.URL}}{{.Forum.Link}}{{if gt .Page 1}}?page={{.Page}}{{end}}"/>
{{if not .CurrentUser.Loggedin}}<link rel="canonical" href="//{{.Site.URL}}{{.Forum.Link}}{{if gt .Page 1}}?page={{.Page}}{{end}}"/>{{end}}
<div id="forum_head_block" class="rowblock rowhead topic_list_title_block{{if .CurrentUser.Loggedin}} has_opt{{end}}">
<div class="rowitem forum_title">

View File

@ -2,7 +2,7 @@
{{if gt .Page 1}}<link rel="prev"href="{{.Topic.Link}}?page={{subtract .Page 1}}"/>{{end}}
{{if ne .LastPage .Page}}<link rel="prerender next"href="{{.Topic.Link}}?page={{add .Page 1}}"/>{{end}}
<link rel="canonical" href="//{{.Site.URL}}{{.Topic.Link}}{{if gt .Page 1}}?page={{.Page}}{{end}}"/>
{{if not .CurrentUser.Loggedin}}<link rel="canonical" href="//{{.Site.URL}}{{.Topic.Link}}{{if gt .Page 1}}?page={{.Page}}{{end}}"/>{{end}}
<div {{scope "topic_title_block"}} class="rowblock rowhead topic_block" aria-label="{{lang "topic.topic_info_aria"}}">
<div class="rowitem topic_item{{if .Topic.Sticky}} topic_sticky_head{{else if .Topic.IsClosed}} topic_closed_head{{end}}">

View File

@ -7,7 +7,7 @@
<div id="nextFloat" class="next_button">
<a class="next_link" aria-label="{{lang "paginator.next_page_aria"}}" rel="next"href="{{.Topic.Link}}?page={{add .Page 1}}">{{lang "paginator.greater_than"}}</a>
</div>{{end}}
<link rel="canonical" href="//{{.Site.URL}}{{.Topic.Link}}{{if gt .Page 1}}?page={{.Page}}{{end}}"/>
{{if not .CurrentUser.Loggedin}}<link rel="canonical" href="//{{.Site.URL}}{{.Topic.Link}}{{if gt .Page 1}}?page={{.Page}}{{end}}"/>{{end}}
<div {{scope "topic_title_block"}} class="rowblock rowhead topic_block" aria-label="{{lang "topic.topic_info_aria"}}">
<div class="rowitem topic_item{{if .Topic.Sticky}} topic_sticky_head{{else if .Topic.IsClosed}} topic_closed_head{{end}}">

View File

@ -1,6 +1,6 @@
{{template "header.html" . }}
<main id="topicsItemList" itemscope itemtype="http://schema.org/ItemList">
<link rel="canonical"href="//{{.Site.URL}}/topics/{{if eq .Sort.SortBy "mostviewed"}}most-viewed/{{end}}{{if gt .Page 1}}?page={{.Page}}{{end}}"/>
{{if not .CurrentUser.Loggedin}}<link rel="canonical"href="//{{.Site.URL}}/topics/{{if eq .Sort.SortBy "mostviewed"}}most-viewed/{{end}}{{if gt .Page 1}}?page={{.Page}}{{end}}"/>{{end}}
<div class="rowblock rowhead topic_list_title_block{{if .CurrentUser.Loggedin}} has_opt{{end}}">
<div class="rowitem topic_list_title"><h1 itemprop="name">{{.Title}}</h1></div>