Support IPv6 hosts in the URL Parser.

Support m and gaming variants of YouTube in the Media Parser.
Reduce repetition in the URL utility functions slightly.

Added 30 new parser test cases.
This commit is contained in:
Azareal 2019-05-24 09:39:24 +10:00
parent 515e607587
commit bc20c495c3
2 changed files with 50 additions and 10 deletions

View File

@ -744,7 +744,8 @@ func validateURLString(data string) bool {
// ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s.
for ; len(data) > i; i++ { for ; len(data) > i; i++ {
if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { char := data[i]
if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && char != ']' && !(char > 44 && char < 58) && !(char > 64 && char < 92) && !(char > 96 && char < 123) { // 90 is Z, 91 is [
return false return false
} }
} }
@ -770,7 +771,8 @@ func validatedURLBytes(data []byte) (url []byte) {
// ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s.
for ; datalen > i; i++ { for ; datalen > i; i++ {
if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { char := data[i]
if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && char != ']' && !(char > 44 && char < 58) && !(char > 64 && char < 92) && !(char > 96 && char < 123) { // 90 is Z, 91 is [
return InvalidURL return InvalidURL
} }
} }
@ -797,7 +799,8 @@ func PartialURLString(data string) (url []byte) {
// ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s.
for ; end >= i; i++ { for ; end >= i; i++ {
if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { char := data[i]
if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && char != ']' && !(char > 44 && char < 58) && !(char > 64 && char < 92) && !(char > 96 && char < 123) { // 90 is Z, 91 is [
end = i end = i
} }
} }
@ -807,6 +810,7 @@ func PartialURLString(data string) (url []byte) {
} }
// TODO: Write a test for this // TODO: Write a test for this
// TODO: Handle the host bits differently from the paths...
func PartialURLStringLen(data string) (int, bool) { func PartialURLStringLen(data string) (int, bool) {
i := 0 i := 0
if len(data) >= 6 { if len(data) >= 6 {
@ -831,11 +835,12 @@ func PartialURLStringLen(data string) (int, bool) {
f := i f := i
//fmt.Println("f:",f) //fmt.Println("f:",f)
for ; len(data) > i; i++ { for ; len(data) > i; i++ {
if data[i] < 33 { // space and invisibles char := data[i]
if char < 33 { // space and invisibles
//fmt.Println("e2:",i) //fmt.Println("e2:",i)
return i, i != f return i, i != f
} else if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { } else if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && char != ']' && !(char > 44 && char < 58) && !(char > 64 && char < 92) && !(char > 96 && char < 123) { // 90 is Z, 91 is [
//log.Print("Bad Character: ", data[i]) //log.Print("Bad Character: ", char)
//fmt.Println("e3") //fmt.Println("e3")
return i, false return i, false
} }
@ -850,6 +855,7 @@ func PartialURLStringLen(data string) (int, bool) {
} }
// TODO: Write a test for this // TODO: Write a test for this
// TODO: Get this to support IPv6 hosts, this isn't currently done as this is used in the bbcode plugin where it thinks the [ is a IPv6 host
func PartialURLStringLen2(data string) int { func PartialURLStringLen2(data string) int {
i := 0 i := 0
if len(data) >= 6 { if len(data) >= 6 {
@ -867,8 +873,9 @@ func PartialURLStringLen2(data string) int {
// ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s.
for ; len(data) > i; i++ { for ; len(data) > i; i++ {
if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) { char := data[i]
//log.Print("Bad Character: ", data[i]) if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && !(char > 44 && char < 58) && !(char > 64 && char < 91) && !(char > 96 && char < 123) { // 90 is Z, 91 is [
//log.Print("Bad Character: ", char)
return i return i
} }
} }
@ -938,12 +945,12 @@ func parseMediaString(data string) (media MediaEmbed, ok bool) {
// ? - I don't think this hostname will hit every YT domain // ? - I don't think this hostname will hit every YT domain
// TODO: Make this a more customisable handler rather than hard-coding it in here // TODO: Make this a more customisable handler rather than hard-coding it in here
if hostname == "www.youtube.com" && path == "/watch" { if strings.HasSuffix(hostname,".youtube.com") && path == "/watch" {
video, ok := query["v"] video, ok := query["v"]
if ok && len(video) >= 1 && video[0] != "" { if ok && len(video) >= 1 && video[0] != "" {
media.Type = "raw" media.Type = "raw"
// TODO: Filter the URL to make sure no nasties end up in there // TODO: Filter the URL to make sure no nasties end up in there
media.Body = "<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/" + video[0] + "' frameborder='0' allowfullscreen></iframe>" media.Body = "<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/" + video[0] + "' frameborder=0 allowfullscreen></iframe>"
return media, true return media, true
} }
} }

View File

@ -190,6 +190,34 @@ func TestParser(t *testing.T) {
msgList.Add("//"+c.Site.URL+"\n", "<a href='//"+c.Site.URL+"'>//"+c.Site.URL+"</a><br>") msgList.Add("//"+c.Site.URL+"\n", "<a href='//"+c.Site.URL+"'>//"+c.Site.URL+"</a><br>")
msgList.Add("//"+c.Site.URL+"\n//"+c.Site.URL, "<a href='//"+c.Site.URL+"'>//"+c.Site.URL+"</a><br><a href='//"+c.Site.URL+"'>//"+c.Site.URL+"</a>") msgList.Add("//"+c.Site.URL+"\n//"+c.Site.URL, "<a href='//"+c.Site.URL+"'>//"+c.Site.URL+"</a><br><a href='//"+c.Site.URL+"'>//"+c.Site.URL+"</a>")
var local = func(url string) {
msgList.Add("//"+url, "<a href='//"+url+"'>//"+url+"</a>")
msgList.Add("//"+url+"\n", "<a href='//"+url+"'>//"+url+"</a><br>")
msgList.Add("//"+url+"\n//"+url, "<a href='//"+url+"'>//"+url+"</a><br><a href='//"+url+"'>//"+url+"</a>")
}
local("localhost")
local("127.0.0.1")
local("[::1]")
msgList.Add("https://www.youtube.com/watch?v=lalalalala","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
//msgList.Add("https://www.youtube.com/watch?v=;","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/;' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://www.youtube.com/watch?v=d;","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/d' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://www.youtube.com/watch?v=d;d","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/d' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://www.youtube.com/watch?v=alert()","<red>[Invalid URL]</red>()")
msgList.Add("https://www.youtube.com/watch?v=js:alert()","<red>[Invalid URL]</red>()")
msgList.Add("https://www.youtube.com/watch?v='+><script>alert(\"\")</script><+'","<red>[Invalid URL]</red>'+><script>alert(\"\")</script><+'")
msgList.Add("https://www.youtube.com/watch?v='+onready='alert(\"\")'+'","<red>[Invalid URL]</red>'+onready='alert(\"\")'+'")
msgList.Add(" https://www.youtube.com/watch?v=lalalalala"," <iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://www.youtube.com/watch?v=lalalalala tt","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe> tt")
msgList.Add("https://www.youtube.com/watch?v=lalalalala&d=haha","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://gaming.youtube.com/watch?v=lalalalala","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://gaming.youtube.com/watch?v=lalalalala&d=haha","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://m.youtube.com/watch?v=lalalalala","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://m.youtube.com/watch?v=lalalalala&d=haha","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("http://www.youtube.com/watch?v=lalalalala","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("//www.youtube.com/watch?v=lalalalala","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
//msgList.Add("www.youtube.com/watch?v=lalalalala","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("#tid-1", "<a href='/topic/1'>#tid-1</a>") msgList.Add("#tid-1", "<a href='/topic/1'>#tid-1</a>")
msgList.Add("##tid-1", "##tid-1") msgList.Add("##tid-1", "##tid-1")
msgList.Add("# #tid-1", "# #tid-1") msgList.Add("# #tid-1", "# #tid-1")
@ -201,9 +229,13 @@ func TestParser(t *testing.T) {
msgList.Add("https://"+url+"/#tid-1", "<a href='https://"+url+"/#tid-1'>https://"+url+"/#tid-1</a>") msgList.Add("https://"+url+"/#tid-1", "<a href='https://"+url+"/#tid-1'>https://"+url+"/#tid-1</a>")
msgList.Add("https://"+url+"/?hi=2", "<a href='https://"+url+"/?hi=2'>https://"+url+"/?hi=2</a>") msgList.Add("https://"+url+"/?hi=2", "<a href='https://"+url+"/?hi=2'>https://"+url+"/?hi=2</a>")
msgList.Add("#fid-1", "<a href='/forum/1'>#fid-1</a>") msgList.Add("#fid-1", "<a href='/forum/1'>#fid-1</a>")
msgList.Add(" #fid-1", " <a href='/forum/1'>#fid-1</a>")
msgList.Add("#fid-0", "<red>[Invalid Forum]</red>") msgList.Add("#fid-0", "<red>[Invalid Forum]</red>")
msgList.Add(" #fid-0", " <red>[Invalid Forum]</red>")
msgList.Add("#", "#") msgList.Add("#", "#")
msgList.Add("# ", "# ") msgList.Add("# ", "# ")
msgList.Add(" @", " @")
msgList.Add(" #", " #")
msgList.Add("#@", "#@") msgList.Add("#@", "#@")
msgList.Add("#@ ", "#@ ") msgList.Add("#@ ", "#@ ")
msgList.Add("#@1", "#@1") msgList.Add("#@1", "#@1")
@ -224,6 +256,7 @@ func TestParser(t *testing.T) {
msgList.Add("@2 ", "<red>[Invalid Profile]</red> ") msgList.Add("@2 ", "<red>[Invalid Profile]</red> ")
msgList.Add("@2 @2", "<red>[Invalid Profile]</red> <red>[Invalid Profile]</red>") msgList.Add("@2 @2", "<red>[Invalid Profile]</red> <red>[Invalid Profile]</red>")
msgList.Add("@1", "<a href='/user/admin.1' class='mention'>@Admin</a>") msgList.Add("@1", "<a href='/user/admin.1' class='mention'>@Admin</a>")
msgList.Add(" @1", " <a href='/user/admin.1' class='mention'>@Admin</a>")
msgList.Add("@1t", "<a href='/user/admin.1' class='mention'>@Admin</a>t") msgList.Add("@1t", "<a href='/user/admin.1' class='mention'>@Admin</a>t")
msgList.Add("@1 ", "<a href='/user/admin.1' class='mention'>@Admin</a> ") msgList.Add("@1 ", "<a href='/user/admin.1' class='mention'>@Admin</a> ")
msgList.Add("@1 @1", "<a href='/user/admin.1' class='mention'>@Admin</a> <a href='/user/admin.1' class='mention'>@Admin</a>") msgList.Add("@1 @1", "<a href='/user/admin.1' class='mention'>@Admin</a> <a href='/user/admin.1' class='mention'>@Admin</a>")