Support IPv6 hosts in the URL Parser.

Support m and gaming variants of YouTube in the Media Parser.
Reduce repetition in the URL utility functions slightly.

Added 30 new parser test cases.
This commit is contained in:
Azareal 2019-05-24 09:39:24 +10:00
parent 515e607587
commit bc20c495c3
2 changed files with 50 additions and 10 deletions

View File

@ -744,7 +744,8 @@ func validateURLString(data string) bool {
// ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s.
for ; len(data) > i; i++ {
if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) {
char := data[i]
if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && char != ']' && !(char > 44 && char < 58) && !(char > 64 && char < 92) && !(char > 96 && char < 123) { // 90 is Z, 91 is [
return false
}
}
@ -770,7 +771,8 @@ func validatedURLBytes(data []byte) (url []byte) {
// ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s.
for ; datalen > i; i++ {
if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) {
char := data[i]
if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && char != ']' && !(char > 44 && char < 58) && !(char > 64 && char < 92) && !(char > 96 && char < 123) { // 90 is Z, 91 is [
return InvalidURL
}
}
@ -797,7 +799,8 @@ func PartialURLString(data string) (url []byte) {
// ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s.
for ; end >= i; i++ {
if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) {
char := data[i]
if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && char != ']' && !(char > 44 && char < 58) && !(char > 64 && char < 92) && !(char > 96 && char < 123) { // 90 is Z, 91 is [
end = i
}
}
@ -807,6 +810,7 @@ func PartialURLString(data string) (url []byte) {
}
// TODO: Write a test for this
// TODO: Handle the host bits differently from the paths...
func PartialURLStringLen(data string) (int, bool) {
i := 0
if len(data) >= 6 {
@ -831,11 +835,12 @@ func PartialURLStringLen(data string) (int, bool) {
f := i
//fmt.Println("f:",f)
for ; len(data) > i; i++ {
if data[i] < 33 { // space and invisibles
char := data[i]
if char < 33 { // space and invisibles
//fmt.Println("e2:",i)
return i, i != f
} else if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) {
//log.Print("Bad Character: ", data[i])
} else if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && char != ']' && !(char > 44 && char < 58) && !(char > 64 && char < 92) && !(char > 96 && char < 123) { // 90 is Z, 91 is [
//log.Print("Bad Character: ", char)
//fmt.Println("e3")
return i, false
}
@ -850,6 +855,7 @@ func PartialURLStringLen(data string) (int, bool) {
}
// TODO: Write a test for this
// TODO: Get this to support IPv6 hosts, this isn't currently done as this is used in the bbcode plugin where it thinks the [ is a IPv6 host
func PartialURLStringLen2(data string) int {
i := 0
if len(data) >= 6 {
@ -867,8 +873,9 @@ func PartialURLStringLen2(data string) int {
// ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s.
for ; len(data) > i; i++ {
if data[i] != '\\' && data[i] != '_' && data[i] != ':' && data[i] != '?' && data[i] != '&' && data[i] != '=' && data[i] != ';' && data[i] != '@' && data[i] != '#' && !(data[i] > 44 && data[i] < 58) && !(data[i] > 64 && data[i] < 91) && !(data[i] > 96 && data[i] < 123) {
//log.Print("Bad Character: ", data[i])
char := data[i]
if char != '\\' && char != '_' && char != ':' && char != '?' && char != '&' && char != '=' && char != ';' && char != '@' && char != '#' && !(char > 44 && char < 58) && !(char > 64 && char < 91) && !(char > 96 && char < 123) { // 90 is Z, 91 is [
//log.Print("Bad Character: ", char)
return i
}
}
@ -938,12 +945,12 @@ func parseMediaString(data string) (media MediaEmbed, ok bool) {
// ? - I don't think this hostname will hit every YT domain
// TODO: Make this a more customisable handler rather than hard-coding it in here
if hostname == "www.youtube.com" && path == "/watch" {
if strings.HasSuffix(hostname,".youtube.com") && path == "/watch" {
video, ok := query["v"]
if ok && len(video) >= 1 && video[0] != "" {
media.Type = "raw"
// TODO: Filter the URL to make sure no nasties end up in there
media.Body = "<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/" + video[0] + "' frameborder='0' allowfullscreen></iframe>"
media.Body = "<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/" + video[0] + "' frameborder=0 allowfullscreen></iframe>"
return media, true
}
}

View File

@ -190,6 +190,34 @@ func TestParser(t *testing.T) {
msgList.Add("//"+c.Site.URL+"\n", "<a href='//"+c.Site.URL+"'>//"+c.Site.URL+"</a><br>")
msgList.Add("//"+c.Site.URL+"\n//"+c.Site.URL, "<a href='//"+c.Site.URL+"'>//"+c.Site.URL+"</a><br><a href='//"+c.Site.URL+"'>//"+c.Site.URL+"</a>")
var local = func(url string) {
msgList.Add("//"+url, "<a href='//"+url+"'>//"+url+"</a>")
msgList.Add("//"+url+"\n", "<a href='//"+url+"'>//"+url+"</a><br>")
msgList.Add("//"+url+"\n//"+url, "<a href='//"+url+"'>//"+url+"</a><br><a href='//"+url+"'>//"+url+"</a>")
}
local("localhost")
local("127.0.0.1")
local("[::1]")
msgList.Add("https://www.youtube.com/watch?v=lalalalala","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
//msgList.Add("https://www.youtube.com/watch?v=;","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/;' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://www.youtube.com/watch?v=d;","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/d' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://www.youtube.com/watch?v=d;d","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/d' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://www.youtube.com/watch?v=alert()","<red>[Invalid URL]</red>()")
msgList.Add("https://www.youtube.com/watch?v=js:alert()","<red>[Invalid URL]</red>()")
msgList.Add("https://www.youtube.com/watch?v='+><script>alert(\"\")</script><+'","<red>[Invalid URL]</red>'+><script>alert(\"\")</script><+'")
msgList.Add("https://www.youtube.com/watch?v='+onready='alert(\"\")'+'","<red>[Invalid URL]</red>'+onready='alert(\"\")'+'")
msgList.Add(" https://www.youtube.com/watch?v=lalalalala"," <iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://www.youtube.com/watch?v=lalalalala tt","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe> tt")
msgList.Add("https://www.youtube.com/watch?v=lalalalala&d=haha","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://gaming.youtube.com/watch?v=lalalalala","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://gaming.youtube.com/watch?v=lalalalala&d=haha","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://m.youtube.com/watch?v=lalalalala","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("https://m.youtube.com/watch?v=lalalalala&d=haha","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("http://www.youtube.com/watch?v=lalalalala","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("//www.youtube.com/watch?v=lalalalala","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
//msgList.Add("www.youtube.com/watch?v=lalalalala","<iframe class='postIframe' src='https://www.youtube-nocookie.com/embed/lalalalala' frameborder=0 allowfullscreen></iframe>")
msgList.Add("#tid-1", "<a href='/topic/1'>#tid-1</a>")
msgList.Add("##tid-1", "##tid-1")
msgList.Add("# #tid-1", "# #tid-1")
@ -201,9 +229,13 @@ func TestParser(t *testing.T) {
msgList.Add("https://"+url+"/#tid-1", "<a href='https://"+url+"/#tid-1'>https://"+url+"/#tid-1</a>")
msgList.Add("https://"+url+"/?hi=2", "<a href='https://"+url+"/?hi=2'>https://"+url+"/?hi=2</a>")
msgList.Add("#fid-1", "<a href='/forum/1'>#fid-1</a>")
msgList.Add(" #fid-1", " <a href='/forum/1'>#fid-1</a>")
msgList.Add("#fid-0", "<red>[Invalid Forum]</red>")
msgList.Add(" #fid-0", " <red>[Invalid Forum]</red>")
msgList.Add("#", "#")
msgList.Add("# ", "# ")
msgList.Add(" @", " @")
msgList.Add(" #", " #")
msgList.Add("#@", "#@")
msgList.Add("#@ ", "#@ ")
msgList.Add("#@1", "#@1")
@ -224,6 +256,7 @@ func TestParser(t *testing.T) {
msgList.Add("@2 ", "<red>[Invalid Profile]</red> ")
msgList.Add("@2 @2", "<red>[Invalid Profile]</red> <red>[Invalid Profile]</red>")
msgList.Add("@1", "<a href='/user/admin.1' class='mention'>@Admin</a>")
msgList.Add(" @1", " <a href='/user/admin.1' class='mention'>@Admin</a>")
msgList.Add("@1t", "<a href='/user/admin.1' class='mention'>@Admin</a>t")
msgList.Add("@1 ", "<a href='/user/admin.1' class='mention'>@Admin</a> ")
msgList.Add("@1 @1", "<a href='/user/admin.1' class='mention'>@Admin</a> <a href='/user/admin.1' class='mention'>@Admin</a>")