From c4f2f0df90a78d6fbbbe0e00d422b58df5795d55 Mon Sep 17 00:00:00 2001 From: Azareal Date: Thu, 25 Jun 2020 17:46:09 +1000 Subject: [PATCH] support youtu.be urls add noscript tags for video embeds support youtube timestamps validate youtube video parameters add more parser tests --- common/parser.go | 99 ++++++++++++++++++++++++++++++++++++++++++++++-- parser_test.go | 38 ++++++++++++------- 2 files changed, 120 insertions(+), 17 deletions(-) diff --git a/common/parser.go b/common/parser.go index 37827e45..67ea8e77 100644 --- a/common/parser.go +++ b/common/parser.go @@ -1037,6 +1037,7 @@ func parseMediaString(data string, settings *ParseSettings) (media MediaEmbed, o media.Trusted = samesite path := uurl.EscapedPath() + //fmt.Println("path:", path) pathFrags := strings.Split(path, "/") if len(pathFrags) >= 2 { if samesite && pathFrags[1] == "attachs" && (scheme == "http:" || scheme == "https:") { @@ -1067,22 +1068,90 @@ func parseMediaString(data string, settings *ParseSettings) (media MediaEmbed, o } } + //fmt.Printf("settings.NoEmbed: %+v\n", settings.NoEmbed) + //settings.NoEmbed = false if !settings.NoEmbed { // ? - I don't think this hostname will hit every YT domain // TODO: Make this a more customisable handler rather than hard-coding it in here + ytInvalid := func(v string) bool { + for _, ch := range v { + if !((ch > 47 && ch < 58) || (ch > 64 && ch < 91) || (ch > 96 && ch < 123) || ch == '-' || ch == '_') { + var sport string + if port != "443" && port != "80" && port != "" { + sport = ":" + port + } + var q string + if len(uurl.RawQuery) > 0 { + q = "?" + uurl.RawQuery + } + var frag string + if len(uurl.Fragment) > 0 { + frag = "#" + uurl.Fragment + } + media.FURL = host + sport + path + q + frag + media.URL = scheme + "//" + media.FURL + //fmt.Printf("ytInvalid true: %+v\n",v) + return true + } + } + return false + } + ytInvalid2 := func(t string) bool { + for _, ch := range t { + if !((ch > 47 && ch < 58) || ch == 'h' || ch =='m' || ch =='s') { + //fmt.Printf("ytInvalid2 true: %+v\n",t) + return true + } + } + return false + } if strings.HasSuffix(host, ".youtube.com") && path == "/watch" { video, ok := query["v"] if ok && len(video) >= 1 && video[0] != "" { + v := video[0] + if ytInvalid(v) { + return media, true + } + var t,t2 string + tt, ok := query["t"] + if ok && len(tt) >= 1 { + t, t2 = tt[0], tt[0] + } media.Type = ERawExternal - // TODO: Filter the URL to make sure no nasties end up in there - media.Body = "" + if t != "" && !ytInvalid2(t) { + s,m,h := parseDuration(t2) + calc := s + (m * 60) + (h * 60 * 60) + if calc > 0 { + t = "&t="+t + t2 = "?start="+strconv.Itoa(calc) + } else { + t, t2 = "","" + } + } + l := "https://"+ host + path+"?v="+v+t + media.Body = "" return media, true } + } else if host == "youtu.be" { + v := strings.TrimPrefix(path,"/") + if ytInvalid(v) { + return media, true + } + l := "https://youtu.be/"+v + media.Type = ERawExternal + media.Body = "" + return media, true } else if strings.HasPrefix(host, "www.nicovideo.jp") && strings.HasPrefix(path, "/watch/sm") { vid, err := strconv.ParseInt(strings.TrimPrefix(path, "/watch/sm"), 10, 64) if err == nil { + var sport string + if port != "443" && port != "80" && port != "" { + sport = ":" + port + } media.Type = ERawExternal - media.Body = "" + sm := strconv.FormatInt(vid, 10) + l := "https://"+ host + sport + path + media.Body = "" return media, true } } @@ -1123,6 +1192,30 @@ func parseMediaString(data string, settings *ParseSettings) (media MediaEmbed, o return media, true } +func parseDuration(dur string) (s,m,h int) { + var ibuf []byte + for _, ch := range dur { + switch { + case ch > 47 && ch < 58: + ibuf = append(ibuf,byte(ch)) + case ch == 'h': + h, _ = strconv.Atoi(string(ibuf)) + ibuf = ibuf[:0] + case ch == 'm': + m, _ = strconv.Atoi(string(ibuf)) + ibuf = ibuf[:0] + case ch == 's': + s, _ = strconv.Atoi(string(ibuf)) + ibuf = ibuf[:0] + } + } + // Stop accidental uses of timestamps + if h == 0 && m == 0 && s < 2 { + s = 0 + } + return s,m,h +} + // TODO: Write a test for this func CoerceIntString(data string) (res, length int) { if !(data[0] > 47 && data[0] < 58) { diff --git a/parser_test.go b/parser_test.go index fbf493b0..5f837d35 100644 --- a/parser_test.go +++ b/parser_test.go @@ -263,28 +263,38 @@ func TestParser(t *testing.T) { local("127.0.0.1") local("[::1]") - l.Add("https://www.youtube.com/watch?v=lalalalala", "") - //l.Add("https://www.youtube.com/watch?v=;","") - l.Add("https://www.youtube.com/watch?v=d;", "") - l.Add("https://www.youtube.com/watch?v=d;d", "") + l.Add("https://www.youtube.com/watch?v=lalalalala", "") + l.Add("https://www.youtube.com/watch?v=lalalalala&t=30s", "") + l.Add("https://www.youtube.com/watch?v=lalalalala&t=1s", "") + l.Add("https://www.youtube.com/watch?v=lalalalala&t=1", "") + //l.Add("https://www.youtube.com/watch?v=;","") + l.Add("https://www.youtube.com/watch?v=d;", "") + l.Add("https://www.youtube.com/watch?v=d;d", "") l.Add("https://www.youtube.com/watch?v=alert()", "[Invalid URL]()") l.Add("https://www.youtube.com/watch?v=alert()()", "[Invalid URL]()()") l.Add("https://www.youtube.com/watch?v=js:alert()", "[Invalid URL]()") l.Add("https://www.youtube.com/watch?v='+><+'", "[Invalid URL]'+><+'") l.Add("https://www.youtube.com/watch?v='+onready='alert(\"\")'+'", "[Invalid URL]'+onready='alert(\"\")'+'") - l.Add(" https://www.youtube.com/watch?v=lalalalala", " ") - l.Add("https://www.youtube.com/watch?v=lalalalala tt", " tt") - l.Add("https://www.youtube.com/watch?v=lalalalala&d=haha", "") - l.Add("https://gaming.youtube.com/watch?v=lalalalala", "") - l.Add("https://gaming.youtube.com/watch?v=lalalalala&d=haha", "") - l.Add("https://m.youtube.com/watch?v=lalalalala", "") - l.Add("https://m.youtube.com/watch?v=lalalalala&d=haha", "") - l.Add("http://www.youtube.com/watch?v=lalalalala", "") - l.Add("//www.youtube.com/watch?v=lalalalala", "") - //l.Add("www.youtube.com/watch?v=lalalalala","") + l.Add(" https://www.youtube.com/watch?v=lalalalala", " ") + l.Add("https://www.youtube.com/watch?v=lalalalala tt", " tt") + l.Add("https://www.youtube.com/watch?v=lalalalala&d=haha", "") + l.Add("https://gaming.youtube.com/watch?v=lalalalala", "") + l.Add("https://gaming.youtube.com/watch?v=lalalalala&d=haha", "") + l.Add("https://youtu.be/lalalalala", "") + l.Add("https://m.youtube.com/watch?v=lalalalala", "") + l.Add("https://m.youtube.com/watch?v=lalalalala&d=haha", "") + l.Add("http://www.youtube.com/watch?v=lalalalala", "") + l.Add("//www.youtube.com/watch?v=lalalalala", "") + //l.Add("www.youtube.com/watch?v=lalalalala","") + l.Add("https://www.nicovideo.jp/watch/sm111111", "") + //l.Add("www.nicovideo.jp/watch/sm111111", "") + //l.Add("www.nicovideo.jp/watch/smlalalalala", "www.nicovideo.jp/watch/smlalalalala") + l.Add("https://www.nicovideo.jp/watch/smlalalalala", "www.nicovideo.jp/watch/smlalalalala") + l.Add("//www.youtube.com/watch?v=lalalalala&t=30s", "") l.Add("#tid-1", "#tid-1") l.Add("##tid-1", "##tid-1") + l.Add("#@tid-1", "#@tid-1") l.Add("# #tid-1", "# #tid-1") l.Add("@ #tid-1", "[Invalid Profile]#tid-1") l.Add("@#tid-1", "[Invalid Profile]tid-1")