Rewrote the reverse HTML parser and also fixed the bug with Trumbowyg's span tags sometimes becoming visible. Fixes #9

This commit is contained in:
Azareal 2018-06-26 00:28:04 +10:00
parent 0306c8bf44
commit 1f28ecb804
1 changed files with 132 additions and 49 deletions

View File

@ -163,6 +163,13 @@ func shortcodeToUnicode(msg string) string {
return msg
}
type TagToAction struct {
Suffix string
Do func(*TagToAction, bool, int, []rune) (int, string) // func(tagToAction,open,i,runes) (newI, output)
Depth int // For use by Do
PartialMode bool
}
// TODO: Write tests for this
// TODO: Preparse Markdown and normalize it into HTML?
func PreparseMessage(msg string) string {
@ -175,12 +182,9 @@ func PreparseMessage(msg string) string {
msg = SanitiseBody(msg)
msg = strings.Replace(msg, " ", "", -1)
//fmt.Println("before msg: ", msg)
var runes = []rune(msg)
msg = ""
var inBold = false
var inItalic = false
var inStrike = false
var inUnderline = false
var stepForward = func(i int, step int, runes []rune) int {
i += step
if i < len(runes) {
@ -188,65 +192,144 @@ func PreparseMessage(msg string) string {
}
return i - step
}
// TODO: We can maybe reduce the size of this by using an offset?
// TODO: Move some of these closures out of this function to make things a little more efficient
var allowedTags = [][]string{
'e': []string{"m"},
's': []string{"", "trong", "pan"},
'd': []string{"el"},
'u': []string{""},
'b': []string{""},
}
var buildLitMatch = func(tag string) func(*TagToAction, bool, int, []rune) (int, string) {
return func(action *TagToAction, open bool, _ int, _ []rune) (int, string) {
if open {
action.Depth++
return -1, "<" + tag + ">"
}
if action.Depth <= 0 {
return -1, ""
}
action.Depth--
return -1, "</" + tag + ">"
}
}
var tagToAction = [][]*TagToAction{
'e': []*TagToAction{&TagToAction{"m", buildLitMatch("em"), 0, false}},
's': []*TagToAction{
&TagToAction{"", buildLitMatch("del"), 0, false},
&TagToAction{"trong", buildLitMatch("strong"), 0, false},
// Hides the span tags Trumbowyg loves blasting out randomly
&TagToAction{"pan", func(act *TagToAction, open bool, i int, runes []rune) (int, string) {
if open {
act.Depth++
//fmt.Println("skipping attributes")
for ; i < len(runes); i++ {
if runes[i] == '&' && peekMatch(i, "gt;", runes) {
//fmt.Println("found tag exit")
return i + 3, " "
}
}
return -1, " "
}
if act.Depth <= 0 {
return -1, " "
}
act.Depth--
return -1, " "
}, 0, true},
},
'd': []*TagToAction{&TagToAction{"el", buildLitMatch("del"), 0, false}},
'u': []*TagToAction{&TagToAction{"", buildLitMatch("u"), 0, false}},
'b': []*TagToAction{&TagToAction{"", buildLitMatch("b"), 0, false}},
}
// TODO: Implement a less literal parser
for i := 0; i < len(runes); i++ {
char := runes[i]
if char == '&' && peek(i, 1, runes) == 'l' && peek(i, 2, runes) == 't' && peek(i, 3, runes) == ';' {
if char == '&' && peekMatch(i, "lt;", runes) {
//fmt.Println("found less than")
i = stepForward(i, 4, runes)
char := runes[i]
//fmt.Println("char: ", char)
//fmt.Println("string(char): ", string(char))
if int(char) >= len(allowedTags) {
//fmt.Println("sentinel char out of bounds")
i -= 4
continue
}
var closeTag bool
if char == '/' {
//fmt.Println("found close tag")
i = stepForward(i, 1, runes)
char := runes[i]
if inItalic && char == 'e' && peekMatch(i, "m&gt;", runes) {
i += 5
inItalic = false
msg += "</em>"
} else if inBold && char == 's' && peekMatch(i, "trong&gt;", runes) {
i += 9
inBold = false
msg += "</strong>"
} else if inStrike && char == 'd' && peekMatch(i, "el&gt;", runes) {
i += 6
inStrike = false
msg += "</del>"
} else if inUnderline && char == 'u' && peekMatch(i, "&gt;", runes) {
i += 4
inUnderline = false
msg += "</u>"
char = runes[i]
closeTag = true
}
tags := allowedTags[char]
if len(tags) == 0 {
//fmt.Println("couldn't find char in allowedTags")
if closeTag {
i -= 5
} else {
i -= 4
}
} else if !inItalic && char == 'e' && peekMatch(i, "m&gt;", runes) {
i += 5
inItalic = true
msg += "<em>"
} else if !inBold && char == 's' && peekMatch(i, "trong&gt;", runes) {
i += 9
inBold = true
msg += "<strong>"
} else if !inStrike && char == 'd' && peekMatch(i, "el&gt;", runes) {
i += 6
inStrike = true
msg += "<del>"
} else if !inUnderline && char == 'u' && peekMatch(i, "&gt;", runes) {
i += 4
inUnderline = true
msg += "<u>"
continue
}
// TODO: Scan through tags and make sure the suffix is present to reduce the number of false positives which hit the loop below
//fmt.Printf("tags: %+v\n", tags)
var newI = -1
var out string
toActionList := tagToAction[char]
//fmt.Println("toActionList: ", toActionList)
for _, toAction := range toActionList {
//fmt.Printf("toAction: %+v\n", toAction)
// TODO: Optimise this, maybe with goto or a function call to avoid scanning the text twice?
if (toAction.PartialMode && !closeTag && peekMatch(i, toAction.Suffix, runes)) || peekMatch(i, toAction.Suffix+"&gt;", runes) {
//fmt.Println("peekMatched")
newI, out = toAction.Do(toAction, !closeTag, i, runes)
//fmt.Println("newI: ", newI)
//fmt.Println("i: ", i)
if newI != -1 {
i = newI
} else {
i += len(toAction.Suffix + "&gt;")
}
//fmt.Println("i: ", i)
//fmt.Println("out: ", out)
break
}
}
if out == "" {
//fmt.Println("no out")
if closeTag {
i -= 5
} else {
i -= 4
}
} else if out != " " {
msg += out
}
} else {
msg += string(char)
}
}
if inItalic {
msg += "</em>"
}
if inBold {
msg += "</strong>"
}
if inStrike {
msg += "</del>"
}
if inUnderline {
msg += "</u>"
for _, actionList := range tagToAction {
for _, toAction := range actionList {
if toAction.Depth > 1 {
for ; toAction.Depth > 0; toAction.Depth-- {
_, out := toAction.Do(toAction, false, len(runes), runes)
if out != "" {
msg += out
}
}
}
}
}
//fmt.Println("msg: ", msg)
return shortcodeToUnicode(msg)
}