Optimistically expect simple strings for json (#6)

Performance update to appendJSONString so that it now checks if the input is a simple string that contains no json delimiters, control characters, or unicode. If simple then the operation is only three appends. [double-quote, string, double-quote]. If a non-simple character is encountered then all of the previous characters are appended and the operation falls back to the original method for the remaining characters. Before: BenchmarkLogEmpty-8 100000000 17.1 ns/op BenchmarkDisabled-8 500000000 4.12 ns/op BenchmarkInfo-8 20000000 101 ns/op BenchmarkContextFields-8 20000000 105 ns/op BenchmarkLogFields-8 5000000 281 ns/op After: BenchmarkLogEmpty-8 100000000 16.7 ns/op BenchmarkDisabled-8 500000000 3.79 ns/op BenchmarkInfo-8 30000000 44.8 ns/op BenchmarkContextFields-8 30000000 67.5 ns/op BenchmarkLogFields-8 10000000 197 ns/op
2017-06-23 20:28:33 -07:00 · 2017-06-23 20:28:33 -07:00 · f8aa7a1962
commit f8aa7a1962
parent 2e3da1d5b5
2 changed files with 62 additions and 30 deletions
--- a/json.go
+++ b/json.go
@ -4,41 +4,71 @@ import "unicode/utf8"

 const hex = "0123456789abcdef"

+// appendJSONString encodes the input string to json and appends
+// the encoded string to the input byte slice.
+//
+// The operation loops though each byte in the string looking
+// for characters that need json or utf8 encoding. If the string
+// does not need encoding, then the string is appended in it's
+// entirety to the byte slice.
+// If we encounter a byte that does need encoding, switch up
+// the operation and perform a byte-by-byte read-encode-append.
 func appendJSONString(dst []byte, s string) []byte {
+	// Start with a double quote.
 	dst = append(dst, '"')
-	for i := 0; i < len(s); {
-		if b := s[i]; b < utf8.RuneSelf {
-			switch b {
-			case '"', '\\':
-				dst = append(dst, '\\', b)
-			case '\b':
-				dst = append(dst, '\\', 'b')
-			case '\f':
-				dst = append(dst, '\\', 'f')
-			case '\n':
-				dst = append(dst, '\\', 'n')
-			case '\r':
-				dst = append(dst, '\\', 'r')
-			case '\t':
-				dst = append(dst, '\\', 't')
-			default:
-				if b >= 0x20 {
-					dst = append(dst, b)
-				} else {
-					dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF])
+	// Loop through each character in the string.
+	for i := 0; i < len(s); i++ {
+		// Check if the character needs encoding. Control characters, slashes,
+		// and the double quote need json encoding. Bytes above the ascii
+		// boundary needs utf8 encoding.
+		if s[i] < ' ' || s[i] == '\\' || s[i] == '"' || s[i] > 126 {
+			// We encountered a character that needs to be encoded. Let's
+			// append the previous simple characters to the byte slice
+			// and switch our operation to read and encode the remainder
+			// characters byte-by-byte.
+			dst = append(dst, s[:i]...)
+			for i < len(s) {
+				if b := s[i]; b < utf8.RuneSelf {
+					switch b {
+					case '"', '\\':
+						dst = append(dst, '\\', b)
+					case '\b':
+						dst = append(dst, '\\', 'b')
+					case '\f':
+						dst = append(dst, '\\', 'f')
+					case '\n':
+						dst = append(dst, '\\', 'n')
+					case '\r':
+						dst = append(dst, '\\', 'r')
+					case '\t':
+						dst = append(dst, '\\', 't')
+					default:
+						if b >= 0x20 {
+							dst = append(dst, b)
+						} else {
+							dst = append(dst, '\\', 'u', '0', '0',
+								hex[b>>4], hex[b&0xF])
+						}
+					}
+					i++
+					continue
 				}
+				r, size := utf8.DecodeRuneInString(s[i:])
+				if r == utf8.RuneError && size == 1 {
+					dst = append(dst, `\ufffd`...)
+					i++
+					continue
+				}
+				dst = append(dst, s[i:i+size]...)
+				i += size
 			}
-			i++
-			continue
+			// End with a double quote
+			return append(dst, '"')
 		}
-		r, size := utf8.DecodeRuneInString(s[i:])
-		if r == utf8.RuneError && size == 1 {
-			dst = append(dst, `\ufffd`...)
-			i++
-			continue
-		}
-		dst = append(dst, s[i:i+size]...)
-		i += size
 	}
+	// The string has no need for encoding an therefore is directly
+	// appended to the byte slice.
+	dst = append(dst, s...)
+	// End with a double quote
 	return append(dst, '"')
 }
--- a/json_test.go
+++ b/json_test.go
@ -40,6 +40,8 @@ func TestAppendJSONString(t *testing.T) {
 		{"\x1d", `"\u001d"`},
 		{"\x1e", `"\u001e"`},
 		{"\x1f", `"\u001f"`},
+		{"ascii", `"ascii"`},
+		{"emoji \u2764\ufe0f!", `"emoji ❤️!"`},
 	}

 	for _, tt := range encodeStringTests {