Optimize JSON string encoding using a lookup table
benchstat old new name old time/op new time/op delta AppendString/MultiBytesFirst-8 77.9ns ± 5% 70.2ns ± 1% -9.88% (p=0.008 n=5+5) AppendString/MultiBytesMiddle-8 64.2ns ± 1% 56.3ns ± 5% -12.19% (p=0.008 n=5+5) AppendString/MultiBytesLast-8 51.2ns ± 2% 45.2ns ± 4% -11.65% (p=0.008 n=5+5) AppendString/NoEncoding-8 36.2ns ± 4% 34.0ns ± 6% ~ (p=0.087 n=5+5) AppendString/EncodingFirst-8 67.7ns ± 2% 59.4ns ± 2% -12.26% (p=0.008 n=5+5) AppendString/EncodingMiddle-8 56.5ns ± 2% 50.6ns ± 5% -10.54% (p=0.008 n=5+5) AppendString/EncodingLast-8 41.3ns ± 1% 39.6ns ± 5% -4.11% (p=0.024 n=5+5) AppendBytes/MultiBytesLast-8 53.5ns ± 6% 45.6ns ± 4% -14.79% (p=0.008 n=5+5) AppendBytes/NoEncoding-8 36.3ns ± 3% 28.6ns ± 3% -21.10% (p=0.008 n=5+5) AppendBytes/EncodingFirst-8 67.3ns ± 4% 62.1ns ± 4% -7.75% (p=0.008 n=5+5) AppendBytes/EncodingMiddle-8 59.2ns ± 7% 51.0ns ± 6% -13.85% (p=0.008 n=5+5) AppendBytes/EncodingLast-8 43.7ns ± 6% 34.4ns ± 2% -21.32% (p=0.008 n=5+5) AppendBytes/MultiBytesFirst-8 77.7ns ± 2% 71.2ns ± 3% -8.37% (p=0.008 n=5+5) AppendBytes/MultiBytesMiddle-8 63.6ns ± 3% 57.8ns ± 5% -9.12% (p=0.008 n=5+5)
This commit is contained in:
parent
5250a1ba2d
commit
4ea03de40d
|
@ -0,0 +1,85 @@
|
||||||
|
package json
|
||||||
|
|
||||||
|
import "unicode/utf8"
|
||||||
|
|
||||||
|
// AppendBytes is a mirror of appendString with []byte arg
|
||||||
|
func AppendBytes(dst, s []byte) []byte {
|
||||||
|
dst = append(dst, '"')
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
if !noEscapeTable[s[i]] {
|
||||||
|
dst = appendBytesComplex(dst, s, i)
|
||||||
|
return append(dst, '"')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dst = append(dst, s...)
|
||||||
|
return append(dst, '"')
|
||||||
|
}
|
||||||
|
|
||||||
|
// AppendHex encodes the input bytes to a hex string and appends
|
||||||
|
// the encoded string to the input byte slice.
|
||||||
|
//
|
||||||
|
// The operation loops though each byte and encodes it as hex using
|
||||||
|
// the hex lookup table.
|
||||||
|
func AppendHex(dst, s []byte) []byte {
|
||||||
|
dst = append(dst, '"')
|
||||||
|
for _, v := range s {
|
||||||
|
dst = append(dst, hex[v>>4], hex[v&0x0f])
|
||||||
|
}
|
||||||
|
return append(dst, '"')
|
||||||
|
}
|
||||||
|
|
||||||
|
// appendBytesComplex is a mirror of the appendStringComplex
|
||||||
|
// with []byte arg
|
||||||
|
func appendBytesComplex(dst, s []byte, i int) []byte {
|
||||||
|
start := 0
|
||||||
|
for i < len(s) {
|
||||||
|
b := s[i]
|
||||||
|
if b >= utf8.RuneSelf {
|
||||||
|
r, size := utf8.DecodeRune(s[i:])
|
||||||
|
if r == utf8.RuneError && size == 1 {
|
||||||
|
if start < i {
|
||||||
|
dst = append(dst, s[start:i]...)
|
||||||
|
}
|
||||||
|
dst = append(dst, `\ufffd`...)
|
||||||
|
i += size
|
||||||
|
start = i
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
i += size
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if noEscapeTable[b] {
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// We encountered a character that needs to be encoded.
|
||||||
|
// Let's append the previous simple characters to the byte slice
|
||||||
|
// and switch our operation to read and encode the remainder
|
||||||
|
// characters byte-by-byte.
|
||||||
|
if start < i {
|
||||||
|
dst = append(dst, s[start:i]...)
|
||||||
|
}
|
||||||
|
switch b {
|
||||||
|
case '"', '\\':
|
||||||
|
dst = append(dst, '\\', b)
|
||||||
|
case '\b':
|
||||||
|
dst = append(dst, '\\', 'b')
|
||||||
|
case '\f':
|
||||||
|
dst = append(dst, '\\', 'f')
|
||||||
|
case '\n':
|
||||||
|
dst = append(dst, '\\', 'n')
|
||||||
|
case '\r':
|
||||||
|
dst = append(dst, '\\', 'r')
|
||||||
|
case '\t':
|
||||||
|
dst = append(dst, '\\', 't')
|
||||||
|
default:
|
||||||
|
dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF])
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
start = i
|
||||||
|
}
|
||||||
|
if start < len(s) {
|
||||||
|
dst = append(dst, s[start:]...)
|
||||||
|
}
|
||||||
|
return dst
|
||||||
|
}
|
|
@ -0,0 +1,82 @@
|
||||||
|
package json
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"unicode"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestAppendBytes(t *testing.T) {
|
||||||
|
for _, tt := range encodeStringTests {
|
||||||
|
b := AppendBytes([]byte{}, []byte(tt.in))
|
||||||
|
if got, want := string(b), tt.out; got != want {
|
||||||
|
t.Errorf("appendBytes(%q) = %#q, want %#q", tt.in, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAppendHex(t *testing.T) {
|
||||||
|
for _, tt := range encodeHexTests {
|
||||||
|
b := AppendHex([]byte{}, []byte{tt.in})
|
||||||
|
if got, want := string(b), tt.out; got != want {
|
||||||
|
t.Errorf("appendHex(%x) = %s, want %s", tt.in, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStringBytes(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
// Test that encodeState.stringBytes and encodeState.string use the same encoding.
|
||||||
|
var r []rune
|
||||||
|
for i := '\u0000'; i <= unicode.MaxRune; i++ {
|
||||||
|
r = append(r, i)
|
||||||
|
}
|
||||||
|
s := string(r) + "\xff\xff\xffhello" // some invalid UTF-8 too
|
||||||
|
|
||||||
|
enc := string(AppendString([]byte{}, s))
|
||||||
|
encBytes := string(AppendBytes([]byte{}, []byte(s)))
|
||||||
|
|
||||||
|
if enc != encBytes {
|
||||||
|
i := 0
|
||||||
|
for i < len(enc) && i < len(encBytes) && enc[i] == encBytes[i] {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
enc = enc[i:]
|
||||||
|
encBytes = encBytes[i:]
|
||||||
|
i = 0
|
||||||
|
for i < len(enc) && i < len(encBytes) && enc[len(enc)-i-1] == encBytes[len(encBytes)-i-1] {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
enc = enc[:len(enc)-i]
|
||||||
|
encBytes = encBytes[:len(encBytes)-i]
|
||||||
|
|
||||||
|
if len(enc) > 20 {
|
||||||
|
enc = enc[:20] + "..."
|
||||||
|
}
|
||||||
|
if len(encBytes) > 20 {
|
||||||
|
encBytes = encBytes[:20] + "..."
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Errorf("encodings differ at %#q vs %#q", enc, encBytes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkAppendBytes(b *testing.B) {
|
||||||
|
tests := map[string]string{
|
||||||
|
"NoEncoding": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||||
|
"EncodingFirst": `"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||||
|
"EncodingMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa"aaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||||
|
"EncodingLast": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"`,
|
||||||
|
"MultiBytesFirst": `❤️aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||||
|
"MultiBytesMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa❤️aaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||||
|
"MultiBytesLast": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa❤️`,
|
||||||
|
}
|
||||||
|
for name, str := range tests {
|
||||||
|
byt := []byte(str)
|
||||||
|
b.Run(name, func(b *testing.B) {
|
||||||
|
buf := make([]byte, 0, 100)
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = AppendBytes(buf, byt)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
|
@ -4,6 +4,14 @@ import "unicode/utf8"
|
||||||
|
|
||||||
const hex = "0123456789abcdef"
|
const hex = "0123456789abcdef"
|
||||||
|
|
||||||
|
var noEscapeTable = [256]bool{}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
for i := 0; i <= 0x7e; i++ {
|
||||||
|
noEscapeTable[i] = i >= 0x20 && i != '\\' && i != '"'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// AppendStrings encodes the input strings to json and
|
// AppendStrings encodes the input strings to json and
|
||||||
// appends the encoded string list to the input byte slice.
|
// appends the encoded string list to the input byte slice.
|
||||||
func AppendStrings(dst []byte, vals []string) []byte {
|
func AppendStrings(dst []byte, vals []string) []byte {
|
||||||
|
@ -38,7 +46,7 @@ func AppendString(dst []byte, s string) []byte {
|
||||||
// Check if the character needs encoding. Control characters, slashes,
|
// Check if the character needs encoding. Control characters, slashes,
|
||||||
// and the double quote need json encoding. Bytes above the ascii
|
// and the double quote need json encoding. Bytes above the ascii
|
||||||
// boundary needs utf8 encoding.
|
// boundary needs utf8 encoding.
|
||||||
if s[i] < 0x20 || s[i] > 0x7e || s[i] == '\\' || s[i] == '"' {
|
if !noEscapeTable[s[i]] {
|
||||||
// We encountered a character that needs to be encoded. Switch
|
// We encountered a character that needs to be encoded. Switch
|
||||||
// to complex version of the algorithm.
|
// to complex version of the algorithm.
|
||||||
dst = appendStringComplex(dst, s, i)
|
dst = appendStringComplex(dst, s, i)
|
||||||
|
@ -76,89 +84,7 @@ func appendStringComplex(dst []byte, s string, i int) []byte {
|
||||||
i += size
|
i += size
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if b >= 0x20 && b <= 0x7e && b != '\\' && b != '"' {
|
if noEscapeTable[b] {
|
||||||
i++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// We encountered a character that needs to be encoded.
|
|
||||||
// Let's append the previous simple characters to the byte slice
|
|
||||||
// and switch our operation to read and encode the remainder
|
|
||||||
// characters byte-by-byte.
|
|
||||||
if start < i {
|
|
||||||
dst = append(dst, s[start:i]...)
|
|
||||||
}
|
|
||||||
switch b {
|
|
||||||
case '"', '\\':
|
|
||||||
dst = append(dst, '\\', b)
|
|
||||||
case '\b':
|
|
||||||
dst = append(dst, '\\', 'b')
|
|
||||||
case '\f':
|
|
||||||
dst = append(dst, '\\', 'f')
|
|
||||||
case '\n':
|
|
||||||
dst = append(dst, '\\', 'n')
|
|
||||||
case '\r':
|
|
||||||
dst = append(dst, '\\', 'r')
|
|
||||||
case '\t':
|
|
||||||
dst = append(dst, '\\', 't')
|
|
||||||
default:
|
|
||||||
dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF])
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
start = i
|
|
||||||
}
|
|
||||||
if start < len(s) {
|
|
||||||
dst = append(dst, s[start:]...)
|
|
||||||
}
|
|
||||||
return dst
|
|
||||||
}
|
|
||||||
|
|
||||||
// AppendBytes is a mirror of appendString with []byte arg
|
|
||||||
func AppendBytes(dst, s []byte) []byte {
|
|
||||||
dst = append(dst, '"')
|
|
||||||
for i := 0; i < len(s); i++ {
|
|
||||||
if s[i] < 0x20 || s[i] > 0x7e || s[i] == '\\' || s[i] == '"' {
|
|
||||||
dst = appendBytesComplex(dst, s, i)
|
|
||||||
return append(dst, '"')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dst = append(dst, s...)
|
|
||||||
return append(dst, '"')
|
|
||||||
}
|
|
||||||
|
|
||||||
// AppendHex encodes the input bytes to a hex string and appends
|
|
||||||
// the encoded string to the input byte slice.
|
|
||||||
//
|
|
||||||
// The operation loops though each byte and encodes it as hex using
|
|
||||||
// the hex lookup table.
|
|
||||||
func AppendHex(dst, s []byte) []byte {
|
|
||||||
dst = append(dst, '"')
|
|
||||||
for _, v := range s {
|
|
||||||
dst = append(dst, hex[v>>4], hex[v&0x0f])
|
|
||||||
}
|
|
||||||
return append(dst, '"')
|
|
||||||
}
|
|
||||||
|
|
||||||
// appendBytesComplex is a mirror of the appendStringComplex
|
|
||||||
// with []byte arg
|
|
||||||
func appendBytesComplex(dst, s []byte, i int) []byte {
|
|
||||||
start := 0
|
|
||||||
for i < len(s) {
|
|
||||||
b := s[i]
|
|
||||||
if b >= utf8.RuneSelf {
|
|
||||||
r, size := utf8.DecodeRune(s[i:])
|
|
||||||
if r == utf8.RuneError && size == 1 {
|
|
||||||
if start < i {
|
|
||||||
dst = append(dst, s[start:i]...)
|
|
||||||
}
|
|
||||||
dst = append(dst, `\ufffd`...)
|
|
||||||
i += size
|
|
||||||
start = i
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
i += size
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if b >= 0x20 && b <= 0x7e && b != '\\' && b != '"' {
|
|
||||||
i++
|
i++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@ package json
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
"unicode"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var encodeStringTests = []struct {
|
var encodeStringTests = []struct {
|
||||||
|
@ -73,61 +72,6 @@ func TestAppendString(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAppendBytes(t *testing.T) {
|
|
||||||
for _, tt := range encodeStringTests {
|
|
||||||
b := AppendBytes([]byte{}, []byte(tt.in))
|
|
||||||
if got, want := string(b), tt.out; got != want {
|
|
||||||
t.Errorf("appendBytes(%q) = %#q, want %#q", tt.in, got, want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAppendHex(t *testing.T) {
|
|
||||||
for _, tt := range encodeHexTests {
|
|
||||||
b := AppendHex([]byte{}, []byte{tt.in})
|
|
||||||
if got, want := string(b), tt.out; got != want {
|
|
||||||
t.Errorf("appendHex(%x) = %s, want %s", tt.in, got, want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestStringBytes(t *testing.T) {
|
|
||||||
t.Parallel()
|
|
||||||
// Test that encodeState.stringBytes and encodeState.string use the same encoding.
|
|
||||||
var r []rune
|
|
||||||
for i := '\u0000'; i <= unicode.MaxRune; i++ {
|
|
||||||
r = append(r, i)
|
|
||||||
}
|
|
||||||
s := string(r) + "\xff\xff\xffhello" // some invalid UTF-8 too
|
|
||||||
|
|
||||||
enc := string(AppendString([]byte{}, s))
|
|
||||||
encBytes := string(AppendBytes([]byte{}, []byte(s)))
|
|
||||||
|
|
||||||
if enc != encBytes {
|
|
||||||
i := 0
|
|
||||||
for i < len(enc) && i < len(encBytes) && enc[i] == encBytes[i] {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
enc = enc[i:]
|
|
||||||
encBytes = encBytes[i:]
|
|
||||||
i = 0
|
|
||||||
for i < len(enc) && i < len(encBytes) && enc[len(enc)-i-1] == encBytes[len(encBytes)-i-1] {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
enc = enc[:len(enc)-i]
|
|
||||||
encBytes = encBytes[:len(encBytes)-i]
|
|
||||||
|
|
||||||
if len(enc) > 20 {
|
|
||||||
enc = enc[:20] + "..."
|
|
||||||
}
|
|
||||||
if len(encBytes) > 20 {
|
|
||||||
encBytes = encBytes[:20] + "..."
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Errorf("encodings differ at %#q vs %#q", enc, encBytes)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkAppendString(b *testing.B) {
|
func BenchmarkAppendString(b *testing.B) {
|
||||||
tests := map[string]string{
|
tests := map[string]string{
|
||||||
"NoEncoding": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
"NoEncoding": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
||||||
|
@ -147,24 +91,3 @@ func BenchmarkAppendString(b *testing.B) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkAppendBytes(b *testing.B) {
|
|
||||||
tests := map[string]string{
|
|
||||||
"NoEncoding": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
|
||||||
"EncodingFirst": `"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
|
||||||
"EncodingMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa"aaaaaaaaaaaaaaaaaaaaaaaa`,
|
|
||||||
"EncodingLast": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"`,
|
|
||||||
"MultiBytesFirst": `❤️aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`,
|
|
||||||
"MultiBytesMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa❤️aaaaaaaaaaaaaaaaaaaaaaaa`,
|
|
||||||
"MultiBytesLast": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa❤️`,
|
|
||||||
}
|
|
||||||
for name, str := range tests {
|
|
||||||
byt := []byte(str)
|
|
||||||
b.Run(name, func(b *testing.B) {
|
|
||||||
buf := make([]byte, 0, 100)
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
_ = AppendBytes(buf, byt)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in New Issue