zlog/internal/json/string.go
Olivier Poitrey 4ea03de40d Optimize JSON string encoding using a lookup table
benchstat old new
name                             old time/op    new time/op    delta
AppendString/MultiBytesFirst-8     77.9ns ± 5%    70.2ns ± 1%   -9.88%  (p=0.008 n=5+5)
AppendString/MultiBytesMiddle-8    64.2ns ± 1%    56.3ns ± 5%  -12.19%  (p=0.008 n=5+5)
AppendString/MultiBytesLast-8      51.2ns ± 2%    45.2ns ± 4%  -11.65%  (p=0.008 n=5+5)
AppendString/NoEncoding-8          36.2ns ± 4%    34.0ns ± 6%     ~     (p=0.087 n=5+5)
AppendString/EncodingFirst-8       67.7ns ± 2%    59.4ns ± 2%  -12.26%  (p=0.008 n=5+5)
AppendString/EncodingMiddle-8      56.5ns ± 2%    50.6ns ± 5%  -10.54%  (p=0.008 n=5+5)
AppendString/EncodingLast-8        41.3ns ± 1%    39.6ns ± 5%   -4.11%  (p=0.024 n=5+5)
AppendBytes/MultiBytesLast-8       53.5ns ± 6%    45.6ns ± 4%  -14.79%  (p=0.008 n=5+5)
AppendBytes/NoEncoding-8           36.3ns ± 3%    28.6ns ± 3%  -21.10%  (p=0.008 n=5+5)
AppendBytes/EncodingFirst-8        67.3ns ± 4%    62.1ns ± 4%   -7.75%  (p=0.008 n=5+5)
AppendBytes/EncodingMiddle-8       59.2ns ± 7%    51.0ns ± 6%  -13.85%  (p=0.008 n=5+5)
AppendBytes/EncodingLast-8         43.7ns ± 6%    34.4ns ± 2%  -21.32%  (p=0.008 n=5+5)
AppendBytes/MultiBytesFirst-8      77.7ns ± 2%    71.2ns ± 3%   -8.37%  (p=0.008 n=5+5)
AppendBytes/MultiBytesMiddle-8     63.6ns ± 3%    57.8ns ± 5%   -9.12%  (p=0.008 n=5+5)
2018-03-23 04:24:50 -07:00

122 lines
3.3 KiB
Go

package json
import "unicode/utf8"
const hex = "0123456789abcdef"
var noEscapeTable = [256]bool{}
func init() {
for i := 0; i <= 0x7e; i++ {
noEscapeTable[i] = i >= 0x20 && i != '\\' && i != '"'
}
}
// AppendStrings encodes the input strings to json and
// appends the encoded string list to the input byte slice.
func AppendStrings(dst []byte, vals []string) []byte {
if len(vals) == 0 {
return append(dst, '[', ']')
}
dst = append(dst, '[')
dst = AppendString(dst, vals[0])
if len(vals) > 1 {
for _, val := range vals[1:] {
dst = AppendString(append(dst, ','), val)
}
}
dst = append(dst, ']')
return dst
}
// AppendString encodes the input string to json and appends
// the encoded string to the input byte slice.
//
// The operation loops though each byte in the string looking
// for characters that need json or utf8 encoding. If the string
// does not need encoding, then the string is appended in it's
// entirety to the byte slice.
// If we encounter a byte that does need encoding, switch up
// the operation and perform a byte-by-byte read-encode-append.
func AppendString(dst []byte, s string) []byte {
// Start with a double quote.
dst = append(dst, '"')
// Loop through each character in the string.
for i := 0; i < len(s); i++ {
// Check if the character needs encoding. Control characters, slashes,
// and the double quote need json encoding. Bytes above the ascii
// boundary needs utf8 encoding.
if !noEscapeTable[s[i]] {
// We encountered a character that needs to be encoded. Switch
// to complex version of the algorithm.
dst = appendStringComplex(dst, s, i)
return append(dst, '"')
}
}
// The string has no need for encoding an therefore is directly
// appended to the byte slice.
dst = append(dst, s...)
// End with a double quote
return append(dst, '"')
}
// appendStringComplex is used by appendString to take over an in
// progress JSON string encoding that encountered a character that needs
// to be encoded.
func appendStringComplex(dst []byte, s string, i int) []byte {
start := 0
for i < len(s) {
b := s[i]
if b >= utf8.RuneSelf {
r, size := utf8.DecodeRuneInString(s[i:])
if r == utf8.RuneError && size == 1 {
// In case of error, first append previous simple characters to
// the byte slice if any and append a remplacement character code
// in place of the invalid sequence.
if start < i {
dst = append(dst, s[start:i]...)
}
dst = append(dst, `\ufffd`...)
i += size
start = i
continue
}
i += size
continue
}
if noEscapeTable[b] {
i++
continue
}
// We encountered a character that needs to be encoded.
// Let's append the previous simple characters to the byte slice
// and switch our operation to read and encode the remainder
// characters byte-by-byte.
if start < i {
dst = append(dst, s[start:i]...)
}
switch b {
case '"', '\\':
dst = append(dst, '\\', b)
case '\b':
dst = append(dst, '\\', 'b')
case '\f':
dst = append(dst, '\\', 'f')
case '\n':
dst = append(dst, '\\', 'n')
case '\r':
dst = append(dst, '\\', 'r')
case '\t':
dst = append(dst, '\\', 't')
default:
dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF])
}
i++
start = i
}
if start < len(s) {
dst = append(dst, s[start:]...)
}
return dst
}