diff --git a/README.md b/README.md index 5e3fab1..a8e5cd2 100644 --- a/README.md +++ b/README.md @@ -255,11 +255,11 @@ Some settings can be changed and will by applied to all loggers: All operations are allocation free (those numbers *include* JSON encoding): ``` -BenchmarkLogEmpty-8 100000000 19.1 ns/op 0 B/op 0 allocs/op -BenchmarkDisabled-8 500000000 4.07 ns/op 0 B/op 0 allocs/op -BenchmarkInfo-8 30000000 46.3 ns/op 0 B/op 0 allocs/op -BenchmarkContextFields-8 30000000 47.1 ns/op 0 B/op 0 allocs/op -BenchmarkLogFields-8 10000000 186 ns/op 0 B/op 0 allocs/op +BenchmarkLogEmpty-8 100000000 19.1 ns/op 0 B/op 0 allocs/op +BenchmarkDisabled-8 500000000 4.07 ns/op 0 B/op 0 allocs/op +BenchmarkInfo-8 30000000 42.5 ns/op 0 B/op 0 allocs/op +BenchmarkContextFields-8 30000000 44.9 ns/op 0 B/op 0 allocs/op +BenchmarkLogFields-8 10000000 184 ns/op 0 B/op 0 allocs/op ``` Using Uber's zap [comparison benchmark](https://github.com/uber-go/zap#performance): diff --git a/json.go b/json.go index 8ff43a4..b439d18 100644 --- a/json.go +++ b/json.go @@ -21,48 +21,10 @@ func appendJSONString(dst []byte, s string) []byte { // Check if the character needs encoding. Control characters, slashes, // and the double quote need json encoding. Bytes above the ascii // boundary needs utf8 encoding. - if s[i] < ' ' || s[i] == '\\' || s[i] == '"' || s[i] > 126 { - // We encountered a character that needs to be encoded. Let's - // append the previous simple characters to the byte slice - // and switch our operation to read and encode the remainder - // characters byte-by-byte. - dst = append(dst, s[:i]...) - for i < len(s) { - if b := s[i]; b < utf8.RuneSelf { - switch b { - case '"', '\\': - dst = append(dst, '\\', b) - case '\b': - dst = append(dst, '\\', 'b') - case '\f': - dst = append(dst, '\\', 'f') - case '\n': - dst = append(dst, '\\', 'n') - case '\r': - dst = append(dst, '\\', 'r') - case '\t': - dst = append(dst, '\\', 't') - default: - if b >= 0x20 { - dst = append(dst, b) - } else { - dst = append(dst, '\\', 'u', '0', '0', - hex[b>>4], hex[b&0xF]) - } - } - i++ - continue - } - r, size := utf8.DecodeRuneInString(s[i:]) - if r == utf8.RuneError && size == 1 { - dst = append(dst, `\ufffd`...) - i++ - continue - } - dst = append(dst, s[i:i+size]...) - i += size - } - // End with a double quote + if s[i] < 0x20 || s[i] > 0x7e || s[i] == '\\' || s[i] == '"' { + // We encountered a character that needs to be encoded. Switch + // to complex version of the algorithm. + dst = appendJSONStringComplex(dst, s, i) return append(dst, '"') } } @@ -72,3 +34,63 @@ func appendJSONString(dst []byte, s string) []byte { // End with a double quote return append(dst, '"') } + +// appendJSONStringComplex is used by appendJSONString to take over an in +// progress JSON string encoding that encountered a character that needs +// to be encoded. +func appendJSONStringComplex(dst []byte, s string, i int) []byte { + start := 0 + for i < len(s) { + b := s[i] + if b >= utf8.RuneSelf { + r, size := utf8.DecodeRuneInString(s[i:]) + if r == utf8.RuneError && size == 1 { + // In case of error, first append previous simple characters to + // the byte slice if any and append a remplacement character code + // in place of the invalid sequence. + if start < i { + dst = append(dst, s[start:i]...) + } + dst = append(dst, `\ufffd`...) + i += size + start = i + continue + } + i += size + continue + } + if b >= 0x20 && b <= 0x7e && b != '\\' && b != '"' { + i++ + continue + } + // We encountered a character that needs to be encoded. + // Let's append the previous simple characters to the byte slice + // and switch our operation to read and encode the remainder + // characters byte-by-byte. + if start < i { + dst = append(dst, s[start:i]...) + } + switch b { + case '"', '\\': + dst = append(dst, '\\', b) + case '\b': + dst = append(dst, '\\', 'b') + case '\f': + dst = append(dst, '\\', 'f') + case '\n': + dst = append(dst, '\\', 'n') + case '\r': + dst = append(dst, '\\', 'r') + case '\t': + dst = append(dst, '\\', 't') + default: + dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF]) + } + i++ + start = i + } + if start < len(s) { + dst = append(dst, s[start:]...) + } + return dst +} diff --git a/json_test.go b/json_test.go index caf698b..2c45ca5 100644 --- a/json_test.go +++ b/json_test.go @@ -43,6 +43,8 @@ func TestAppendJSONString(t *testing.T) { {"\x1d", `"\u001d"`}, {"\x1e", `"\u001e"`}, {"\x1f", `"\u001f"`}, + {"✭", `"✭"`}, + {"foo\xc2\x7fbar", `"foo\ufffd\u007fbar"`}, // invalid sequence {"ascii", `"ascii"`}, {"\"a", `"\"a"`}, {"\x1fa", `"\u001fa"`}, @@ -61,13 +63,13 @@ func TestAppendJSONString(t *testing.T) { func BenchmarkAppendJSONString(b *testing.B) { tests := map[string]string{ - "NoEncoding": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`, - "EncodingFirst": `"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`, - "EncodingMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa"aaaaaaaaaaaaaaaaaaaaaaaa`, - "EncodingLast": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"`, - "RuneFirst": `❤️aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`, - "RuneMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa❤️aaaaaaaaaaaaaaaaaaaaaaaa`, - "RuneLast": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa❤️`, + "NoEncoding": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`, + "EncodingFirst": `"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`, + "EncodingMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa"aaaaaaaaaaaaaaaaaaaaaaaa`, + "EncodingLast": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"`, + "MultiBytesFirst": `❤️aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`, + "MultiBytesMiddle": `aaaaaaaaaaaaaaaaaaaaaaaaa❤️aaaaaaaaaaaaaaaaaaaaaaaa`, + "MultiBytesLast": `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa❤️`, } for name, str := range tests { b.Run(name, func(b *testing.B) {