From b43957b06122d83db4e48001416e553dd7d51986 Mon Sep 17 00:00:00 2001 From: Bakkeby Date: Thu, 7 Mar 2024 23:23:56 +0100 Subject: [PATCH] Optimise utf8decode() --- st.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/st.c b/st.c index 46eecbd..30f8fc8 100644 --- a/st.c +++ b/st.c @@ -217,9 +217,9 @@ static void selscroll(int, int); static void selsnap(int *, int *, int); static size_t utf8decode(const char *, Rune *, size_t); -static Rune utf8decodebyte(char, size_t *); -static char utf8encodebyte(Rune, size_t); -static size_t utf8validate(Rune *, size_t); +static inline Rune utf8decodebyte(char, size_t *); +static inline char utf8encodebyte(Rune, size_t); +static inline size_t utf8validate(Rune *, size_t); static char *base64dec(const char *); static char base64dec_getc(const char **); @@ -297,21 +297,24 @@ xstrdup(const char *s) size_t utf8decode(const char *c, Rune *u, size_t clen) { - size_t i, j, len, type; + size_t i, len; Rune udecoded; *u = UTF_INVALID; if (!clen) return 0; udecoded = utf8decodebyte(c[0], &len); - if (!BETWEEN(len, 1, UTF_SIZ)) + if (!BETWEEN(len, 2, UTF_SIZ)) { + *u = (len == 1) ? udecoded : UTF_INVALID; return 1; - for (i = 1, j = 1; i < clen && j < len; ++i, ++j) { - udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type); - if (type != 0) - return j; } - if (j < len) + clen = MIN(clen, len); + for (i = 1; i < clen; ++i) { + if ((c[i] & 0xC0) != 0x80) + return i; + udecoded = (udecoded << 6) | (c[i] & 0x3F); + } + if (i < len) return 0; *u = (!BETWEEN(udecoded, utfmin[len], utfmax[len]) || BETWEEN(udecoded, 0xD800, 0xDFFF)) ? UTF_INVALID : udecoded;