Optimise utf8decode()

This commit is contained in:
Bakkeby 2024-03-07 23:23:56 +01:00
parent d78d582eaa
commit b43957b061
1 changed files with 13 additions and 10 deletions

23
st.c
View File

@ -217,9 +217,9 @@ static void selscroll(int, int);
static void selsnap(int *, int *, int); static void selsnap(int *, int *, int);
static size_t utf8decode(const char *, Rune *, size_t); static size_t utf8decode(const char *, Rune *, size_t);
static Rune utf8decodebyte(char, size_t *); static inline Rune utf8decodebyte(char, size_t *);
static char utf8encodebyte(Rune, size_t); static inline char utf8encodebyte(Rune, size_t);
static size_t utf8validate(Rune *, size_t); static inline size_t utf8validate(Rune *, size_t);
static char *base64dec(const char *); static char *base64dec(const char *);
static char base64dec_getc(const char **); static char base64dec_getc(const char **);
@ -297,21 +297,24 @@ xstrdup(const char *s)
size_t size_t
utf8decode(const char *c, Rune *u, size_t clen) utf8decode(const char *c, Rune *u, size_t clen)
{ {
size_t i, j, len, type; size_t i, len;
Rune udecoded; Rune udecoded;
*u = UTF_INVALID; *u = UTF_INVALID;
if (!clen) if (!clen)
return 0; return 0;
udecoded = utf8decodebyte(c[0], &len); udecoded = utf8decodebyte(c[0], &len);
if (!BETWEEN(len, 1, UTF_SIZ)) if (!BETWEEN(len, 2, UTF_SIZ)) {
*u = (len == 1) ? udecoded : UTF_INVALID;
return 1; return 1;
for (i = 1, j = 1; i < clen && j < len; ++i, ++j) {
udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type);
if (type != 0)
return j;
} }
if (j < len) clen = MIN(clen, len);
for (i = 1; i < clen; ++i) {
if ((c[i] & 0xC0) != 0x80)
return i;
udecoded = (udecoded << 6) | (c[i] & 0x3F);
}
if (i < len)
return 0; return 0;
*u = (!BETWEEN(udecoded, utfmin[len], utfmax[len]) || BETWEEN(udecoded, 0xD800, 0xDFFF)) *u = (!BETWEEN(udecoded, utfmin[len], utfmax[len]) || BETWEEN(udecoded, 0xD800, 0xDFFF))
? UTF_INVALID : udecoded; ? UTF_INVALID : udecoded;