Support UTF-8 characters as word delimiters
For a higher usefulness of the utf8strchr function, the index of the UTF-8 character could be returned in addition with a Rune instead of a char*. Since utf8strchr is currently only used by ISDELIM I didn't bother to increase the complexity.
This commit is contained in:
parent
c03548750b
commit
caa97cc781
18
st.c
18
st.c
|
@ -71,7 +71,7 @@ char *argv0;
|
||||||
#define ISCONTROLC0(c) (BETWEEN(c, 0, 0x1f) || (c) == '\177')
|
#define ISCONTROLC0(c) (BETWEEN(c, 0, 0x1f) || (c) == '\177')
|
||||||
#define ISCONTROLC1(c) (BETWEEN(c, 0x80, 0x9f))
|
#define ISCONTROLC1(c) (BETWEEN(c, 0x80, 0x9f))
|
||||||
#define ISCONTROL(c) (ISCONTROLC0(c) || ISCONTROLC1(c))
|
#define ISCONTROL(c) (ISCONTROLC0(c) || ISCONTROLC1(c))
|
||||||
#define ISDELIM(u) (BETWEEN(u, 0, 127) && strchr(worddelimiters, u) != NULL)
|
#define ISDELIM(u) (utf8strchr(worddelimiters, u) != NULL)
|
||||||
#define LIMIT(x, a, b) (x) = (x) < (a) ? (a) : (x) > (b) ? (b) : (x)
|
#define LIMIT(x, a, b) (x) = (x) < (a) ? (a) : (x) > (b) ? (b) : (x)
|
||||||
#define ATTRCMP(a, b) ((a).mode != (b).mode || (a).fg != (b).fg || (a).bg != (b).bg)
|
#define ATTRCMP(a, b) ((a).mode != (b).mode || (a).fg != (b).fg || (a).bg != (b).bg)
|
||||||
#define IS_SET(flag) ((term.mode & (flag)) != 0)
|
#define IS_SET(flag) ((term.mode & (flag)) != 0)
|
||||||
|
@ -473,6 +473,7 @@ static size_t utf8decode(char *, Rune *, size_t);
|
||||||
static Rune utf8decodebyte(char, size_t *);
|
static Rune utf8decodebyte(char, size_t *);
|
||||||
static size_t utf8encode(Rune, char *);
|
static size_t utf8encode(Rune, char *);
|
||||||
static char utf8encodebyte(Rune, size_t);
|
static char utf8encodebyte(Rune, size_t);
|
||||||
|
static char *utf8strchr(char *s, Rune u);
|
||||||
static size_t utf8validate(Rune *, size_t);
|
static size_t utf8validate(Rune *, size_t);
|
||||||
|
|
||||||
static ssize_t xwrite(int, const char *, size_t);
|
static ssize_t xwrite(int, const char *, size_t);
|
||||||
|
@ -640,6 +641,21 @@ utf8encodebyte(Rune u, size_t i) {
|
||||||
return utfbyte[i] | (u & ~utfmask[i]);
|
return utfbyte[i] | (u & ~utfmask[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char *
|
||||||
|
utf8strchr(char *s, Rune u) {
|
||||||
|
Rune r;
|
||||||
|
size_t i, j, len;
|
||||||
|
|
||||||
|
len = strlen(s);
|
||||||
|
for(i = 0, j = 0; i < len; i += j) {
|
||||||
|
if(!(j = utf8decode(&s[i], &r, len - i)))
|
||||||
|
break;
|
||||||
|
if(r == u)
|
||||||
|
return &(s[i]);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
utf8validate(Rune *u, size_t i) {
|
utf8validate(Rune *u, size_t i) {
|
||||||
if(!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF))
|
if(!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF))
|
||||||
|
|
Loading…
Reference in New Issue