Compare commits

...

3 Commits

Author SHA1 Message Date
Hiltjo Posthuma 475d8093cb drw.c: use the same pattern as ellipsis_width to check for infinite recursion 2024-07-14 11:43:01 +02:00
NRK 59936c7d97 render invalid utf8 sequences as U+FFFD
previously drw_text would do the width calculations as if
invalid utf8 sequences were replaced with U+FFFD but would pass
the invalid utf8 sequence to xft to render where xft would just
cut it off at the first invalid byte.

this change makes invalid utf8 render as U+FFFD and avoids
sending invalid sequences to xft. the following can be used to
check the behavior before and after the patch:

	$ printf "0\xef1234567\ntest" | dmenu

Ref: https://lists.suckless.org/dev/2407/35646.html
2024-07-14 11:42:58 +02:00
NRK 51e32d49b5 overhaul utf8decode()
this changes the utf8decode function to:

* report when an error occurs
* report how many bytes to advance on error

these will be useful in the next commit to render invalid utf8
sequences.

the new implementation is also shorter and more direct.
2024-07-14 11:42:55 +02:00
1 changed files with 44 additions and 49 deletions

93
drw.c
View File

@ -9,54 +9,40 @@
#include "util.h" #include "util.h"
#define UTF_INVALID 0xFFFD #define UTF_INVALID 0xFFFD
#define UTF_SIZ 4
static const unsigned char utfbyte[UTF_SIZ + 1] = {0x80, 0, 0xC0, 0xE0, 0xF0}; static int
static const unsigned char utfmask[UTF_SIZ + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8}; utf8decode(const char *s_in, long *u, int *err)
static const long utfmin[UTF_SIZ + 1] = { 0, 0, 0x80, 0x800, 0x10000};
static const long utfmax[UTF_SIZ + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
static long
utf8decodebyte(const char c, size_t *i)
{ {
for (*i = 0; *i < (UTF_SIZ + 1); ++(*i)) static const unsigned char lens[] = {
if (((unsigned char)c & utfmask[*i]) == utfbyte[*i]) /* 0XXXX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
return (unsigned char)c & ~utfmask[*i]; /* 10XXX */ 0, 0, 0, 0, 0, 0, 0, 0, /* invalid */
return 0; /* 110XX */ 2, 2, 2, 2,
} /* 1110X */ 3, 3,
/* 11110 */ 4,
static size_t /* 11111 */ 0, /* invalid */
utf8validate(long *u, size_t i) };
{ static const unsigned char leading_mask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF)) static const unsigned int overlong[] = { 0x0, 0x80, 0x0800, 0x10000 };
*u = UTF_INVALID;
for (i = 1; *u > utfmax[i]; ++i)
;
return i;
}
static size_t
utf8decode(const char *c, long *u, size_t clen)
{
size_t i, j, len, type;
long udecoded;
const unsigned char *s = (const unsigned char *)s_in;
int len = lens[*s >> 3];
*u = UTF_INVALID; *u = UTF_INVALID;
if (!clen) *err = 1;
return 0; if (len == 0)
udecoded = utf8decodebyte(c[0], &len);
if (!BETWEEN(len, 1, UTF_SIZ))
return 1; return 1;
for (i = 1, j = 1; i < clen && j < len; ++i, ++j) {
udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type);
if (type)
return j;
}
if (j < len)
return 0;
*u = udecoded;
utf8validate(u, len);
long cp = s[0] & leading_mask[len - 1];
for (int i = 1; i < len; ++i) {
if (s[i] == '\0' || (s[i] & 0xC0) != 0x80)
return i;
cp = (cp << 6) | (s[i] & 0x3F);
}
/* out of range, surrogate, overlong encoding */
if (cp > 0x10FFFF || (cp >> 11) == 0x1B || cp < overlong[len - 1])
return len;
*err = 0;
*u = cp;
return len; return len;
} }
@ -242,7 +228,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len, hash, h0, h1; unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len, hash, h0, h1;
XftDraw *d = NULL; XftDraw *d = NULL;
Fnt *usedfont, *curfont, *nextfont; Fnt *usedfont, *curfont, *nextfont;
int utf8strlen, utf8charlen, render = x || y || w || h; int utf8strlen, utf8charlen, utf8err, render = x || y || w || h;
long utf8codepoint = 0; long utf8codepoint = 0;
const char *utf8str; const char *utf8str;
FcCharSet *fccharset; FcCharSet *fccharset;
@ -251,7 +237,8 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
XftResult result; XftResult result;
int charexists = 0, overflow = 0; int charexists = 0, overflow = 0;
/* keep track of a couple codepoints for which we have no match. */ /* keep track of a couple codepoints for which we have no match. */
static unsigned int nomatches[128], ellipsis_width; static unsigned int nomatches[128], ellipsis_width, invalid_width;
static const char invalid[] = "<EFBFBD>";
if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts) if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts)
return 0; return 0;
@ -271,12 +258,14 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
usedfont = drw->fonts; usedfont = drw->fonts;
if (!ellipsis_width && render) if (!ellipsis_width && render)
ellipsis_width = drw_fontset_getwidth(drw, "..."); ellipsis_width = drw_fontset_getwidth(drw, "...");
if (!invalid_width && render)
invalid_width = drw_fontset_getwidth(drw, invalid);
while (1) { while (1) {
ew = ellipsis_len = utf8strlen = 0; ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0;
utf8str = text; utf8str = text;
nextfont = NULL; nextfont = NULL;
while (*text) { while (*text) {
utf8charlen = utf8decode(text, &utf8codepoint, UTF_SIZ); utf8charlen = utf8decode(text, &utf8codepoint, &utf8err);
for (curfont = drw->fonts; curfont; curfont = curfont->next) { for (curfont = drw->fonts; curfont; curfont = curfont->next) {
charexists = charexists || XftCharExists(drw->dpy, curfont->xfont, utf8codepoint); charexists = charexists || XftCharExists(drw->dpy, curfont->xfont, utf8codepoint);
if (charexists) { if (charexists) {
@ -298,9 +287,9 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
else else
utf8strlen = ellipsis_len; utf8strlen = ellipsis_len;
} else if (curfont == usedfont) { } else if (curfont == usedfont) {
utf8strlen += utf8charlen;
text += utf8charlen; text += utf8charlen;
ew += tmpw; utf8strlen += utf8err ? 0 : utf8charlen;
ew += utf8err ? 0 : tmpw;
} else { } else {
nextfont = curfont; nextfont = curfont;
} }
@ -308,7 +297,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
} }
} }
if (overflow || !charexists || nextfont) if (overflow || !charexists || nextfont || utf8err)
break; break;
else else
charexists = 0; charexists = 0;
@ -323,6 +312,12 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
x += ew; x += ew;
w -= ew; w -= ew;
} }
if (utf8err && (!render || invalid_width < w)) {
if (render)
drw_text(drw, x, y, w, h, 0, invalid, invert);
x += invalid_width;
w -= invalid_width;
}
if (render && overflow) if (render && overflow)
drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, "...", invert); drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, "...", invert);