From 64333e3ef89047d1c09cdc5053af647dbd8344da Mon Sep 17 00:00:00 2001 From: nicm Date: Sat, 14 Nov 2015 10:56:31 +0000 Subject: [PATCH 1/2] Be more strict about invalid UTF-8. --- input.c | 13 +++++++----- key-string.c | 6 ++++-- screen-write.c | 56 ++++++++++++++++++++++++++++---------------------- tmux.h | 2 +- tty-keys.c | 11 +++++++--- utf8.c | 42 ++++++++++++++++++++----------------- 6 files changed, 76 insertions(+), 54 deletions(-) diff --git a/input.c b/input.c index babdecdb..cb92e52f 100644 --- a/input.c +++ b/input.c @@ -446,11 +446,11 @@ const struct input_transition input_state_ground_table[] = { { 0x1c, 0x1f, input_c0_dispatch, NULL }, { 0x20, 0x7e, input_print, NULL }, { 0x7f, 0x7f, NULL, NULL }, - { 0x80, 0xc1, input_print, NULL }, + { 0x80, 0xc1, NULL, NULL }, { 0xc2, 0xdf, input_utf8_open, &input_state_utf8_one }, { 0xe0, 0xef, input_utf8_open, &input_state_utf8_two }, { 0xf0, 0xf4, input_utf8_open, &input_state_utf8_three }, - { 0xf5, 0xff, input_print, NULL }, + { 0xf5, 0xff, NULL, NULL }, { -1, -1, NULL, NULL } }; @@ -1923,7 +1923,8 @@ input_utf8_open(struct input_ctx *ictx) { struct utf8_data *ud = &ictx->utf8data; - utf8_open(ud, ictx->ch); + if (!utf8_open(ud, ictx->ch)) + log_fatalx("UTF-8 open invalid %#hhx", ictx->ch); log_debug("%s %hhu", __func__, ud->size); @@ -1936,7 +1937,8 @@ input_utf8_add(struct input_ctx *ictx) { struct utf8_data *ud = &ictx->utf8data; - utf8_append(ud, ictx->ch); + if (utf8_append(ud, ictx->ch) != 1) + log_fatalx("UTF-8 add invalid %#hhx", ictx->ch); log_debug("%s", __func__); @@ -1949,7 +1951,8 @@ input_utf8_close(struct input_ctx *ictx) { struct utf8_data *ud = &ictx->utf8data; - utf8_append(ud, ictx->ch); + if (utf8_append(ud, ictx->ch) != 0) + log_fatalx("UTF-8 close invalid %#hhx", ictx->ch); log_debug("%s %hhu '%*s' (width %hhu)", __func__, ud->size, (int)ud->size, ud->data, ud->width); diff --git a/key-string.c b/key-string.c index c2230218..81d014ac 100644 --- a/key-string.c +++ b/key-string.c @@ -144,7 +144,7 @@ key_string_lookup_string(const char *string) static const char *other = "!#()+,-.0123456789:;<=>?'\r\t"; key_code key; u_short u; - int size; + int size, more; key_code modifiers; struct utf8_data ud; u_int i; @@ -177,7 +177,9 @@ key_string_lookup_string(const char *string) if (strlen(string) != ud.size) return (KEYC_NONE); for (i = 1; i < ud.size; i++) - utf8_append(&ud, (u_char)string[i]); + more = utf8_append(&ud, (u_char)string[i]); + if (more != 0) + return (KEYC_NONE); key = utf8_combine(&ud); return (key | modifiers); } diff --git a/screen-write.c b/screen-write.c index 14b8a41a..9e1ef822 100644 --- a/screen-write.c +++ b/screen-write.c @@ -115,6 +115,7 @@ screen_write_strlen(const char *fmt, ...) struct utf8_data ud; u_char *ptr; size_t left, size = 0; + int more; va_start(ap, fmt); xvasprintf(&msg, fmt, ap); @@ -128,11 +129,12 @@ screen_write_strlen(const char *fmt, ...) left = strlen(ptr); if (left < (size_t)ud.size - 1) break; - while (utf8_append(&ud, *ptr)) + while ((more = utf8_append(&ud, *ptr)) == 1) ptr++; ptr++; - size += ud.width; + if (more == 0) + size += ud.width; } else { if (*ptr > 0x1f && *ptr < 0x7f) size++; @@ -176,6 +178,7 @@ screen_write_vnputs(struct screen_write_ctx *ctx, ssize_t maxlen, struct utf8_data ud; u_char *ptr; size_t left, size = 0; + int more; xvasprintf(&msg, fmt, ap); @@ -187,22 +190,24 @@ screen_write_vnputs(struct screen_write_ctx *ctx, ssize_t maxlen, left = strlen(ptr); if (left < (size_t)ud.size - 1) break; - while (utf8_append(&ud, *ptr)) + while ((more = utf8_append(&ud, *ptr)) == 1) ptr++; ptr++; - if (maxlen > 0 && - size + ud.width > (size_t) maxlen) { - while (size < (size_t) maxlen) { - screen_write_putc(ctx, gc, ' '); - size++; + if (more == 0) { + if (maxlen > 0 && + size + ud.width > (size_t) maxlen) { + while (size < (size_t) maxlen) { + screen_write_putc(ctx, gc, ' '); + size++; + } + break; } - break; - } - size += ud.width; + size += ud.width; - utf8_copy(&gc->data, &ud); - screen_write_cell(ctx, gc); + utf8_copy(&gc->data, &ud); + screen_write_cell(ctx, gc); + } } else { if (maxlen > 0 && size + 1 > (size_t) maxlen) break; @@ -231,6 +236,7 @@ screen_write_cnputs(struct screen_write_ctx *ctx, ssize_t maxlen, char *msg; u_char *ptr, *last; size_t left, size = 0; + int more; va_start(ap, fmt); xvasprintf(&msg, fmt, ap); @@ -260,22 +266,24 @@ screen_write_cnputs(struct screen_write_ctx *ctx, ssize_t maxlen, left = strlen(ptr); if (left < (size_t)ud.size - 1) break; - while (utf8_append(&ud, *ptr)) + while ((more = utf8_append(&ud, *ptr)) == 1) ptr++; ptr++; - if (maxlen > 0 && - size + ud.width > (size_t) maxlen) { - while (size < (size_t) maxlen) { - screen_write_putc(ctx, gc, ' '); - size++; + if (more == 0) { + if (maxlen > 0 && + size + ud.width > (size_t) maxlen) { + while (size < (size_t) maxlen) { + screen_write_putc(ctx, gc, ' '); + size++; + } + break; } - break; - } - size += ud.width; + size += ud.width; - utf8_copy(&lgc.data, &ud); - screen_write_cell(ctx, &lgc); + utf8_copy(&lgc.data, &ud); + screen_write_cell(ctx, &lgc); + } } else { if (maxlen > 0 && size + 1 > (size_t) maxlen) break; diff --git a/tmux.h b/tmux.h index e69b74b1..2bc1f0a4 100644 --- a/tmux.h +++ b/tmux.h @@ -624,7 +624,7 @@ struct utf8_data { u_char have; u_char size; - u_char width; + u_char width; /* 0xff if invalid */ } __packed; /* Grid attributes. */ diff --git a/tty-keys.c b/tty-keys.c index 6a64ef15..4bdc061a 100644 --- a/tty-keys.c +++ b/tty-keys.c @@ -472,7 +472,7 @@ tty_keys_next(struct tty *tty) const char *buf; size_t len, size; cc_t bspace; - int delay, expired = 0; + int delay, expired = 0, more; key_code key; struct utf8_data ud; u_int i; @@ -547,7 +547,9 @@ first_key: goto partial_key; } for (i = 1; i < size; i++) - utf8_append(&ud, (u_char)buf[i]); + more = utf8_append(&ud, (u_char)buf[i]); + if (more != 0) + goto discard_key; key = utf8_combine(&ud); log_debug("UTF-8 key %.*s %#llx", (int)size, buf, key); goto complete_key; @@ -653,6 +655,7 @@ tty_keys_mouse(struct tty *tty, const char *buf, size_t len, size_t *size) struct utf8_data ud; u_int i, value, x, y, b, sgr_b; u_char sgr_type, c; + int more; /* * Standard mouse sequences are \033[M followed by three characters @@ -699,7 +702,9 @@ tty_keys_mouse(struct tty *tty, const char *buf, size_t len, size_t *size) (*size)++; if (len <= *size) return (1); - utf8_append(&ud, buf[*size]); + more = utf8_append(&ud, buf[*size]); + if (more != 0) + return (-1); value = utf8_combine(&ud); } else value = (u_char)buf[*size]; diff --git a/utf8.c b/utf8.c index ecc5e718..a36bccc3 100644 --- a/utf8.c +++ b/utf8.c @@ -403,22 +403,26 @@ utf8_open(struct utf8_data *ud, u_char ch) /* * Append character to UTF-8, closing if finished. * - * Returns 1 if more UTF-8 data to come, 0 if finished. + * Returns 1 if more UTF-8 data to come, 0 if finished and valid, -1 if + * finished and invalid. */ int utf8_append(struct utf8_data *ud, u_char ch) { - /* XXX this should do validity checks too! */ - if (ud->have >= ud->size) fatalx("UTF-8 character overflow"); if (ud->size > sizeof ud->data) fatalx("UTF-8 character size too large"); + if (ud->have != 0 && (ch & 0xc0) != 0x80) + ud->width = 0xff; + ud->data[ud->have++] = ch; if (ud->have != ud->size) return (1); + if (ud->width == 0xff) + return (-1); ud->width = utf8_width(utf8_combine(ud)); return (0); } @@ -556,15 +560,15 @@ utf8_strvis(char *dst, const char *src, size_t len, int flag) while (src < end) { if (utf8_open(&ud, *src)) { more = 1; - while (++src < end && more) + while (++src < end && more == 1) more = utf8_append(&ud, *src); - if (!more) { + if (more == 0) { /* UTF-8 character finished. */ for (i = 0; i < ud.size; i++) *dst++ = ud.data[i]; continue; } else if (ud.have > 0) { - /* Not a complete UTF-8 character. */ + /* Not a complete, valid UTF-8 character. */ src -= ud.have; } } @@ -600,9 +604,9 @@ utf8_sanitize(const char *src) dst = xreallocarray(dst, n + 1, sizeof *dst); if (utf8_open(&ud, *src)) { more = 1; - while (*++src != '\0' && more) + while (*++src != '\0' && more == 1) more = utf8_append(&ud, *src); - if (!more) { + if (more != 1) { dst = xreallocarray(dst, n + ud.width, sizeof *dst); for (i = 0; i < ud.width; i++) @@ -612,10 +616,8 @@ utf8_sanitize(const char *src) src -= ud.have; } if (*src > 0x1f && *src < 0x7f) - dst[n] = *src; + dst[n++] = *src; src++; - - n++; } dst = xreallocarray(dst, n + 1, sizeof *dst); @@ -641,18 +643,19 @@ utf8_fromcstr(const char *src) dst = xreallocarray(dst, n + 1, sizeof *dst); if (utf8_open(&dst[n], *src)) { more = 1; - while (*++src != '\0' && more) + while (*++src != '\0' && more == 1) more = utf8_append(&dst[n], *src); - if (!more) { + if (more != 1) { n++; continue; } src -= dst[n].have; } - utf8_set(&dst[n], *src); + if (*src > 0x1f && *src < 0x7f) { + utf8_set(&dst[n], *src); + n++; + } src++; - - n++; } dst = xreallocarray(dst, n + 1, sizeof *dst); @@ -693,15 +696,16 @@ utf8_cstrwidth(const char *s) while (*s != '\0') { if (utf8_open(&tmp, *s)) { more = 1; - while (*++s != '\0' && more) + while (*++s != '\0' && more == 1) more = utf8_append(&tmp, *s); - if (!more) { + if (more != 1) { width += tmp.width; continue; } s -= tmp.have; } - width++; + if (*s > 0x1f && *s < 0x7f) + width++; s++; } return (width); From dab63b029e94dcabe335abf7f89c66c28486a542 Mon Sep 17 00:00:00 2001 From: nicm Date: Sat, 14 Nov 2015 10:57:59 +0000 Subject: [PATCH 2/2] Couple of assignments to remove compiler warnings. --- key-string.c | 1 + tty-keys.c | 1 + 2 files changed, 2 insertions(+) diff --git a/key-string.c b/key-string.c index 81d014ac..ad7cbf50 100644 --- a/key-string.c +++ b/key-string.c @@ -176,6 +176,7 @@ key_string_lookup_string(const char *string) if (utf8_open(&ud, (u_char)*string)) { if (strlen(string) != ud.size) return (KEYC_NONE); + more = 1; for (i = 1; i < ud.size; i++) more = utf8_append(&ud, (u_char)string[i]); if (more != 0) diff --git a/tty-keys.c b/tty-keys.c index 4bdc061a..cc6b934a 100644 --- a/tty-keys.c +++ b/tty-keys.c @@ -546,6 +546,7 @@ first_key: goto discard_key; goto partial_key; } + more = 1; for (i = 1; i < size; i++) more = utf8_append(&ud, (u_char)buf[i]); if (more != 0)