Better UTF-8 support, including combined characters. Unicode data is now stored

as UTF-8 in a separate array, the code does a lookup into this every time it
gets to a UTF-8 cell. Zero width characters are just appended onto the UTF-8
data for the previous cell. This also means that almost no bytes extra are
wasted non-Unicode data (yay).

Still some oddities, such as copy mode skips over wide characters in a strange
way, and the code could do with some tidying.
This commit is contained in:
Nicholas Marriott
2009-03-28 20:17:29 +00:00
parent 34dd72f008
commit cf7b384c43
12 changed files with 364 additions and 226 deletions

View File

@ -1,4 +1,4 @@
/* $Id: cmd-find-window.c,v 1.4 2009-03-28 16:30:05 nicm Exp $ */
/* $Id: cmd-find-window.c,v 1.5 2009-03-28 20:17:29 nicm Exp $ */
/*
* Copyright (c) 2009 Nicholas Marriott <nicm@users.sourceforge.net>
@ -163,25 +163,29 @@ cmd_find_window_callback(void *data, int idx)
char *
cmd_find_window_search(struct window_pane *wp, const char *searchstr)
{
char *buf, *s;
size_t off;
uint64_t text;
u_int i, j, k;
u_char data[4];
const struct grid_cell *gc;
const struct grid_utf8 *gu;
char *buf, *s;
size_t off;
u_int i, j, k;
buf = xmalloc(1);
for (j = 0; j < screen_size_y(&wp->base); j++) {
off = 0;
for (i = 0; i < screen_size_x(&wp->base); i++) {
text = grid_view_peek_text(wp->base.grid, i, j);
utf8_split(text, data);
buf = xrealloc(buf, 1, off + 4);
for (k = 0; k < sizeof data; k++) {
if (data[k] == 0xff)
break;
buf[off++] = data[k];
gc = grid_view_peek_cell(wp->base.grid, i, j);
if (gc->flags & GRID_FLAG_UTF8) {
gu = grid_view_peek_utf8(wp->base.grid, i, j);
buf = xrealloc(buf, 1, off + 8);
for (k = 0; k < 8; k++) {
if (gu->data[k] == 0xff)
break;
buf[off++] = gu->data[k];
}
} else {
buf = xrealloc(buf, 1, off + 1);
buf[off++] = gc->data;
}
}
while (off > 0 && buf[off - 1] == ' ')