mirror of
https://github.com/ascii-boxes/boxes.git
synced 2024-12-12 09:51:10 +01:00
Add advance_next32() function to 'unicode' module to encapsulate the escape handling logic #1
This commit is contained in:
parent
1c4914bb01
commit
4c656727ec
75
src/tools.c
75
src/tools.c
@ -343,7 +343,7 @@ void btrim(char *text, size_t *len)
|
||||
|
||||
void btrim32(uint32_t *text, size_t *len)
|
||||
/*
|
||||
* Remove trailing whitespace from line (unicode version).
|
||||
* Remove trailing whitespace from line (unicode and escape sequence enabled version).
|
||||
*
|
||||
* text string to trim
|
||||
* len pointer to the length of the string in characters
|
||||
@ -353,18 +353,34 @@ void btrim32(uint32_t *text, size_t *len)
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||
*/
|
||||
{
|
||||
int idx = (int) (*len - 1);
|
||||
if (text == NULL || len == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (; idx >= 0; --idx) {
|
||||
ucs4_t c = text[idx];
|
||||
if (uc_is_c_whitespace(c) || uc_is_property_white_space(c) || uc_is_property_bidi_whitespace(c)) {
|
||||
set_char_at(text, idx, char_nul);
|
||||
} else {
|
||||
break;
|
||||
const uint32_t *rest = text;
|
||||
int last_char_pos = -1;
|
||||
size_t step_invis;
|
||||
|
||||
for (ucs4_t c = text[0]; c != char_nul; c = rest[0]) {
|
||||
if (c != char_esc) {
|
||||
if (!uc_is_c_whitespace(c) && !uc_is_property_white_space(c) && !uc_is_property_bidi_whitespace(c)) {
|
||||
last_char_pos = (int) (rest - text);
|
||||
}
|
||||
}
|
||||
rest = advance_next32(rest, &step_invis);
|
||||
}
|
||||
|
||||
/* If the last character is followed by an escape sequence, keep it (but only one). */
|
||||
if (last_char_pos >= 0) {
|
||||
rest = text + last_char_pos + 1;
|
||||
if (rest[0] == char_esc) {
|
||||
advance_next32(rest, &step_invis);
|
||||
last_char_pos += step_invis;
|
||||
}
|
||||
}
|
||||
|
||||
*len = idx + 1;
|
||||
set_char_at(text, (size_t) (last_char_pos + 1), char_nul);
|
||||
*len = (size_t) (last_char_pos + 1);
|
||||
}
|
||||
|
||||
|
||||
@ -546,7 +562,6 @@ void print_input_lines(const char *heading)
|
||||
static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **ascii, size_t **posmap)
|
||||
{
|
||||
size_t invis = 0; /* counts invisible column positions */
|
||||
int ansipos = 0; /* progression of ansi sequence */
|
||||
*num_esc = 0; /* counts the number of escape sequences found */
|
||||
|
||||
if (is_empty(s)) {
|
||||
@ -562,42 +577,25 @@ static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **a
|
||||
(*ascii) = (char *) calloc(buflen, sizeof(char)); /* maybe a little too much, but certainly enough */
|
||||
char *p = *ascii;
|
||||
|
||||
ucs4_t c;
|
||||
size_t mb_idx = 0;
|
||||
size_t step_invis;
|
||||
const uint32_t *rest = s;
|
||||
while ((rest = u32_next(&c, rest))) {
|
||||
|
||||
for (ucs4_t c = s[0]; c != char_nul; c = rest[0]) {
|
||||
if (map_idx >= map_size - 4) {
|
||||
map_size = map_size * 2 + 1;
|
||||
map = (size_t *) realloc(map, map_size * sizeof(size_t));
|
||||
}
|
||||
|
||||
if (ansipos == 0 && c == char_esc) {
|
||||
/* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
|
||||
ansipos++;
|
||||
invis++;
|
||||
if (c == char_esc) {
|
||||
(*num_esc)++;
|
||||
} else if (ansipos == 1 && c == '[') {
|
||||
/* Found '[' char after ESC. A CSI sequence has started. */
|
||||
ansipos++;
|
||||
invis++;
|
||||
} else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) {
|
||||
/* Found a byte designating the end of a two-byte escape sequence */
|
||||
invis++;
|
||||
ansipos = 0;
|
||||
} else if (ansipos == 2) {
|
||||
/* Inside CSI sequence - Keep counting bytes as invisible */
|
||||
invis++;
|
||||
|
||||
/* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
|
||||
if (c >= 0x40 && c <= 0x7e) {
|
||||
ansipos = 0;
|
||||
}
|
||||
|
||||
} else if (is_ascii_printable(c)) {
|
||||
}
|
||||
else if (is_ascii_printable(c)) {
|
||||
*p = c & 0xff;
|
||||
map[map_idx++] = mb_idx;
|
||||
++p;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
int cols = uc_width(c, encoding);
|
||||
if (cols > 0) {
|
||||
memset(p, (int) 'x', cols);
|
||||
@ -607,8 +605,13 @@ static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **a
|
||||
p += cols;
|
||||
}
|
||||
}
|
||||
++mb_idx;
|
||||
|
||||
rest = advance_next32(rest, &step_invis);
|
||||
|
||||
mb_idx += BMAX((size_t) 1, step_invis);
|
||||
invis += step_invis;
|
||||
}
|
||||
|
||||
*p = '\0';
|
||||
(*posmap) = map;
|
||||
return invis;
|
||||
|
@ -100,6 +100,48 @@ uint32_t *new_empty_string32()
|
||||
|
||||
|
||||
|
||||
uint32_t *advance_next32(const uint32_t *s, size_t *invis)
|
||||
{
|
||||
if (is_empty(s)) {
|
||||
return (uint32_t *) s;
|
||||
}
|
||||
|
||||
int ansipos = 0;
|
||||
(*invis) = 0;
|
||||
ucs4_t c;
|
||||
const uint32_t *rest = s;
|
||||
while ((rest = u32_next(&c, rest))) {
|
||||
if (ansipos == 0 && c == char_esc) {
|
||||
/* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
|
||||
(*invis)++;
|
||||
ansipos++;
|
||||
} else if (ansipos == 1 && c == '[') {
|
||||
/* Found '[' char after ESC. A CSI sequence has started. */
|
||||
(*invis)++;
|
||||
ansipos++;
|
||||
} else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) {
|
||||
/* Found a byte designating the end of a two-byte escape sequence */
|
||||
(*invis)++;
|
||||
ansipos = 0;
|
||||
break;
|
||||
} else if (ansipos == 2) {
|
||||
/* Inside CSI sequence - Keep counting chars as invisible */
|
||||
(*invis)++;
|
||||
|
||||
/* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
|
||||
if (c >= 0x40 && c <= 0x7e) {
|
||||
ansipos = 0;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (uint32_t *) rest;
|
||||
}
|
||||
|
||||
|
||||
|
||||
uint32_t *advance32(uint32_t *s, const size_t offset)
|
||||
{
|
||||
if (is_empty(s)) {
|
||||
@ -109,50 +151,28 @@ uint32_t *advance32(uint32_t *s, const size_t offset)
|
||||
return s;
|
||||
}
|
||||
|
||||
ucs4_t c; /* the current character we're looking at */
|
||||
const uint32_t *cStr = s; /* pointer to c in s */
|
||||
size_t idx = 0; /* the count of visible characters */
|
||||
const uint32_t *last_esc; /* pointer to the start of the last escape sequence encountered */
|
||||
const uint32_t *rest = s; /* pointer to the next character coming up, needed only for u32_next() api */
|
||||
int visible = 1; /* flag indicating whether the previous char was a visible char */
|
||||
int ansipos = 0; /* progression of ansi sequence */
|
||||
size_t count = 0; /* the count of visible characters */
|
||||
int visible = 1; /* flag indicating whether the previous char was a visible char */
|
||||
const uint32_t *last_esc = NULL; /* pointer to the start of the last escape sequence encountered */
|
||||
const uint32_t *rest = s; /* pointer to the next character coming up */
|
||||
size_t step_invis = 0; /* unused, but required for advance_next32() call */
|
||||
|
||||
while ((rest = u32_next(&c, rest))) {
|
||||
if (ansipos == 0 && c == char_esc) {
|
||||
/* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
|
||||
last_esc = cStr;
|
||||
for (ucs4_t c = s[0]; c != char_nul; c = rest[0]) {
|
||||
if (c == char_esc) {
|
||||
last_esc = rest;
|
||||
visible = 0;
|
||||
ansipos++;
|
||||
} else if (ansipos == 1 && c == '[') {
|
||||
/* Found '[' char after ESC. A CSI sequence has started. */
|
||||
ansipos++;
|
||||
visible = 0;
|
||||
} else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) {
|
||||
/* Found a char designating the end of a two-byte escape sequence */
|
||||
visible = 0;
|
||||
ansipos = 0;
|
||||
} else if (ansipos == 2) {
|
||||
/* Inside CSI sequence - Keep counting chars as invisible */
|
||||
visible = 0;
|
||||
|
||||
/* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
|
||||
if (c >= 0x40 && c <= 0x7e) {
|
||||
ansipos = 0;
|
||||
}
|
||||
} else {
|
||||
/* a visible char */
|
||||
if (idx == offset) {
|
||||
if (!visible) {
|
||||
if (count++ == offset) {
|
||||
if (!visible && last_esc != NULL) {
|
||||
return (uint32_t *) last_esc;
|
||||
}
|
||||
return (uint32_t *) cStr;
|
||||
break;
|
||||
}
|
||||
++idx;
|
||||
visible = 1;
|
||||
}
|
||||
cStr = rest;
|
||||
rest = advance_next32(rest, &step_invis);
|
||||
}
|
||||
return new_empty_string32(); /* offset too large, not enough characters in string */
|
||||
return (uint32_t *) rest; /* may point to zero terminator when offset too large */
|
||||
}
|
||||
|
||||
|
||||
|
@ -50,10 +50,28 @@ int is_ascii_printable(const ucs4_t c);
|
||||
/** Return a freshly allocated empty UTF-32 string. */
|
||||
uint32_t *new_empty_string32();
|
||||
|
||||
/**
|
||||
* Return the next position in <s> in accordance with escape sequences. The result can be the next normal character,
|
||||
* or again an escape sequence, if it directly follows the first.
|
||||
*
|
||||
* @param <s> The pointer to the start position. Is assumed to point either at the ESC at the start of an escape
|
||||
* sequence, or to be positioned outside an escape sequence.
|
||||
* @param <invis> Will contain the number of invisible characters skipped in order to get to the new position.
|
||||
* This will be 0 unless <s> pointed to an ESC char, in which case it contains the length in characters of that
|
||||
* escape sequence.
|
||||
* @return The next position, or 0 if the end of the string was reached
|
||||
*/
|
||||
uint32_t *advance_next32(const uint32_t *s, size_t *invis);
|
||||
|
||||
/**
|
||||
* Determine a new position in the given string s with the given offset of visible characters.
|
||||
* If the character right in front of the target character is invisible, then the pointer is moved to the start of
|
||||
* that invisible sequence. The purpose is to catch any escape sequences which would for example color the character.
|
||||
*
|
||||
* @param <s> The pointer to the start position. Is assumed to point either at the ESC at the start of an escape
|
||||
* sequence, or to be positioned outside an escape sequence.
|
||||
* @param <offset> the number of visible character positions to advance the pointer
|
||||
* @return a pointer to the new position in s, or 0 if the end of the string was reached
|
||||
*/
|
||||
uint32_t *advance32(uint32_t *s, const size_t offset);
|
||||
|
||||
|
@ -10,7 +10,7 @@ Original Designer: (public domain)
|
||||
Creation Date: March 18, 1999 (Thursday, 15:25h)
|
||||
Current Revision: 1.0 as of March 18, 1999 (Thursday, 15:25h)
|
||||
Indentation Mode: box (indent box)
|
||||
Replacement Rules: 1. (glob) "\*/" WITH "*\/"
|
||||
Replacement Rules: 1. (glob) "\*/" WITH "*\\/"
|
||||
Reversion Rules: 1. (glob) "\*\\/" TO "*/"
|
||||
Minimum Box Dimensions: 5 x 3 (width x height)
|
||||
Default Padding: left 1, right 1
|
||||
|
@ -6,7 +6,7 @@
|
||||
:OUTPUT-FILTER
|
||||
:EXPECTED
|
||||
/********************************************/
|
||||
/* [38;5;214m[0m[38;5;214mT[0m[38;5;208mh[0m[38;5;203me[0m[38;5;203mr[0m[38;5;198me[0m[38;5;199m [0m[38;5;163mi[0m[38;5;164ms[0m[38;5;129m [0m[38;5;129mn[0m[38;5;93mo[0m[38;5;63m [0m[38;5;63mc[0m[38;5;33mo[0m[38;5;39mm[0m[38;5;38mp[0m[38;5;44ml[0m[38;5;49me[0m[38;5;49mt[0m[38;5;48me[0m[38;5;83m [0m[38;5;83mt[0m[38;5;118mh[0m[38;5;154me[0m[38;5;148mo[0m[38;5;184mr[0m[38;5;214my[0m[38;5;214m [0m[38;5;208mo[0m[38;5;203mf[0m[38;5;203m [0m[38;5;198ma[0m[38;5;199mn[0m[38;5;163my[0m[38;5;164mt[0m[38;5;129mh[0m[38;5;129mi[0m[38;5;93mn[0m[38;5;63mg[0m[38;5;63m.[0m[38;5;33m[0m */
|
||||
/* [38;5;214m[0m[38;5;214mT[0m[38;5;208mh[0m[38;5;203me[0m[38;5;203mr[0m[38;5;198me[0m[38;5;199m [0m[38;5;163mi[0m[38;5;164ms[0m[38;5;129m [0m[38;5;129mn[0m[38;5;93mo[0m[38;5;63m [0m[38;5;63mc[0m[38;5;33mo[0m[38;5;39mm[0m[38;5;38mp[0m[38;5;44ml[0m[38;5;49me[0m[38;5;49mt[0m[38;5;48me[0m[38;5;83m [0m[38;5;83mt[0m[38;5;118mh[0m[38;5;154me[0m[38;5;148mo[0m[38;5;184mr[0m[38;5;214my[0m[38;5;214m [0m[38;5;208mo[0m[38;5;203mf[0m[38;5;203m [0m[38;5;198ma[0m[38;5;199mn[0m[38;5;163my[0m[38;5;164mt[0m[38;5;129mh[0m[38;5;129mi[0m[38;5;93mn[0m[38;5;63mg[0m[38;5;63m.[0m */
|
||||
/* [34;2;3m Robert Anton Wilson[0m */
|
||||
/********************************************/
|
||||
:EOF
|
||||
|
@ -5,7 +5,7 @@
|
||||
:OUTPUT-FILTER
|
||||
:EXPECTED
|
||||
/********************************************/
|
||||
/* [38;5;214mT[0m[38;5;208mh[0m[38;5;203me[0m[38;5;203mr[0m[38;5;198me[0m[38;5;199m [0m[38;5;163mi[0m[38;5;164ms[0m[38;5;129m [0m[38;5;129mn[0m[38;5;93mo[0m[38;5;63m [0m[38;5;63mc[0m[38;5;33mo[0m[38;5;39mm[0m[38;5;38mp[0m[38;5;44ml[0m[38;5;49me[0m[38;5;49mt[0m[38;5;48me[0m[38;5;83m [0m[38;5;83mt[0m[38;5;118mh[0m[38;5;154me[0m[38;5;148mo[0m[38;5;184mr[0m[38;5;214my[0m[38;5;214m [0m[38;5;208mo[0m[38;5;203mf[0m[38;5;203m [0m[38;5;198ma[0m[38;5;199mn[0m[38;5;163my[0m[38;5;164mt[0m[38;5;129mh[0m[38;5;129mi[0m[38;5;93mn[0m[38;5;63mg[0m[38;5;63m.[0m[38;5;33m[0m */
|
||||
/* [38;5;214mT[0m[38;5;208mh[0m[38;5;203me[0m[38;5;203mr[0m[38;5;198me[0m[38;5;199m [0m[38;5;163mi[0m[38;5;164ms[0m[38;5;129m [0m[38;5;129mn[0m[38;5;93mo[0m[38;5;63m [0m[38;5;63mc[0m[38;5;33mo[0m[38;5;39mm[0m[38;5;38mp[0m[38;5;44ml[0m[38;5;49me[0m[38;5;49mt[0m[38;5;48me[0m[38;5;83m [0m[38;5;83mt[0m[38;5;118mh[0m[38;5;154me[0m[38;5;148mo[0m[38;5;184mr[0m[38;5;214my[0m[38;5;214m [0m[38;5;208mo[0m[38;5;203mf[0m[38;5;203m [0m[38;5;198ma[0m[38;5;199mn[0m[38;5;163my[0m[38;5;164mt[0m[38;5;129mh[0m[38;5;129mi[0m[38;5;93mn[0m[38;5;63mg[0m[38;5;63m.[0m */
|
||||
/* [34;2;3m Robert Anton Wilson[0m */
|
||||
/********************************************/
|
||||
:EOF
|
||||
|
Loading…
Reference in New Issue
Block a user