mirror of
https://github.com/ascii-boxes/boxes.git
synced 2024-12-12 18:01:14 +01:00
Add advance_next32() function to 'unicode' module to encapsulate the escape handling logic #1
This commit is contained in:
parent
1c4914bb01
commit
4c656727ec
73
src/tools.c
73
src/tools.c
@ -343,7 +343,7 @@ void btrim(char *text, size_t *len)
|
|||||||
|
|
||||||
void btrim32(uint32_t *text, size_t *len)
|
void btrim32(uint32_t *text, size_t *len)
|
||||||
/*
|
/*
|
||||||
* Remove trailing whitespace from line (unicode version).
|
* Remove trailing whitespace from line (unicode and escape sequence enabled version).
|
||||||
*
|
*
|
||||||
* text string to trim
|
* text string to trim
|
||||||
* len pointer to the length of the string in characters
|
* len pointer to the length of the string in characters
|
||||||
@ -353,18 +353,34 @@ void btrim32(uint32_t *text, size_t *len)
|
|||||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||||
*/
|
*/
|
||||||
{
|
{
|
||||||
int idx = (int) (*len - 1);
|
if (text == NULL || len == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
for (; idx >= 0; --idx) {
|
const uint32_t *rest = text;
|
||||||
ucs4_t c = text[idx];
|
int last_char_pos = -1;
|
||||||
if (uc_is_c_whitespace(c) || uc_is_property_white_space(c) || uc_is_property_bidi_whitespace(c)) {
|
size_t step_invis;
|
||||||
set_char_at(text, idx, char_nul);
|
|
||||||
} else {
|
for (ucs4_t c = text[0]; c != char_nul; c = rest[0]) {
|
||||||
break;
|
if (c != char_esc) {
|
||||||
|
if (!uc_is_c_whitespace(c) && !uc_is_property_white_space(c) && !uc_is_property_bidi_whitespace(c)) {
|
||||||
|
last_char_pos = (int) (rest - text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rest = advance_next32(rest, &step_invis);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the last character is followed by an escape sequence, keep it (but only one). */
|
||||||
|
if (last_char_pos >= 0) {
|
||||||
|
rest = text + last_char_pos + 1;
|
||||||
|
if (rest[0] == char_esc) {
|
||||||
|
advance_next32(rest, &step_invis);
|
||||||
|
last_char_pos += step_invis;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
*len = idx + 1;
|
set_char_at(text, (size_t) (last_char_pos + 1), char_nul);
|
||||||
|
*len = (size_t) (last_char_pos + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -546,7 +562,6 @@ void print_input_lines(const char *heading)
|
|||||||
static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **ascii, size_t **posmap)
|
static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **ascii, size_t **posmap)
|
||||||
{
|
{
|
||||||
size_t invis = 0; /* counts invisible column positions */
|
size_t invis = 0; /* counts invisible column positions */
|
||||||
int ansipos = 0; /* progression of ansi sequence */
|
|
||||||
*num_esc = 0; /* counts the number of escape sequences found */
|
*num_esc = 0; /* counts the number of escape sequences found */
|
||||||
|
|
||||||
if (is_empty(s)) {
|
if (is_empty(s)) {
|
||||||
@ -562,42 +577,25 @@ static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **a
|
|||||||
(*ascii) = (char *) calloc(buflen, sizeof(char)); /* maybe a little too much, but certainly enough */
|
(*ascii) = (char *) calloc(buflen, sizeof(char)); /* maybe a little too much, but certainly enough */
|
||||||
char *p = *ascii;
|
char *p = *ascii;
|
||||||
|
|
||||||
ucs4_t c;
|
|
||||||
size_t mb_idx = 0;
|
size_t mb_idx = 0;
|
||||||
|
size_t step_invis;
|
||||||
const uint32_t *rest = s;
|
const uint32_t *rest = s;
|
||||||
while ((rest = u32_next(&c, rest))) {
|
|
||||||
|
for (ucs4_t c = s[0]; c != char_nul; c = rest[0]) {
|
||||||
if (map_idx >= map_size - 4) {
|
if (map_idx >= map_size - 4) {
|
||||||
map_size = map_size * 2 + 1;
|
map_size = map_size * 2 + 1;
|
||||||
map = (size_t *) realloc(map, map_size * sizeof(size_t));
|
map = (size_t *) realloc(map, map_size * sizeof(size_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ansipos == 0 && c == char_esc) {
|
if (c == char_esc) {
|
||||||
/* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
|
|
||||||
ansipos++;
|
|
||||||
invis++;
|
|
||||||
(*num_esc)++;
|
(*num_esc)++;
|
||||||
} else if (ansipos == 1 && c == '[') {
|
|
||||||
/* Found '[' char after ESC. A CSI sequence has started. */
|
|
||||||
ansipos++;
|
|
||||||
invis++;
|
|
||||||
} else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) {
|
|
||||||
/* Found a byte designating the end of a two-byte escape sequence */
|
|
||||||
invis++;
|
|
||||||
ansipos = 0;
|
|
||||||
} else if (ansipos == 2) {
|
|
||||||
/* Inside CSI sequence - Keep counting bytes as invisible */
|
|
||||||
invis++;
|
|
||||||
|
|
||||||
/* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
|
|
||||||
if (c >= 0x40 && c <= 0x7e) {
|
|
||||||
ansipos = 0;
|
|
||||||
}
|
}
|
||||||
|
else if (is_ascii_printable(c)) {
|
||||||
} else if (is_ascii_printable(c)) {
|
|
||||||
*p = c & 0xff;
|
*p = c & 0xff;
|
||||||
map[map_idx++] = mb_idx;
|
map[map_idx++] = mb_idx;
|
||||||
++p;
|
++p;
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
int cols = uc_width(c, encoding);
|
int cols = uc_width(c, encoding);
|
||||||
if (cols > 0) {
|
if (cols > 0) {
|
||||||
memset(p, (int) 'x', cols);
|
memset(p, (int) 'x', cols);
|
||||||
@ -607,8 +605,13 @@ static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **a
|
|||||||
p += cols;
|
p += cols;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
++mb_idx;
|
|
||||||
|
rest = advance_next32(rest, &step_invis);
|
||||||
|
|
||||||
|
mb_idx += BMAX((size_t) 1, step_invis);
|
||||||
|
invis += step_invis;
|
||||||
}
|
}
|
||||||
|
|
||||||
*p = '\0';
|
*p = '\0';
|
||||||
(*posmap) = map;
|
(*posmap) = map;
|
||||||
return invis;
|
return invis;
|
||||||
|
@ -100,6 +100,48 @@ uint32_t *new_empty_string32()
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t *advance_next32(const uint32_t *s, size_t *invis)
|
||||||
|
{
|
||||||
|
if (is_empty(s)) {
|
||||||
|
return (uint32_t *) s;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ansipos = 0;
|
||||||
|
(*invis) = 0;
|
||||||
|
ucs4_t c;
|
||||||
|
const uint32_t *rest = s;
|
||||||
|
while ((rest = u32_next(&c, rest))) {
|
||||||
|
if (ansipos == 0 && c == char_esc) {
|
||||||
|
/* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
|
||||||
|
(*invis)++;
|
||||||
|
ansipos++;
|
||||||
|
} else if (ansipos == 1 && c == '[') {
|
||||||
|
/* Found '[' char after ESC. A CSI sequence has started. */
|
||||||
|
(*invis)++;
|
||||||
|
ansipos++;
|
||||||
|
} else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) {
|
||||||
|
/* Found a byte designating the end of a two-byte escape sequence */
|
||||||
|
(*invis)++;
|
||||||
|
ansipos = 0;
|
||||||
|
break;
|
||||||
|
} else if (ansipos == 2) {
|
||||||
|
/* Inside CSI sequence - Keep counting chars as invisible */
|
||||||
|
(*invis)++;
|
||||||
|
|
||||||
|
/* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
|
||||||
|
if (c >= 0x40 && c <= 0x7e) {
|
||||||
|
ansipos = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (uint32_t *) rest;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
uint32_t *advance32(uint32_t *s, const size_t offset)
|
uint32_t *advance32(uint32_t *s, const size_t offset)
|
||||||
{
|
{
|
||||||
if (is_empty(s)) {
|
if (is_empty(s)) {
|
||||||
@ -109,50 +151,28 @@ uint32_t *advance32(uint32_t *s, const size_t offset)
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
ucs4_t c; /* the current character we're looking at */
|
size_t count = 0; /* the count of visible characters */
|
||||||
const uint32_t *cStr = s; /* pointer to c in s */
|
|
||||||
size_t idx = 0; /* the count of visible characters */
|
|
||||||
const uint32_t *last_esc; /* pointer to the start of the last escape sequence encountered */
|
|
||||||
const uint32_t *rest = s; /* pointer to the next character coming up, needed only for u32_next() api */
|
|
||||||
int visible = 1; /* flag indicating whether the previous char was a visible char */
|
int visible = 1; /* flag indicating whether the previous char was a visible char */
|
||||||
int ansipos = 0; /* progression of ansi sequence */
|
const uint32_t *last_esc = NULL; /* pointer to the start of the last escape sequence encountered */
|
||||||
|
const uint32_t *rest = s; /* pointer to the next character coming up */
|
||||||
|
size_t step_invis = 0; /* unused, but required for advance_next32() call */
|
||||||
|
|
||||||
while ((rest = u32_next(&c, rest))) {
|
for (ucs4_t c = s[0]; c != char_nul; c = rest[0]) {
|
||||||
if (ansipos == 0 && c == char_esc) {
|
if (c == char_esc) {
|
||||||
/* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
|
last_esc = rest;
|
||||||
last_esc = cStr;
|
|
||||||
visible = 0;
|
visible = 0;
|
||||||
ansipos++;
|
|
||||||
} else if (ansipos == 1 && c == '[') {
|
|
||||||
/* Found '[' char after ESC. A CSI sequence has started. */
|
|
||||||
ansipos++;
|
|
||||||
visible = 0;
|
|
||||||
} else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) {
|
|
||||||
/* Found a char designating the end of a two-byte escape sequence */
|
|
||||||
visible = 0;
|
|
||||||
ansipos = 0;
|
|
||||||
} else if (ansipos == 2) {
|
|
||||||
/* Inside CSI sequence - Keep counting chars as invisible */
|
|
||||||
visible = 0;
|
|
||||||
|
|
||||||
/* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
|
|
||||||
if (c >= 0x40 && c <= 0x7e) {
|
|
||||||
ansipos = 0;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
/* a visible char */
|
if (count++ == offset) {
|
||||||
if (idx == offset) {
|
if (!visible && last_esc != NULL) {
|
||||||
if (!visible) {
|
|
||||||
return (uint32_t *) last_esc;
|
return (uint32_t *) last_esc;
|
||||||
}
|
}
|
||||||
return (uint32_t *) cStr;
|
break;
|
||||||
}
|
}
|
||||||
++idx;
|
|
||||||
visible = 1;
|
visible = 1;
|
||||||
}
|
}
|
||||||
cStr = rest;
|
rest = advance_next32(rest, &step_invis);
|
||||||
}
|
}
|
||||||
return new_empty_string32(); /* offset too large, not enough characters in string */
|
return (uint32_t *) rest; /* may point to zero terminator when offset too large */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -50,10 +50,28 @@ int is_ascii_printable(const ucs4_t c);
|
|||||||
/** Return a freshly allocated empty UTF-32 string. */
|
/** Return a freshly allocated empty UTF-32 string. */
|
||||||
uint32_t *new_empty_string32();
|
uint32_t *new_empty_string32();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the next position in <s> in accordance with escape sequences. The result can be the next normal character,
|
||||||
|
* or again an escape sequence, if it directly follows the first.
|
||||||
|
*
|
||||||
|
* @param <s> The pointer to the start position. Is assumed to point either at the ESC at the start of an escape
|
||||||
|
* sequence, or to be positioned outside an escape sequence.
|
||||||
|
* @param <invis> Will contain the number of invisible characters skipped in order to get to the new position.
|
||||||
|
* This will be 0 unless <s> pointed to an ESC char, in which case it contains the length in characters of that
|
||||||
|
* escape sequence.
|
||||||
|
* @return The next position, or 0 if the end of the string was reached
|
||||||
|
*/
|
||||||
|
uint32_t *advance_next32(const uint32_t *s, size_t *invis);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determine a new position in the given string s with the given offset of visible characters.
|
* Determine a new position in the given string s with the given offset of visible characters.
|
||||||
* If the character right in front of the target character is invisible, then the pointer is moved to the start of
|
* If the character right in front of the target character is invisible, then the pointer is moved to the start of
|
||||||
* that invisible sequence. The purpose is to catch any escape sequences which would for example color the character.
|
* that invisible sequence. The purpose is to catch any escape sequences which would for example color the character.
|
||||||
|
*
|
||||||
|
* @param <s> The pointer to the start position. Is assumed to point either at the ESC at the start of an escape
|
||||||
|
* sequence, or to be positioned outside an escape sequence.
|
||||||
|
* @param <offset> the number of visible character positions to advance the pointer
|
||||||
|
* @return a pointer to the new position in s, or 0 if the end of the string was reached
|
||||||
*/
|
*/
|
||||||
uint32_t *advance32(uint32_t *s, const size_t offset);
|
uint32_t *advance32(uint32_t *s, const size_t offset);
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ Original Designer: (public domain)
|
|||||||
Creation Date: March 18, 1999 (Thursday, 15:25h)
|
Creation Date: March 18, 1999 (Thursday, 15:25h)
|
||||||
Current Revision: 1.0 as of March 18, 1999 (Thursday, 15:25h)
|
Current Revision: 1.0 as of March 18, 1999 (Thursday, 15:25h)
|
||||||
Indentation Mode: box (indent box)
|
Indentation Mode: box (indent box)
|
||||||
Replacement Rules: 1. (glob) "\*/" WITH "*\/"
|
Replacement Rules: 1. (glob) "\*/" WITH "*\\/"
|
||||||
Reversion Rules: 1. (glob) "\*\\/" TO "*/"
|
Reversion Rules: 1. (glob) "\*\\/" TO "*/"
|
||||||
Minimum Box Dimensions: 5 x 3 (width x height)
|
Minimum Box Dimensions: 5 x 3 (width x height)
|
||||||
Default Padding: left 1, right 1
|
Default Padding: left 1, right 1
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
:OUTPUT-FILTER
|
:OUTPUT-FILTER
|
||||||
:EXPECTED
|
:EXPECTED
|
||||||
/********************************************/
|
/********************************************/
|
||||||
/* [38;5;214m[0m[38;5;214mT[0m[38;5;208mh[0m[38;5;203me[0m[38;5;203mr[0m[38;5;198me[0m[38;5;199m [0m[38;5;163mi[0m[38;5;164ms[0m[38;5;129m [0m[38;5;129mn[0m[38;5;93mo[0m[38;5;63m [0m[38;5;63mc[0m[38;5;33mo[0m[38;5;39mm[0m[38;5;38mp[0m[38;5;44ml[0m[38;5;49me[0m[38;5;49mt[0m[38;5;48me[0m[38;5;83m [0m[38;5;83mt[0m[38;5;118mh[0m[38;5;154me[0m[38;5;148mo[0m[38;5;184mr[0m[38;5;214my[0m[38;5;214m [0m[38;5;208mo[0m[38;5;203mf[0m[38;5;203m [0m[38;5;198ma[0m[38;5;199mn[0m[38;5;163my[0m[38;5;164mt[0m[38;5;129mh[0m[38;5;129mi[0m[38;5;93mn[0m[38;5;63mg[0m[38;5;63m.[0m[38;5;33m[0m */
|
/* [38;5;214m[0m[38;5;214mT[0m[38;5;208mh[0m[38;5;203me[0m[38;5;203mr[0m[38;5;198me[0m[38;5;199m [0m[38;5;163mi[0m[38;5;164ms[0m[38;5;129m [0m[38;5;129mn[0m[38;5;93mo[0m[38;5;63m [0m[38;5;63mc[0m[38;5;33mo[0m[38;5;39mm[0m[38;5;38mp[0m[38;5;44ml[0m[38;5;49me[0m[38;5;49mt[0m[38;5;48me[0m[38;5;83m [0m[38;5;83mt[0m[38;5;118mh[0m[38;5;154me[0m[38;5;148mo[0m[38;5;184mr[0m[38;5;214my[0m[38;5;214m [0m[38;5;208mo[0m[38;5;203mf[0m[38;5;203m [0m[38;5;198ma[0m[38;5;199mn[0m[38;5;163my[0m[38;5;164mt[0m[38;5;129mh[0m[38;5;129mi[0m[38;5;93mn[0m[38;5;63mg[0m[38;5;63m.[0m */
|
||||||
/* [34;2;3m Robert Anton Wilson[0m */
|
/* [34;2;3m Robert Anton Wilson[0m */
|
||||||
/********************************************/
|
/********************************************/
|
||||||
:EOF
|
:EOF
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
:OUTPUT-FILTER
|
:OUTPUT-FILTER
|
||||||
:EXPECTED
|
:EXPECTED
|
||||||
/********************************************/
|
/********************************************/
|
||||||
/* [38;5;214mT[0m[38;5;208mh[0m[38;5;203me[0m[38;5;203mr[0m[38;5;198me[0m[38;5;199m [0m[38;5;163mi[0m[38;5;164ms[0m[38;5;129m [0m[38;5;129mn[0m[38;5;93mo[0m[38;5;63m [0m[38;5;63mc[0m[38;5;33mo[0m[38;5;39mm[0m[38;5;38mp[0m[38;5;44ml[0m[38;5;49me[0m[38;5;49mt[0m[38;5;48me[0m[38;5;83m [0m[38;5;83mt[0m[38;5;118mh[0m[38;5;154me[0m[38;5;148mo[0m[38;5;184mr[0m[38;5;214my[0m[38;5;214m [0m[38;5;208mo[0m[38;5;203mf[0m[38;5;203m [0m[38;5;198ma[0m[38;5;199mn[0m[38;5;163my[0m[38;5;164mt[0m[38;5;129mh[0m[38;5;129mi[0m[38;5;93mn[0m[38;5;63mg[0m[38;5;63m.[0m[38;5;33m[0m */
|
/* [38;5;214mT[0m[38;5;208mh[0m[38;5;203me[0m[38;5;203mr[0m[38;5;198me[0m[38;5;199m [0m[38;5;163mi[0m[38;5;164ms[0m[38;5;129m [0m[38;5;129mn[0m[38;5;93mo[0m[38;5;63m [0m[38;5;63mc[0m[38;5;33mo[0m[38;5;39mm[0m[38;5;38mp[0m[38;5;44ml[0m[38;5;49me[0m[38;5;49mt[0m[38;5;48me[0m[38;5;83m [0m[38;5;83mt[0m[38;5;118mh[0m[38;5;154me[0m[38;5;148mo[0m[38;5;184mr[0m[38;5;214my[0m[38;5;214m [0m[38;5;208mo[0m[38;5;203mf[0m[38;5;203m [0m[38;5;198ma[0m[38;5;199mn[0m[38;5;163my[0m[38;5;164mt[0m[38;5;129mh[0m[38;5;129mi[0m[38;5;93mn[0m[38;5;63mg[0m[38;5;63m.[0m */
|
||||||
/* [34;2;3m Robert Anton Wilson[0m */
|
/* [34;2;3m Robert Anton Wilson[0m */
|
||||||
/********************************************/
|
/********************************************/
|
||||||
:EOF
|
:EOF
|
||||||
|
Loading…
Reference in New Issue
Block a user