Fix handling of colored whitespace #1

This commit is contained in:
Thomas Jensen 2021-02-02 22:29:13 +01:00
parent eeeacf2949
commit 91d1971cca
No known key found for this signature in database
GPG Key ID: A4ACEE270D0FB7DB
10 changed files with 344 additions and 4 deletions

View File

@ -1364,7 +1364,7 @@ static size_t count_invisible_chars(const uint32_t *s, const size_t buflen, size
ucs4_t c;
const uint32_t *rest = s;
while ((rest = u32_next(&c, rest))) {
if (ansipos == 0 && c == 0x0000001b) {
if (ansipos == 0 && c == char_esc) {
/* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
ansipos++;
invis++;
@ -1556,8 +1556,7 @@ static int read_all_input (const int use_stdin)
input.lines[i].len - input.indent + 1);
input.lines[i].len -= input.indent;
u32_move(input.lines[i].mbtext, input.lines[i].mbtext + input.indent,
input.lines[i].num_chars - input.indent + 1);
input.lines[i].mbtext = advance32(input.lines[i].mbtext, input.indent);
input.lines[i].num_chars -= input.indent;
}
}

View File

@ -34,6 +34,7 @@
#include "shape.h"
#include "boxes.h"
#include "tools.h"
#include "unicode.h"
#include "generate.h"
@ -993,10 +994,11 @@ int output_box(const sentry_t *thebox)
+ strlen(hfill1)
+ input.lines[ti].num_leading_blanks;
}
uint32_t *mbtext_shifted = advance32(input.lines[ti].mbtext, shift < 0 ? (size_t) (-shift) : 0);
concat_strings(obuf, LINE_MAX_BYTES + 1, 8, restored_indent,
skip_left ? "" : thebox[BLEF].chars[j], hfill1,
ti >= 0 && shift > 0 ? nspaces(shift) : "",
ti >= 0 ? u32_strconv_to_locale(input.lines[ti].mbtext - (shift < 0 ? shift : 0)) : "",
ti >= 0 ? u32_strconv_to_locale(mbtext_shifted) : "",
hfill2, nspaces(input.maxline - input.lines[ti].len - shift),
thebox[BRIG].chars[j]);
}

View File

@ -39,6 +39,7 @@ const ucs4_t char_tab = 0x00000009; /* ucs4_t character '\t' (tab) *
const ucs4_t char_space = 0x00000020; /* ucs4_t character ' ' (space) */
const ucs4_t char_cr = 0x0000000d; /* ucs4_t character '\r' (carriage return) */
const ucs4_t char_newline = 0x0000000a; /* ucs4_t character '\n' (newline) */
const ucs4_t char_esc = 0x0000001b; /* ucs4_t character 0x1b (escape) */
const ucs4_t char_nul = 0x00000000; /* ucs4_t character '\0' (zero) */
@ -93,4 +94,67 @@ int is_ascii_printable(const ucs4_t c)
uint32_t *new_empty_string32()
{
return (uint32_t *) calloc(1, sizeof(uint32_t));
}
uint32_t *advance32(uint32_t *s, const size_t offset)
{
if (is_empty(s)) {
return new_empty_string32();
}
if (offset == 0) {
return s;
}
ucs4_t c; /* the current character we're looking at */
const uint32_t *cStr = s; /* pointer to c in s */
size_t idx = 0; /* the count of visible characters */
const uint32_t *last_esc; /* pointer to the start of the last escape sequence encountered */
const uint32_t *rest = s; /* pointer to the next character coming up, needed only for u32_next() api */
int visible = 1; /* flag indicating whether the previous char was a visible char */
int ansipos = 0; /* progression of ansi sequence */
while ((rest = u32_next(&c, rest))) {
if (ansipos == 0 && c == char_esc) {
/* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
last_esc = cStr;
visible = 0;
ansipos++;
} else if (ansipos == 1 && c == '[') {
/* Found '[' char after ESC. A CSI sequence has started. */
ansipos++;
visible = 0;
} else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) {
/* Found a char designating the end of a two-byte escape sequence */
visible = 0;
ansipos = 0;
} else if (ansipos == 2) {
/* Inside CSI sequence - Keep counting chars as invisible */
visible = 0;
/* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
if (c >= 0x40 && c <= 0x7e) {
ansipos = 0;
}
} else {
/* a visible char */
if (idx == offset) {
if (!visible) {
return (uint32_t *) last_esc;
}
return (uint32_t *) cStr;
}
++idx;
visible = 1;
}
cStr = rest;
}
return new_empty_string32(); /* offset too large, not enough characters in string */
}
/*EOF*/ /* vim: set sw=4: */

View File

@ -31,6 +31,7 @@ extern const ucs4_t char_tab; /* ucs4_t character '\t' (tab) *
extern const ucs4_t char_space; /* ucs4_t character ' ' (space) */
extern const ucs4_t char_cr; /* ucs4_t character '\r' (carriage return) */
extern const ucs4_t char_newline; /* ucs4_t character '\n' (newline) */
extern const ucs4_t char_esc; /* ucs4_t character 0x1b (escape) */
extern const ucs4_t char_nul; /* ucs4_t character '\0' (zero) */
@ -43,6 +44,16 @@ int is_empty(const uint32_t *text);
int is_ascii_printable(const ucs4_t c);
/** Return a freshly allocated empty UTF-32 string. */
uint32_t *new_empty_string32();
/**
* Determine a new position in the given string s with the given offset of visible characters.
* If the character right in front of the target character is invisible, then the pointer is moved to the start of
* that invisible sequence. The purpose is to catch any escape sequences which would for example color the character.
*/
uint32_t *advance32(uint32_t *s, const size_t offset);
#endif

View File

@ -0,0 +1,11 @@
:ARGS
:INPUT
    There is no complete theory of anything.
     Robert Anton Wilson
:OUTPUT-FILTER
:EXPECTED
/********************************************/
/* There is no complete theory of anything. */
/*  Robert Anton Wilson */
/********************************************/
:EOF

View File

@ -0,0 +1,50 @@
:ARGS
-ac
:INPUT
a
ab
abc
abcd
abcde
abcdef
abcdefg
abcdefgh
常盤
運作能
申見売映
海棋事行変
ä
äb
äbç
äbçd
äbçdé
äbçdéf
äbçdéfg
äbçdéfgh
:OUTPUT-FILTER
:EXPECTED
/**************/
/* a */
/* ab */
/* abc */
/* abcd */
/* abcde */
/* abcdef */
/* abcdefg */
/* abcdefgh */
/* 試 */
/* 常盤 */
/* 運作能 */
/* 申見売映 */
/* 海棋事行変 */
/* ä */
/* äb */
/* äbç */
/* äbçd */
/* äbçdé */
/* äbçdéf */
/* äbçdéfg */
/* äbçdéfgh */
/**************/
:EOF

View File

@ -0,0 +1,50 @@
:ARGS
-al
:INPUT
         a
     ab
     abc
   abcd
   abcde
   abcdef
   abcdefg
   abcdefgh
         試
     常盤
   運作能
   申見売映
   海棋事行変
          ä
     äb
     äbç
   äbçd
   äbçdé
   äbçdéf
   äbçdéfg
   äbçdéfgh
:OUTPUT-FILTER
:EXPECTED
/**************/
/* a */
/* ab */
/* abc */
/* abcd */
/* abcde */
/* abcdef */
/* abcdefg */
/* abcdefgh */
/* 試 */
/* 常盤 */
/* 運作能 */
/* 申見売映 */
/* 海棋事行変 */
/* ä */
/* äb */
/* äbç */
/* äbçd */
/* äbçdé */
/* äbçdéf */
/* äbçdéfg */
/* äbçdéfgh */
/**************/
:EOF

View File

@ -0,0 +1,50 @@
:ARGS
-ac
:INPUT
         a
     ab
     abc
   abcd
   abcde
   abcdef
   abcdefg
   abcdefgh
         試
     常盤
   運作能
   申見売映
   海棋事行変
          ä
     äb
     äbç
   äbçd
   äbçdé
   äbçdéf
   äbçdéfg
   äbçdéfgh
:OUTPUT-FILTER
:EXPECTED
/**************/
/*     a */
/*   ab */
/*   abc */
/* abcd */
/* abcde */
/* abcdef */
/* abcdefg */
/* abcdefgh */
/*     試 */
/*   常盤 */
/* 運作能 */
/* 申見売映 */
/* 海棋事行変 */
/*     ä */
/*   äb */
/*   äbç */
/* äbçd */
/* äbçdé */
/* äbçdéf */
/* äbçdéfg */
/* äbçdéfgh */
/**************/
:EOF

View File

@ -0,0 +1,50 @@
:ARGS
-ar
:INPUT
         a
     ab
     abc
   abcd
   abcde
   abcdef
   abcdefg
   abcdefgh
         試
     常盤
   運作能
   申見売映
   海棋事行変
          ä
     äb
     äbç
   äbçd
   äbçdé
   äbçdéf
   äbçdéfg
   äbçdéfgh
:OUTPUT-FILTER
:EXPECTED
/**************/
/*       a */
/*   ab */
/*   abc */
/* abcd */
/* abcde */
/* abcdef */
/* abcdefg */
/* abcdefgh */
/*       試 */
/*   常盤 */
/* 運作能 */
/* 申見売映 */
/* 海棋事行変 */
/*        ä */
/*   äb */
/*   äbç */
/* äbçd */
/* äbçdé */
/* äbçdéf */
/* äbçdéfg */
/* äbçdéfgh */
/**************/
:EOF

View File

@ -0,0 +1,53 @@
:ARGS
-ahrvbjr -ph3 -sx26
:INPUT
         a
     ab
     abc
   abcd
   abcde
   abcdef
   abcdefg
   abcdefgh
         試
     常盤
   運作能
   申見売映
   海棋事行変
          ä
     äb
     äbç
   äbçd
   äbçdé
   äbçdéf
   äbçdéfg
   äbçdéfgh
:OUTPUT-FILTER
:EXPECTED
/******************/
/* */
/* */
/* */
/*       a */
/*   ab */
/*   abc */
/* abcd */
/* abcde */
/* abcdef */
/* abcdefg */
/* abcdefgh */
/*       試 */
/*   常盤 */
/* 運作能 */
/* 申見売映 */
/* 海棋事行変 */
/*        ä */
/*   äb */
/*   äbç */
/* äbçd */
/* äbçdé */
/* äbçdéf */
/* äbçdéfg */
/* äbçdéfgh */
/******************/
:EOF