Switch input structure entirely to bxstring

This commit is contained in:
Thomas Jensen 2023-05-24 21:00:49 +02:00
parent 45ce5e6762
commit a759026790
No known key found for this signature in database
GPG Key ID: A4ACEE270D0FB7DB
16 changed files with 539 additions and 172 deletions

View File

@ -20,7 +20,7 @@
#ifndef BOXES_H
#define BOXES_H
#define DEBUG 1
/* #define DEBUG 1 */
/* #define REGEXP_DEBUG 1 */
/* #define PARSER_DEBUG 1 */
/* #define LEXER_DEBUG 1 */
@ -146,24 +146,16 @@ extern int color_output_enabled;
typedef struct {
size_t len; /* length of visible text in columns (visible character positions in a text terminal), which is the same as the length of the 'text' field */
char *text; /* ASCII line content, tabs expanded, ansi escapes removed, multi-byte chars replaced with one or more 'x' */
size_t invis; /* number of invisble columns/characters (part of an ansi sequence) */
uint32_t *mbtext; /* multi-byte (original) line content, tabs expanded. We use UTF-32 in order to enable pointer arithmetic. */
size_t num_chars; /* total number of characters in mbtext, visible + invisible */
uint32_t *mbtext_org; /* mbtext as originally allocated, so that we can free it again */
bxstr_t *text; /* text content of the line as a boxes string */
size_t *tabpos; /* tab positions in expanded work strings, or NULL if not needed */
size_t tabpos_len; /* number of tabs in a line */
size_t *posmap; /* for each character in `text`, position of corresponding char in `mbtext`. Needed for box removal. */
} line_t;
typedef struct {
line_t *lines;
size_t num_lines; /* number of entries in input */
size_t maxline; /* length of longest input line */
size_t indent; /* number of leading spaces found */
size_t maxline; /* length in columns of longest input line */
size_t indent; /* common number of leading spaces found in all input lines */
int final_newline; /* true if the last line of input ends with newline */
} input_t;
@ -172,4 +164,4 @@ extern input_t input;
#endif /* BOXES_H */
/*EOF*/ /* vim: set cindent sw=4: */
/* vim: set cindent sw=4: */

View File

@ -181,6 +181,13 @@ bxstr_t *bxs_from_unicode(uint32_t *pInput)
bxstr_t *bxs_new_empty_string()
{
return bxs_from_ascii("");
}
bxstr_t *bxs_strdup(bxstr_t *pString)
{
if (pString == NULL) {
@ -329,6 +336,36 @@ uint32_t *bxs_strchr(bxstr_t *pString, ucs4_t c, int *cursor)
bxstr_t *bxs_cut_front(bxstr_t *pString, size_t n)
{
if (pString == NULL) {
return NULL;
}
if (n >= pString->num_chars_visible) {
return bxs_new_empty_string();
}
if (n == 0) {
return bxs_strdup(pString);
}
uint32_t *s = pString->memory + pString->first_char[n];
return bxs_from_unicode(s);
}
uint32_t *bxs_first_char_ptr(bxstr_t *pString, size_t n)
{
if (pString == NULL) {
return NULL;
}
if (n >= pString->num_chars_visible) {
return pString->memory + pString->first_char[pString->num_chars_visible]; /* pointer to NUL terminator */
}
return pString->memory + pString->first_char[n];
}
bxstr_t *bxs_trim(bxstr_t *pString)
{
if (pString == NULL) {
@ -338,7 +375,7 @@ bxstr_t *bxs_trim(bxstr_t *pString)
return bxs_strdup(pString);
}
if (pString->indent + pString->trailing == pString->num_chars_visible) {
return bxs_from_ascii("");
return bxs_new_empty_string();
}
uint32_t *e = u32_strdup(pString->memory);
@ -369,6 +406,37 @@ bxstr_t *bxs_rtrim(bxstr_t *pString)
void bxs_append_spaces(bxstr_t *pString, size_t n)
{
if (pString == NULL || n == 0) {
return;
}
pString->memory = (uint32_t *) realloc(pString->memory, (pString->num_chars + n + 1) * sizeof(uint32_t));
u32_set(pString->memory + pString->num_chars, char_space, n);
set_char_at(pString->memory, pString->num_chars + n, char_nul);
pString->ascii = (char *) realloc(pString->ascii, (pString->num_columns + n + 1) * sizeof(char));
memset(pString->ascii + pString->num_columns, ' ', n);
pString->ascii[pString->num_columns + n] = '\0';
pString->first_char =
(size_t *) realloc(pString->first_char, (pString->num_chars_visible + n + 1) * sizeof(size_t));
pString->visible_char =
(size_t *) realloc(pString->visible_char, (pString->num_chars_visible + n + 1) * sizeof(size_t));
for (size_t i = 0; i <= n; i++) {
pString->first_char[pString->num_chars_visible + i] = pString->num_chars + i;
pString->visible_char[pString->num_chars_visible + i] = pString->num_chars + i;
}
pString->num_chars += n;
pString->num_chars_visible += n;
pString->num_columns += n;
pString->trailing += n;
}
char *bxs_to_output(bxstr_t *pString)
{
if (pString == NULL) {
@ -397,6 +465,22 @@ int bxs_is_empty(bxstr_t *pString)
int bxs_is_blank(bxstr_t *pString)
{
if (bxs_is_empty(pString)) {
return 1;
}
for (size_t i = 0; i < pString->num_chars_visible; i++) {
ucs4_t c = pString->memory[pString->visible_char[i]];
if (c != char_tab && c != char_cr && !uc_is_blank(c)) {
return 0;
}
}
return 1;
}
int bxs_is_visible_char(bxstr_t *pString, size_t idx)
{
int result = 0;

View File

@ -83,6 +83,13 @@ bxstr_t *bxs_from_ascii(char *pAscii);
bxstr_t *bxs_from_unicode(uint32_t *pInput);
/**
* Return a freshly allocated empty string.
* @return a new empty string
*/
bxstr_t *bxs_new_empty_string();
/**
* Create an exact copy of a string.
* @param pString the string to copy
@ -134,6 +141,26 @@ bxstr_t *bxs_concat(size_t count, ...);
uint32_t *bxs_strchr(bxstr_t *pString, ucs4_t c, int *cursor);
/**
* Remove the first `n` visible characters from the given string. Invisible characters are additionally removed where
* they are associated with the removed visible characters.
* @param pString the string to shorten
* @param n the number of visible characters to cut from the front of the string. If this is greater than the total
* number of visible characters in the string, will lead to an empty string being produced
* @return a new, shortened string, or NULL only if `pString` was NULL
*/
bxstr_t *bxs_cut_front(bxstr_t *pString, size_t n);
/**
* Return the first character of the visible character with index `n` in the given string's `memory`.
* @param pString the string to use
* @param n the index of the visible character (zero-based)
* @return a pointer into existing memory
*/
uint32_t *bxs_first_char_ptr(bxstr_t *pString, size_t n);
/**
* Create a new string from which all leading and trailing whitespace have been removed.
* @param pString the string to trim, which will not be modified
@ -150,6 +177,14 @@ bxstr_t *bxs_trim(bxstr_t *pString);
bxstr_t *bxs_rtrim(bxstr_t *pString);
/**
* Append `n` spaces to the end of the given string. The given string is *modified* accordingly.
* @param pString the string to modify
* @param n the number of spaces to add
*/
void bxs_append_spaces(bxstr_t *pString, size_t n);
/**
* Convert the string into boxes' output encoding for proper printing on stdout.
* @param pString the string to convert
@ -159,13 +194,22 @@ char *bxs_to_output(bxstr_t *pString);
/**
* Determine if the given string is empty.
* Determine if the given string is empty, which means it does not contain any characters at all (neither visible nor
* invisible).
* @param pString the string to check (may be NULL, which counts as empty)
* @return 1 for empty, 0 for not empty
*/
int bxs_is_empty(bxstr_t *pString);
/**
* Determine if the line is either empty or its visible characters are all whitespace.
* @param pString the string to check (may be NULL, which counts as blank)
* @return 1 for blank, 0 for not blank
*/
int bxs_is_blank(bxstr_t *pString);
/**
* Determine if the character at position `idx` in the given `pString` is a visible character. If `idx` is out of
* bounds, this function will return 0.

View File

@ -789,14 +789,16 @@ static int justify_line(line_t *line, int skew)
}
#if defined(DEBUG) || 0
fprintf (stderr, "justify_line(%c): Input: real: (%02d) \"%s\", text: (%02d) \"%s\", invisible=%d, skew=%d",
opt.justify ? opt.justify : '0', (int) line->num_chars, u32_strconv_to_output(line->mbtext),
(int) line->len, line->text, (int) line->invis, skew);
char *outtext = bxs_to_output(line->text);
fprintf(stderr, "justify_line(%c): Input: real: (%02d) \"%s\", text: (%02d) \"%s\", invisible=%d, skew=%d",
opt.justify ? opt.justify : '0', (int) line->text->num_chars, outtext, (int) line->text->num_columns,
line->text->ascii, (int) line->text->num_chars_invisible, skew);
bxs_free(outtext);
#endif
int result = 0;
size_t initial_space_size = strspn(line->text, " \t");
size_t newlen = line->len - initial_space_size;
size_t initial_space_size = line->text->indent;
size_t newlen = line->text->num_columns - initial_space_size;
size_t shift;
switch (opt.justify) {
@ -1043,16 +1045,17 @@ int output_box(const sentry_t *thebox)
if (ti < (long) input.num_lines) { /* box content (lines) */
int shift = justify_line(input.lines + ti, hpr - hpl);
restored_indent = tabbify_indent(ti, indentspc, indentspclen);
uint32_t *mbtext_shifted = advance32(input.lines[ti].mbtext, shift < 0 ? (size_t) (-shift) : 0);
bxstr_t *text_shifted = bxs_cut_front(input.lines[ti].text, shift < 0 ? (size_t) (-shift) : 0);
uint32_t *spc1 = empty_string;
if (ti >= 0 && shift > 0) {
spc1 = u32_nspaces(shift);
}
uint32_t *spc2 = u32_nspaces(input.maxline - input.lines[ti].len - shift);
uint32_t *spc2 = u32_nspaces(input.maxline - input.lines[ti].text->num_columns - shift);
obuf = bxs_concat(8, restored_indent,
skip_left ? empty_string : thebox[BLEF].mbcs[j]->memory, hfill1, spc1,
ti >= 0 ? mbtext_shifted : empty_string, hfill2, spc2,
ti >= 0 ? text_shifted->memory : empty_string, hfill2, spc2,
thebox[BRIG].mbcs[j]->memory);
bxs_free(text_shifted);
if (spc1 != empty_string) {
BFREE(spc1);
}

View File

@ -18,16 +18,17 @@
*/
#include "config.h"
#include <errno.h>
#include <string.h>
#include <unistr.h>
#include <unitypes.h>
#include "boxes.h"
#include "input.h"
#include "regulex.h"
#include "tools.h"
#include "unicode.h"
#include "input.h"
@ -61,8 +62,8 @@ static int has_linebreak(const uint32_t *s, const int len)
*/
static int get_indent(const line_t *lines, const size_t lines_size)
{
int res = LINE_MAX_BYTES; /* result */
int nonblank = 0; /* true if one non-blank line found */
int res = LINE_MAX_BYTES; /* result */
int nonblank = 0; /* true if one non-blank line found */
if (lines == NULL) {
fprintf(stderr, "%s: internal error\n", PROJECT);
@ -73,9 +74,9 @@ static int get_indent(const line_t *lines, const size_t lines_size)
}
for (size_t j = 0; j < lines_size; ++j) {
if (lines[j].len > 0) {
if (lines[j].text->num_columns > 0) {
nonblank = 1;
size_t ispc = strspn(lines[j].text, " ");
size_t ispc = lines[j].text->indent;
if ((int) ispc < res) {
res = ispc;
}
@ -83,9 +84,10 @@ static int get_indent(const line_t *lines, const size_t lines_size)
}
if (nonblank) {
return res; /* success */
} else {
return 0; /* success, but only blank lines */
return res; /* success */
}
else {
return 0; /* success, but only blank lines */
}
}
@ -140,28 +142,34 @@ int apply_substitutions(input_t *result, const int mode)
opt.design->current_rule = rules;
for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) {
#ifdef REGEXP_DEBUG
fprintf (stderr, "regex_replace(0x%p, \"%s\", \"%s\", %d, \'%c\') == ",
rules[j].prog, rules[j].repstr, u32_strconv_to_output(result->lines[k].mbtext),
(int) result->lines[k].num_chars, rules[j].mode);
char *outtext = bxs_to_output(result->lines[k].text);
char *outrepstr = bxs_to_output(rules[j].repstr);
fprintf(stderr, "regex_replace(0x%p, \"%s\", \"%s\", %d, \'%c\') == ", rules[j].prog, outrepstr,
outtext, (int) result->lines[k].text->num_chars, rules[j].mode);
BFREE(outtext);
BFREE(outrepstr);
#endif
uint32_t *newtext = u32_regex_replace(rules[j].prog, rules[j].repstr->memory,
result->lines[k].mbtext, result->lines[k].num_chars, rules[j].mode == 'g');
uint32_t *newtext = u32_regex_replace(rules[j].prog, rules[j].repstr->memory, result->lines[k].text->memory,
result->lines[k].text->num_chars, rules[j].mode == 'g');
#ifdef REGEXP_DEBUG
fprintf (stderr, "\"%s\"\n", newtext ? u32_strconv_to_output(newtext) : "NULL");
char *outnewtext = newtext ? u32_strconv_to_output(newtext) : strdup("NULL");
fprintf(stderr, "\"%s\"\n", outnewtext);
BFREE(outnewtext);
#endif
if (newtext == NULL) {
return 1;
}
BFREE(result->lines[k].mbtext_org); /* original address allocated for mbtext */
result->lines[k].mbtext = newtext;
result->lines[k].mbtext_org = newtext;
bxs_free(result->lines[k].text);
result->lines[k].text = bxs_from_unicode(newtext);
analyze_line_ascii(result, result->lines + k);
analyze_line_ascii(result, result->lines + k); /* update maxline value */
#ifdef REGEXP_DEBUG
fprintf (stderr, "result->lines[%d] == {%d, \"%s\"}\n", (int) k,
(int) result->lines[k].num_chars, u32_strconv_to_output(result->lines[k].mbtext));
char *outtext2 = bxs_to_output(result->lines[k].text);
fprintf(stderr, "result->lines[%d] == {%d, \"%s\"}\n", (int) k, (int) result->lines[k].text->num_chars,
outtext2);
BFREE(outtext2);
#endif
}
opt.design->current_rule = NULL;
@ -176,7 +184,8 @@ int apply_substitutions(input_t *result, const int mode)
rc = get_indent(result->lines, result->num_lines);
if (rc >= 0) {
result->indent = (size_t) rc;
} else {
}
else {
return 4;
}
}
@ -209,20 +218,19 @@ static void trim_trailing_ws_carefully(uint32_t *mbtemp, size_t *len_chars)
input_t *read_all_input()
{
char buf[LINE_MAX_BYTES + 3]; /* static input buffer incl. newline + zero terminator */
size_t input_size = 0; /* number of elements allocated */
char buf[LINE_MAX_BYTES + 3]; /* static input buffer incl. newline + zero terminator */
size_t input_size = 0; /* number of elements allocated */
input_t *result = (input_t *) calloc(1, sizeof(input_t));
result->indent = LINE_MAX_BYTES;
while (fgets(buf, LINE_MAX_BYTES + 2, opt.infile))
{
while (fgets(buf, LINE_MAX_BYTES + 2, opt.infile)) {
if (result->num_lines % 100 == 0) {
input_size += 100;
line_t *tmp = (line_t *) realloc(result->lines, input_size * sizeof(line_t));
if (tmp == NULL) {
perror(PROJECT);
BFREE (result->lines);
BFREE(result->lines);
return NULL;
}
result->lines = tmp;
@ -240,30 +248,27 @@ input_t *read_all_input()
*/
if (len_chars > 0) {
uint32_t *temp = NULL;
len_chars = expand_tabs_into(mbtemp, opt.tabstop, &temp,
&(result->lines[result->num_lines].tabpos),
&(result->lines[result->num_lines].tabpos_len));
len_chars = expand_tabs_into(mbtemp, opt.tabstop, &temp, &(result->lines[result->num_lines].tabpos),
&(result->lines[result->num_lines].tabpos_len));
if (len_chars == 0) {
perror(PROJECT);
BFREE (result->lines);
BFREE(result->lines);
return NULL;
}
result->lines[result->num_lines].mbtext = temp;
BFREE(mbtemp);
temp = NULL;
result->lines[result->num_lines].text = bxs_from_unicode(temp);
BFREE(temp);
}
else {
result->lines[result->num_lines].mbtext = mbtemp;
result->lines[result->num_lines].text = bxs_new_empty_string();
}
result->lines[result->num_lines].mbtext_org = result->lines[result->num_lines].mbtext;
result->lines[result->num_lines].num_chars = len_chars;
BFREE(mbtemp);
++result->num_lines;
}
if (ferror(stdin)) {
perror(PROJECT);
BFREE (result->lines);
BFREE(result->lines);
return NULL;
}
return result;
@ -296,7 +301,8 @@ int analyze_input(input_t *result)
int rc = get_indent(result->lines, result->num_lines);
if (rc >= 0) {
result->indent = (size_t) rc;
} else {
}
else {
return 1;
}
@ -306,21 +312,23 @@ int analyze_input(input_t *result)
*/
if (opt.design->indentmode != 't' && opt.r == 0) {
for (size_t i = 0; i < result->num_lines; ++i) {
#ifdef DEBUG
fprintf(stderr, "%2d: mbtext = \"%s\" (%d chars)\n", (int) i,
u32_strconv_to_output(result->lines[i].mbtext), (int) result->lines[i].num_chars);
#endif
if (result->lines[i].num_chars >= result->indent) {
memmove(result->lines[i].text, result->lines[i].text + result->indent,
result->lines[i].len - result->indent + 1);
result->lines[i].len -= result->indent;
result->lines[i].mbtext = advance32(result->lines[i].mbtext, result->indent);
result->lines[i].num_chars -= result->indent;
if (result->lines[i].text->num_columns >= result->indent) {
/*
* We should really remove *columns* rather than *characters*, but since the removed characters are
* spaces (indentation), and there are no double-wide spaces in Unicode, both actions are equivalent.
*/
bxstr_t *unindented = bxs_cut_front(result->lines[i].text, result->indent);
bxs_free(result->lines[i].text);
result->lines[i].text = unindented;
}
#ifdef DEBUG
fprintf(stderr, "%2d: mbtext = \"%s\" (%d chars)\n", (int) i,
u32_strconv_to_output(result->lines[i].mbtext), (int) result->lines[i].num_chars);
char *outtext = bxs_to_output(result->lines[i].text);
fprintf(stderr, "%2d: text = \"%s\" (%d chars, %d visible, %d invisible, %d columns)\n"
" ascii = \"%s\"\n", (int) i, outtext,
(int) result->lines[i].text->num_chars, (int) result->lines[i].text->num_chars_visible,
(int) result->lines[i].text->num_chars_invisible, (int) result->lines[i].text->num_columns,
result->lines[i].text->ascii);
BFREE(outtext);
#endif
}
result->maxline -= result->indent;
@ -336,10 +344,11 @@ int analyze_input(input_t *result)
}
#ifdef DEBUG
fprintf (stderr, "Effective encoding: %s\n", encoding);
fprintf(stderr, "Effective encoding: %s\n", encoding);
print_input_lines(NULL);
#endif
return 0;
}
/*EOF*/ /* vim: set sw=4: */
/* vim: set sw=4: */

View File

@ -53,11 +53,10 @@ static int best_match(const line_t *line,
size_t k; /* line counter within shape */
int w; /* shape counter */
sentry_t *cs; /* current shape */
char *s; /* duplicate of current shape part */
char *p; /* position found by strstr */
uint32_t *s; /* duplicate of current shape part */
uint32_t *p; /* position found by u32_strstr() */
size_t cq; /* current quality */
char *q; /* space check rover */
line_t chkline; /* for calls to empty_line() */
uint32_t *q; /* space check rover */
size_t quality;
*ws = *we = *es = *ee = NULL;
@ -87,13 +86,11 @@ static int best_match(const line_t *line,
cs = opt.design->shape + west_side[--w];
}
chkline.text = cs->chars[k];
chkline.len = cs->width;
if (empty_line(&chkline) && !(quality == 0 && j == numw - 1)) {
if (bxs_is_blank(cs->mbcs[k]) && !(quality == 0 && j == numw - 1)) {
continue;
}
s = (char *) strdup(cs->chars[k]);
s = u32_strdup(cs->mbcs[k]->memory);
if (s == NULL) {
perror(PROJECT);
return -1;
@ -101,10 +98,10 @@ static int best_match(const line_t *line,
cq = cs->width;
do {
p = strstr(line->text, s);
p = u32_strstr(line->text->memory, s);
if (p) {
q = p - 1;
while (q >= line->text) {
while (q >= line->text->memory) {
if (*q-- != ' ') {
p = NULL;
break;
@ -115,10 +112,10 @@ static int best_match(const line_t *line,
}
}
if (!p && cq) {
if (*s == ' ') {
memmove(s, s + 1, cq--);
} else if (s[cq - 1] == ' ') {
s[--cq] = '\0';
if (*s == char_space) {
u32_move(s, s + 1, cq--);
} else if (s[cq - 1] == char_space) {
s[--cq] = char_nul;
} else {
cq = 0;
break;
@ -162,13 +159,11 @@ static int best_match(const line_t *line,
BFREE(mbcs_temp);
#endif
chkline.text = cs->chars[k];
chkline.len = cs->width;
if (empty_line(&chkline)) {
if (bxs_is_blank(cs->mbcs[k])) {
continue;
}
s = (char *) strdup(cs->chars[k]);
s = u32_strdup(cs->mbcs[k]->memory);
if (s == NULL) {
perror(PROJECT);
return -1;
@ -176,7 +171,7 @@ static int best_match(const line_t *line,
cq = cs->width;
do {
p = my_strnrstr(line->text, s, cq, 0);
p = u32_strnrstr(line->text->memory, s, cq, 0);
if (p) {
q = p + cq;
while (*q) {
@ -191,7 +186,7 @@ static int best_match(const line_t *line,
}
if (!p && cq) {
if (*s == ' ') {
memmove(s, s + 1, cq--);
u32_move(s, s + 1, cq--);
} else if (s[cq - 1] == ' ') {
s[--cq] = '\0';
} else {
@ -558,7 +553,7 @@ static design_t *detect_design()
break;
}
for (j = 0; j < d->shape[scnt].height; ++j) {
shpln.text = d->shape[scnt].chars[j]; // TODO HERE
shpln.text = d->shape[scnt].chars[j]; // TODO
shpln.len = d->shape[scnt].width;
if (empty_line(&shpln)) {
continue;
@ -803,11 +798,7 @@ static void add_spaces_to_line(line_t* line, const size_t n)
if (n == 0) {
return;
}
line->mbtext_org = (uint32_t *) realloc(line->mbtext_org, (line->num_chars + n + 1) * sizeof(uint32_t));
line->mbtext = line->mbtext_org;
u32_set(line->mbtext + line->num_chars, char_space, n);
set_char_at(line->mbtext, line->num_chars + n, char_nul);
line->num_chars += n;
bxs_append_spaces(line->text, n);
analyze_line_ascii(&input, line);
}
@ -860,7 +851,7 @@ int remove_box()
*/
const size_t normalized_len = input.maxline + opt.design->shape[NE].width;
for (j = 0; j < input.num_lines; ++j) {
add_spaces_to_line(input.lines + j, normalized_len - input.lines[j].len);
add_spaces_to_line(input.lines + j, normalized_len - input.lines[j].text->num_columns);
}
#ifdef DEBUG
fprintf(stderr, "Normalized all lines to %d columns (maxline + east width).\n", (int) input.maxline);
@ -930,7 +921,7 @@ int remove_box()
fprintf(stderr, "\033[00;33;01mline %2d: no side match\033[00m\n", (int) j);
#endif
}
else {
else { // TODO HERE
#ifdef DEBUG
fprintf(stderr, "\033[00;33;01mline %2d: west: %d (\'%c\') to %d (\'%c\') [len %d]; "
"east: %d (\'%c\') to %d (\'%c\') [len %d]\033[00m\n", (int) j,
@ -1070,15 +1061,16 @@ void output_input(const int trim_only)
if (input.lines[j].text == NULL) {
continue;
}
btrim(input.lines[j].text, &(input.lines[j].len));
btrim32(input.lines[j].mbtext, &(input.lines[j].num_chars));
bxstr_t *temp = bxs_rtrim(input.lines[j].text);
bxs_free(input.lines[j].text);
input.lines[j].text = temp;
if (trim_only) {
continue;
}
char *indentspc = NULL;
if (opt.tabexp == 'u') {
indent = strspn(input.lines[j].text, " ");
indent = input.lines[j].text->indent;
ntabs = indent / opt.tabstop;
nspcs = indent % opt.tabstop;
indentspc = (char *) malloc(ntabs + nspcs + 1);
@ -1101,9 +1093,11 @@ void output_input(const int trim_only)
indent = 0;
}
fprintf(opt.outfile, "%s%s%s", indentspc, u32_strconv_to_output(advance32(input.lines[j].mbtext, indent)),
char *outtext = u32_strconv_to_output(bxs_first_char_ptr(input.lines[j].text, indent));
fprintf(opt.outfile, "%s%s%s", indentspc, outtext,
(input.final_newline || j < input.num_lines - 1 ? opt.eol : ""));
BFREE (indentspc);
BFREE(outtext);
BFREE(indentspc);
}
}

View File

@ -273,54 +273,17 @@ char *repeat(char *s, size_t count)
int empty_line(const line_t *line)
/*
* Return true if line is empty.
*
* Empty lines either consist entirely of whitespace or don't exist.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
{
char *p;
size_t j;
if (!line) {
return 1;
}
if (line->text == NULL || line->len <= 0) {
return 1;
}
for (p = line->text, j = 0; *p && j < line->len; ++j, ++p) {
if (*p != ' ' && *p != '\t' && *p != '\r') {
return 0;
}
}
return 1;
return bxs_is_blank(line->text);
}
size_t expand_tabs_into(const uint32_t *input_buffer, const int tabstop, uint32_t **text, size_t **tabpos,
size_t *tabpos_len)
/*
* Expand tab chars in input_buffer and store result in text.
*
* input_buffer Line of text with tab chars
* tabstop tab stop distance
* text address of the pointer that will take the result
* tabpos array of ints giving the positions of the first
* space of an expanded tab in the text result buffer
* tabpos_len number of tabs recorded in tabpos
*
* Memory will be allocated for text and tabpos.
* Should only be called for lines of length > 0;
*
* RETURNS: Success: Length of the result line in characters (> 0)
* Error: 0 (e.g. out of memory)
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
{
static uint32_t temp[LINE_MAX_BYTES + 100]; /* work string */
size_t io; /* character position in work string */
@ -574,9 +537,11 @@ void print_input_lines(const char *heading)
fprintf(stderr, "Input Lines%s:\n", heading != NULL ? heading : "");
fprintf(stderr, " [num_chars] \"real text\" [num_cols] \"ascii_text\"\n");
for (size_t i = 0; i < input.num_lines; ++i) {
char *outtext = bxs_to_output(input.lines[i].text);
fprintf(stderr, "%4d [%02d] \"%s\" [%02d] \"%s\"", (int) i,
(int) input.lines[i].num_chars, u32_strconv_to_output(input.lines[i].mbtext),
(int) input.lines[i].len, input.lines[i].text);
(int) input.lines[i].text->num_chars, outtext,
(int) input.lines[i].text->num_columns, input.lines[i].text->ascii);
bxs_free(outtext);
fprintf(stderr, "\tTabs: [");
if (input.lines[i].tabpos != NULL) {
for (size_t j = 0; j < input.lines[i].tabpos_len; ++j) {
@ -587,13 +552,27 @@ void print_input_lines(const char *heading)
}
}
fprintf(stderr, "] (%d)", (int) input.lines[i].tabpos_len);
fprintf(stderr, "\tinvisible=%d\n", (int) input.lines[i].invis);
fprintf(stderr, "\tinvisible=%d\n", (int) input.lines[i].text->num_chars_invisible);
fprintf(stderr, " posmap=");
if (input.lines[i].posmap != NULL) {
fprintf(stderr, " visible_char=");
if (input.lines[i].text->visible_char != NULL) {
fprintf(stderr, "[");
for (size_t j = 0; j < input.lines[i].len; j++) {
fprintf(stderr, "%d%s", (int) input.lines[i].posmap[j], j == (input.lines[i].len - 1) ? "" : ", ");
for (size_t j = 0; j < input.lines[i].text->num_chars_visible; j++) {
fprintf(stderr, "%d%s", (int) input.lines[i].text->visible_char[j],
j == (input.lines[i].text->num_chars_visible - 1) ? "" : ", ");
}
fprintf(stderr, "]\n");
}
else {
fprintf(stderr, "null\n");
}
fprintf(stderr, " first_char=");
if (input.lines[i].text->first_char != NULL) {
fprintf(stderr, "[");
for (size_t j = 0; j < input.lines[i].text->num_chars_visible; j++) {
fprintf(stderr, "%d%s", (int) input.lines[i].text->first_char[j],
j == (input.lines[i].text->num_chars_visible - 1) ? "" : ", ");
}
fprintf(stderr, "]\n");
}
@ -720,21 +699,8 @@ int is_csi_reset(const uint32_t *csi)
void analyze_line_ascii(input_t *input_ptr, line_t *line)
{
size_t num_esc = 0;
char *ascii;
size_t *map;
size_t invis = count_invisible_chars(line->mbtext, &num_esc, &ascii, &(map));
line->invis = invis;
/* u32_strwidth() does not count control characters, i.e. ESC characters, for which we must correct */
line->len = u32_strwidth(line->mbtext, encoding) - invis + num_esc;
line->num_chars = u32_strlen(line->mbtext);
BFREE(line->text);
line->text = ascii;
BFREE(line->posmap);
line->posmap = map;
if (line->len > input_ptr->maxline) {
input_ptr->maxline = line->len;
if (line->text->num_columns > input_ptr->maxline) {
input_ptr->maxline = line->text->num_columns;
}
}

View File

@ -52,9 +52,29 @@ typedef void (*bx_fprintf_t)(FILE *stream, const char *format, ...);
*/
extern bx_fprintf_t bx_fprintf;
/*
* Return true (1) if line is empty.
* Empty lines either consist entirely of whitespace or don't exist.
* @param line the line to check
* @return 1 if line is empty, 0 if it isn't
*/
int empty_line(const line_t *line);
/**
* Expand tab chars in `input_buffer` and store result in `text`.
* Memory will be allocated for text and tabpos.
* Should only be called for lines of length > 0;
*
* @param input_buffer Line of text with tab chars
* @param tabstop tab stop distance as per command line options
* @param text address of the pointer that will take the result of this function
* @param tabpos array of ints giving the positions of the first space of an expanded tab in the text result buffer
* @param tabpos_len number of tabs recorded in tabpos
* @return Success: Length of the result line in characters (> 0);
* Error: 0 (e.g. out of memory)
*/
size_t expand_tabs_into(const uint32_t *input_buffer, const int tabstop, uint32_t **text,
size_t **tabpos, size_t *tabpos_len);

View File

@ -334,4 +334,41 @@ uint32_t *u32_nspaces(const size_t n)
}
uint32_t *u32_strnrstr(const uint32_t *s1, const uint32_t *s2, const size_t s2_len, int skip)
{
if (is_empty(s2)) {
return (uint32_t *) s1;
}
if (is_empty(s1)) {
return NULL;
}
if (skip < 0) {
skip = 0;
}
uint32_t *p = u32_strrchr(s1, s2[0]);
if (!p) {
return NULL;
}
while (p >= s1) {
int comp = u32_strncmp(p, s2, s2_len);
if (comp == 0) {
if (skip--) {
--p;
}
else {
return p;
}
}
else {
--p;
}
}
return NULL;
}
/* vim: set cindent sw=4: */

View File

@ -227,6 +227,17 @@ char *to_utf8(uint32_t *src);
uint32_t *u32_nspaces(const size_t n);
/**
* Return pointer to last occurrence of string `s2` in string `s1`.
* @param s1 string to search
* @param s2 string to search for in `s1`
* @param s2_len length in characters of `s2`
* @param skip number of finds to ignore before returning anything
* @return pointer to last occurrence of string `s2` in string `s1`; NULL if not found or error
*/
uint32_t *u32_strnrstr(const uint32_t *s1, const uint32_t *s2, const size_t s2_len, int skip);
#endif
/*EOF*/ /* vim: set cindent sw=4: */
/* vim: set cindent sw=4: */

View File

@ -72,5 +72,5 @@ cmdline_test.o: cmdline_test.c cmdline_test.h boxes.h cmdline.h global_mock.h t
tools_test.o: tools_test.c tools_test.h tools.h unicode.h config.h | check_dir
regulex_test.o: regulex_test.c regulex_test.h boxes.h global_mock.h regulex.h config.h | check_dir
main.o: main.c bxstring_test.h cmdline_test.h global_mock.h tools_test.h regulex_test.h unicode_test.h config.h | check_dir
unicode_test.o: unicode_test.c unicode_test.h boxes.h unicode.h config.h | check_dir
unicode_test.o: unicode_test.c unicode_test.h boxes.h tools.h unicode.h config.h | check_dir
utest_tools.o: utest_tools.c utest_tools.h config.h | check_dir

View File

@ -353,6 +353,32 @@ void test_ansi_unicode_null(void **state)
void test_bxs_new_empty_string(void **state)
{
UNUSED(state);
bxstr_t *actual = bxs_new_empty_string();
assert_non_null(actual);
assert_non_null(actual->memory);
assert_int_equal(1, is_char_at(actual->memory, 0, char_nul));
assert_string_equal("", actual->ascii);
assert_int_equal(0, (int) actual->indent);
assert_int_equal(0, (int) actual->num_columns);
assert_int_equal(0, (int) actual->num_chars);
assert_int_equal(0, (int) actual->num_chars_visible);
assert_int_equal(0, (int) actual->num_chars_invisible);
assert_int_equal(0, (int) actual->trailing);
int expected_firstchar_idx[] = {0};
assert_array_equal(expected_firstchar_idx, actual->first_char, 1);
int expected_vischar_idx[] = {0};
assert_array_equal(expected_vischar_idx, actual->visible_char, 1);
bxs_free(actual);
}
void test_bxs_strdup(void **state)
{
UNUSED(state);
@ -386,6 +412,81 @@ void test_bxs_strdup(void **state)
void test_bxs_cut_front(void **state)
{
UNUSED(state);
bxstr_t *actual = bxs_cut_front(NULL, 1);
assert_null(actual);
uint32_t *ustr32 = u32_strconv_from_arg(" x\x1b[38;5;203mx\x1b[0m\x1b[38;5;198mf\x1b[0moo", "ASCII");
assert_non_null(ustr32);
bxstr_t *input = bxs_from_unicode(ustr32);
actual = bxs_cut_front(input, 3);
assert_non_null(actual);
assert_non_null(actual->memory);
assert_string_equal("foo", actual->ascii);
assert_int_equal(0, (int) actual->indent);
assert_int_equal(3, (int) actual->num_columns);
assert_int_equal(18, (int) actual->num_chars);
assert_int_equal(3, (int) actual->num_chars_visible);
assert_int_equal(15, (int) actual->num_chars_invisible);
assert_int_equal(0, (int) actual->trailing);
int expected_firstchar_idx[] = {0, 16, 17, 18};
assert_array_equal(expected_firstchar_idx, actual->first_char, 4);
int expected_vischar_idx[] = {11, 16, 17, 18};
assert_array_equal(expected_vischar_idx, actual->visible_char, 4);
bxs_free(actual);
actual = bxs_cut_front(input, 1000);
assert_non_null(actual);
assert_non_null(actual->memory);
assert_string_equal("", actual->ascii);
assert_int_equal(0, (int) actual->indent);
assert_int_equal(0, (int) actual->num_columns);
assert_int_equal(0, (int) actual->num_chars);
assert_int_equal(0, (int) actual->num_chars_visible);
assert_int_equal(0, (int) actual->num_chars_invisible);
assert_int_equal(0, (int) actual->trailing);
bxs_free(actual);
BFREE(ustr32);
bxs_free(input);
}
void test_bxs_cut_front_zero(void **state)
{
UNUSED(state);
uint32_t *ustr32 = u32_strconv_from_arg(" x\x1b[38;5;203mx\x1b[0m\x1b[38;5;198mf\x1b[0moo", "ASCII");
assert_non_null(ustr32);
bxstr_t *input = bxs_from_unicode(ustr32);
bxstr_t *actual = actual = bxs_cut_front(input, 0);
assert_non_null(actual);
assert_non_null(actual->memory);
assert_string_equal(" xxfoo", actual->ascii);
assert_int_equal(1, (int) actual->indent);
assert_int_equal(6, (int) actual->num_columns);
assert_int_equal(36, (int) actual->num_chars);
assert_int_equal(6, (int) actual->num_chars_visible);
assert_int_equal(30, (int) actual->num_chars_invisible);
assert_int_equal(0, (int) actual->trailing);
int expected_firstchar_idx[] = {0, 1, 2, 18, 34, 35, 36};
assert_array_equal(expected_firstchar_idx, actual->first_char, 7);
int expected_vischar_idx[] = {0, 1, 13, 29, 34, 35, 36};
assert_array_equal(expected_vischar_idx, actual->visible_char, 7);
bxs_free(actual);
BFREE(ustr32);
bxs_free(input);
}
void test_bxs_trimdup_null(void **state)
{
UNUSED(state);
@ -876,6 +977,37 @@ void test_bxs_rtrim_empty(void **state)
void test_bxs_append_spaces(void **state)
{
UNUSED(state);
bxs_append_spaces(NULL, 2);
uint32_t *ustr32 = u32_strconv_from_arg("X\x1b[38;5;203mY\x1b[0mZ", "UTF-8");
assert_non_null(ustr32);
bxstr_t *bxstr = bxs_from_unicode(ustr32);
bxs_append_spaces(bxstr, 0);
bxs_append_spaces(bxstr, 3);
assert_non_null(bxstr->memory);
assert_string_equal("XYZ ", bxstr->ascii);
assert_int_equal(0, (int) bxstr->indent);
assert_int_equal(6, (int) bxstr->num_columns);
assert_int_equal(21, (int) bxstr->num_chars);
assert_int_equal(6, (int) bxstr->num_chars_visible);
assert_int_equal(15, (int) bxstr->num_chars_invisible);
assert_int_equal(3, (int) bxstr->trailing);
int expected_firstchar_idx[] = {0, 1, 17, 18, 19, 20, 21};
assert_array_equal(expected_firstchar_idx, bxstr->first_char, 7);
int expected_vischar_idx[] = {0, 12, 17, 18, 19, 20, 21};
assert_array_equal(expected_vischar_idx, bxstr->visible_char, 7);
BFREE(ustr32);
bxs_free(bxstr);
}
void test_bxs_to_output(void **state)
{
UNUSED(state);
@ -905,6 +1037,40 @@ void test_bxs_is_empty_null(void **state)
void test_bxs_is_blank(void **state)
{
UNUSED(state);
assert_int_equal(1, bxs_is_blank(NULL));
bxstr_t *bxstr = bxs_new_empty_string();
assert_int_equal(1, bxs_is_blank(bxstr));
bxs_free(bxstr);
uint32_t *ustr32 = u32_strconv_from_arg(" \x1b[38;5;203m \x1b[0m \x1b[38;5;203m\x1b[0m", "ASCII");
assert_non_null(ustr32);
bxstr = bxs_from_unicode(ustr32);
assert_int_equal(1, bxs_is_blank(bxstr));
BFREE(ustr32);
bxs_free(bxstr);
ustr32 = u32_strconv_from_arg("\x1b[38;5;203m\x1b[0m", "ASCII");
assert_non_null(ustr32);
bxstr = bxs_from_unicode(ustr32);
assert_int_equal(1, bxs_is_blank(bxstr));
BFREE(ustr32);
bxs_free(bxstr);
ustr32 = u32_strconv_from_arg("x", "ASCII");
assert_non_null(ustr32);
bxstr = bxs_from_unicode(ustr32);
assert_int_equal(0, bxs_is_blank(bxstr));
BFREE(ustr32);
bxs_free(bxstr);
}
void test_bxs_is_visible_char(void **state)
{
UNUSED(state);

View File

@ -36,8 +36,15 @@ void test_ansi_unicode_tabs(void **state);
void test_ansi_unicode_broken_escapes(void **state);
void test_ansi_unicode_null(void **state);
void test_bxs_new_empty_string(void **state);
void test_bxs_is_blank(void **state);
void test_bxs_strdup(void **state);
void test_bxs_cut_front(void **state);
void test_bxs_cut_front_zero(void **state);
void test_bxs_trimdup_null(void **state);
void test_bxs_trimdup_invalid_startidx(void **state);
void test_bxs_trimdup_invalid_endidx(void **state);
@ -63,6 +70,8 @@ void test_bxs_trim_none(void **state);
void test_bxs_rtrim(void **state);
void test_bxs_rtrim_empty(void **state);
void test_bxs_append_spaces(void **state);
void test_bxs_to_output(void **state);
void test_bxs_is_empty_null(void **state);

View File

@ -115,7 +115,8 @@ int main(void)
cmocka_unit_test(test_is_allowed_in_sample),
cmocka_unit_test(test_is_allowed_in_shape),
cmocka_unit_test(test_is_allowed_in_filename),
cmocka_unit_test(test_is_allowed_in_kv_string)
cmocka_unit_test(test_is_allowed_in_kv_string),
cmocka_unit_test(test_u32_strnrstr)
};
const struct CMUnitTest bxstring_tests[] = {
@ -132,7 +133,11 @@ int main(void)
cmocka_unit_test_setup(test_ansi_unicode_tabs, beforeTest),
cmocka_unit_test_setup(test_ansi_unicode_broken_escapes, beforeTest),
cmocka_unit_test_setup(test_ansi_unicode_null, beforeTest),
cmocka_unit_test_setup(test_bxs_new_empty_string, beforeTest),
cmocka_unit_test_setup(test_bxs_is_blank, beforeTest),
cmocka_unit_test_setup(test_bxs_strdup, beforeTest),
cmocka_unit_test_setup(test_bxs_cut_front, beforeTest),
cmocka_unit_test_setup(test_bxs_cut_front_zero, beforeTest),
cmocka_unit_test_setup(test_bxs_trimdup_null, beforeTest),
cmocka_unit_test_setup(test_bxs_trimdup_invalid_startidx, beforeTest),
cmocka_unit_test_setup(test_bxs_trimdup_invalid_endidx, beforeTest),
@ -153,6 +158,7 @@ int main(void)
cmocka_unit_test_setup(test_bxs_trim_none, beforeTest),
cmocka_unit_test_setup(test_bxs_rtrim, beforeTest),
cmocka_unit_test_setup(test_bxs_rtrim_empty, beforeTest),
cmocka_unit_test_setup(test_bxs_append_spaces, beforeTest),
cmocka_unit_test_setup(test_bxs_to_output, beforeTest),
cmocka_unit_test_setup(test_bxs_is_empty_null, beforeTest),
cmocka_unit_test_setup(test_bxs_is_visible_char, beforeTest),

View File

@ -28,6 +28,7 @@
#include <string.h>
#include "boxes.h"
#include "tools.h"
#include "unicode.h"
#include "unicode_test.h"
@ -158,4 +159,28 @@ void test_is_allowed_in_kv_string(void **state)
}
void test_u32_strnrstr(void **state)
{
UNUSED(state);
uint32_t *haystack = u32_strconv_from_arg("a foo found found bar fou", "ASCII");
assert_non_null(haystack);
uint32_t *needle = u32_strconv_from_arg("found", "ASCII");
assert_non_null(needle);
assert_null(u32_strnrstr(NULL, needle, u32_strlen(needle), 0));
assert_ptr_equal(haystack, u32_strnrstr(haystack, NULL, 0, 0));
uint32_t *actual = u32_strnrstr(haystack, needle, u32_strlen(needle), 1);
assert_ptr_equal(haystack + 6, actual);
actual = u32_strnrstr(haystack, needle, u32_strlen(needle), -1); /* -1 will be "fixed" to 0 */
assert_ptr_equal(haystack + 12, actual);
BFREE(haystack);
BFREE(needle);
}
/* vim: set cindent sw=4: */

View File

@ -27,6 +27,7 @@ void test_is_allowed_in_sample(void **state);
void test_is_allowed_in_shape(void **state);
void test_is_allowed_in_filename(void **state);
void test_is_allowed_in_kv_string(void **state);
void test_u32_strnrstr(void **state);
#endif