Add new "bxstring" module which is meant to evolve into boxes' string abstraction

It handles embedded escape sequences, mbcs encodings, and characters wider than 1 column.
This commit is contained in:
Thomas Jensen 2022-09-17 16:02:42 +02:00
parent 77d02890a7
commit b0d3384bd4
No known key found for this signature in database
GPG Key ID: A4ACEE270D0FB7DB
16 changed files with 796 additions and 69 deletions

View File

@ -183,6 +183,7 @@ win32.utest: $(OUT_DIR)
$(MAKE) -C utest BOXES_PLATFORM=win32 C_INCLUDE_PATH=../$(PCRE2_DIR)/src:../$(WIN_CMOCKA_DIR)/include \ $(MAKE) -C utest BOXES_PLATFORM=win32 C_INCLUDE_PATH=../$(PCRE2_DIR)/src:../$(WIN_CMOCKA_DIR)/include \
LDFLAGS_ADDTL="-L../$(PCRE2_DIR)/.libs -L../$(WIN_CMOCKA_DIR)/lib" utest LDFLAGS_ADDTL="-L../$(PCRE2_DIR)/.libs -L../$(WIN_CMOCKA_DIR)/lib" utest
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Cleanup # Cleanup
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

View File

@ -23,11 +23,11 @@ GEN_HDR = parser.h boxes.h lex.yy.h
GEN_SRC = parser.c lex.yy.c GEN_SRC = parser.c lex.yy.c
GEN_FILES = $(GEN_SRC) $(GEN_HDR) GEN_FILES = $(GEN_SRC) $(GEN_HDR)
ORIG_HDRCL = boxes.in.h config.h ORIG_HDRCL = boxes.in.h config.h
ORIG_HDR = $(ORIG_HDRCL) cmdline.h discovery.h generate.h input.h list.h parsecode.h parsing.h query.h regulex.h \ ORIG_HDR = $(ORIG_HDRCL) bxstring.h cmdline.h discovery.h generate.h input.h list.h parsecode.h parsing.h query.h \
remove.h shape.h tools.h unicode.h regulex.h remove.h shape.h tools.h unicode.h
ORIG_GEN = lexer.l parser.y ORIG_GEN = lexer.l parser.y
ORIG_NORM = boxes.c cmdline.c discovery.c generate.c input.c list.c parsecode.c parsing.c query.c regulex.c remove.c \ ORIG_NORM = boxes.c bxstring.c cmdline.c discovery.c generate.c input.c list.c parsecode.c parsing.c query.c \
shape.c tools.c unicode.c regulex.c remove.c shape.c tools.c unicode.c
ORIG_SRC = $(ORIG_GEN) $(ORIG_NORM) ORIG_SRC = $(ORIG_GEN) $(ORIG_NORM)
ORIG_FILES = $(ORIG_SRC) $(ORIG_HDR) ORIG_FILES = $(ORIG_SRC) $(ORIG_HDR)
@ -108,6 +108,7 @@ lex.yy.c lex.yy.h: lexer.l | check_dir
$(LEX) --header-file=lex.yy.h $< $(LEX) --header-file=lex.yy.h $<
boxes.o: boxes.c boxes.h cmdline.h discovery.h generate.h input.h list.h remove.h tools.h unicode.h config.h | check_dir boxes.o: boxes.c boxes.h cmdline.h discovery.h generate.h input.h list.h remove.h tools.h unicode.h config.h | check_dir
bxstring.o: bxstring.c bxstring.h tools.h unicode.h config.h | check_dir
cmdline.o: cmdline.c cmdline.h boxes.h tools.h config.h | check_dir cmdline.o: cmdline.c cmdline.h boxes.h tools.h config.h | check_dir
discovery.o: discovery.c discovery.h boxes.h tools.h config.h | check_dir discovery.o: discovery.c discovery.h boxes.h tools.h config.h | check_dir
generate.o: generate.c generate.h boxes.h shape.h tools.h unicode.h config.h | check_dir generate.o: generate.c generate.h boxes.h shape.h tools.h unicode.h config.h | check_dir

184
src/bxstring.c Normal file
View File

@ -0,0 +1,184 @@
/*
* boxes - Command line filter to draw/remove ASCII boxes around text
* Copyright (c) 1999-2021 Thomas Jensen and the boxes contributors
*
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
* License, version 3, as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
* You should have received a copy of the GNU General Public License along with this program.
* If not, see <https://www.gnu.org/licenses/>.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
/*
* The boxes-internal representation of strings.
*/
#include "config.h"
#include <string.h>
#include <unictype.h>
#include <unistr.h>
#include <uniwidth.h>
#include "bxstring.h"
#include "tools.h"
#include "unicode.h"
bxstr_t *bxs_from_ascii(char *pAscii)
{
if (pAscii == NULL) {
bx_fprintf(stderr, "%s: internal error: from_ascii() called with NULL\n", PROJECT);
return NULL;
}
if (strchr(pAscii, '\t') != NULL) {
bx_fprintf(stderr, "%s: internal error: from_ascii() called with tabs: \"%s\"\n", PROJECT, pAscii);
return NULL;
}
bxstr_t *result = (bxstr_t *) calloc(1, sizeof(bxstr_t));
result->memory = u32_strconv_from_arg(pAscii, "ASCII");
if (result->memory == NULL) {
BFREE(result);
return NULL;
}
result->ascii = strdup(pAscii);
size_t num_esc = 0;
char *ascii_copy;
size_t *map;
result->num_chars_invisible = count_invisible_chars(result->memory, &num_esc, &ascii_copy, &(map));
BFREE(ascii_copy);
result->num_chars = strlen(pAscii);
result->num_columns = result->num_chars;
result->num_chars_visible = result->num_chars - result->num_chars_invisible;
result->indent = strspn(pAscii, " ");
result->trailing = my_strrspn(pAscii, " ");
result->first_char = malloc(result->num_chars_visible * sizeof(size_t));
result->visible_char = malloc(result->num_chars_visible * sizeof(size_t));
for (size_t i = 0; i < result->num_chars_visible; i++) {
result->first_char[i] = i;
result->visible_char[i] = i;
}
return result;
}
bxstr_t *bxs_from_unicode(uint32_t *pInput)
{
if (pInput == NULL) {
bx_fprintf(stderr, "%s: internal error: from_unicode() called with NULL\n", PROJECT);
return NULL;
}
bxstr_t *result = (bxstr_t *) calloc(1, sizeof(bxstr_t));
result->memory = u32_strdup(pInput);
result->num_chars = u32_strlen(pInput);
size_t ascii_len = ((size_t) u32_strwidth(pInput, encoding)) + 1; /* often generous, but always enough */
result->ascii = (char *) calloc(ascii_len, sizeof(char));
size_t map_size = 5;
result->first_char = (size_t *) calloc(map_size, sizeof(size_t));
result->visible_char = (size_t *) calloc(map_size, sizeof(size_t));
char *ascii_ptr = result->ascii;
const uint32_t *rest = pInput;
size_t step_invis = 0;
int indent_active = 1;
size_t blank_streak = 0;
int first_candidate = -1;
size_t idx = 0;
for (ucs4_t c = pInput[0]; c != char_nul; c = rest[0]) {
if (result->num_chars_visible >= map_size - 2) {
map_size = map_size * 2 + 1;
result->first_char = (size_t *) realloc(result->first_char, map_size * sizeof(size_t));
result->visible_char = (size_t *) realloc(result->visible_char, map_size * sizeof(size_t));
}
if (c == char_tab) {
bx_fprintf(stderr, "%s: internal error: tab encountered in from_unicode()\n", PROJECT);
bxs_free(result);
return NULL;
}
else if (c == char_esc) {
if (is_csi_reset(rest)) {
first_candidate = -1;
}
else {
first_candidate = idx;
}
}
else {
int cols = 1;
if (is_ascii_printable(c)) {
*ascii_ptr = c & 0xff;
++ascii_ptr;
}
else {
cols = BMAX(0, uc_width(c, encoding));
if (cols > 0) {
memset(ascii_ptr, (int) (uc_is_blank(c) ? ' ' : 'x'), cols);
ascii_ptr += cols;
}
}
if (uc_is_blank(c)) {
if (indent_active) {
result->indent += cols;
}
blank_streak++;
}
result->num_columns += BMAX(0, cols);
result->visible_char[result->num_chars_visible] = idx;
result->first_char[result->num_chars_visible] = first_candidate < 0 ? idx : (size_t) first_candidate;
first_candidate = -1;
}
if (!uc_is_blank(c) && c != char_esc) {
indent_active = 0;
blank_streak = 0;
}
rest = advance_next32(rest, &step_invis);
if (step_invis == 0) {
result->num_chars_visible++;
idx++;
}
else {
result->num_chars_invisible += step_invis;
idx += step_invis;
}
}
*ascii_ptr = '\0';
result->visible_char[result->num_chars_visible] = idx; // both point to the terminator
result->first_char[result->num_chars_visible] = idx;
result->trailing = blank_streak;
return result;
}
void bxs_free(bxstr_t *pString)
{
if (pString != NULL) {
BFREE(pString->memory);
BFREE(pString->ascii);
BFREE(pString->first_char);
BFREE(pString->visible_char);
BFREE(pString);
}
}
/* vim: set cindent sw=4: */

70
src/bxstring.h Normal file
View File

@ -0,0 +1,70 @@
/*
* boxes - Command line filter to draw/remove ASCII boxes around text
* Copyright (c) 1999-2021 Thomas Jensen and the boxes contributors
*
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
* License, version 3, as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
* You should have received a copy of the GNU General Public License along with this program.
* If not, see <https://www.gnu.org/licenses/>.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
/*
* The boxes-internal representation of strings.
*/
#ifndef BXSTRING_H
#define BXSTRING_H
#include <unitypes.h>
typedef struct {
uint32_t *memory; /* Immutable. Pointer to the original memory area for the string, NUL terminated */
char *ascii; /* Immutable. ASCII version of the original string, tabs expanded, invisible characters removed, multi-byte chars replaced with one or more 'x'. NUL terminated. CHECK remove this eventually */
size_t indent; /* Immutable. Number of spaces at the beginning of the original string */
size_t num_columns; /* Immutable. Total number of screen columns required to display the string (important for double-wide characters such as Chinese) */
size_t num_chars; /* Immutable. Number of characters in the original string (visible + invisible) */
size_t num_chars_visible; /* Immutable. Number of visible characters in the original string */
size_t num_chars_invisible; /* Immutable. Number of invisible characters in the original string */
size_t trailing; /* Immutable. Number of trailing spaces in the original string */
size_t *first_char; /* Immutable. Array of index values into `memory` of the first actual character (possibly invisible) of each visible character */
size_t *visible_char; /* Immutable. Array of index values into `memory` of the visible characters themselves */
int offset_start; /* Number of visible characters to cut from the beginning of the string. Must be smaller than or equal to `indent`. Can be negative, in which case spaces will be prepended. */
int offset_end; /* Number of visible characters to cut from the end of the string. Must be smaller than or equal to `trailing`. Can be negative, in which case spaces will be appended. */
} bxstr_t;
/**
* Construct a `bxstr_t` from an ASCII string.
* @param pAscii the ASCII string, which does not contain tabs or ANSI escapes
* @return a pointer to a new `bxstr_t` for which memory has been allocated
*/
bxstr_t *bxs_from_ascii(char *pAscii);
/**
* Construct a `bxstr_t` from a Unicode string.
* @param pInput the utf-8 encoded string, which may include ANSI escapes, but NOT tabs
* @return a pointer to a new `bxstr_t` for which memory has been allocated,
* or NULL if `pInput` was NULL or an error occurred (such as tabs encountered)
*/
bxstr_t *bxs_from_unicode(uint32_t *pInput);
/**
* Free the memory allocated by the given `bxstr_t`.
* @param pString the `bxstr_t` to free
*/
void bxs_free(bxstr_t *pString);
/* TODO */
#endif
/*EOF*/ /* vim: set cindent sw=4: */

View File

@ -18,13 +18,13 @@
*/ */
#include "config.h" #include "config.h"
#include <errno.h> #include <errno.h>
#include <stdio.h>
#include <stdarg.h> #include <stdarg.h>
#include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <strings.h> #include <strings.h>
#include <unictype.h> #include <unictype.h>
#include <unistr.h> #include <unistr.h>
#include <unitypes.h> #include <unitypes.h>
@ -32,8 +32,8 @@
#include "boxes.h" #include "boxes.h"
#include "shape.h" #include "shape.h"
#include "unicode.h"
#include "tools.h" #include "tools.h"
#include "unicode.h"
@ -55,15 +55,20 @@ int strisyes(const char *s)
if (!strncasecmp("on", s, 3)) { if (!strncasecmp("on", s, 3)) {
return 1; return 1;
} else if (!strncasecmp("yes", s, 4)) { }
else if (!strncasecmp("yes", s, 4)) {
return 1; return 1;
} else if (!strncasecmp("true", s, 5)) { }
else if (!strncasecmp("true", s, 5)) {
return 1; return 1;
} else if (!strncmp("1", s, 2)) { }
else if (!strncmp("1", s, 2)) {
return 1; return 1;
} else if (!strncasecmp("t", s, 2)) { }
else if (!strncasecmp("t", s, 2)) {
return 1; return 1;
} else { }
else {
return 0; return 0;
} }
} }
@ -88,15 +93,20 @@ int strisno(const char *s)
if (!strncasecmp("off", s, 4)) { if (!strncasecmp("off", s, 4)) {
return 1; return 1;
} else if (!strncasecmp("no", s, 3)) { }
else if (!strncasecmp("no", s, 3)) {
return 1; return 1;
} else if (!strncasecmp("false", s, 6)) { }
else if (!strncasecmp("false", s, 6)) {
return 1; return 1;
} else if (!strncmp("0", s, 2)) { }
else if (!strncmp("0", s, 2)) {
return 1; return 1;
} else if (!strncasecmp("f", s, 2)) { }
else if (!strncasecmp("f", s, 2)) {
return 1; return 1;
} else { }
else {
return 0; return 0;
} }
} }
@ -121,7 +131,7 @@ void concat_strings(char *dst, int max_len, int count, ...)
va_list va; va_list va;
const char *src; const char *src;
va_start (va, count); va_start(va, count);
/* /*
* Sanity check. * Sanity check.
@ -139,12 +149,11 @@ void concat_strings(char *dst, int max_len, int count, ...)
* Loop over all input strings. * Loop over all input strings.
*/ */
while (count-- > 0 && max_len > 1) { while (count-- > 0 && max_len > 1) {
/* /*
* Grab an input string pointer. If it's NULL, skip it (eg. treat * Grab an input string pointer. If it's NULL, skip it (eg. treat
* it as empty. * it as empty.
*/ */
src = va_arg (va, const char *); src = va_arg(va, const char *);
if (src == NULL) { if (src == NULL) {
continue; continue;
@ -159,7 +168,7 @@ void concat_strings(char *dst, int max_len, int count, ...)
} }
} }
va_end (va); va_end(va);
/* /*
* Terminate the string with an ASCII NUL. * Terminate the string with an ASCII NUL.
@ -179,27 +188,27 @@ char *concat_strings_alloc(size_t count, ...)
const char *src; const char *src;
va_list va; va_list va;
va_start (va, count); va_start(va, count);
for (size_t i = 0; i < count; i++) { for (size_t i = 0; i < count; i++) {
src = va_arg (va, const char *); src = va_arg(va, const char *);
if (src != NULL) { if (src != NULL) {
total_len += strlen(src); total_len += strlen(src);
} }
} }
va_end (va); va_end(va);
char *result = malloc(total_len + 1); char *result = malloc(total_len + 1);
char *p = result; char *p = result;
va_start (va, count); va_start(va, count);
for (size_t i = 0; i < count; i++) { for (size_t i = 0; i < count; i++) {
src = va_arg (va, const char *); src = va_arg(va, const char *);
if (src != NULL && src[0] != '\0') { if (src != NULL && src[0] != '\0') {
strcpy(p, src); strcpy(p, src);
p += strlen(src); p += strlen(src);
} }
} }
va_end (va); va_end(va);
*p = '\0'; *p = '\0';
return result; return result;
@ -236,8 +245,8 @@ int empty_line(const line_t *line)
size_t expand_tabs_into(const uint32_t *input_buffer, const int tabstop, uint32_t **text, size_t expand_tabs_into(const uint32_t *input_buffer, const int tabstop, uint32_t **text, size_t **tabpos,
size_t **tabpos, size_t *tabpos_len) size_t *tabpos_len)
/* /*
* Expand tab chars in input_buffer and store result in text. * Expand tab chars in input_buffer and store result in text.
* *
@ -257,9 +266,9 @@ size_t expand_tabs_into(const uint32_t *input_buffer, const int tabstop, uint32_
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/ */
{ {
static uint32_t temp[LINE_MAX_BYTES + 100]; /* work string */ static uint32_t temp[LINE_MAX_BYTES + 100]; /* work string */
size_t io; /* character position in work string */ size_t io; /* character position in work string */
size_t tabnum = 0; /* index of the current tab */ size_t tabnum = 0; /* index of the current tab */
*text = NULL; *text = NULL;
*tabpos = NULL; *tabpos = NULL;
@ -281,7 +290,7 @@ size_t expand_tabs_into(const uint32_t *input_buffer, const int tabstop, uint32_
if (*tabpos_len > 0) { if (*tabpos_len > 0) {
*tabpos = (size_t *) calloc((*tabpos_len) + 1, sizeof(size_t)); *tabpos = (size_t *) calloc((*tabpos_len) + 1, sizeof(size_t));
if (*tabpos == NULL) { if (*tabpos == NULL) {
return 0; /* out of memory */ return 0; /* out of memory */
} }
} }
@ -322,9 +331,7 @@ void btrim(char *text, size_t *len)
{ {
long idx = (long) (*len) - 1; long idx = (long) (*len) - 1;
while (idx >= 0 && (text[idx] == '\n' || text[idx] == '\r' while (idx >= 0 && (text[idx] == '\n' || text[idx] == '\r' || text[idx] == '\t' || text[idx] == ' ')) {
|| text[idx] == '\t' || text[idx] == ' ')) /**/
{
text[idx--] = '\0'; text[idx--] = '\0';
} }
@ -415,7 +422,8 @@ char *my_strnrstr(const char *s1, const char *s2, const size_t s2_len, int skip)
if (comp == 0) { if (comp == 0) {
if (skip--) { if (skip--) {
--p; --p;
} else { }
else {
return p; return p;
} }
} }
@ -429,6 +437,26 @@ char *my_strnrstr(const char *s1, const char *s2, const size_t s2_len, int skip)
size_t my_strrspn(const char *s, const char *accept)
{
if (!s || *s == '\0') {
return 0;
}
if (!accept || *accept == '\0') {
return 0;
}
for (int i = strlen(s) - 1; i >= 0; i--) {
size_t idx = (size_t) i;
if (strchr(accept, s[idx]) == NULL) {
return strlen(s) - (idx + 1);
}
}
return strlen(s);
}
char *tabbify_indent(const size_t lineno, char *indentspc, const size_t indentspc_len) char *tabbify_indent(const size_t lineno, char *indentspc, const size_t indentspc_len)
/* /*
* Checks if tab expansion mode is "keep", and if so, calculates a new * Checks if tab expansion mode is "keep", and if so, calculates a new
@ -470,11 +498,9 @@ char *tabbify_indent(const size_t lineno, char *indentspc, const size_t indentsp
result[indentspc_len] = '\0'; result[indentspc_len] = '\0';
result_len = indentspc_len; result_len = indentspc_len;
for (i = 0; i < input.lines[lineno].tabpos_len for (i = 0; i < input.lines[lineno].tabpos_len && input.lines[lineno].tabpos[i] < indentspc_len; ++i) {
&& input.lines[lineno].tabpos[i] < indentspc_len; ++i) /**/
{
size_t tpos = input.lines[lineno].tabpos[i]; size_t tpos = input.lines[lineno].tabpos[i];
size_t nspc = opt.tabstop - (tpos % opt.tabstop); /* no of spcs covered by tab */ size_t nspc = opt.tabstop - (tpos % opt.tabstop); /* no of spcs covered by tab */
if (tpos + nspc > input.indent) { if (tpos + nspc > input.indent) {
break; break;
} }
@ -527,7 +553,8 @@ void print_input_lines(const char *heading)
fprintf(stderr, "%d%s", (int) input.lines[i].posmap[j], j == (input.lines[i].len - 1) ? "" : ", "); fprintf(stderr, "%d%s", (int) input.lines[i].posmap[j], j == (input.lines[i].len - 1) ? "" : ", ");
} }
fprintf(stderr, "]\n"); fprintf(stderr, "]\n");
} else { }
else {
fprintf(stderr, "null\n"); fprintf(stderr, "null\n");
} }
} }
@ -551,10 +578,10 @@ void print_input_lines(const char *heading)
* @param <posmap> pointer to the position map, which maps each position in <ascii> to a position in <s> * @param <posmap> pointer to the position map, which maps each position in <ascii> to a position in <s>
* @returns the number of invisible characters in <s> * @returns the number of invisible characters in <s>
*/ */
static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **ascii, size_t **posmap) size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **ascii, size_t **posmap)
{ {
size_t invis = 0; /* counts invisible column positions */ size_t invis = 0; /* counts invisible column positions */
*num_esc = 0; /* counts the number of escape sequences found */ *num_esc = 0; /* counts the number of escape sequences found */
if (is_empty(s)) { if (is_empty(s)) {
(*ascii) = (char *) strdup(""); (*ascii) = (char *) strdup("");
@ -565,8 +592,8 @@ static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **a
size_t buflen = (size_t) u32_strwidth(s, encoding) + 1; size_t buflen = (size_t) u32_strwidth(s, encoding) + 1;
size_t map_size = BMAX((size_t) 5, buflen); size_t map_size = BMAX((size_t) 5, buflen);
size_t map_idx = 0; size_t map_idx = 0;
size_t *map = (size_t *) calloc(map_size, sizeof(size_t)); /* might not be enough if many double-wide chars */ size_t *map = (size_t *) calloc(map_size, sizeof(size_t)); /* might not be enough if many double-wide chars */
(*ascii) = (char *) calloc(buflen, sizeof(char)); /* maybe a little too much, but certainly enough */ (*ascii) = (char *) calloc(buflen, sizeof(char)); /* maybe a little too much, but certainly enough */
char *p = *ascii; char *p = *ascii;
size_t mb_idx = 0; size_t mb_idx = 0;
@ -611,6 +638,41 @@ static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **a
int is_csi_reset(const uint32_t *csi)
{
ucs4_t puc = '\0';
const uint32_t *rest = csi;
size_t csi_pos = 0;
while ((rest = u32_next(&puc, rest))) {
switch(csi_pos) {
case 0:
if (puc != char_esc) {
return 0;
}
break;
case 1:
if (puc != '[' && puc != '(') {
return 0;
}
break;
case 2:
if (puc != '0') {
if (puc >= 0x40 && puc <= 0x7e) {
return 1;
}
return 0;
}
break;
default:
return (puc >= 0x40 && puc <= 0x7e) ? 1 : 0;
}
csi_pos++;
}
return 0;
}
void analyze_line_ascii(input_t *input_ptr, line_t *line) void analyze_line_ascii(input_t *input_ptr, line_t *line)
{ {
size_t num_esc = 0; size_t num_esc = 0;
@ -742,4 +804,4 @@ void bx_fprintf(FILE *stream, const char *format, ...)
} }
/*EOF*/ /* vim: set sw=4: */ /* vim: set sw=4: */

View File

@ -33,10 +33,10 @@
}) })
#define BFREE(p) { /* free memory and clear pointer */ \ #define BFREE(p) { /* free memory and clear pointer */ \
if (p) { \ if (p) { \
free((void *) p); \ free((void *) (p)); \
(p) = NULL; \ (p) = NULL; \
} \ } \
} }
@ -49,8 +49,16 @@ void btrim(char *text, size_t *len);
void btrim32(uint32_t *text, size_t *len); void btrim32(uint32_t *text, size_t *len);
char *my_strnrstr(const char *s1, const char *s2, const size_t s2_len, char *my_strnrstr(const char *s1, const char *s2, const size_t s2_len, int skip);
int skip);
/**
* Calculates the length (in bytes) of the segment at the end of `s` which consists entirely of bytes in `accept`.
* This is like `strspn()`, but from the end of the string.
* @param <s> the string to search
* @param <accept> acceptable characters that form the trailing segment
* @return the number of bytes found as described above
*/
size_t my_strrspn(const char *s, const char *accept);
int strisyes(const char *s); int strisyes(const char *s);
@ -75,6 +83,16 @@ void print_input_lines(const char *heading);
void analyze_line_ascii(input_t *input_ptr, line_t *line); void analyze_line_ascii(input_t *input_ptr, line_t *line);
size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **ascii, size_t **posmap);
/**
* Determine whether the given sequence of characters is a CSI (also called "escape sequence") that resets all
* modifications, typically `ESC[0m`.
* @param csi a pointer into a zero-terminated UTF-32 string
* @returns 1 if true, 0 if false
*/
int is_csi_reset(const uint32_t *csi);
int array_contains(char **array, const size_t array_len, const char *s); int array_contains(char **array, const size_t array_len, const char *s);
/** /**

View File

@ -28,7 +28,7 @@
* However, people might not conform to this, so we use ISO_8859-15 as a reasonable superset. */ * However, people might not conform to this, so we use ISO_8859-15 as a reasonable superset. */
extern const char *config_encoding; extern const char *config_encoding;
/** the character encoding of input (and output) text */ /* effective character encoding of input and output text */
extern const char *encoding; extern const char *encoding;
/** ucs4_t character '\t' (tab) */ /** ucs4_t character '\t' (tab) */

View File

@ -19,7 +19,7 @@ SRC_DIR = ../src
UTEST_DIR = ../utest UTEST_DIR = ../utest
VPATH = $(SRC_DIR):$(SRC_DIR)/misc:$(UTEST_DIR) VPATH = $(SRC_DIR):$(SRC_DIR)/misc:$(UTEST_DIR)
UTEST_NORM = global_mock.c cmdline_test.c tools_test.c regulex_test.o main.o UTEST_NORM = global_mock.c bxstring_test.o cmdline_test.c tools_test.c regulex_test.o main.o utest_tools.o
MOCKS = bx_fprintf MOCKS = bx_fprintf
.PHONY: check_dir flags_unix flags_win32 flags_ utest .PHONY: check_dir flags_unix flags_win32 flags_ utest
@ -66,8 +66,10 @@ unittest.exe: $(UTEST_OBJ) | check_dir
-lkernel32 -l:libunistring.a -l:libpcre2-32.a -l:libiconv.a -l:libcmocka.dll.a -lkernel32 -l:libunistring.a -l:libpcre2-32.a -l:libiconv.a -l:libcmocka.dll.a
global_mock.o: global_mock.c global_mock.h boxes.h unicode.h config.h | check_dir global_mock.o: global_mock.c global_mock.h boxes.h unicode.h config.h | check_dir
cmdline_test.o: cmdline_test.c cmdline_test.h global_mock.h boxes.h cmdline.h config.h | check_dir bxstring_test.o: bxstring_test.c bxstring_test.h global_mock.h bxstring.h config.h | check_dir
tools_test.o: tools_test.c tools_test.h tools.h config.h | check_dir cmdline_test.o: cmdline_test.c cmdline_test.h global_mock.h boxes.h cmdline.h config.h | check_dir
regulex_test.o: regulex_test.c regulex_test.h global_mock.h regulex.h config.h | check_dir tools_test.o: tools_test.c tools_test.h tools.h config.h | check_dir
main.o: main.c global_mock.h tools_test.h regulex_test.h config.h | check_dir regulex_test.o: regulex_test.c regulex_test.h global_mock.h regulex.h config.h | check_dir
main.o: main.c global_mock.h tools_test.h regulex_test.h config.h | check_dir
utest_tools.o: utest_tools.c utest_tools.h config.h | check_dir

161
utest/bxstring_test.c Normal file
View File

@ -0,0 +1,161 @@
/*
* boxes - Command line filter to draw/remove ASCII boxes around text
* Copyright (c) 1999-2021 Thomas Jensen and the boxes contributors
*
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
* License, version 3, as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
* You should have received a copy of the GNU General Public License along with this program.
* If not, see <https://www.gnu.org/licenses/>.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
/*
* Unit tests of the 'bxstring' module
*/
#include "config.h"
#include <setjmp.h>
#include <stdarg.h>
#include <stddef.h>
#include <cmocka.h>
#include "bxstring.h"
#include "bxstring_test.h"
#include "global_mock.h"
#include "tools.h"
#include "unicode.h"
#include "utest_tools.h"
void test_ascii_simple(void **state)
{
(void) state; /* unused */
bxstr_t *actual = bxs_from_ascii("foo");
assert_non_null(actual);
assert_non_null(actual->memory);
assert_string_equal("foo", actual->ascii);
assert_int_equal(0, (int) actual->indent);
assert_int_equal(3, (int) actual->num_columns);
assert_int_equal(3, (int) actual->num_chars);
assert_int_equal(3, (int) actual->num_chars_visible);
assert_int_equal(0, (int) actual->num_chars_invisible);
assert_int_equal(0, (int) actual->trailing);
int expected_indexes[] = {0, 1, 2};
assert_array_equal(expected_indexes, actual->first_char, 3);
assert_array_equal(expected_indexes, actual->visible_char, 3);
assert_int_equal(0, actual->offset_start);
assert_int_equal(0, actual->offset_end);
bxs_free(actual);
}
void test_ansi_unicode_book(void **state)
{
(void) state; /* unused */
uint32_t *ustr32 = u32_strconv_from_arg(
"\x1b[38;5;203m \x1b[0m \x1b[38;5;198m x\x1b[0mxx\x1b[38;5;208m\xc3\xa4\x1b[0m\x1b[38;5;203mb\x1b[0m ",
"UTF-8");
assert_non_null(ustr32);
bxstr_t *actual = bxs_from_unicode(ustr32);
assert_non_null(actual);
assert_non_null(actual->memory);
assert_string_equal(" xxxxb ", actual->ascii); // 'ä' replaced by 'x'
assert_int_equal(4, (int) actual->indent);
assert_int_equal(10, (int) actual->num_columns);
assert_int_equal(70, (int) actual->num_chars);
assert_int_equal(10, (int) actual->num_chars_visible);
assert_int_equal(60, (int) actual->num_chars_invisible);
assert_int_equal(1, (int) actual->trailing);
int expected_firstchar_idx[] = {0, 16, 17, 18, 30, 35, 36, 37, 53, 69, 70};
assert_array_equal(expected_firstchar_idx, actual->first_char, 11);
int expected_vischar_idx[] = {11, 16, 17, 29, 30, 35, 36, 48, 64, 69, 70};
assert_array_equal(expected_vischar_idx, actual->visible_char, 11);
assert_int_equal(0, actual->offset_start);
assert_int_equal(0, actual->offset_end);
BFREE(ustr32);
bxs_free(actual);
}
void test_ansi_unicode_space_kinds(void **state)
{
(void) state; /* unused */
uint32_t *ustr32 = u32_strconv_from_arg(
"\xe2\x80\x83\xe2\x80\x82 X", /* em-space, en-space, space, x */
"UTF-8");
assert_non_null(ustr32);
bxstr_t *actual = bxs_from_unicode(ustr32);
assert_non_null(actual);
assert_non_null(actual->memory);
assert_string_equal(" X", actual->ascii);
assert_int_equal(3, (int) actual->indent);
assert_int_equal(4, (int) actual->num_columns);
assert_int_equal(4, (int) actual->num_chars);
assert_int_equal(4, (int) actual->num_chars_visible);
assert_int_equal(0, (int) actual->num_chars_invisible);
assert_int_equal(0, (int) actual->trailing);
int expected_firstchar_idx[] = {0, 1, 2, 3};
assert_array_equal(expected_firstchar_idx, actual->first_char, 4);
int expected_vischar_idx[] = {0, 1, 2, 3};
assert_array_equal(expected_vischar_idx, actual->visible_char, 4);
assert_int_equal(0, actual->offset_start);
assert_int_equal(0, actual->offset_end);
BFREE(ustr32);
bxs_free(actual);
}
void test_ansi_unicode_chinese(void **state)
{
(void) state; /* unused */
uint32_t *ustr32 = u32_strconv_from_arg(
"\xe5\x85\xac\xe7\x88\xb8\xe8\xa6\x81\xe9\x81\x93\xef\xbc\x81", /* 公爸要道! */
"UTF-8");
assert_non_null(ustr32);
bxstr_t *actual = bxs_from_unicode(ustr32);
assert_non_null(actual);
assert_non_null(actual->memory);
assert_string_equal("xxxxxxxxxx", actual->ascii);
assert_int_equal(0, (int) actual->indent);
assert_int_equal(10, (int) actual->num_columns);
assert_int_equal(5, (int) actual->num_chars);
assert_int_equal(5, (int) actual->num_chars_visible);
assert_int_equal(0, (int) actual->num_chars_invisible);
assert_int_equal(0, (int) actual->trailing);
int expected_firstchar_idx[] = {0, 1, 2, 3, 4};
assert_array_equal(expected_firstchar_idx, actual->first_char, 5);
int expected_vischar_idx[] = {0, 1, 2, 3, 4};
assert_array_equal(expected_vischar_idx, actual->visible_char, 5);
assert_int_equal(0, actual->offset_start);
assert_int_equal(0, actual->offset_end);
BFREE(ustr32);
bxs_free(actual);
}
// TODO test case for incomplete/broken escape sequences
/* vim: set cindent sw=4: */

32
utest/bxstring_test.h Normal file
View File

@ -0,0 +1,32 @@
/*
* boxes - Command line filter to draw/remove ASCII boxes around text
* Copyright (c) 1999-2021 Thomas Jensen and the boxes contributors
*
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
* License, version 3, as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
* You should have received a copy of the GNU General Public License along with this program.
* If not, see <https://www.gnu.org/licenses/>.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
/*
* Unit tests of the 'cmdline' module
*/
#ifndef BXSTRING_TEST_H
#define BXSTRING_TEST_H
void test_ascii_simple(void **state);
void test_ansi_unicode_book(void **state);
void test_ansi_unicode_space_kinds(void **state);
void test_ansi_unicode_chinese(void **state);
#endif
/* vim: set cindent sw=4: */

View File

@ -61,7 +61,7 @@ void collect_reset()
/** /**
* Mock of the `bx_fprintf()` function which records its output instead of printing it. Assumes that no output string * Mock of the `bx_fprintf()` function which records its output instead of printing it. Assumes that no output string
* will be longer than 512 characters. * of our test cases will be longer than 512 characters.
* @param stream `stdout` or `stderr` * @param stream `stdout` or `stderr`
* @param format the format string, followed by the arguments * @param format the format string, followed by the arguments
*/ */

View File

@ -24,6 +24,7 @@
#include <cmocka.h> #include <cmocka.h>
#include "global_mock.h" #include "global_mock.h"
#include "bxstring_test.h"
#include "cmdline_test.h" #include "cmdline_test.h"
#include "tools_test.h" #include "tools_test.h"
#include "regulex_test.h" #include "regulex_test.h"
@ -91,13 +92,24 @@ int main(void)
cmocka_unit_test(test_strisyes_true), cmocka_unit_test(test_strisyes_true),
cmocka_unit_test(test_strisyes_false), cmocka_unit_test(test_strisyes_false),
cmocka_unit_test(test_strisno_true), cmocka_unit_test(test_strisno_true),
cmocka_unit_test(test_strisno_false) cmocka_unit_test(test_strisno_false),
cmocka_unit_test(test_my_strrspn_edge),
cmocka_unit_test(test_my_strrspn),
cmocka_unit_test(test_is_csi_reset)
};
const struct CMUnitTest bxstring_tests[] = {
cmocka_unit_test_setup(test_ascii_simple, beforeTest),
cmocka_unit_test_setup(test_ansi_unicode_book, beforeTest),
cmocka_unit_test_setup(test_ansi_unicode_space_kinds, beforeTest),
cmocka_unit_test_setup(test_ansi_unicode_chinese, beforeTest)
}; };
int num_failed = 0; int num_failed = 0;
num_failed += cmocka_run_group_tests(cmdline_tests, NULL, NULL); num_failed += cmocka_run_group_tests(cmdline_tests, NULL, NULL);
num_failed += cmocka_run_group_tests(regulex_tests, NULL, NULL); num_failed += cmocka_run_group_tests(regulex_tests, NULL, NULL);
num_failed += cmocka_run_group_tests(tools_tests, NULL, NULL); num_failed += cmocka_run_group_tests(tools_tests, NULL, NULL);
num_failed += cmocka_run_group_tests(bxstring_tests, NULL, NULL);
teardown(); teardown();
return num_failed; return num_failed;

View File

@ -18,17 +18,21 @@
*/ */
#include "config.h" #include "config.h"
#include <setjmp.h>
#include <stdarg.h> #include <stdarg.h>
#include <stddef.h> #include <stddef.h>
#include <setjmp.h>
#include <cmocka.h> #include <cmocka.h>
#include "tools.h" #include "tools.h"
#include "tools_test.h" #include "tools_test.h"
#include "unicode.h"
void test_strisyes_true(void **state) void test_strisyes_true(void **state)
{ {
(void) state; /* unused */ (void) state; /* unused */
assert_int_equal(1, strisyes("On")); assert_int_equal(1, strisyes("On"));
assert_int_equal(1, strisyes("on")); assert_int_equal(1, strisyes("on"));
@ -44,7 +48,7 @@ void test_strisyes_true(void **state)
void test_strisyes_false(void **state) void test_strisyes_false(void **state)
{ {
(void) state; /* unused */ (void) state; /* unused */
assert_int_equal(0, strisyes(NULL)); assert_int_equal(0, strisyes(NULL));
assert_int_equal(0, strisyes("")); assert_int_equal(0, strisyes(""));
@ -63,7 +67,7 @@ void test_strisyes_false(void **state)
void test_strisno_true(void **state) void test_strisno_true(void **state)
{ {
(void) state; /* unused */ (void) state; /* unused */
assert_int_equal(1, strisno("off")); assert_int_equal(1, strisno("off"));
assert_int_equal(1, strisno("Off")); assert_int_equal(1, strisno("Off"));
@ -79,7 +83,7 @@ void test_strisno_true(void **state)
void test_strisno_false(void **state) void test_strisno_false(void **state)
{ {
(void) state; /* unused */ (void) state; /* unused */
assert_int_equal(0, strisno(NULL)); assert_int_equal(0, strisno(NULL));
assert_int_equal(0, strisno("")); assert_int_equal(0, strisno(""));
@ -96,4 +100,49 @@ void test_strisno_false(void **state)
} }
/*EOF*/ /* vim: set cindent sw=4: */ void test_my_strrspn_edge(void **state)
{
(void) state; /* unused */
assert_int_equal(0, (int) my_strrspn(NULL, "abc"));
assert_int_equal(0, (int) my_strrspn("", "abc"));
assert_int_equal(0, (int) my_strrspn("abc", NULL));
assert_int_equal(0, (int) my_strrspn("abc", ""));
assert_int_equal(0, (int) my_strrspn(NULL, NULL));
}
void test_my_strrspn(void **state)
{
(void) state; /* unused */
assert_int_equal(2, (int) my_strrspn("foo", "o"));
assert_int_equal(0, (int) my_strrspn("foo", "ABC"));
assert_int_equal(3, (int) my_strrspn("foo", "foobar"));
assert_int_equal(1, (int) my_strrspn("foo ", " "));
assert_int_equal(1, (int) my_strrspn("a", "a"));
assert_int_equal(0, (int) my_strrspn("a", "A"));
assert_int_equal(2, (int) my_strrspn("axxaa", "a"));
}
void test_is_csi_reset(void **state)
{
(void) state; /* unused */
assert_int_equal(1, is_csi_reset(u32_strconv_from_arg("\x1b[0m", "ASCII")));
assert_int_equal(1, is_csi_reset(u32_strconv_from_arg("\x1b[m", "ASCII")));
assert_int_equal(1, is_csi_reset(u32_strconv_from_arg("\x1b(0m", "ASCII")));
assert_int_equal(1, is_csi_reset(u32_strconv_from_arg("\x1b(m", "ASCII")));
assert_int_equal(1, is_csi_reset(u32_strconv_from_arg("\x1b[0m foo", "ASCII")));
assert_int_equal(0, is_csi_reset(u32_strconv_from_arg("", "ASCII")));
assert_int_equal(0, is_csi_reset(u32_strconv_from_arg("normal", "ASCII")));
assert_int_equal(0, is_csi_reset(u32_strconv_from_arg("\x1b[", "ASCII")));
assert_int_equal(0, is_csi_reset(u32_strconv_from_arg("not yet \x1b[0m", "ASCII")));
assert_int_equal(0, is_csi_reset(u32_strconv_from_arg("\x1b[38;5;203m", "ASCII")));
assert_int_equal(0, is_csi_reset(u32_strconv_from_arg("\x1b_BROKEN", "ASCII")));
}
/* vim: set cindent sw=4: */

View File

@ -27,6 +27,11 @@ void test_strisyes_false(void **state);
void test_strisno_true(void **state); void test_strisno_true(void **state);
void test_strisno_false(void **state); void test_strisno_false(void **state);
void test_my_strrspn_edge(void **state);
void test_my_strrspn(void **state);
void test_is_csi_reset(void **state);
#endif #endif

86
utest/utest_tools.c Normal file
View File

@ -0,0 +1,86 @@
/*
* boxes - Command line filter to draw/remove ASCII boxes around text
* Copyright (c) 1999-2021 Thomas Jensen and the boxes contributors
*
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
* License, version 3, as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
* You should have received a copy of the GNU General Public License along with this program.
* If not, see <https://www.gnu.org/licenses/>.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
/*
* Helper functions for all the unit tests.
*/
#include "config.h"
#include <setjmp.h>
#include <stdarg.h>
#include <stddef.h>
#include <cmocka.h>
#include <stdio.h>
#include "utest_tools.h"
void assert_array_equal(int p_expected[], size_t *p_actual, size_t p_len_expected)
{
if (p_expected == NULL && p_actual == NULL) {
assert_int_equal(0, p_len_expected);
return;
}
if (p_expected == NULL) {
assert_null(p_actual);
assert_int_equal(0, p_len_expected);
}
if (p_actual == NULL) {
assert_null(p_expected);
assert_int_equal(0, p_len_expected);
}
for (size_t i = 0; i < p_len_expected; i++) {
assert_int_equal(p_expected[i], (int) p_actual[i]);
}
}
void print_array_i(int p_array[], size_t p_len)
{
if (p_array != NULL) {
printf("[");
for (size_t i = 0; i < p_len; i++) {
printf("%d%s", p_array[i], i < p_len - 1 ? ", " : "");
}
printf("]\n");
}
else {
printf("NULL\n");
}
}
void print_array_s(size_t p_array[], size_t p_len)
{
if (p_array != NULL) {
printf("[");
for (size_t i = 0; i < p_len; i++) {
printf("%d%s", (int) p_array[i], i < p_len - 1 ? ", " : "");
}
printf("]\n");
}
else {
printf("NULL\n");
}
}
/* vim: set cindent sw=4: */

44
utest/utest_tools.h Normal file
View File

@ -0,0 +1,44 @@
/*
* boxes - Command line filter to draw/remove ASCII boxes around text
* Copyright (c) 1999-2021 Thomas Jensen and the boxes contributors
*
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
* License, version 3, as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
* You should have received a copy of the GNU General Public License along with this program.
* If not, see <https://www.gnu.org/licenses/>.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
/*
* Helper functions for all the unit tests.
*/
#ifndef UTEST_TOOLS_H
#define UTEST_TOOLS_H
/**
* Assert that the two given arrays have the same contents.
* @param <p_expected> the expected array
* @param <p_actual> the actual array
* @param <p_len_expected> number of elements in `p_expected`
*/
void assert_array_equal(int p_expected[], size_t *p_actual, size_t p_len_expected);
/**
* Print the contents of the given array for debugging purposes.
* @param p_array the array to print
* @param p_len the number of elements in the array
*/
void print_array_i(int p_array[], size_t p_len);
void print_array_s(size_t p_array[], size_t p_len);
#endif
/*EOF*/ /* vim: set cindent sw=4: */