From 21a691fbd1cf4a475857f85a80040e9077bd041f Mon Sep 17 00:00:00 2001 From: Thomas Jensen Date: Wed, 19 Apr 2023 21:06:00 +0200 Subject: [PATCH] Use UTF-8 data read from config file #72 - shapes get additional MBCS values - metadata is already converted for real to use bxstr_t* --- src/boxes.c | 25 +++++++--- src/boxes.in.h | 22 ++++---- src/cmdline.c | 7 +-- src/discovery.c | 96 ++++++++++++++++++++++------------- src/discovery.h | 4 +- src/input.c | 6 +-- src/list.c | 60 +++++++++++++--------- src/parsing.c | 44 ++++++++-------- src/parsing.h | 3 +- src/regulex.c | 88 +++++++++++++++++++++++--------- src/regulex.h | 57 ++++++++++++++++++--- src/shape.c | 25 ++++++---- src/shape.h | 15 +++--- src/tools.c | 78 ++++++++++++++++++++++++++++- src/tools.h | 59 ++++++++++++++++++++++ utest/cmdline_test.c | 68 ++++++++++++------------- utest/main.c | 56 +++++++++++++++++++-- utest/regulex_test.c | 13 +++-- utest/tools_test.c | 116 +++++++++++++++++++++++++++++++++++++++++++ utest/tools_test.h | 5 ++ 20 files changed, 653 insertions(+), 194 deletions(-) diff --git a/src/boxes.c b/src/boxes.c index c3bfbb5..71cd61f 100644 --- a/src/boxes.c +++ b/src/boxes.c @@ -84,23 +84,29 @@ static int build_design(design_t **adesigns, const char *cld) dp->name = ""; dp->aliases = (char **) calloc(1, sizeof(char *)); - dp->created = "now"; + dp->created = bxs_from_ascii("now"); dp->revision = "1.0"; - dp->sample = "n/a"; + dp->sample = bxs_from_ascii("n/a"); dp->indentmode = DEF_INDENTMODE; dp->padding[BLEF] = 1; - dp->defined_in = "(command line)"; + dp->defined_in = bxs_from_ascii("(command line)"); dp->tags = (char **) calloc(2, sizeof(char *)); dp->tags[0] = "transient"; + uint32_t *cld_u32 = u32_strconv_from_arg(cld, "UTF-8"); /* CHECK wrong on Windows (UTF-16) or different IME */ + bxstr_t *cldW = bxs_from_unicode(cld_u32); + BFREE(cld_u32); + dp->shape[W].height = 1; - dp->shape[W].width = strlen(cld); + dp->shape[W].width = cldW->num_columns; dp->shape[W].elastic = 1; - rc = genshape(dp->shape[W].width, dp->shape[W].height, &(dp->shape[W].chars)); + rc = genshape(dp->shape[W].width, dp->shape[W].height, &(dp->shape[W].chars), &(dp->shape[W].mbcs)); if (rc) { return rc; } + bxs_free(dp->shape[W].mbcs[0]); + dp->shape[W].mbcs[0] = cldW; strcpy(dp->shape[W].chars[0], cld); for (i = 0; i < NUM_SHAPES; ++i) { @@ -139,7 +145,7 @@ static int build_design(design_t **adesigns, const char *cld) return 1; /* never happens ;-) */ } - rc = genshape(c->width, c->height, &(c->chars)); + rc = genshape(c->width, c->height, &(c->chars), &(c->mbcs)); if (rc) { return rc; } @@ -185,7 +191,7 @@ static void handle_command_line(int argc, char *argv[]) */ static void handle_config_parsing() { - char *config_file = discover_config_file(0); + bxstr_t *config_file = discover_config_file(0); if (config_file == NULL) { exit(EXIT_FAILURE); } @@ -397,10 +403,13 @@ int main(int argc, char *argv[]) fprintf (stderr, "BOXES STARTING ...\n"); #endif + /* Temporarily set the system encoding, for proper output of --help text etc. */ + setlocale(LC_ALL, ""); /* switch from default "C" encoding to system encoding */ + encoding = locale_charset(); + handle_command_line(argc, argv); /* Store system character encoding */ - setlocale(LC_ALL, ""); /* switch from default "C" encoding to system encoding */ encoding = check_encoding(opt.encoding, locale_charset()); #ifdef DEBUG fprintf (stderr, "Character Encoding = %s\n", encoding); diff --git a/src/boxes.in.h b/src/boxes.in.h index 4c267d4..72705a6 100644 --- a/src/boxes.in.h +++ b/src/boxes.in.h @@ -26,9 +26,10 @@ /* #define LEXER_DEBUG 1 */ /* #define DISCOVERY_DEBUG 1 */ - #include #include + +#include "bxstring.h" #include "regulex.h" #include "shape.h" @@ -56,6 +57,9 @@ #define LINE_MAX_BYTES 16382 #endif +/* Macro to declare a function parameter as intentionally unused in order to avoid compiler warnings */ +#define UNUSED(variable) ((void)(variable)) + #define BTOP 0 /* for use with sides */ #define BRIG 1 @@ -64,8 +68,8 @@ typedef struct { - char *search; - char *repstr; + bxstr_t *search; + bxstr_t *repstr; pcre2_code *prog; /* compiled search pattern */ int line; /* line of definition in config file */ char mode; /* 'g' or 'o' */ @@ -75,12 +79,12 @@ typedef struct { typedef struct { char *name; /* primary name of the box design */ char **aliases; /* zero-terminated array of alias names of the design */ - char *author; /* creator of the configuration file entry */ - char *designer; /* creator of the original ASCII artwork */ - char *created; /* date created, free format */ + bxstr_t *author; /* creator of the configuration file entry */ + bxstr_t *designer; /* creator of the original ASCII artwork */ + bxstr_t *created; /* date created, free format */ char *revision; /* revision number of design */ - char *revdate; /* date of current revision */ - char *sample; /* the complete sample block in one string */ + bxstr_t *revdate; /* date of current revision */ + bxstr_t *sample; /* the complete sample block in one string */ char indentmode; /* 'b', 't', or 'n' */ sentry_t shape[NUM_SHAPES]; size_t maxshapeheight; /* height of highest shape in design */ @@ -88,7 +92,7 @@ typedef struct { size_t minheight; int padding[NUM_SIDES]; char **tags; - char *defined_in; /* path to config file where this was defined */ + bxstr_t *defined_in; /* path to config file where this was defined */ reprule_t *current_rule; reprule_t *reprules; /* applied when drawing a box */ diff --git a/src/cmdline.c b/src/cmdline.c index 32caf2b..695b2e3 100644 --- a/src/cmdline.c +++ b/src/cmdline.c @@ -76,7 +76,7 @@ static void usage_short(FILE *st) */ void usage_long(FILE *st) { - char *config_file = discover_config_file(0); + bxstr_t *config_file = discover_config_file(0); fprintf(st, "%s - draws any kind of box around your text (or removes it)\n", PROJECT); fprintf(st, " Website: https://boxes.thomasjensen.com/\n"); @@ -86,7 +86,8 @@ void usage_long(FILE *st) fprintf(st, " -d name box design [default: first one in file]\n"); fprintf(st, " -e eol Override line break type (experimental) [default: %s]\n", strcmp(EOL_DEFAULT, "\r\n") == 0 ? "CRLF" : "LF"); - fprintf(st, " -f file configuration file [default: %s]\n", config_file != NULL ? config_file : "none"); + fprintf(st, " -f file configuration file [default: %s]\n", + config_file != NULL ? bxs_to_output(config_file) : "none"); fprintf(st, " -h print usage information\n"); fprintf(st, " -i mode indentation mode [default: box]\n"); fprintf(st, " -k bool leading/trailing blank line retention on removal\n"); @@ -100,7 +101,7 @@ void usage_long(FILE *st) fprintf(st, " -t str tab stop distance and expansion [default: %de]\n", DEF_TABSTOP); fprintf(st, " -v print version information\n"); - BFREE(config_file); + bxs_free(config_file); } diff --git a/src/discovery.c b/src/discovery.c index 6e8c93b..29cccab 100644 --- a/src/discovery.c +++ b/src/discovery.c @@ -18,6 +18,7 @@ */ #include "config.h" + #include #include #include @@ -32,46 +33,48 @@ #include "boxes.h" #include "tools.h" +#include "unicode.h" #include "discovery.h" -static int can_read_file(const char *filename) +static int can_read_file(char *filename_utf8) { #ifdef DISCOVERY_DEBUG - fprintf(stderr, "can_read_file(%s) - enter\n", filename); + fprintf(stderr, "can_read_file(%s) - enter\n", filename_utf8); #endif struct stat statbuf; int result = 1; - if (filename == NULL || filename[0] == '\0') { + if (filename_utf8 == NULL || filename_utf8[0] == '\0') { #ifdef DISCOVERY_DEBUG fprintf(stderr, "%s: can_read_file(): argument was NULL\n", PROJECT); #endif result = 0; } else { - FILE *f = fopen(filename, "r"); + FILE *f = bx_fopen(filename_utf8, "r"); if (f == NULL) { #ifdef DISCOVERY_DEBUG fprintf(stderr, "%s: can_read_file(): File \"%s\" could not be opened for reading - %s\n", - PROJECT, filename, strerror(errno)); + PROJECT, filename_utf8, strerror(errno)); #endif result = 0; } else { fclose(f); - if (stat(filename, &statbuf) != 0) { + if (stat(filename_utf8, &statbuf) != 0) { #ifdef DISCOVERY_DEBUG fprintf(stderr, "%s: can_read_file(): File \"%s\" not statable - %s\n", - PROJECT, filename, strerror(errno)); + PROJECT, filename_utf8, strerror(errno)); #endif result = 0; } else if (S_ISDIR(statbuf.st_mode)) { #ifdef DISCOVERY_DEBUG - fprintf(stderr, "%s: can_read_file(): File \"%s\" is in fact a directory\n", PROJECT, filename); + fprintf(stderr, "%s: can_read_file(): File \"%s\" is in fact a directory\n", PROJECT, + filename_utf8); #endif result = 0; } @@ -85,25 +88,25 @@ static int can_read_file(const char *filename) -static int can_read_dir(const char *dirname) +static int can_read_dir(const char *dirname_utf8) { #ifdef DISCOVERY_DEBUG - fprintf(stderr, "can_read_dir(%s) - enter\n", dirname); + fprintf(stderr, "can_read_dir(%s) - enter\n", dirname_utf8); #endif int result = 1; - if (dirname == NULL || dirname[0] == '\0') { + if (dirname_utf8 == NULL || dirname_utf8[0] == '\0') { #ifdef DISCOVERY_DEBUG fprintf(stderr, "%s: can_read_dir(): argument was NULL\n", PROJECT); #endif result = 0; } else { - DIR *dir = opendir(dirname); + DIR *dir = opendir(dirname_utf8); if (dir == NULL) { #ifdef DISCOVERY_DEBUG fprintf(stderr, "%s: can_read_dir(): Directory \"%s\" could not be opened for reading - %s\n", - PROJECT, dirname, strerror(errno)); + PROJECT, dirname_utf8, strerror(errno)); #endif result = 0; } @@ -119,19 +122,19 @@ static int can_read_dir(const char *dirname) -static char *combine(const char *dirname, const char *filename) +static char *combine(const char *dirname_utf8, const char *filename_utf8) { - const size_t dirname_len = strlen(dirname); + const size_t dirname_len = strlen(dirname_utf8); - if (dirname[dirname_len - 1] == '/') { - return concat_strings_alloc(2, dirname, filename); + if (dirname_utf8[dirname_len - 1] == '/') { + return concat_strings_alloc(2, dirname_utf8, filename_utf8); } - return concat_strings_alloc(3, dirname, "/", filename); + return concat_strings_alloc(3, dirname_utf8, "/", filename_utf8); } -static char *locate_config_in_dir(const char *dirname) +static char *locate_config_in_dir(const char *dirname_utf8) { #ifdef __MINGW32__ static const char *filenames[] = {"boxes.cfg", "box-designs.cfg", "boxes-config"}; @@ -139,7 +142,7 @@ static char *locate_config_in_dir(const char *dirname) static const char *filenames[] = {".boxes", "box-designs", "boxes-config", "boxes"}; #endif for (size_t i = 0; i < (sizeof(filenames) / sizeof(const char *)); i++) { - char *f = combine(dirname, filenames[i]); + char *f = combine(dirname_utf8, filenames[i]); if (can_read_file(f)) { return f; } @@ -150,20 +153,20 @@ static char *locate_config_in_dir(const char *dirname) -static char *locate_config_file_or_dir(const char *path, const char *ext_msg) +static char *locate_config_file_or_dir(char *path_utf8, const char *ext_msg) { char *result = NULL; - if (can_read_file(path)) { - result = strdup(path); + if (can_read_file(path_utf8)) { + result = strdup(path_utf8); } - else if (can_read_dir(path)) { - result = locate_config_in_dir(path); + else if (can_read_dir(path_utf8)) { + result = locate_config_in_dir(path_utf8); if (result == NULL) { - fprintf(stderr, "%s: Couldn\'t find config file in directory \'%s\'%s\n", PROJECT, path, ext_msg); + fprintf(stderr, "%s: Couldn\'t find config file in directory \'%s\'%s\n", PROJECT, path_utf8, ext_msg); } } else { - fprintf(stderr, "%s: Couldn\'t find config file at \'%s\'%s\n", PROJECT, path, ext_msg); + fprintf(stderr, "%s: Couldn\'t find config file at \'%s\'%s\n", PROJECT, path_utf8, ext_msg); } return result; } @@ -186,6 +189,10 @@ static char *from_env_var(const char *env_var, const char *postfix) static char *locate_config_common(int *error_printed) { + #ifdef DISCOVERY_DEBUG + fprintf(stderr, "locate_config_common() - enter\n"); + #endif + char *result = NULL; if (opt.f) { result = locate_config_file_or_dir(opt.f, ""); @@ -199,6 +206,10 @@ static char *locate_config_common(int *error_printed) *error_printed = 1; } } + + #ifdef DISCOVERY_DEBUG + fprintf(stderr, "locate_config_common() - exit -> [%s]\n", result); + #endif return result; } @@ -242,15 +253,31 @@ static char *exe_to_cfg() #endif -char *discover_config_file(const int global_only) + +static bxstr_t *utf8_to_bxs(char *utf8) +{ + bxstr_t *result = NULL; + if (utf8 != NULL) { + uint32_t *utf32 = u32_strconv_from_arg(utf8, "UTF-8"); + result = bxs_from_unicode(utf32); + BFREE(utf32); + } + return result; +} + + + +bxstr_t *discover_config_file(const int global_only) { #ifdef DISCOVERY_DEBUG fprintf(stderr, "discover_config_file(%s) - enter\n", global_only ? "true" : "false"); #endif int error_printed = 0; - char *result = NULL; + bxstr_t *result = NULL; if (!global_only) { - result = locate_config_common(&error_printed); + char *common_config = locate_config_common(&error_printed); + result = utf8_to_bxs(common_config); + BFREE(common_config); } if (result == NULL && !error_printed) { @@ -279,19 +306,20 @@ char *discover_config_file(const int global_only) #ifdef __MINGW32__ char *exepath = exe_to_cfg(); if (can_read_file(exepath)) { - result = exepath; + result = utf8_to_bxs(exepath); } else { fprintf(stderr, "%s: Couldn\'t find config file at \'%s\'\n", PROJECT, exepath); error_printed = 1; } + BFREE(exepath); #else if (can_read_file(GLOBALCONF)) { - result = strdup(GLOBALCONF); + result = utf8_to_bxs(GLOBALCONF); } #endif } else if (can_read_dir(dir)) { - result = locate_config_in_dir(dir); + result = utf8_to_bxs(locate_config_in_dir(dir)); } if (result != NULL) { break; @@ -303,7 +331,7 @@ char *discover_config_file(const int global_only) fprintf(stderr, "%s: Can't find config file.\n", PROJECT); } #ifdef DISCOVERY_DEBUG - fprintf(stderr, "discover_config_file() - exit -> [%s]\n", result); + fprintf(stderr, "discover_config_file() - exit -> [%s]\n", bxs_to_output(result)); #endif return result; } diff --git a/src/discovery.h b/src/discovery.h index ede8110..64e2545 100644 --- a/src/discovery.h +++ b/src/discovery.h @@ -20,8 +20,10 @@ #ifndef BOXES_DISCOVERY_H #define BOXES_DISCOVERY_H +#include "bxstring.h" -char *discover_config_file(const int global_only); + +bxstr_t *discover_config_file(const int global_only); #endif /* BOXES_DISCOVERY_H */ diff --git a/src/input.c b/src/input.c index 8130369..c26adff 100644 --- a/src/input.c +++ b/src/input.c @@ -123,7 +123,7 @@ int apply_substitutions(input_t *result, const int mode) errno = 0; opt.design->current_rule = rules; for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) { - rules[j].prog = compile_pattern(rules[j].search); + rules[j].prog = u32_compile_pattern(rules[j].search->memory); if (rules[j].prog == NULL) { return 5; } @@ -144,8 +144,8 @@ int apply_substitutions(input_t *result, const int mode) rules[j].prog, rules[j].repstr, u32_strconv_to_output(result->lines[k].mbtext), (int) result->lines[k].num_chars, rules[j].mode); #endif - uint32_t *newtext = regex_replace(rules[j].prog, rules[j].repstr, - result->lines[k].mbtext, result->lines[k].num_chars, rules[j].mode == 'g'); + uint32_t *newtext = u32_regex_replace(rules[j].prog, rules[j].repstr->memory, + result->lines[k].mbtext, result->lines[k].num_chars, rules[j].mode == 'g'); #ifdef REGEXP_DEBUG fprintf (stderr, "\"%s\"\n", newtext ? u32_strconv_to_output(newtext) : "NULL"); #endif diff --git a/src/list.c b/src/list.c index d8024ba..0ed00f9 100644 --- a/src/list.c +++ b/src/list.c @@ -18,15 +18,18 @@ */ #include "config.h" + #include #include #include #include "boxes.h" +#include "bxstring.h" #include "parsing.h" #include "query.h" #include "tools.h" #include "list.h" +#include "unicode.h" @@ -163,17 +166,23 @@ static void print_tags(tagstats_t *tagstats, size_t num_tags) -static char *escape(const char *org, const int pLength) +static bxstr_t *escape(const bxstr_t *org, const int pLength) { - char *result = (char *) calloc(1, 2 * strlen(org) + 1); + const ucs4_t char_backslash = to_utf32('\\'); + const ucs4_t char_quote = to_utf32('"'); + + uint32_t *temp = (uint32_t *) calloc(2 * org->num_chars + 1, sizeof(uint32_t)); int orgIdx, resultIdx; for (orgIdx = 0, resultIdx = 0; orgIdx < pLength; ++orgIdx, ++resultIdx) { - if (org[orgIdx] == '\\' || org[orgIdx] == '"') { - result[resultIdx++] = '\\'; + if (is_char_at(org->memory, orgIdx, char_backslash) || is_char_at(org->memory, orgIdx, char_quote)) { + set_char_at(temp, resultIdx++, char_backslash); } - result[resultIdx] = org[orgIdx]; + set_char_at(temp, resultIdx, org->memory[orgIdx]); } - result[resultIdx] = '\0'; + set_char_at(temp, resultIdx, char_nul); + + bxstr_t *result = bxs_from_unicode(temp); + BFREE(temp); return result; } @@ -199,16 +208,19 @@ static void print_design_details(design_t *d) } fprintf(opt.outfile, "%s", opt.eol); - fprintf(opt.outfile, "Author: %s%s", d->author ? d->author : "(unknown author)", opt.eol); - fprintf(opt.outfile, "Original Designer: %s%s", d->designer ? d->designer : "(unknown artist)", opt.eol); - fprintf(opt.outfile, "Creation Date: %s%s", d->created ? d->created : "(unknown)", opt.eol); + fprintf(opt.outfile, "Author: %s%s", + d->author ? bxs_to_output(d->author) : "(unknown author)", opt.eol); + fprintf(opt.outfile, "Original Designer: %s%s", + d->designer ? bxs_to_output(d->designer) : "(unknown artist)", opt.eol); + fprintf(opt.outfile, "Creation Date: %s%s", + d->created ? bxs_to_output(d->created) : "(unknown)", opt.eol); fprintf(opt.outfile, "Current Revision: %s%s%s%s", d->revision ? d->revision : "", d->revision && d->revdate ? " as of " : "", - d->revdate ? d->revdate : (d->revision ? "" : "(unknown)"), opt.eol); + d->revdate ? bxs_to_output(d->revdate) : (d->revision ? "" : "(unknown)"), opt.eol); - fprintf(opt.outfile, "Configuration File: %s%s", d->defined_in, opt.eol); + fprintf(opt.outfile, "Configuration File: %s%s", bxs_to_output(d->defined_in), opt.eol); fprintf(opt.outfile, "Indentation Mode: "); switch (d->indentmode) { @@ -228,7 +240,7 @@ static void print_design_details(design_t *d) for (int i = 0; i < (int) d->anz_reprules; ++i) { fprintf(opt.outfile, "%d. (%s) \"%s\" WITH \"%s\"%s", i + 1, d->reprules[i].mode == 'g' ? "glob" : "once", - d->reprules[i].search, d->reprules[i].repstr, opt.eol); + bxs_to_output(d->reprules[i].search), bxs_to_output(d->reprules[i].repstr), opt.eol); if (i < (int) d->anz_reprules - 1) { fprintf(opt.outfile, " "); } @@ -242,7 +254,7 @@ static void print_design_details(design_t *d) for (int i = 0; i < (int) d->anz_revrules; ++i) { fprintf(opt.outfile, "%d. (%s) \"%s\" TO \"%s\"%s", i + 1, d->revrules[i].mode == 'g' ? "glob" : "once", - d->revrules[i].search, d->revrules[i].repstr, opt.eol); + bxs_to_output(d->revrules[i].search), bxs_to_output(d->revrules[i].repstr), opt.eol); if (i < (int) d->anz_revrules - 1) { fprintf(opt.outfile, " "); } @@ -315,7 +327,7 @@ static void print_design_details(design_t *d) * Display all shapes */ if (query_is_undoc()) { - fprintf(opt.outfile, "Sample:%s%s%s", opt.eol, d->sample, opt.eol); + fprintf(opt.outfile, "Sample:%s%s%s", opt.eol, bxs_to_output(d->sample), opt.eol); } else { int first_shape = 1; @@ -324,11 +336,11 @@ static void print_design_details(design_t *d) continue; } for (size_t w = 0; w < d->shape[i].height; ++w) { - char *escaped_line = escape(d->shape[i].chars[w], d->shape[i].width); + bxstr_t *escaped_line = escape(d->shape[i].mbcs[w], d->shape[i].width); fprintf(opt.outfile, "%-24s%3s%c \"%s\"%c%s", (first_shape == 1 && w == 0 ? "Defined Shapes:" : ""), (w == 0 ? shape_name[i] : ""), (w == 0 ? ':' : ' '), - escaped_line, + bxs_to_output(escaped_line), (w < d->shape[i].height - 1 ? ',' : ' '), opt.eol ); @@ -358,24 +370,24 @@ int list_designs() for (int i = 0; i < num_designs; ++i) { char *all_names = names(list[i]); - if (list[i]->author && list[i]->designer && strcmp(list[i]->author, list[i]->designer) != 0) { + if (list[i]->author && list[i]->designer && bxs_strcmp(list[i]->author, list[i]->designer) != 0) { fprintf(opt.outfile, "%s%s%s, coded by %s:%s%s%s%s%s", all_names, opt.eol, - list[i]->designer, list[i]->author, opt.eol, opt.eol, - list[i]->sample, opt.eol, opt.eol); + bxs_to_output(list[i]->designer), bxs_to_output(list[i]->author), opt.eol, opt.eol, + bxs_to_output(list[i]->sample), opt.eol, opt.eol); } else if (list[i]->designer) { fprintf(opt.outfile, "%s%s%s:%s%s%s%s%s", all_names, opt.eol, - list[i]->designer, opt.eol, opt.eol, - list[i]->sample, opt.eol, opt.eol); + bxs_to_output(list[i]->designer), opt.eol, opt.eol, + bxs_to_output(list[i]->sample), opt.eol, opt.eol); } else if (list[i]->author) { fprintf(opt.outfile, "%s%sunknown artist, coded by %s:%s%s%s%s%s", all_names, opt.eol, - list[i]->author, opt.eol, opt.eol, - list[i]->sample, opt.eol, opt.eol); + bxs_to_output(list[i]->author), opt.eol, opt.eol, + bxs_to_output(list[i]->sample), opt.eol, opt.eol); } else { fprintf(opt.outfile, "%s:%s%s%s%s%s", all_names, opt.eol, opt.eol, - list[i]->sample, opt.eol, opt.eol); + bxs_to_output(list[i]->sample), opt.eol, opt.eol); } BFREE(all_names); diff --git a/src/parsing.c b/src/parsing.c index 144efdc..b82b71b 100644 --- a/src/parsing.c +++ b/src/parsing.c @@ -26,11 +26,14 @@ #include #include "boxes.h" +#include "bxstring.h" #include "parsing.h" -#include "parser.h" -#include "lex.yy.h" #include "tools.h" +#include "parser.h" + +#include "lex.yy.h" + /** file handle of the config file currently being parsed */ static FILE *current_config_handle = NULL; @@ -39,10 +42,10 @@ static FILE *current_config_handle = NULL; static pass_to_bison *current_bison_args = NULL; /** the name of the initially specified config file */ -static const char *first_config_file = NULL; +static bxstr_t *first_config_file = NULL; /** all parent configs encountered across all parsed config files */ -static char **parent_configs = NULL; +static bxstr_t **parent_configs = NULL; /** total number of parent configs (the size of the `parent_configs` array) */ static size_t num_parent_configs = 0; @@ -58,9 +61,10 @@ static size_t num_parent_configs = 0; */ static int open_yy_config_file(pass_to_bison *bison_args) { - current_config_handle = fopen(bison_args->config_file, "r"); + current_config_handle = bx_fopens(bison_args->config_file, "r"); if (current_config_handle == NULL) { - fprintf(stderr, "%s: Couldn't open config file '%s' for input\n", PROJECT, bison_args->config_file); + fprintf(stderr, "%s: Couldn't open config file '%s' for input\n", PROJECT, + bxs_to_output(bison_args->config_file)); return 1; } yyset_in(current_config_handle, bison_args->lexer_state); @@ -83,15 +87,15 @@ void print_design_list_header() } fprintf(opt.outfile, "%s%s", opt.eol, opt.eol); fprintf(opt.outfile, "Configuration Files:%s", opt.eol); - fprintf(opt.outfile, " - %s%s", first_config_file, opt.eol); + fprintf(opt.outfile, " - %s%s", bxs_to_output(first_config_file), opt.eol); for (size_t i = 0; i < num_parent_configs; i++) { - fprintf(opt.outfile, " - %s (parent)%s", parent_configs[i], opt.eol); + fprintf(opt.outfile, " - %s (parent)%s", bxs_to_output(parent_configs[i]), opt.eol); } } else { - fprintf(opt.outfile, " in \"%s\":%s", first_config_file, opt.eol); + fprintf(opt.outfile, " in \"%s\":%s", bxs_to_output(first_config_file), opt.eol); fprintf(opt.outfile, "-----------------------%s", num_designs == 1 ? "" : "-"); - for (int i = strlen(first_config_file) + strlen(buf); i > 0; --i) { + for (int i = first_config_file->num_columns + strlen(buf); i > 0; --i) { fprintf(opt.outfile, "-"); } } @@ -113,7 +117,7 @@ int yyerror(pass_to_bison *bison_args, const char *fmt, ...) va_start (ap, fmt); pass_to_bison *bargs = bison_args ? bison_args : current_bison_args; - fprintf(stderr, "%s: %s: line %d: ", PROJECT, bargs->config_file, yyget_lineno(bargs->lexer_state)); + fprintf(stderr, "%s: %s: line %d: ", PROJECT, bxs_to_output(bargs->config_file), yyget_lineno(bargs->lexer_state)); vfprintf(stderr, fmt, ap); fputc('\n', stderr); @@ -124,13 +128,13 @@ int yyerror(pass_to_bison *bison_args, const char *fmt, ...) -static pass_to_bison new_bison_args(const char *config_file) +static pass_to_bison new_bison_args(bxstr_t *config_file) { pass_to_bison bison_args; bison_args.designs = NULL; bison_args.num_designs = 0; bison_args.design_idx = 0; - bison_args.config_file = (char *) config_file; + bison_args.config_file = config_file; bison_args.num_mandatory = 0; bison_args.time_for_se_check = 0; bison_args.num_shapespec = 0; @@ -155,10 +159,10 @@ static pass_to_flex new_flex_extra_data() -static pass_to_bison parse_config_file(const char *config_file, design_t *child_configs, size_t num_child_configs) +static pass_to_bison parse_config_file(bxstr_t *config_file, design_t *child_configs, size_t num_child_configs) { #ifdef DEBUG - fprintf (stderr, "Parsing Config File %s ...\n", config_file); + fprintf (stderr, "Parsing Config File %s ...\n", bxs_to_output(config_file)); #endif pass_to_bison bison_args = new_bison_args(config_file); @@ -213,10 +217,10 @@ static int record_parent_config_files(pass_to_bison *bison_args) { if (bison_args->num_parent_configs > 0) { for (int parent_idx = bison_args->num_parent_configs - 1; parent_idx >= 0; parent_idx--) { - char *parent_file = bison_args->parent_configs[parent_idx]; - int is_new = !array_contains(parent_configs, num_parent_configs, parent_file); + bxstr_t *parent_file = bison_args->parent_configs[parent_idx]; + int is_new = !array_contains_bxs(parent_configs, num_parent_configs, parent_file); if (is_new) { - parent_configs = (char **) realloc(parent_configs, (num_parent_configs + 1) * sizeof(char *)); + parent_configs = (bxstr_t **) realloc(parent_configs, (num_parent_configs + 1) * sizeof(bxstr_t *)); if (parent_configs == NULL) { return 1; } @@ -258,7 +262,7 @@ static int copy_designs(pass_to_bison *bison_args, design_t **r_result, size_t * -design_t *parse_config_files(const char *p_first_config_file, size_t *r_num_designs) +design_t *parse_config_files(bxstr_t *p_first_config_file, size_t *r_num_designs) { size_t parents_parsed = -1; /** how many parent config files have already been parsed */ @@ -266,7 +270,7 @@ design_t *parse_config_files(const char *p_first_config_file, size_t *r_num_desi *r_num_designs = 0; first_config_file = p_first_config_file; - const char *config_file = p_first_config_file; + bxstr_t *config_file = p_first_config_file; do { pass_to_bison bison_args = parse_config_file(config_file, result, *r_num_designs); ++parents_parsed; diff --git a/src/parsing.h b/src/parsing.h index bf49c2d..481f3e6 100644 --- a/src/parsing.h +++ b/src/parsing.h @@ -21,6 +21,7 @@ #define PARSING_H 1 #include "parser.h" +#include "bxstring.h" /** @@ -45,7 +46,7 @@ int yyerror(pass_to_bison *bison_args, const char *fmt, ...); * Will be set to 0 on error * @return the consolidated list of designs parsed, or `NULL` on error (then an error message was alread printed) */ -design_t *parse_config_files(const char *first_config_file, size_t *r_num_designs); +design_t *parse_config_files(bxstr_t *first_config_file, size_t *r_num_designs); #endif diff --git a/src/regulex.c b/src/regulex.c index 48fc824..8c61486 100644 --- a/src/regulex.c +++ b/src/regulex.c @@ -18,29 +18,40 @@ */ #include "config.h" + #include #include #include #include #include "boxes.h" +#include "regulex.h" #include "tools.h" #include "unicode.h" -#include "regulex.h" pcre2_code *compile_pattern(char *pattern) { - int errornumber; - PCRE2_SIZE erroroffset; - PCRE2_SPTR pattern32 = u32_strconv_from_arg(pattern, config_encoding); - if (pattern32 == NULL) { + uint32_t *ustr = u32_strconv_from_arg(pattern, CONFIG_FILE_ENCODING); + if (ustr == NULL) { + bx_fprintf(stderr, "Failed to convert pattern string to UTF-32 - \"%s\"\n", pattern); return NULL; } + pcre2_code *result = u32_compile_pattern(ustr); + BFREE(ustr); + return result; +} + + + +pcre2_code *u32_compile_pattern(uint32_t *pattern) +{ + int errornumber; + PCRE2_SIZE erroroffset; pcre2_code *re = pcre2_compile( - pattern32, /* the pattern */ + (PCRE2_SPTR) pattern, /* the pattern */ PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ 0, /* default options */ &errornumber, @@ -51,27 +62,56 @@ pcre2_code *compile_pattern(char *pattern) PCRE2_UCHAR buffer[256]; pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); bx_fprintf(stderr, "Regular expression pattern \"%s\" failed to compile at position %d: %s\n", - pattern, (int) erroroffset, u32_strconv_to_output(buffer)); + u32_strconv_to_output(pattern), (int) erroroffset, u32_strconv_to_output(buffer)); } return re; } +int regex_match(pcre2_code *pattern, char *subject_string) +{ + uint32_t *ustr = u32_strconv_from_arg(subject_string, "ASCII"); + int result = u32_regex_match(pattern, ustr); + BFREE(ustr); + return result; +} + + + +int u32_regex_match(pcre2_code *pattern, uint32_t *subject_string) +{ + pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(pattern, NULL); + int rc = pcre2_match(pattern, (PCRE2_SPTR) subject_string, PCRE2_ZERO_TERMINATED, 0, 0, match_data, NULL); + pcre2_match_data_free(match_data); + return rc < 0 ? 0 : 1; +} + + + uint32_t *regex_replace(pcre2_code *search, char *replace, uint32_t *input, const size_t input_len, const int global) { - PCRE2_SPTR replacement = u32_strconv_from_arg(replace, config_encoding); - if (replacement == NULL) { + uint32_t *ustr = u32_strconv_from_arg(replace, CONFIG_FILE_ENCODING); + if (ustr == NULL) { bx_fprintf(stderr, "Failed to convert replacement string to UTF-32 - \"%s\"\n", replace); return NULL; } + uint32_t *result = u32_regex_replace(search, ustr, input, input_len, global); + BFREE(ustr); + return result; +} + + +uint32_t *u32_regex_replace(pcre2_code *search, uint32_t *replace, uint32_t *input, const size_t input_len, + const int global) +{ uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED - | (global ? PCRE2_SUBSTITUTE_GLOBAL : 0); - PCRE2_SIZE outlen = input_len * 2; /* estimated length of output buffer in characters, fine if too small */ + | (global ? PCRE2_SUBSTITUTE_GLOBAL : 0); + PCRE2_SIZE outlen = input_len * 2; /* estimated length of output buffer in characters, fine if too small */ PCRE2_SIZE bufsize = (input_len < 8) ? 16 : outlen; - uint32_t *output = (uint32_t *) malloc(sizeof(uint32_t) * bufsize); /* output buffer */ + uint32_t *output = (uint32_t *) malloc(sizeof(uint32_t) * bufsize); /* output buffer */ int pcre2_rc; int done = 0; @@ -83,17 +123,18 @@ uint32_t *regex_replace(pcre2_code *search, char *replace, uint32_t *input, cons PCRE2_SIZE outlen = bufsize; pcre2_rc = pcre2_substitute(search, - (PCRE2_SPTR) input, PCRE2_ZERO_TERMINATED, - 0, /* start offset */ - options, - NULL, /* ptr to a match data block */ - NULL, /* match context */ - replacement, PCRE2_ZERO_TERMINATED, - output, &outlen); + (PCRE2_SPTR) input, PCRE2_ZERO_TERMINATED, + 0, /* start offset */ + options, + NULL, /* ptr to a match data block */ + NULL, /* match context */ + (PCRE2_SPTR) replace, PCRE2_ZERO_TERMINATED, + output, &outlen); if (pcre2_rc != PCRE2_ERROR_NOMEMORY || bufsize >= outlen) { done = 1; - } else { + } + else { #ifdef REGEXP_DEBUG fprintf(stderr, "Reallocating output buffer from %ld to %ld UTF-32 chars\n", bufsize, outlen); #endif @@ -101,13 +142,13 @@ uint32_t *regex_replace(pcre2_code *search, char *replace, uint32_t *input, cons output = (uint32_t *) realloc(output, sizeof(uint32_t) * bufsize); } } - BFREE(replacement); if (pcre2_rc < 0) { PCRE2_UCHAR buffer[256]; pcre2_get_error_message(pcre2_rc, buffer, sizeof(buffer)); /* buffer will normally contain "invalid replacement string" */ - bx_fprintf(stderr, "Error substituting \"%s\": %s\n", replace, u32_strconv_to_output(buffer)); + bx_fprintf(stderr, "Error substituting \"%s\": %s\n", u32_strconv_to_output(replace), + u32_strconv_to_output(buffer)); BFREE(output); return NULL; } @@ -116,5 +157,4 @@ uint32_t *regex_replace(pcre2_code *search, char *replace, uint32_t *input, cons } - -/*EOF*/ /* vim: set sw=4: */ +/* vim: set sw=4: */ diff --git a/src/regulex.h b/src/regulex.h index d973c59..4e8d449 100644 --- a/src/regulex.h +++ b/src/regulex.h @@ -39,24 +39,67 @@ -/* +/** * Compile the given pattern into a PCRE2 regular expression. + * @param pattern the pattern to compile + * @return the compiled pattern */ pcre2_code *compile_pattern(char *pattern); -/* + +/** + * Compile the given pattern into a PCRE2 regular expression. + * @param pattern the pattern to compile + * @return the compiled pattern + */ +pcre2_code *u32_compile_pattern(uint32_t *pattern); + + +/** + * Determine if the given `subject_string` matches the given `pattern`. + * @param pattern the compiled pattern + * @param subject_string the string to check + * @return flag indicating a match (0 == no match, otherwise: match) + */ +int regex_match(pcre2_code *pattern, char *subject_string); + + +/** + * Determine if the given `subject_string` matches the given `pattern`. + * @param pattern the compiled pattern + * @param subject_string the string to check, in UTF-32 + * @return flag indicating a match (0 == no match, otherwise: match) + */ +int u32_regex_match(pcre2_code *pattern, uint32_t *subject_string); + + +/** * Perform a regex replacement on the given string. * - * @param the compiled pattern to search for - * @param the replacement string - * @param the string to which the replacements shall be applied - * @param the length of in characters, not bytes - * @param flag indicating whether all occurrences shall be replaced (true) or just the first (false) + * @param search the compiled pattern to search for + * @param replace the replacement string + * @param input the string to which the replacements shall be applied + * @param input_len the length of in characters, not bytes + * @param global flag indicating whether all occurrences shall be replaced (true) or just the first (false) * @return a new string which is a copy of output with the replacements applied, or NULL on error */ uint32_t *regex_replace(pcre2_code *search, char *replace, uint32_t *input, const size_t input_len, const int global); +/** + * Perform a regex replacement on the given string. + * + * @param search the compiled pattern to search for + * @param replace the replacement string + * @param input the string to which the replacements shall be applied + * @param input_len the length of in characters, not bytes + * @param global flag indicating whether all occurrences shall be replaced (true) or just the first (false) + * @return a new string which is a copy of output with the replacements applied, or NULL on error + */ +uint32_t *u32_regex_replace(pcre2_code *search, uint32_t *replace, uint32_t *input, const size_t input_len, + const int global); + + #endif /*EOF*/ /* vim: set cindent sw=4: */ diff --git a/src/shape.c b/src/shape.c index bfa4d0d..735f17f 100644 --- a/src/shape.c +++ b/src/shape.c @@ -18,10 +18,13 @@ */ #include "config.h" + #include #include #include #include + +#include "bxstring.h" #include "shape.h" #include "boxes.h" #include "tools.h" @@ -109,13 +112,14 @@ int on_side(const shape_t s, const int idx) -int genshape(const size_t width, const size_t height, char ***chars) +int genshape(const size_t width, const size_t height, char ***chars, bxstr_t ***mbcs) /* * Generate a shape consisting of spaces only. * * width desired shape width * height desired shape height * chars pointer to the shape lines (should be NULL upon call) + * mbcs pointer to the shape lines, MBCS version (should be NULL upon call) * * Memory is allocated for the shape lines which must be freed by the caller. * @@ -138,15 +142,16 @@ int genshape(const size_t width, const size_t height, char ***chars) return 2; } + *mbcs = (bxstr_t **) calloc(height, sizeof(bxstr_t *)); + if (*mbcs == NULL) { + BFREE(*chars); + perror(PROJECT); + return 4; + } + for (j = 0; j < height; ++j) { - (*chars)[j] = (char *) calloc(width + 1, sizeof(char)); - if ((*chars)[j] == NULL) { - perror(PROJECT); - for (/*empty*/; j > 0; --j) BFREE ((*chars)[j - 1]); - BFREE (*chars); - return 3; - } - memset((*chars)[j], ' ', width); + (*chars)[j] = nspaces(width); + (*mbcs)[j] = bxs_from_ascii((*chars)[j]); } return 0; @@ -166,8 +171,10 @@ void freeshape(sentry_t *shape) for (j = 0; j < shape->height; ++j) { BFREE (shape->chars[j]); + bxs_free(shape->mbcs[j]); } BFREE (shape->chars); + BFREE (shape->mbcs); *shape = SENTRY_INITIALIZER; } diff --git a/src/shape.h b/src/shape.h index f85c175..5dce7e3 100644 --- a/src/shape.h +++ b/src/shape.h @@ -20,6 +20,8 @@ #ifndef SHAPE_H #define SHAPE_H +#include "bxstring.h" + typedef enum { NW, NNW, N, NNE, NE, ENE, E, ESE, SE, SSE, S, SSW, SW, WSW, W, WNW @@ -42,17 +44,18 @@ extern shape_t *sides[NUM_SIDES]; typedef struct { - char **chars; - size_t height; - size_t width; - int elastic; /* elastic is used only in original definition */ + char **chars; + bxstr_t **mbcs; + size_t height; + size_t width; + int elastic; /* elastic is used only in original definition */ } sentry_t; -#define SENTRY_INITIALIZER (sentry_t) {NULL, 0, 0, 0} +#define SENTRY_INITIALIZER (sentry_t) {NULL, NULL, 0, 0, 0} -int genshape (const size_t width, const size_t height, char ***chars); +int genshape (const size_t width, const size_t height, char ***chars, bxstr_t ***mbcs); void freeshape (sentry_t *shape); shape_t findshape (const sentry_t *sarr, const int num); diff --git a/src/tools.c b/src/tools.c index fb7c11f..f1ff7b7 100644 --- a/src/tools.c +++ b/src/tools.c @@ -25,18 +25,44 @@ #include #include #include +#include #include #include #include #include #include "boxes.h" +#include "regulex.h" #include "shape.h" #include "tools.h" #include "unicode.h" +static pcre2_code *pattern_ascii_id = NULL; +static pcre2_code *pattern_ascii_id_strict = NULL; + + +static pcre2_code *get_pattern_ascii_id(int strict) +{ + pcre2_code *result = NULL; + if (strict) { + if (pattern_ascii_id_strict == NULL) { + pattern_ascii_id_strict = compile_pattern("^(?!.*?--|none)[a-z][a-z0-9-]*(? 0) { + for (size_t i = 0; i < array_len; ++i) { + if (bxs_strcmp(array[i], s) == 0) { + result = 1; + break; + } + } + } + return result; +} + + + size_t array_count0(char **array) { size_t num_elems = 0; @@ -740,7 +783,7 @@ size_t array_count0(char **array) -char *trimdup(char *s, char *e) +char *trimdup(char *s, char *e) // TODO consider removing, as we have bxs_trimdup() { if (s > e || (s == e && *s == '\0')) { return strdup(""); @@ -756,7 +799,7 @@ char *trimdup(char *s, char *e) -int tag_is_valid(char *tag) +int tag_is_valid(char *tag) // TODO replace with is_ascii_id(strict) { if (tag == NULL) { return 0; @@ -773,6 +816,17 @@ int tag_is_valid(char *tag) +int is_ascii_id(bxstr_t *s, int strict) +{ + if (s == NULL || s->num_chars == 0) { + return 0; + } + pcre2_code *pattern = get_pattern_ascii_id(strict); + return u32_regex_match(pattern, s->memory); +} + + + char *bx_strndup(const char *s, size_t n) { if (s == NULL) { @@ -804,4 +858,24 @@ void bx_fprintf(FILE *stream, const char *format, ...) } + +FILE *bx_fopens(bxstr_t *pathname, char *mode) +{ + return bx_fopen(to_utf8(pathname->memory), mode); +} + + + +FILE *bx_fopen(char *pathname, char *mode) +{ + /* + * On Linux/UNIX and OS X (Mac), one can access files with non-ASCII file names by passing them to fopen() as UTF-8. + * On Windows, a different function must be called. (Info: https://stackoverflow.com/a/35065142/1005481) + */ + FILE *f = fopen(pathname, mode); + // TODO Windows + return f; +} + + /* vim: set sw=4: */ diff --git a/src/tools.h b/src/tools.h index 065e741..bf43a81 100644 --- a/src/tools.h +++ b/src/tools.h @@ -42,15 +42,20 @@ int empty_line(const line_t *line); + size_t expand_tabs_into(const uint32_t *input_buffer, const int tabstop, uint32_t **text, size_t **tabpos, size_t *tabpos_len); + void btrim(char *text, size_t *len); + void btrim32(uint32_t *text, size_t *len); + char *my_strnrstr(const char *s1, const char *s2, const size_t s2_len, int skip); + /** * Calculates the length (in bytes) of the segment at the end of `s` which consists entirely of bytes in `accept`. * This is like `strspn()`, but from the end of the string. @@ -60,10 +65,13 @@ char *my_strnrstr(const char *s1, const char *s2, const size_t s2_len, int skip) */ size_t my_strrspn(const char *s, const char *accept); + int strisyes(const char *s); + int strisno(const char *s); + /** * Concatenate variable number of strings into one. This would normally be achieved via snprintf(), but that's not * available on all platforms where boxes is compiled. @@ -73,18 +81,25 @@ int strisno(const char *s); */ char *concat_strings_alloc(size_t count, ...); + void concat_strings(char *dst, int max_len, int count, ...); + char *tabbify_indent(const size_t lineno, char *indentspc, const size_t indentspc_len); + char *nspaces(const size_t n); + void print_input_lines(const char *heading); + void analyze_line_ascii(input_t *input_ptr, line_t *line); + size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **ascii, size_t **posmap); + /** * Determine whether the given sequence of characters is a CSI (also called "escape sequence") that resets all * modifications, typically `ESC[0m`. @@ -93,8 +108,10 @@ size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **ascii, s */ int is_csi_reset(const uint32_t *csi); + int array_contains(char **array, const size_t array_len, const char *s); + /** * Determine if the given `array` contains the given string (case-insensitive!). * @param array an array of strings to search @@ -103,6 +120,10 @@ int array_contains(char **array, const size_t array_len, const char *s); */ int array_contains0(char **array, const char *s); + +int array_contains_bxs(bxstr_t **array, const size_t array_len, bxstr_t *s); + + /** * Count the number of elements in a zero-terminated array. * @param array a zero-terminated array of strings (can be NULL) @@ -110,6 +131,7 @@ int array_contains0(char **array, const char *s); */ size_t array_count0(char **array); + /** * Trim leading and trailing whitespace from a string and return the result in a new string, for which memory is * allocated. `s` and `e` may point into some other string, which will not be modified. `e` should be greater than `s`. @@ -119,6 +141,7 @@ size_t array_count0(char **array); */ char *trimdup(char *s, char *e); + /** * Determine if the given string is a valid tag. Valid tags are lower-case alphanumeric strings which may have * intermixed, single hyphens. A valid tag starts with a letter. Hyphens may also not appear as last character. @@ -147,6 +170,42 @@ char *bx_strndup(const char *s, size_t n); void bx_fprintf(FILE *stream, const char *format, ...); +/** + * Determine if the given string is an "ASCII ID", which means: + * - It consists only of the letters `abcdefghijklmnopqrstuvwxyz-0123456789`. If not in strict mode, upper case A-Z + * and underscores are also allowed. + * - The first letter is a-z (lower case). If not in strict mode, upper case A-Z is also allowed. + * - The last character may not be a hyphen or underscore. + * - No two hyphens or underscores, or mixture thereof, may appear in sequence. + * - The entire string must not be the word `none`. + * - Minimum length is 1 character. + * @param s the string to check + * @param strict flag indicating whether "strict checks" should be applied (1) or not (0) + * @return flag (1 or 0) + */ +int is_ascii_id(bxstr_t *s, int strict); + + +/** + * Open the file pointed to by `pathname` and associate a stream with it. Supports non-ASCII pathnames and encapsulates + * the logic for different operating systems. + * @param pathname the pathname of the file to open + * @param mode a mode sequence like for standard `fopen()` + * @return the file stream + */ +FILE *bx_fopens(bxstr_t *pathname, char *mode); + + +/** + * Open the file pointed to by `pathname` and associate a stream with it. Supports non-ASCII pathnames and encapsulates + * the logic for different operating systems. + * @param pathname the pathname of the file to open, as a UTF-8 or ASCII (single byte) byte sequence + * @param mode a mode sequence like for standard `fopen()` + * @return the file stream + */ +FILE *bx_fopen(char *pathname, char *mode); + + #endif /*EOF*/ /* vim: set cindent sw=4: */ diff --git a/utest/cmdline_test.c b/utest/cmdline_test.c index 5d676cd..edd3807 100644 --- a/utest/cmdline_test.c +++ b/utest/cmdline_test.c @@ -58,7 +58,7 @@ static opt_t *act(const int num_args, ...) void test_indentmode_none(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-i", "none"); @@ -69,7 +69,7 @@ void test_indentmode_none(void **state) void test_indentmode_invalid_long(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-i", "INVALID"); @@ -81,7 +81,7 @@ void test_indentmode_invalid_long(void **state) void test_indentmode_invalid_short(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-i", "X"); @@ -93,7 +93,7 @@ void test_indentmode_invalid_short(void **state) void test_indentmode_box(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-i", "BO"); @@ -104,7 +104,7 @@ void test_indentmode_box(void **state) void test_indentmode_text(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-i", "t"); @@ -115,7 +115,7 @@ void test_indentmode_text(void **state) void test_killblank_true(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-k", "true"); @@ -126,7 +126,7 @@ void test_killblank_true(void **state) void test_killblank_false(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-k", "false"); @@ -137,7 +137,7 @@ void test_killblank_false(void **state) void test_killblank_invalid(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-k", "INVALID"); @@ -149,7 +149,7 @@ void test_killblank_invalid(void **state) void test_killblank_multiple(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(4, "-k", "true", "-k", "false"); // first one wins @@ -160,7 +160,7 @@ void test_killblank_multiple(void **state) void test_padding_top_bottom(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-p", "t2b10"); @@ -172,7 +172,7 @@ void test_padding_top_bottom(void **state) void test_padding_invalid(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-p", "INVALID"); @@ -184,7 +184,7 @@ void test_padding_invalid(void **state) void test_padding_negative(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-p", "a-1"); @@ -196,7 +196,7 @@ void test_padding_negative(void **state) void test_padding_notset(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-p", ""); @@ -208,7 +208,7 @@ void test_padding_notset(void **state) void test_padding_invalid_value(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-p", "l2rX"); @@ -220,7 +220,7 @@ void test_padding_invalid_value(void **state) void test_padding_novalue(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-p", "a"); @@ -232,7 +232,7 @@ void test_padding_novalue(void **state) void test_tabstops_zero(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-t", "0"); @@ -244,7 +244,7 @@ void test_tabstops_zero(void **state) void test_tabstops_500(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-t", "500"); @@ -256,7 +256,7 @@ void test_tabstops_500(void **state) void test_tabstops_4X(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-t", "4X"); @@ -268,7 +268,7 @@ void test_tabstops_4X(void **state) void test_tabstops_4e(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-t", "4e"); @@ -280,7 +280,7 @@ void test_tabstops_4e(void **state) void test_tabstops_4ex(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-t", "4ex"); @@ -292,7 +292,7 @@ void test_tabstops_4ex(void **state) void test_tabstops_7(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-t", "7"); @@ -304,7 +304,7 @@ void test_tabstops_7(void **state) void test_alignment_invalid_hX(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-a", "hX"); @@ -316,7 +316,7 @@ void test_alignment_invalid_hX(void **state) void test_alignment_invalid_vX(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-a", "vX"); @@ -328,7 +328,7 @@ void test_alignment_invalid_vX(void **state) void test_alignment_invalid_jX(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-a", "jX"); @@ -340,7 +340,7 @@ void test_alignment_invalid_jX(void **state) void test_alignment_notset(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-a", ""); @@ -352,7 +352,7 @@ void test_alignment_notset(void **state) void test_alignment_incomplete(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-a", "v"); @@ -364,7 +364,7 @@ void test_alignment_incomplete(void **state) void test_inputfiles_illegal_third_file(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(3, "file1", "file2", "file3_ILLEGAL"); @@ -378,7 +378,7 @@ void test_inputfiles_illegal_third_file(void **state) void test_inputfiles_stdin_stdout(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-", "-"); @@ -390,7 +390,7 @@ void test_inputfiles_stdin_stdout(void **state) void test_inputfiles_stdin(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(1, "-"); @@ -402,7 +402,7 @@ void test_inputfiles_stdin(void **state) void test_inputfiles_input_nonexistent(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(1, "NON-EXISTENT"); @@ -414,7 +414,7 @@ void test_inputfiles_input_nonexistent(void **state) void test_inputfiles_actual_success(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "../utest/dummy_in.txt", "dummy_out.txt"); @@ -426,7 +426,7 @@ void test_inputfiles_actual_success(void **state) void test_command_line_design_empty(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(2, "-c", ""); @@ -438,7 +438,7 @@ void test_command_line_design_empty(void **state) void test_help(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(1, "-h"); @@ -450,7 +450,7 @@ void test_help(void **state) void test_version_requested(void **state) { - (void) state; /* unused */ + UNUSED(state); opt_t *actual = act(1, "-v"); diff --git a/utest/main.c b/utest/main.c index b2a14ce..3ce343f 100644 --- a/utest/main.c +++ b/utest/main.c @@ -18,9 +18,11 @@ */ #include "config.h" + #include #include #include + #include #include "global_mock.h" @@ -28,9 +30,12 @@ #include "cmdline_test.h" #include "tools_test.h" #include "regulex_test.h" +#include "unicode_test.h" -static int beforeTest(void** state) { + +static int beforeTest(void** state) +{ (void) state; /* unused */ collect_reset(); @@ -95,26 +100,69 @@ int main(void) cmocka_unit_test(test_strisno_false), cmocka_unit_test(test_my_strrspn_edge), cmocka_unit_test(test_my_strrspn), - cmocka_unit_test(test_is_csi_reset) + cmocka_unit_test(test_is_csi_reset), + cmocka_unit_test(test_is_ascii_id_valid), + cmocka_unit_test(test_is_ascii_id_invalid), + cmocka_unit_test(test_is_ascii_id_strict_valid), + cmocka_unit_test(test_is_ascii_id_strict_invalid) + }; + + const struct CMUnitTest unicode_tests[] = { + cmocka_unit_test(test_to_utf32), + cmocka_unit_test(test_is_blank), + cmocka_unit_test(test_is_allowed_in_sample), + cmocka_unit_test(test_is_allowed_in_shape), + cmocka_unit_test(test_is_allowed_in_filename), + cmocka_unit_test(test_is_allowed_in_kv_string) }; const struct CMUnitTest bxstring_tests[] = { cmocka_unit_test_setup(test_ascii_simple, beforeTest), - cmocka_unit_test_setup(test_ascii_tabs, beforeTest), + cmocka_unit_test_setup(test_ascii_illegalchar, beforeTest), cmocka_unit_test_setup(test_ascii_null, beforeTest), cmocka_unit_test_setup(test_ansi_unicode_book, beforeTest), cmocka_unit_test_setup(test_ansi_unicode_space_kinds, beforeTest), cmocka_unit_test_setup(test_ansi_unicode_chinese, beforeTest), cmocka_unit_test_setup(test_ansi_unicode_empty, beforeTest), + cmocka_unit_test_setup(test_ansi_unicode_blanks, beforeTest), + cmocka_unit_test_setup(test_ansi_unicode_invisible_only, beforeTest), + cmocka_unit_test_setup(test_ansi_unicode_illegalchar, beforeTest), cmocka_unit_test_setup(test_ansi_unicode_tabs, beforeTest), cmocka_unit_test_setup(test_ansi_unicode_null, beforeTest), - cmocka_unit_test_setup(test_bxsfree_null, beforeTest) + cmocka_unit_test_setup(test_bxs_strdup, beforeTest), + cmocka_unit_test_setup(test_bxs_trimdup_null, beforeTest), + cmocka_unit_test_setup(test_bxs_trimdup_invalid_startidx, beforeTest), + cmocka_unit_test_setup(test_bxs_trimdup_invalid_endidx, beforeTest), + cmocka_unit_test_setup(test_bxs_trimdup_invalid_endidx2, beforeTest), + cmocka_unit_test_setup(test_bxs_trimdup_normal, beforeTest), + cmocka_unit_test_setup(test_bxs_trimdup_vanish, beforeTest), + cmocka_unit_test_setup(test_bxs_trimdup_ansi, beforeTest), + cmocka_unit_test_setup(test_bxs_trimdup_ansi_same, beforeTest), + cmocka_unit_test_setup(test_bxs_strcat, beforeTest), + cmocka_unit_test_setup(test_bxs_strcat_empty, beforeTest), + cmocka_unit_test_setup(test_bxs_strcat_empty2, beforeTest), + cmocka_unit_test_setup(test_bxs_strcat_empty3, beforeTest), + cmocka_unit_test_setup(test_bxs_strchr, beforeTest), + cmocka_unit_test_setup(test_bxs_strchr_empty, beforeTest), + cmocka_unit_test_setup(test_bxs_strchr_cursor, beforeTest), + cmocka_unit_test_setup(test_bxs_trim, beforeTest), + cmocka_unit_test_setup(test_bxs_trim_blanks, beforeTest), + cmocka_unit_test_setup(test_bxs_trim_none, beforeTest), + cmocka_unit_test_setup(test_bxs_rtrim, beforeTest), + cmocka_unit_test_setup(test_bxs_rtrim_empty, beforeTest), + cmocka_unit_test_setup(test_bxs_to_output, beforeTest), + cmocka_unit_test_setup(test_bxs_is_empty_null, beforeTest), + cmocka_unit_test_setup(test_bxs_strcmp, beforeTest), + cmocka_unit_test_setup(test_bxs_valid_anywhere_error, beforeTest), + cmocka_unit_test_setup(test_bxs_valid_in_filename_error, beforeTest), + cmocka_unit_test_setup(test_bxs_free_null, beforeTest) }; int num_failed = 0; num_failed += cmocka_run_group_tests(cmdline_tests, NULL, NULL); num_failed += cmocka_run_group_tests(regulex_tests, NULL, NULL); num_failed += cmocka_run_group_tests(tools_tests, NULL, NULL); + num_failed += cmocka_run_group_tests(unicode_tests, NULL, NULL); num_failed += cmocka_run_group_tests(bxstring_tests, NULL, NULL); teardown(); diff --git a/utest/regulex_test.c b/utest/regulex_test.c index 6e54604..7a64d04 100644 --- a/utest/regulex_test.c +++ b/utest/regulex_test.c @@ -18,12 +18,15 @@ */ #include "config.h" + #include #include #include #include #include #include + +#include "boxes.h" #include "global_mock.h" #include "regulex.h" @@ -31,7 +34,7 @@ void test_compile_pattern_empty(void **state) { - (void) state; /* unused */ + UNUSED(state); assert_null(compile_pattern(NULL)); assert_non_null(compile_pattern("")); @@ -41,7 +44,7 @@ void test_compile_pattern_empty(void **state) void test_compile_pattern_error(void **state) { - (void) state; /* unused */ + UNUSED(state); assert_null(compile_pattern("incomplete[x")); assert_int_equal(1, collect_err_size); @@ -59,7 +62,7 @@ void test_compile_pattern_error(void **state) void test_regex_replace_invalid_utf(void **state) { - (void) state; /* unused */ + UNUSED(state); const char *input = "input"; assert_null(regex_replace(compile_pattern("search"), NULL, /* NULL is an invalid replacement string*/ @@ -72,7 +75,7 @@ void test_regex_replace_invalid_utf(void **state) void test_regex_replace_buffer_resize(void **state) { - (void) state; /* unused */ + UNUSED(state); const char *input = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; uint32_t *actual = regex_replace(compile_pattern("x"), "long_replacement_string_", @@ -95,7 +98,7 @@ void test_regex_replace_buffer_resize(void **state) void test_regex_replace_error(void **state) { - (void) state; /* unused */ + UNUSED(state); const char *input = "xxx"; uint32_t *actual = regex_replace(compile_pattern("x"), "INVALID $2", diff --git a/utest/tools_test.c b/utest/tools_test.c index e1b98d6..eb7ab0a 100644 --- a/utest/tools_test.c +++ b/utest/tools_test.c @@ -145,4 +145,120 @@ void test_is_csi_reset(void **state) } +void test_is_ascii_id_valid(void **state) +{ + (void) state; /* unused */ + + bxstr_t *s = bxs_from_ascii("valid"); + assert_int_equal(1, is_ascii_id(s, 0)); + bxs_free(s); + + s = bxs_from_ascii("also-valid"); + assert_int_equal(1, is_ascii_id(s, 0)); + bxs_free(s); + + s = bxs_from_ascii("fine2"); + assert_int_equal(1, is_ascii_id(s, 0)); + bxs_free(s); + + s = bxs_from_ascii("A"); + assert_int_equal(1, is_ascii_id(s, 0)); + bxs_free(s); + + s = bxs_from_ascii("A_2"); + assert_int_equal(1, is_ascii_id(s, 0)); + bxs_free(s); +} + + +void test_is_ascii_id_invalid(void **state) +{ + (void) state; /* unused */ + + assert_int_equal(0, is_ascii_id(NULL, 0)); + + bxstr_t *s = bxs_from_ascii(""); + assert_int_equal(0, is_ascii_id(s, 0)); + bxs_free(s); + + s = bxs_from_ascii("a--b"); + assert_int_equal(0, is_ascii_id(s, 0)); + bxs_free(s); + + s = bxs_from_ascii("also-_invalid"); + assert_int_equal(0, is_ascii_id(s, 0)); + bxs_free(s); + + s = bxs_from_ascii("invalid-"); + assert_int_equal(0, is_ascii_id(s, 0)); + bxs_free(s); + + s = bxs_from_ascii("42"); + assert_int_equal(0, is_ascii_id(s, 0)); + bxs_free(s); + + uint32_t *ustr32 = u32_strconv_from_arg( + "\xe5\x85\xac\xe7\x88\xb8\xe8\xa6\x81\xe9\x81\x93\xef\xbc\x81", /* 公爸要道! */ + "UTF-8"); + s = bxs_from_unicode(ustr32); + assert_int_equal(0, is_ascii_id(s, 0)); + bxs_free(s); + BFREE(ustr32); +} + + +void test_is_ascii_id_strict_valid(void **state) +{ + (void) state; /* unused */ + + bxstr_t *s = bxs_from_ascii("valid"); + assert_int_equal(1, is_ascii_id(s, 1)); + bxs_free(s); + + s = bxs_from_ascii("also-valid"); + assert_int_equal(1, is_ascii_id(s, 1)); + bxs_free(s); + + s = bxs_from_ascii("fine2"); + assert_int_equal(1, is_ascii_id(s, 1)); + bxs_free(s); + + s = bxs_from_ascii("a"); + assert_int_equal(1, is_ascii_id(s, 1)); + bxs_free(s); +} + + +void test_is_ascii_id_strict_invalid(void **state) +{ + (void) state; /* unused */ + + assert_int_equal(0, is_ascii_id(NULL, 1)); + + bxstr_t *s = bxs_from_ascii(""); + assert_int_equal(0, is_ascii_id(s, 1)); + bxs_free(s); + + s = bxs_from_ascii("a--b"); + assert_int_equal(0, is_ascii_id(s, 1)); + bxs_free(s); + + s = bxs_from_ascii("also-_invalid"); + assert_int_equal(0, is_ascii_id(s, 1)); + bxs_free(s); + + s = bxs_from_ascii("invalid-"); + assert_int_equal(0, is_ascii_id(s, 1)); + bxs_free(s); + + s = bxs_from_ascii("42"); + assert_int_equal(0, is_ascii_id(s, 1)); + bxs_free(s); + + s = bxs_from_ascii("A_2"); + assert_int_equal(0, is_ascii_id(s, 1)); + bxs_free(s); +} + + /* vim: set cindent sw=4: */ diff --git a/utest/tools_test.h b/utest/tools_test.h index d906710..6100159 100644 --- a/utest/tools_test.h +++ b/utest/tools_test.h @@ -32,6 +32,11 @@ void test_my_strrspn(void **state); void test_is_csi_reset(void **state); +void test_is_ascii_id_valid(void **state); +void test_is_ascii_id_invalid(void **state); +void test_is_ascii_id_strict_valid(void **state); +void test_is_ascii_id_strict_invalid(void **state); + #endif