From 5456fc236787f36f20018343e6758677ab38ef42 Mon Sep 17 00:00:00 2001 From: Thomas Jensen Date: Sat, 30 Dec 2023 14:48:12 +0100 Subject: [PATCH] Adapt Windows-specific code to recent changes This included renaming the `WORD` token in our lexer to `BXWORD`, in order to avoid a conflict with a symbol in windows.h. --- src/boxes.c | 24 ++++++++++++++++++++++-- src/lexer.l | 6 +++--- src/parser.y | 6 +++--- src/tools.c | 3 ++- utest/unicode_test.c | 1 + 5 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/boxes.c b/src/boxes.c index f6d0e52..832bcf4 100644 --- a/src/boxes.c +++ b/src/boxes.c @@ -24,6 +24,9 @@ #include #include #include +#ifdef _WIN32 +#include +#endif #include "boxes.h" #include "bxstring.h" @@ -103,7 +106,8 @@ static int build_design(design_t **adesigns, const char *cld) dp->tags = (char **) calloc(2, sizeof(char *)); dp->tags[0] = "transient"; - uint32_t *cld_u32 = u32_strconv_from_arg(cld, "UTF-8"); /* CHECK wrong on Windows (UTF-16) or different IME */ + /* We always use UTF-8, which is correct for Linux and MacOS, and for modern Windows configured for UTF-8. */ + uint32_t *cld_u32 = u32_strconv_from_arg(cld, "UTF-8"); bxstr_t *cldW = bxs_from_unicode(cld_u32); BFREE(cld_u32); @@ -453,6 +457,22 @@ static int check_color_support(int opt_color) +/** + * Switch from default "C" encoding to system encoding. + */ +static void activateSystemEncoding() +{ + #ifdef _WIN32 + SetConsoleOutputCP(CP_ACP); + SetConsoleCP(CP_ACP); + /* If it should one day turn out that this doesn't have the desired effect, try setlocale(LC_ALL, ".UTF8"). */ + #else + setlocale(LC_ALL, ""); + #endif +} + + + /* _\|/_ (o o) +----oOO-{_}-OOo------------------------------------------------------------+ @@ -470,7 +490,7 @@ int main(int argc, char *argv[]) #endif /* Temporarily set the system encoding, for proper output of --help text etc. */ - setlocale(LC_ALL, ""); /* switch from default "C" encoding to system encoding */ + activateSystemEncoding(); encoding = locale_charset(); handle_command_line(argc, argv); diff --git a/src/lexer.l b/src/lexer.l index ebb0eae..8dea639 100644 --- a/src/lexer.l +++ b/src/lexer.l @@ -137,7 +137,7 @@ PFILENAME [^\r\n]+ [^ \t\r\n]+ { /* - * String delimiter spec - like WORD, but allow any character + * String delimiter spec - like BXWORD, but allow any character */ yylval->s = bxs_from_ascii("IGNORED"); char *str = (char *) strdup(yytext); @@ -415,10 +415,10 @@ PFILENAME [^\r\n]+ exit (EXIT_FAILURE); } #ifdef LEXER_DEBUG - fprintf (stderr, " WORD: %s\n", u32_strconv_to_output(utf8)); + fprintf (stderr, " BXWORD: %s\n", u32_strconv_to_output(utf8)); #endif BFREE(utf8); - return WORD; + return BXWORD; } diff --git a/src/parser.y b/src/parser.y index b60b9e7..d842a04 100644 --- a/src/parser.y +++ b/src/parser.y @@ -127,7 +127,7 @@ typedef struct { %token YPARENT YSHAPES YELASTIC YPADDING YSAMPLE YENDSAMPLE YBOX YEND YUNREC %token YREPLACE YREVERSE YTO YWITH YCHGDEL YTAGS %token KEYWORD -%token WORD +%token BXWORD %token ASCII_ID %token STRING %token FILENAME @@ -224,7 +224,7 @@ alias_list: alias | alias_list ',' alias; design_id: ASCII_ID | ASCII_ID ',' alias_list -| WORD +| BXWORD { yyerror(bison_args, "box design name must consist of printable standard ASCII characters."); YYERROR; @@ -282,7 +282,7 @@ entry: KEYWORD STRING | YTAGS '(' tag_list ')' | YTAGS tag_entry -| WORD STRING | ASCII_ID STRING +| BXWORD STRING | ASCII_ID STRING { #ifdef PARSER_DEBUG fprintf (stderr, " Parser: Discarding entry [%s = %s].\n", $1, bxs_to_output($2)); diff --git a/src/tools.c b/src/tools.c index ccdc4c2..6fa6c2f 100644 --- a/src/tools.c +++ b/src/tools.c @@ -741,9 +741,10 @@ FILE *bx_fopen(char *pathname, char *mode) /* * On Linux/UNIX and OS X (Mac), one can access files with non-ASCII file names by passing them to fopen() as UTF-8. * On Windows, a different function must be called. (Info: https://stackoverflow.com/a/35065142/1005481) + * On newer Windows, we're good: + * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/setlocale-wsetlocale#utf-8-support */ FILE *f = fopen(pathname, mode); - // TODO Windows return f; } diff --git a/utest/unicode_test.c b/utest/unicode_test.c index 00108b2..54fdc0d 100644 --- a/utest/unicode_test.c +++ b/utest/unicode_test.c @@ -196,6 +196,7 @@ void test_u32_insert_space_at(void **state) u32_insert_space_at(&s, 1, 1); u32_insert_space_at(&s, 10000, 2); + assert_non_null(s); assert_int_equal(0, u32_strcmp(expected, s)); BFREE(s);