diff --git a/src/Makefile b/src/Makefile index f41d699..03a6c3e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -28,9 +28,11 @@ GEN_HDR = parser.h boxes.h lex.yy.h GEN_SRC = parser.c lex.yy.c GEN_FILES = $(GEN_SRC) $(GEN_HDR) ORIG_HDRCL = boxes.in.h config.h -ORIG_HDR = $(ORIG_HDRCL) cmdline.h discovery.h generate.h list.h parsecode.h parsing.h regulex.h remove.h shape.h tools.h unicode.h +ORIG_HDR = $(ORIG_HDRCL) cmdline.h discovery.h generate.h input.h list.h parsecode.h parsing.h regulex.h remove.h \ + shape.h tools.h unicode.h ORIG_GEN = lexer.l parser.y -ORIG_NORM = boxes.c cmdline.c discovery.c generate.c list.c parsecode.c parsing.c regulex.c remove.c shape.c tools.c unicode.c +ORIG_NORM = boxes.c cmdline.c discovery.c generate.c input.c list.c parsecode.c parsing.c regulex.c remove.c shape.c \ + tools.c unicode.c ORIG_SRC = $(ORIG_GEN) $(ORIG_NORM) ORIG_FILES = $(ORIG_SRC) $(ORIG_HDR) @@ -86,11 +88,12 @@ parser.c parser.h: parser.y lex.yy.h | check_dir lex.yy.c lex.yy.h: lexer.l | check_dir $(LEX) --header-file=lex.yy.h $< -boxes.o: boxes.c boxes.h cmdline.h discovery.h regulex.h shape.h tools.h unicode.h generate.h list.h remove.h config.h | check_dir +boxes.o: boxes.c boxes.h cmdline.h discovery.h generate.h input.h list.h remove.h tools.h unicode.h config.h | check_dir cmdline.o: cmdline.c cmdline.h boxes.h tools.h config.h | check_dir discovery.o: discovery.c discovery.h boxes.h tools.h config.h | check_dir generate.o: generate.c generate.h boxes.h shape.h tools.h unicode.h config.h | check_dir getopt.o: misc/getopt.c misc/getopt.h | check_dir +input.o: input.c boxes.h input.h regulex.h tools.h unicode.h config.h | check_dir lex.yy.o: lex.yy.c parser.h boxes.h parsing.h tools.h shape.h config.h | check_dir list.o: list.c list.h boxes.h parsing.h tools.h config.h | check_dir parsecode.o: parsecode.c parser.h boxes.h tools.h lex.yy.h regulex.h unicode.h config.h | check_dir diff --git a/src/boxes.c b/src/boxes.c index c85aefc..7116fdf 100644 --- a/src/boxes.c +++ b/src/boxes.c @@ -19,29 +19,20 @@ */ #include "config.h" -#include #include -#include #include #include -#include - -#include -#include -#include -#include -#include +#include #include "boxes.h" #include "cmdline.h" -#include "list.h" -#include "shape.h" -#include "tools.h" #include "discovery.h" #include "generate.h" +#include "input.h" +#include "list.h" #include "parsing.h" -#include "regulex.h" #include "remove.h" +#include "tools.h" #include "unicode.h" @@ -53,11 +44,11 @@ +--------------------------------------------------------------------------*/ design_t *designs = NULL; /* available box designs */ -int anz_designs = 0; /* no of designs after parsing */ +int anz_designs = 0; /* number of designs after parsing TODO rename to num_designs */ opt_t opt; /* command line options */ -input_t input = INPUT_INITIALIZER; /* input lines */ +input_t input; /* input lines */ /* _\|/_ @@ -232,358 +223,6 @@ static int query_by_tag() -static int get_indent(const line_t *lines, const size_t lines_size) -/* - * Determine indentation of given lines in spaces. - * - * lines the lines to examine - * lines_size number of lines to examine - * - * Lines are assumed to be free of trailing whitespace. - * - * RETURNS: >= 0 indentation in spaces - * < 0 error - * -* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - */ -{ - int res = LINE_MAX_BYTES; /* result */ - int nonblank = 0; /* true if one non-blank line found */ - - if (lines == NULL) { - fprintf(stderr, "%s: internal error\n", PROJECT); - return -1; - } - if (lines_size == 0) { - return 0; - } - - for (size_t j = 0; j < lines_size; ++j) { - if (lines[j].len > 0) { - nonblank = 1; - size_t ispc = strspn(lines[j].text, " "); - if ((int) ispc < res) { - res = ispc; - } - } - } - - if (nonblank) { - return res; /* success */ - } else { - return 0; /* success, but only blank lines */ - } -} - - - -static int apply_substitutions(const int mode) -/* - * Apply regular expression substitutions to input text. - * - * mode == 0 use replacement rules (box is being *drawn*) - * == 1 use reversion rules (box is being *removed*) - * - * Attn: This modifies the actual input array! - * - * RETURNS: == 0 success - * != 0 error - * -* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - */ -{ - size_t anz_rules; - reprule_t *rules; - size_t j, k; - - if (opt.design == NULL) { - return 1; - } - - if (mode == 0) { - anz_rules = opt.design->anz_reprules; - rules = opt.design->reprules; - } - else if (mode == 1) { - anz_rules = opt.design->anz_revrules; - rules = opt.design->revrules; - } - else { - fprintf(stderr, "%s: internal error\n", PROJECT); - return 2; - } - - /* - * Compile regular expressions - */ - #ifdef REGEXP_DEBUG - fprintf(stderr, "Compiling %d %s rule patterns\n", (int) anz_rules, mode ? "reversion" : "replacement"); - #endif - errno = 0; - opt.design->current_rule = rules; - for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) { - rules[j].prog = compile_pattern(rules[j].search); - if (rules[j].prog == NULL) { - return 5; - } - } - opt.design->current_rule = NULL; - if (errno) { - return 3; - } - - /* - * Apply regular expression substitutions to input lines - */ - for (k = 0; k < input.anz_lines; ++k) { - opt.design->current_rule = rules; - for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) { - #ifdef REGEXP_DEBUG - fprintf (stderr, "regex_replace(0x%p, \"%s\", \"%s\", %d, \'%c\') == ", - rules[j].prog, rules[j].repstr, u32_strconv_to_output(input.lines[k].mbtext), - (int) input.lines[k].num_chars, rules[j].mode); - #endif - uint32_t *newtext = regex_replace(rules[j].prog, rules[j].repstr, - input.lines[k].mbtext, input.lines[k].num_chars, rules[j].mode == 'g'); - #ifdef REGEXP_DEBUG - fprintf (stderr, "\"%s\"\n", newtext ? u32_strconv_to_output(newtext) : "NULL"); - #endif - if (newtext == NULL) { - return 1; - } - - BFREE(input.lines[k].mbtext_org); /* original address allocated for mbtext */ - input.lines[k].mbtext = newtext; - input.lines[k].mbtext_org = newtext; - - analyze_line_ascii(input.lines + k); - - #ifdef REGEXP_DEBUG - fprintf (stderr, "input.lines[%d] == {%d, \"%s\"}\n", (int) k, - (int) input.lines[k].num_chars, u32_strconv_to_output(input.lines[k].mbtext)); - #endif - } - opt.design->current_rule = NULL; - } - - /* - * If text indentation was part of the lines processed, indentation - * may now be different -> recalculate input.indent. - */ - if (opt.design->indentmode == 't') { - int rc; - rc = get_indent(input.lines, input.anz_lines); - if (rc >= 0) { - input.indent = (size_t) rc; - } else { - return 4; - } - } - - return 0; -} - - - -static int has_linebreak(const uint32_t *s, const int len) -/* - * Determine if the given line of raw text is ended by a line break. - * - * s: the string to check - * len: length of s in characters - * - * RETURNS: != 0 line break found - * == 0 line break not found - * -* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - */ -{ - int result = 0; - if (s != NULL && len > 0) { - ucs4_t the_last = s[len - 1]; - result = u32_cmp(&char_cr, &the_last, 1) == 0 || u32_cmp(&char_newline, &the_last, 1) == 0; - #if defined(DEBUG) - fprintf(stderr, "has_linebreak: (%#010x) %d\n", (int) the_last, result); - #endif - } - return result; -} - - - -static int read_all_input(const int use_stdin) -/* - * Read entire input (possibly from stdin) and store it in 'input' array. - * - * Tabs are expanded. - * Might allocate slightly more memory than it needs. Trade-off for speed. - * - * use_stdin: flag indicating whether to read from stdin (use_stdin != 0) - * or use the data currently present in input (use_stdin == 0). - * - * RETURNS: != 0 on error (out of memory) - * == 0 on success - * -* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - */ -{ - char buf[LINE_MAX_BYTES + 3]; /* input buffer incl. newline + zero terminator */ - size_t len_chars; - size_t input_size = 0; /* number of elements allocated */ - uint32_t *mbtemp = NULL; /* temp string for preparing the multi-byte input */ - size_t i; - int rc; - - input.indent = LINE_MAX_BYTES; - input.maxline = 0; - - if (use_stdin) { - input.anz_lines = 0; - - /* - * Start reading - */ - while (fgets(buf, LINE_MAX_BYTES + 2, opt.infile)) { - if (input.anz_lines % 100 == 0) { - input_size += 100; - line_t *tmp = (line_t *) realloc(input.lines, input_size * sizeof(line_t)); - if (tmp == NULL) { - perror(PROJECT); - BFREE (input.lines); - return 1; - } - input.lines = tmp; - } - - mbtemp = u32_strconv_from_input(buf); - len_chars = u32_strlen(mbtemp); - input.final_newline = has_linebreak(mbtemp, len_chars); - input.lines[input.anz_lines].posmap = NULL; - input.lines[input.anz_lines].tabpos = NULL; - input.lines[input.anz_lines].tabpos_len = 0; - - if (opt.r) { - if (is_char_at(mbtemp, len_chars - 1, char_newline)) { - set_char_at(mbtemp, len_chars - 1, char_nul); - --len_chars; - } - if (is_char_at(mbtemp, len_chars - 1, char_cr)) { - set_char_at(mbtemp, len_chars - 1, char_nul); - --len_chars; - } - } - else { - btrim32(mbtemp, &len_chars); - } - - /* - * Expand tabs - */ - if (len_chars > 0) { - uint32_t *temp = NULL; - len_chars = expand_tabs_into(mbtemp, opt.tabstop, &temp, - &(input.lines[input.anz_lines].tabpos), - &(input.lines[input.anz_lines].tabpos_len)); - if (len_chars == 0) { - perror(PROJECT); - BFREE (input.lines); - return 1; - } - input.lines[input.anz_lines].mbtext = temp; - BFREE(mbtemp); - temp = NULL; - } - else { - input.lines[input.anz_lines].mbtext = mbtemp; - } - input.lines[input.anz_lines].mbtext_org = input.lines[input.anz_lines].mbtext; - input.lines[input.anz_lines].num_chars = len_chars; - - /* - * Build ASCII equivalent of the multi-byte string, update line stats - */ - input.lines[input.anz_lines].text = NULL; /* we haven't used it yet! */ - analyze_line_ascii(input.lines + input.anz_lines); - - ++input.anz_lines; - } - - if (ferror(stdin)) { - perror(PROJECT); - BFREE (input.lines); - return 1; - } - } - - else { - /* recalculate input statistics for redrawing the mended box */ - for (i = 0; i < input.anz_lines; ++i) { - analyze_line_ascii(input.lines + i); - } - } - - /* - * Exit if there was no input at all - */ - if (input.lines == NULL || input.lines[0].text == NULL) { - return 0; - } - - /* - * Compute indentation - */ - rc = get_indent(input.lines, input.anz_lines); - if (rc >= 0) { - input.indent = (size_t) rc; - } else { - return 1; - } - - /* - * Remove indentation, unless we want to preserve it (when removing - * a box or if the user wants to retain it inside the box) - */ - if (opt.design->indentmode != 't' && opt.r == 0) { - for (i = 0; i < input.anz_lines; ++i) { - #ifdef DEBUG - fprintf(stderr, "%2d: mbtext = \"%s\" (%d chars)\n", (int) i, - u32_strconv_to_output(input.lines[i].mbtext), (int) input.lines[i].num_chars); - #endif - if (input.lines[i].num_chars >= input.indent) { - memmove(input.lines[i].text, input.lines[i].text + input.indent, - input.lines[i].len - input.indent + 1); - input.lines[i].len -= input.indent; - - input.lines[i].mbtext = advance32(input.lines[i].mbtext, input.indent); - input.lines[i].num_chars -= input.indent; - } - #ifdef DEBUG - fprintf(stderr, "%2d: mbtext = \"%s\" (%d chars)\n", (int) i, - u32_strconv_to_output(input.lines[i].mbtext), (int) input.lines[i].num_chars); - #endif - } - input.maxline -= input.indent; - } - - /* - * Apply regular expression substitutions - */ - if (opt.r == 0) { - if (apply_substitutions(0) != 0) { - return 1; - } - } - -#ifdef DEBUG - fprintf (stderr, "Encoding: %s\n", encoding); - print_input_lines(NULL); -#endif - - return 0; -} - - - /* _\|/_ (o o) +----oOO-{_}-OOo------------------------------------------------------------+ @@ -720,10 +359,20 @@ int main(int argc, char *argv[]) #ifdef DEBUG fprintf (stderr, "Reading all input ...\n"); #endif - rc = read_all_input(opt.mend); - if (rc) { + input_t *raw_input = NULL; + if (opt.mend != 0) { + raw_input = read_all_input(); + if (raw_input == NULL) { + exit(EXIT_FAILURE); + } + } + if (analyze_input(raw_input ? raw_input : &input)) { exit(EXIT_FAILURE); } + if (raw_input) { + memcpy(&input, raw_input, sizeof(input_t)); + BFREE(raw_input); + } if (input.anz_lines == 0) { exit(EXIT_SUCCESS); } @@ -798,7 +447,7 @@ int main(int argc, char *argv[]) if (rc) { exit(EXIT_FAILURE); } - rc = apply_substitutions(1); + rc = apply_substitutions(&input, 1); if (rc) { exit(EXIT_FAILURE); } diff --git a/src/boxes.in.h b/src/boxes.in.h index d3a5716..fc29b3e 100644 --- a/src/boxes.in.h +++ b/src/boxes.in.h @@ -158,14 +158,12 @@ typedef struct { typedef struct { line_t *lines; - size_t anz_lines; /* number of entries in input */ + size_t anz_lines; /* number of entries in input TODO rename to num_lines */ size_t maxline; /* length of longest input line */ size_t indent; /* number of leading spaces found */ int final_newline; /* true if the last line of input ends with newline */ } input_t; -#define INPUT_INITIALIZER {NULL, 0, 0, LINE_MAX_BYTES, 0} - extern input_t input; diff --git a/src/input.c b/src/input.c new file mode 100644 index 0000000..c80fe50 --- /dev/null +++ b/src/input.c @@ -0,0 +1,350 @@ +/* + * boxes - Command line filter to draw/remove ASCII boxes around text + * Copyright (c) 1999-2021 Thomas Jensen and the boxes contributors + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License, version 2, as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + */ + +/* + * Read and analyze input text. + */ + +#include "config.h" +#include +#include +#include +#include + +#include "boxes.h" +#include "regulex.h" +#include "tools.h" +#include "unicode.h" +#include "input.h" + + + +/** + * Determine if the given line of raw text is ended by a line break. + * @param s the string to check + * @param len length of s in characters + * @returns != 0 if line break found; + * == 0 if line break not found + */ +static int has_linebreak(const uint32_t *s, const int len) +{ + int result = 0; + if (s != NULL && len > 0) { + ucs4_t the_last = s[len - 1]; + result = u32_cmp(&char_cr, &the_last, 1) == 0 || u32_cmp(&char_newline, &the_last, 1) == 0; + #if defined(DEBUG) + fprintf(stderr, "has_linebreak: (%#010x) %d\n", (int) the_last, result); + #endif + } + return result; +} + + + +/** + * Determine indentation of given lines in spaces. Lines are assumed to be free of trailing whitespace. + * @param lines the lines to examine + * @param lines_size number of lines to examine + * @returns >= 0: indentation in spaces; < 0: error + */ +static int get_indent(const line_t *lines, const size_t lines_size) +{ + int res = LINE_MAX_BYTES; /* result */ + int nonblank = 0; /* true if one non-blank line found */ + + if (lines == NULL) { + fprintf(stderr, "%s: internal error\n", PROJECT); + return -1; + } + if (lines_size == 0) { + return 0; + } + + for (size_t j = 0; j < lines_size; ++j) { + if (lines[j].len > 0) { + nonblank = 1; + size_t ispc = strspn(lines[j].text, " "); + if ((int) ispc < res) { + res = ispc; + } + } + } + + if (nonblank) { + return res; /* success */ + } else { + return 0; /* success, but only blank lines */ + } +} + + + +int apply_substitutions(input_t *result, const int mode) +{ + size_t anz_rules; + reprule_t *rules; + size_t j, k; + + if (opt.design == NULL) { + return 1; + } + + if (mode == 0) { + anz_rules = opt.design->anz_reprules; + rules = opt.design->reprules; + } + else if (mode == 1) { + anz_rules = opt.design->anz_revrules; + rules = opt.design->revrules; + } + else { + fprintf(stderr, "%s: internal error\n", PROJECT); + return 2; + } + + /* + * Compile regular expressions + */ + #ifdef REGEXP_DEBUG + fprintf(stderr, "Compiling %d %s rule patterns\n", (int) anz_rules, mode ? "reversion" : "replacement"); + #endif + errno = 0; + opt.design->current_rule = rules; + for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) { + rules[j].prog = compile_pattern(rules[j].search); + if (rules[j].prog == NULL) { + return 5; + } + } + opt.design->current_rule = NULL; + if (errno) { + return 3; + } + + /* + * Apply regular expression substitutions to input lines + */ + for (k = 0; k < result->anz_lines; ++k) { + opt.design->current_rule = rules; + for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) { + #ifdef REGEXP_DEBUG + fprintf (stderr, "regex_replace(0x%p, \"%s\", \"%s\", %d, \'%c\') == ", + rules[j].prog, rules[j].repstr, u32_strconv_to_output(result->lines[k].mbtext), + (int) result->lines[k].num_chars, rules[j].mode); + #endif + uint32_t *newtext = regex_replace(rules[j].prog, rules[j].repstr, + result->lines[k].mbtext, result->lines[k].num_chars, rules[j].mode == 'g'); + #ifdef REGEXP_DEBUG + fprintf (stderr, "\"%s\"\n", newtext ? u32_strconv_to_output(newtext) : "NULL"); + #endif + if (newtext == NULL) { + return 1; + } + + BFREE(result->lines[k].mbtext_org); /* original address allocated for mbtext */ + result->lines[k].mbtext = newtext; + result->lines[k].mbtext_org = newtext; + + analyze_line_ascii(result, result->lines + k); + + #ifdef REGEXP_DEBUG + fprintf (stderr, "result->lines[%d] == {%d, \"%s\"}\n", (int) k, + (int) result->lines[k].num_chars, u32_strconv_to_output(result->lines[k].mbtext)); + #endif + } + opt.design->current_rule = NULL; + } + + /* + * If text indentation was part of the lines processed, indentation + * may now be different -> recalculate result->indent. + */ + if (opt.design->indentmode == 't') { + int rc; + rc = get_indent(result->lines, result->anz_lines); + if (rc >= 0) { + result->indent = (size_t) rc; + } else { + return 4; + } + } + + return 0; +} + + + +static void trim_trailing_ws_carefully(uint32_t *mbtemp, size_t *len_chars) +{ + if (opt.r) { + /* remove only trailing line breaks, but keep the space */ + if (is_char_at(mbtemp, *len_chars - 1, char_newline)) { + set_char_at(mbtemp, *len_chars - 1, char_nul); + --(*len_chars); + } + if (is_char_at(mbtemp, *len_chars - 1, char_cr)) { + set_char_at(mbtemp, *len_chars - 1, char_nul); + --(*len_chars); + } + } + else { + /* remove all trailing whitespace, including unicode whitespace */ + btrim32(mbtemp, len_chars); + } +} + + + +input_t *read_all_input() +{ + char buf[LINE_MAX_BYTES + 3]; /* static input buffer incl. newline + zero terminator */ + size_t input_size = 0; /* number of elements allocated */ + + input_t *result = (input_t *) calloc(1, sizeof(input_t)); + result->indent = LINE_MAX_BYTES; + + while (fgets(buf, LINE_MAX_BYTES + 2, opt.infile)) + { + if (result->anz_lines % 100 == 0) { + input_size += 100; + line_t *tmp = (line_t *) realloc(result->lines, input_size * sizeof(line_t)); + if (tmp == NULL) { + perror(PROJECT); + BFREE (result->lines); + return NULL; + } + result->lines = tmp; + } + + memset(result->lines + result->anz_lines, 0, sizeof(line_t)); + + uint32_t *mbtemp = u32_strconv_from_input(buf); + size_t len_chars = u32_strlen(mbtemp); + result->final_newline = has_linebreak(mbtemp, len_chars); + trim_trailing_ws_carefully(mbtemp, &len_chars); + + /* + * Expand tabs + */ + if (len_chars > 0) { + uint32_t *temp = NULL; + len_chars = expand_tabs_into(mbtemp, opt.tabstop, &temp, + &(result->lines[result->anz_lines].tabpos), + &(result->lines[result->anz_lines].tabpos_len)); + if (len_chars == 0) { + perror(PROJECT); + BFREE (result->lines); + return NULL; + } + result->lines[result->anz_lines].mbtext = temp; + BFREE(mbtemp); + temp = NULL; + } + else { + result->lines[result->anz_lines].mbtext = mbtemp; + } + result->lines[result->anz_lines].mbtext_org = result->lines[result->anz_lines].mbtext; + result->lines[result->anz_lines].num_chars = len_chars; + + ++result->anz_lines; + } + + if (ferror(stdin)) { + perror(PROJECT); + BFREE (result->lines); + return NULL; + } + return result; +} + + + +int analyze_input(input_t *result) +{ + result->indent = LINE_MAX_BYTES; + result->maxline = 0; + + /* + * Build ASCII equivalent of the multi-byte string, update line stats + */ + for (size_t i = 0; i < result->anz_lines; ++i) { + analyze_line_ascii(result, result->lines + i); + } + + /* + * Exit if there was no input at all + */ + if (result->lines == NULL || result->lines[0].text == NULL) { + return 0; + } + + /* + * Compute indentation + */ + int rc = get_indent(result->lines, result->anz_lines); + if (rc >= 0) { + result->indent = (size_t) rc; + } else { + return 1; + } + + /* + * Remove indentation, unless we want to preserve it (when removing + * a box or if the user wants to retain it inside the box) + */ + if (opt.design->indentmode != 't' && opt.r == 0) { + for (size_t i = 0; i < result->anz_lines; ++i) { + #ifdef DEBUG + fprintf(stderr, "%2d: mbtext = \"%s\" (%d chars)\n", (int) i, + u32_strconv_to_output(result->lines[i].mbtext), (int) result->lines[i].num_chars); + #endif + if (result->lines[i].num_chars >= result->indent) { + memmove(result->lines[i].text, result->lines[i].text + result->indent, + result->lines[i].len - result->indent + 1); + result->lines[i].len -= result->indent; + + result->lines[i].mbtext = advance32(result->lines[i].mbtext, result->indent); + result->lines[i].num_chars -= result->indent; + } + #ifdef DEBUG + fprintf(stderr, "%2d: mbtext = \"%s\" (%d chars)\n", (int) i, + u32_strconv_to_output(result->lines[i].mbtext), (int) result->lines[i].num_chars); + #endif + } + result->maxline -= result->indent; + } + + /* + * Apply regular expression substitutions + */ + if (opt.r == 0) { + if (apply_substitutions(result, 0) != 0) { + return 1; + } + } + + #ifdef DEBUG + fprintf (stderr, "Effective encoding: %s\n", encoding); + print_input_lines(NULL); + #endif + return 0; +} + +/*EOF*/ /* vim: set sw=4: */ diff --git a/src/input.h b/src/input.h new file mode 100644 index 0000000..fb188e5 --- /dev/null +++ b/src/input.h @@ -0,0 +1,59 @@ +/* + * boxes - Command line filter to draw/remove ASCII boxes around text + * Copyright (c) 1999-2021 Thomas Jensen and the boxes contributors + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License, version 2, as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + */ + +/* + * Read and analyze input text. + */ + +#ifndef INPUT_H +#define INPUT_H + +#include "boxes.h" + + +/** + * Read the entire input from `opt.infile` until EOF is encountered. Tabs are expanded. + * @return a pointer to the read input data, for which new memory was allocated, or `NULL` on error + */ +input_t *read_all_input(); + + +/** + * Analyze and prepare the input text for further processing. Compute statistics, remove indentation, and apply + * regular expressions if specified in the design. + * @param input_data the input data to analyze and modify + * @returns == 0 on success; anything else on error + */ +int analyze_input(input_t *input_data); + + +/** + * Apply regular expression substitutions to input text. Attn: This modifies the global input array! + * @param input_data pointer to the input data where substitutions should be applied + * @param mode when 0: use replacement rules (box is being *drawn*); + * when 1: use reversion rules (box is being *removed*) + * @returns == 0 on success; anything else on error + */ +int apply_substitutions(input_t *input_data, const int mode); + + +#endif + +/*EOF*/ /* vim: set cindent sw=4: */ diff --git a/src/remove.c b/src/remove.c index 1d1b33c..f1b660a 100644 --- a/src/remove.c +++ b/src/remove.c @@ -811,7 +811,7 @@ static void add_spaces_to_line(line_t* line, const size_t n) u32_set(line->mbtext + line->num_chars, char_space, n); set_char_at(line->mbtext, line->num_chars + n, char_nul); line->num_chars += n; - analyze_line_ascii(line); + analyze_line_ascii(&input, line); } diff --git a/src/tools.c b/src/tools.c index 0c4f0b9..70cf805 100644 --- a/src/tools.c +++ b/src/tools.c @@ -35,8 +35,8 @@ #include #include -#include "shape.h" #include "boxes.h" +#include "shape.h" #include "unicode.h" #include "tools.h" @@ -616,7 +616,7 @@ static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **a -void analyze_line_ascii(line_t *line) +void analyze_line_ascii(input_t *input_ptr, line_t *line) { size_t num_esc = 0; char *ascii; @@ -631,8 +631,8 @@ void analyze_line_ascii(line_t *line) BFREE(line->posmap); line->posmap = map; - if (line->len > input.maxline) { - input.maxline = line->len; + if (line->len > input_ptr->maxline) { + input_ptr->maxline = line->len; } } diff --git a/src/tools.h b/src/tools.h index 2bbb5f6..26fc014 100644 --- a/src/tools.h +++ b/src/tools.h @@ -78,7 +78,7 @@ char *nspaces(const size_t n); void print_input_lines(const char *heading); -void analyze_line_ascii(line_t *line); +void analyze_line_ascii(input_t *input_ptr, line_t *line); int array_contains(char **array, const size_t array_len, const char *s);