Extract reading of input lines into its own, new 'input' module #78

This commit is contained in:
Thomas Jensen
2021-04-15 20:50:02 +02:00
parent fb010bc7d6
commit a7227c8aa8
8 changed files with 441 additions and 382 deletions

View File

@ -28,9 +28,11 @@ GEN_HDR = parser.h boxes.h lex.yy.h
GEN_SRC = parser.c lex.yy.c
GEN_FILES = $(GEN_SRC) $(GEN_HDR)
ORIG_HDRCL = boxes.in.h config.h
ORIG_HDR = $(ORIG_HDRCL) cmdline.h discovery.h generate.h list.h parsecode.h parsing.h regulex.h remove.h shape.h tools.h unicode.h
ORIG_HDR = $(ORIG_HDRCL) cmdline.h discovery.h generate.h input.h list.h parsecode.h parsing.h regulex.h remove.h \
shape.h tools.h unicode.h
ORIG_GEN = lexer.l parser.y
ORIG_NORM = boxes.c cmdline.c discovery.c generate.c list.c parsecode.c parsing.c regulex.c remove.c shape.c tools.c unicode.c
ORIG_NORM = boxes.c cmdline.c discovery.c generate.c input.c list.c parsecode.c parsing.c regulex.c remove.c shape.c \
tools.c unicode.c
ORIG_SRC = $(ORIG_GEN) $(ORIG_NORM)
ORIG_FILES = $(ORIG_SRC) $(ORIG_HDR)
@ -86,11 +88,12 @@ parser.c parser.h: parser.y lex.yy.h | check_dir
lex.yy.c lex.yy.h: lexer.l | check_dir
$(LEX) --header-file=lex.yy.h $<
boxes.o: boxes.c boxes.h cmdline.h discovery.h regulex.h shape.h tools.h unicode.h generate.h list.h remove.h config.h | check_dir
boxes.o: boxes.c boxes.h cmdline.h discovery.h generate.h input.h list.h remove.h tools.h unicode.h config.h | check_dir
cmdline.o: cmdline.c cmdline.h boxes.h tools.h config.h | check_dir
discovery.o: discovery.c discovery.h boxes.h tools.h config.h | check_dir
generate.o: generate.c generate.h boxes.h shape.h tools.h unicode.h config.h | check_dir
getopt.o: misc/getopt.c misc/getopt.h | check_dir
input.o: input.c boxes.h input.h regulex.h tools.h unicode.h config.h | check_dir
lex.yy.o: lex.yy.c parser.h boxes.h parsing.h tools.h shape.h config.h | check_dir
list.o: list.c list.h boxes.h parsing.h tools.h config.h | check_dir
parsecode.o: parsecode.c parser.h boxes.h tools.h lex.yy.h regulex.h unicode.h config.h | check_dir

View File

@ -19,29 +19,20 @@
*/
#include "config.h"
#include <errno.h>
#include <locale.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <unictype.h>
#include <unistdio.h>
#include <unistr.h>
#include <unitypes.h>
#include <uniwidth.h>
#include <uniconv.h>
#include "boxes.h"
#include "cmdline.h"
#include "list.h"
#include "shape.h"
#include "tools.h"
#include "discovery.h"
#include "generate.h"
#include "input.h"
#include "list.h"
#include "parsing.h"
#include "regulex.h"
#include "remove.h"
#include "tools.h"
#include "unicode.h"
@ -53,11 +44,11 @@
+--------------------------------------------------------------------------*/
design_t *designs = NULL; /* available box designs */
int anz_designs = 0; /* no of designs after parsing */
int anz_designs = 0; /* number of designs after parsing TODO rename to num_designs */
opt_t opt; /* command line options */
input_t input = INPUT_INITIALIZER; /* input lines */
input_t input; /* input lines */
/* _\|/_
@ -232,358 +223,6 @@ static int query_by_tag()
static int get_indent(const line_t *lines, const size_t lines_size)
/*
* Determine indentation of given lines in spaces.
*
* lines the lines to examine
* lines_size number of lines to examine
*
* Lines are assumed to be free of trailing whitespace.
*
* RETURNS: >= 0 indentation in spaces
* < 0 error
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
{
int res = LINE_MAX_BYTES; /* result */
int nonblank = 0; /* true if one non-blank line found */
if (lines == NULL) {
fprintf(stderr, "%s: internal error\n", PROJECT);
return -1;
}
if (lines_size == 0) {
return 0;
}
for (size_t j = 0; j < lines_size; ++j) {
if (lines[j].len > 0) {
nonblank = 1;
size_t ispc = strspn(lines[j].text, " ");
if ((int) ispc < res) {
res = ispc;
}
}
}
if (nonblank) {
return res; /* success */
} else {
return 0; /* success, but only blank lines */
}
}
static int apply_substitutions(const int mode)
/*
* Apply regular expression substitutions to input text.
*
* mode == 0 use replacement rules (box is being *drawn*)
* == 1 use reversion rules (box is being *removed*)
*
* Attn: This modifies the actual input array!
*
* RETURNS: == 0 success
* != 0 error
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
{
size_t anz_rules;
reprule_t *rules;
size_t j, k;
if (opt.design == NULL) {
return 1;
}
if (mode == 0) {
anz_rules = opt.design->anz_reprules;
rules = opt.design->reprules;
}
else if (mode == 1) {
anz_rules = opt.design->anz_revrules;
rules = opt.design->revrules;
}
else {
fprintf(stderr, "%s: internal error\n", PROJECT);
return 2;
}
/*
* Compile regular expressions
*/
#ifdef REGEXP_DEBUG
fprintf(stderr, "Compiling %d %s rule patterns\n", (int) anz_rules, mode ? "reversion" : "replacement");
#endif
errno = 0;
opt.design->current_rule = rules;
for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) {
rules[j].prog = compile_pattern(rules[j].search);
if (rules[j].prog == NULL) {
return 5;
}
}
opt.design->current_rule = NULL;
if (errno) {
return 3;
}
/*
* Apply regular expression substitutions to input lines
*/
for (k = 0; k < input.anz_lines; ++k) {
opt.design->current_rule = rules;
for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) {
#ifdef REGEXP_DEBUG
fprintf (stderr, "regex_replace(0x%p, \"%s\", \"%s\", %d, \'%c\') == ",
rules[j].prog, rules[j].repstr, u32_strconv_to_output(input.lines[k].mbtext),
(int) input.lines[k].num_chars, rules[j].mode);
#endif
uint32_t *newtext = regex_replace(rules[j].prog, rules[j].repstr,
input.lines[k].mbtext, input.lines[k].num_chars, rules[j].mode == 'g');
#ifdef REGEXP_DEBUG
fprintf (stderr, "\"%s\"\n", newtext ? u32_strconv_to_output(newtext) : "NULL");
#endif
if (newtext == NULL) {
return 1;
}
BFREE(input.lines[k].mbtext_org); /* original address allocated for mbtext */
input.lines[k].mbtext = newtext;
input.lines[k].mbtext_org = newtext;
analyze_line_ascii(input.lines + k);
#ifdef REGEXP_DEBUG
fprintf (stderr, "input.lines[%d] == {%d, \"%s\"}\n", (int) k,
(int) input.lines[k].num_chars, u32_strconv_to_output(input.lines[k].mbtext));
#endif
}
opt.design->current_rule = NULL;
}
/*
* If text indentation was part of the lines processed, indentation
* may now be different -> recalculate input.indent.
*/
if (opt.design->indentmode == 't') {
int rc;
rc = get_indent(input.lines, input.anz_lines);
if (rc >= 0) {
input.indent = (size_t) rc;
} else {
return 4;
}
}
return 0;
}
static int has_linebreak(const uint32_t *s, const int len)
/*
* Determine if the given line of raw text is ended by a line break.
*
* s: the string to check
* len: length of s in characters
*
* RETURNS: != 0 line break found
* == 0 line break not found
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
{
int result = 0;
if (s != NULL && len > 0) {
ucs4_t the_last = s[len - 1];
result = u32_cmp(&char_cr, &the_last, 1) == 0 || u32_cmp(&char_newline, &the_last, 1) == 0;
#if defined(DEBUG)
fprintf(stderr, "has_linebreak: (%#010x) %d\n", (int) the_last, result);
#endif
}
return result;
}
static int read_all_input(const int use_stdin)
/*
* Read entire input (possibly from stdin) and store it in 'input' array.
*
* Tabs are expanded.
* Might allocate slightly more memory than it needs. Trade-off for speed.
*
* use_stdin: flag indicating whether to read from stdin (use_stdin != 0)
* or use the data currently present in input (use_stdin == 0).
*
* RETURNS: != 0 on error (out of memory)
* == 0 on success
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
{
char buf[LINE_MAX_BYTES + 3]; /* input buffer incl. newline + zero terminator */
size_t len_chars;
size_t input_size = 0; /* number of elements allocated */
uint32_t *mbtemp = NULL; /* temp string for preparing the multi-byte input */
size_t i;
int rc;
input.indent = LINE_MAX_BYTES;
input.maxline = 0;
if (use_stdin) {
input.anz_lines = 0;
/*
* Start reading
*/
while (fgets(buf, LINE_MAX_BYTES + 2, opt.infile)) {
if (input.anz_lines % 100 == 0) {
input_size += 100;
line_t *tmp = (line_t *) realloc(input.lines, input_size * sizeof(line_t));
if (tmp == NULL) {
perror(PROJECT);
BFREE (input.lines);
return 1;
}
input.lines = tmp;
}
mbtemp = u32_strconv_from_input(buf);
len_chars = u32_strlen(mbtemp);
input.final_newline = has_linebreak(mbtemp, len_chars);
input.lines[input.anz_lines].posmap = NULL;
input.lines[input.anz_lines].tabpos = NULL;
input.lines[input.anz_lines].tabpos_len = 0;
if (opt.r) {
if (is_char_at(mbtemp, len_chars - 1, char_newline)) {
set_char_at(mbtemp, len_chars - 1, char_nul);
--len_chars;
}
if (is_char_at(mbtemp, len_chars - 1, char_cr)) {
set_char_at(mbtemp, len_chars - 1, char_nul);
--len_chars;
}
}
else {
btrim32(mbtemp, &len_chars);
}
/*
* Expand tabs
*/
if (len_chars > 0) {
uint32_t *temp = NULL;
len_chars = expand_tabs_into(mbtemp, opt.tabstop, &temp,
&(input.lines[input.anz_lines].tabpos),
&(input.lines[input.anz_lines].tabpos_len));
if (len_chars == 0) {
perror(PROJECT);
BFREE (input.lines);
return 1;
}
input.lines[input.anz_lines].mbtext = temp;
BFREE(mbtemp);
temp = NULL;
}
else {
input.lines[input.anz_lines].mbtext = mbtemp;
}
input.lines[input.anz_lines].mbtext_org = input.lines[input.anz_lines].mbtext;
input.lines[input.anz_lines].num_chars = len_chars;
/*
* Build ASCII equivalent of the multi-byte string, update line stats
*/
input.lines[input.anz_lines].text = NULL; /* we haven't used it yet! */
analyze_line_ascii(input.lines + input.anz_lines);
++input.anz_lines;
}
if (ferror(stdin)) {
perror(PROJECT);
BFREE (input.lines);
return 1;
}
}
else {
/* recalculate input statistics for redrawing the mended box */
for (i = 0; i < input.anz_lines; ++i) {
analyze_line_ascii(input.lines + i);
}
}
/*
* Exit if there was no input at all
*/
if (input.lines == NULL || input.lines[0].text == NULL) {
return 0;
}
/*
* Compute indentation
*/
rc = get_indent(input.lines, input.anz_lines);
if (rc >= 0) {
input.indent = (size_t) rc;
} else {
return 1;
}
/*
* Remove indentation, unless we want to preserve it (when removing
* a box or if the user wants to retain it inside the box)
*/
if (opt.design->indentmode != 't' && opt.r == 0) {
for (i = 0; i < input.anz_lines; ++i) {
#ifdef DEBUG
fprintf(stderr, "%2d: mbtext = \"%s\" (%d chars)\n", (int) i,
u32_strconv_to_output(input.lines[i].mbtext), (int) input.lines[i].num_chars);
#endif
if (input.lines[i].num_chars >= input.indent) {
memmove(input.lines[i].text, input.lines[i].text + input.indent,
input.lines[i].len - input.indent + 1);
input.lines[i].len -= input.indent;
input.lines[i].mbtext = advance32(input.lines[i].mbtext, input.indent);
input.lines[i].num_chars -= input.indent;
}
#ifdef DEBUG
fprintf(stderr, "%2d: mbtext = \"%s\" (%d chars)\n", (int) i,
u32_strconv_to_output(input.lines[i].mbtext), (int) input.lines[i].num_chars);
#endif
}
input.maxline -= input.indent;
}
/*
* Apply regular expression substitutions
*/
if (opt.r == 0) {
if (apply_substitutions(0) != 0) {
return 1;
}
}
#ifdef DEBUG
fprintf (stderr, "Encoding: %s\n", encoding);
print_input_lines(NULL);
#endif
return 0;
}
/* _\|/_
(o o)
+----oOO-{_}-OOo------------------------------------------------------------+
@ -720,10 +359,20 @@ int main(int argc, char *argv[])
#ifdef DEBUG
fprintf (stderr, "Reading all input ...\n");
#endif
rc = read_all_input(opt.mend);
if (rc) {
input_t *raw_input = NULL;
if (opt.mend != 0) {
raw_input = read_all_input();
if (raw_input == NULL) {
exit(EXIT_FAILURE);
}
}
if (analyze_input(raw_input ? raw_input : &input)) {
exit(EXIT_FAILURE);
}
if (raw_input) {
memcpy(&input, raw_input, sizeof(input_t));
BFREE(raw_input);
}
if (input.anz_lines == 0) {
exit(EXIT_SUCCESS);
}
@ -798,7 +447,7 @@ int main(int argc, char *argv[])
if (rc) {
exit(EXIT_FAILURE);
}
rc = apply_substitutions(1);
rc = apply_substitutions(&input, 1);
if (rc) {
exit(EXIT_FAILURE);
}

View File

@ -158,14 +158,12 @@ typedef struct {
typedef struct {
line_t *lines;
size_t anz_lines; /* number of entries in input */
size_t anz_lines; /* number of entries in input TODO rename to num_lines */
size_t maxline; /* length of longest input line */
size_t indent; /* number of leading spaces found */
int final_newline; /* true if the last line of input ends with newline */
} input_t;
#define INPUT_INITIALIZER {NULL, 0, 0, LINE_MAX_BYTES, 0}
extern input_t input;

350
src/input.c Normal file
View File

@ -0,0 +1,350 @@
/*
* boxes - Command line filter to draw/remove ASCII boxes around text
* Copyright (c) 1999-2021 Thomas Jensen and the boxes contributors
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License, version 2, as published
* by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
/*
* Read and analyze input text.
*/
#include "config.h"
#include <errno.h>
#include <string.h>
#include <unistr.h>
#include <unitypes.h>
#include "boxes.h"
#include "regulex.h"
#include "tools.h"
#include "unicode.h"
#include "input.h"
/**
* Determine if the given line of raw text is ended by a line break.
* @param s the string to check
* @param len length of s in characters
* @returns != 0 if line break found;
* == 0 if line break not found
*/
static int has_linebreak(const uint32_t *s, const int len)
{
int result = 0;
if (s != NULL && len > 0) {
ucs4_t the_last = s[len - 1];
result = u32_cmp(&char_cr, &the_last, 1) == 0 || u32_cmp(&char_newline, &the_last, 1) == 0;
#if defined(DEBUG)
fprintf(stderr, "has_linebreak: (%#010x) %d\n", (int) the_last, result);
#endif
}
return result;
}
/**
* Determine indentation of given lines in spaces. Lines are assumed to be free of trailing whitespace.
* @param lines the lines to examine
* @param lines_size number of lines to examine
* @returns >= 0: indentation in spaces; < 0: error
*/
static int get_indent(const line_t *lines, const size_t lines_size)
{
int res = LINE_MAX_BYTES; /* result */
int nonblank = 0; /* true if one non-blank line found */
if (lines == NULL) {
fprintf(stderr, "%s: internal error\n", PROJECT);
return -1;
}
if (lines_size == 0) {
return 0;
}
for (size_t j = 0; j < lines_size; ++j) {
if (lines[j].len > 0) {
nonblank = 1;
size_t ispc = strspn(lines[j].text, " ");
if ((int) ispc < res) {
res = ispc;
}
}
}
if (nonblank) {
return res; /* success */
} else {
return 0; /* success, but only blank lines */
}
}
int apply_substitutions(input_t *result, const int mode)
{
size_t anz_rules;
reprule_t *rules;
size_t j, k;
if (opt.design == NULL) {
return 1;
}
if (mode == 0) {
anz_rules = opt.design->anz_reprules;
rules = opt.design->reprules;
}
else if (mode == 1) {
anz_rules = opt.design->anz_revrules;
rules = opt.design->revrules;
}
else {
fprintf(stderr, "%s: internal error\n", PROJECT);
return 2;
}
/*
* Compile regular expressions
*/
#ifdef REGEXP_DEBUG
fprintf(stderr, "Compiling %d %s rule patterns\n", (int) anz_rules, mode ? "reversion" : "replacement");
#endif
errno = 0;
opt.design->current_rule = rules;
for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) {
rules[j].prog = compile_pattern(rules[j].search);
if (rules[j].prog == NULL) {
return 5;
}
}
opt.design->current_rule = NULL;
if (errno) {
return 3;
}
/*
* Apply regular expression substitutions to input lines
*/
for (k = 0; k < result->anz_lines; ++k) {
opt.design->current_rule = rules;
for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) {
#ifdef REGEXP_DEBUG
fprintf (stderr, "regex_replace(0x%p, \"%s\", \"%s\", %d, \'%c\') == ",
rules[j].prog, rules[j].repstr, u32_strconv_to_output(result->lines[k].mbtext),
(int) result->lines[k].num_chars, rules[j].mode);
#endif
uint32_t *newtext = regex_replace(rules[j].prog, rules[j].repstr,
result->lines[k].mbtext, result->lines[k].num_chars, rules[j].mode == 'g');
#ifdef REGEXP_DEBUG
fprintf (stderr, "\"%s\"\n", newtext ? u32_strconv_to_output(newtext) : "NULL");
#endif
if (newtext == NULL) {
return 1;
}
BFREE(result->lines[k].mbtext_org); /* original address allocated for mbtext */
result->lines[k].mbtext = newtext;
result->lines[k].mbtext_org = newtext;
analyze_line_ascii(result, result->lines + k);
#ifdef REGEXP_DEBUG
fprintf (stderr, "result->lines[%d] == {%d, \"%s\"}\n", (int) k,
(int) result->lines[k].num_chars, u32_strconv_to_output(result->lines[k].mbtext));
#endif
}
opt.design->current_rule = NULL;
}
/*
* If text indentation was part of the lines processed, indentation
* may now be different -> recalculate result->indent.
*/
if (opt.design->indentmode == 't') {
int rc;
rc = get_indent(result->lines, result->anz_lines);
if (rc >= 0) {
result->indent = (size_t) rc;
} else {
return 4;
}
}
return 0;
}
static void trim_trailing_ws_carefully(uint32_t *mbtemp, size_t *len_chars)
{
if (opt.r) {
/* remove only trailing line breaks, but keep the space */
if (is_char_at(mbtemp, *len_chars - 1, char_newline)) {
set_char_at(mbtemp, *len_chars - 1, char_nul);
--(*len_chars);
}
if (is_char_at(mbtemp, *len_chars - 1, char_cr)) {
set_char_at(mbtemp, *len_chars - 1, char_nul);
--(*len_chars);
}
}
else {
/* remove all trailing whitespace, including unicode whitespace */
btrim32(mbtemp, len_chars);
}
}
input_t *read_all_input()
{
char buf[LINE_MAX_BYTES + 3]; /* static input buffer incl. newline + zero terminator */
size_t input_size = 0; /* number of elements allocated */
input_t *result = (input_t *) calloc(1, sizeof(input_t));
result->indent = LINE_MAX_BYTES;
while (fgets(buf, LINE_MAX_BYTES + 2, opt.infile))
{
if (result->anz_lines % 100 == 0) {
input_size += 100;
line_t *tmp = (line_t *) realloc(result->lines, input_size * sizeof(line_t));
if (tmp == NULL) {
perror(PROJECT);
BFREE (result->lines);
return NULL;
}
result->lines = tmp;
}
memset(result->lines + result->anz_lines, 0, sizeof(line_t));
uint32_t *mbtemp = u32_strconv_from_input(buf);
size_t len_chars = u32_strlen(mbtemp);
result->final_newline = has_linebreak(mbtemp, len_chars);
trim_trailing_ws_carefully(mbtemp, &len_chars);
/*
* Expand tabs
*/
if (len_chars > 0) {
uint32_t *temp = NULL;
len_chars = expand_tabs_into(mbtemp, opt.tabstop, &temp,
&(result->lines[result->anz_lines].tabpos),
&(result->lines[result->anz_lines].tabpos_len));
if (len_chars == 0) {
perror(PROJECT);
BFREE (result->lines);
return NULL;
}
result->lines[result->anz_lines].mbtext = temp;
BFREE(mbtemp);
temp = NULL;
}
else {
result->lines[result->anz_lines].mbtext = mbtemp;
}
result->lines[result->anz_lines].mbtext_org = result->lines[result->anz_lines].mbtext;
result->lines[result->anz_lines].num_chars = len_chars;
++result->anz_lines;
}
if (ferror(stdin)) {
perror(PROJECT);
BFREE (result->lines);
return NULL;
}
return result;
}
int analyze_input(input_t *result)
{
result->indent = LINE_MAX_BYTES;
result->maxline = 0;
/*
* Build ASCII equivalent of the multi-byte string, update line stats
*/
for (size_t i = 0; i < result->anz_lines; ++i) {
analyze_line_ascii(result, result->lines + i);
}
/*
* Exit if there was no input at all
*/
if (result->lines == NULL || result->lines[0].text == NULL) {
return 0;
}
/*
* Compute indentation
*/
int rc = get_indent(result->lines, result->anz_lines);
if (rc >= 0) {
result->indent = (size_t) rc;
} else {
return 1;
}
/*
* Remove indentation, unless we want to preserve it (when removing
* a box or if the user wants to retain it inside the box)
*/
if (opt.design->indentmode != 't' && opt.r == 0) {
for (size_t i = 0; i < result->anz_lines; ++i) {
#ifdef DEBUG
fprintf(stderr, "%2d: mbtext = \"%s\" (%d chars)\n", (int) i,
u32_strconv_to_output(result->lines[i].mbtext), (int) result->lines[i].num_chars);
#endif
if (result->lines[i].num_chars >= result->indent) {
memmove(result->lines[i].text, result->lines[i].text + result->indent,
result->lines[i].len - result->indent + 1);
result->lines[i].len -= result->indent;
result->lines[i].mbtext = advance32(result->lines[i].mbtext, result->indent);
result->lines[i].num_chars -= result->indent;
}
#ifdef DEBUG
fprintf(stderr, "%2d: mbtext = \"%s\" (%d chars)\n", (int) i,
u32_strconv_to_output(result->lines[i].mbtext), (int) result->lines[i].num_chars);
#endif
}
result->maxline -= result->indent;
}
/*
* Apply regular expression substitutions
*/
if (opt.r == 0) {
if (apply_substitutions(result, 0) != 0) {
return 1;
}
}
#ifdef DEBUG
fprintf (stderr, "Effective encoding: %s\n", encoding);
print_input_lines(NULL);
#endif
return 0;
}
/*EOF*/ /* vim: set sw=4: */

59
src/input.h Normal file
View File

@ -0,0 +1,59 @@
/*
* boxes - Command line filter to draw/remove ASCII boxes around text
* Copyright (c) 1999-2021 Thomas Jensen and the boxes contributors
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License, version 2, as published
* by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
/*
* Read and analyze input text.
*/
#ifndef INPUT_H
#define INPUT_H
#include "boxes.h"
/**
* Read the entire input from `opt.infile` until EOF is encountered. Tabs are expanded.
* @return a pointer to the read input data, for which new memory was allocated, or `NULL` on error
*/
input_t *read_all_input();
/**
* Analyze and prepare the input text for further processing. Compute statistics, remove indentation, and apply
* regular expressions if specified in the design.
* @param input_data the input data to analyze and modify
* @returns == 0 on success; anything else on error
*/
int analyze_input(input_t *input_data);
/**
* Apply regular expression substitutions to input text. Attn: This modifies the global input array!
* @param input_data pointer to the input data where substitutions should be applied
* @param mode when 0: use replacement rules (box is being *drawn*);
* when 1: use reversion rules (box is being *removed*)
* @returns == 0 on success; anything else on error
*/
int apply_substitutions(input_t *input_data, const int mode);
#endif
/*EOF*/ /* vim: set cindent sw=4: */

View File

@ -811,7 +811,7 @@ static void add_spaces_to_line(line_t* line, const size_t n)
u32_set(line->mbtext + line->num_chars, char_space, n);
set_char_at(line->mbtext, line->num_chars + n, char_nul);
line->num_chars += n;
analyze_line_ascii(line);
analyze_line_ascii(&input, line);
}

View File

@ -35,8 +35,8 @@
#include <unitypes.h>
#include <uniwidth.h>
#include "shape.h"
#include "boxes.h"
#include "shape.h"
#include "unicode.h"
#include "tools.h"
@ -616,7 +616,7 @@ static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **a
void analyze_line_ascii(line_t *line)
void analyze_line_ascii(input_t *input_ptr, line_t *line)
{
size_t num_esc = 0;
char *ascii;
@ -631,8 +631,8 @@ void analyze_line_ascii(line_t *line)
BFREE(line->posmap);
line->posmap = map;
if (line->len > input.maxline) {
input.maxline = line->len;
if (line->len > input_ptr->maxline) {
input_ptr->maxline = line->len;
}
}

View File

@ -78,7 +78,7 @@ char *nspaces(const size_t n);
void print_input_lines(const char *heading);
void analyze_line_ascii(line_t *line);
void analyze_line_ascii(input_t *input_ptr, line_t *line);
int array_contains(char **array, const size_t array_len, const char *s);