mirror of
https://github.com/ascii-boxes/boxes.git
synced 2025-03-10 12:18:15 +01:00
parent
6a3d0e715c
commit
a579da13a5
2
Makefile
2
Makefile
@ -48,7 +48,7 @@ infomsg:
|
|||||||
|
|
||||||
replaceinfos: src/boxes.h doc/boxes.1
|
replaceinfos: src/boxes.h doc/boxes.1
|
||||||
|
|
||||||
src/boxes.h: src/boxes.h.in src/regexp/regexp.h Makefile
|
src/boxes.h: src/boxes.h.in src/regulex.h src/shape.h Makefile
|
||||||
sed -e 's/--BVERSION--/$(BVERSION) $(GIT_STATUS)/; s/--GLOBALCONF--/$(subst /,\/,$(GLOBALCONF))/' src/boxes.h.in > src/boxes.h
|
sed -e 's/--BVERSION--/$(BVERSION) $(GIT_STATUS)/; s/--GLOBALCONF--/$(subst /,\/,$(GLOBALCONF))/' src/boxes.h.in > src/boxes.h
|
||||||
|
|
||||||
doc/boxes.1: doc/boxes.1.in Makefile
|
doc/boxes.1: doc/boxes.1.in Makefile
|
||||||
|
11
boxes-config
11
boxes-config
@ -1874,14 +1874,11 @@ shapes {
|
|||||||
sw ("/*") s ("*") se ("*/")
|
sw ("/*") s ("*") se ("*/")
|
||||||
}
|
}
|
||||||
|
|
||||||
replace "^( *)([^ ])" with "\\1\\2 "
|
replace "(.)(?!$)" with "$1 "
|
||||||
replace "([^ ]) ([^ ])" with "\\1 \\2"
|
reverse "^( *)([^ ]*) " to "$1$2" # $1 to leave indentation untouched # TODO
|
||||||
reverse "^( *)([^ ]*) " to "\\1\\2" # \1 to leave indentation untouched
|
|
||||||
reverse "([^ ]) ([^ ])" to "\\1 \\2"
|
|
||||||
|
|
||||||
padding {
|
padding {
|
||||||
left 2
|
horiz 2
|
||||||
right 1
|
|
||||||
}
|
}
|
||||||
|
|
||||||
elastic (n,e,s,w)
|
elastic (n,e,s,w)
|
||||||
@ -2253,7 +2250,7 @@ shapes {
|
|||||||
elastic (n, s, e, w)
|
elastic (n, s, e, w)
|
||||||
|
|
||||||
padding {
|
padding {
|
||||||
left 1
|
left 2
|
||||||
}
|
}
|
||||||
|
|
||||||
END unicornsay
|
END unicornsay
|
||||||
|
23
src/Makefile
23
src/Makefile
@ -26,9 +26,9 @@ GEN_HDR = parser.h boxes.h
|
|||||||
GEN_SRC = parser.c lex.yy.c
|
GEN_SRC = parser.c lex.yy.c
|
||||||
GEN_FILES = $(GEN_SRC) $(GEN_HDR)
|
GEN_FILES = $(GEN_SRC) $(GEN_HDR)
|
||||||
ORIG_HDRCL = boxes.h.in config.h
|
ORIG_HDRCL = boxes.h.in config.h
|
||||||
ORIG_HDR = $(ORIG_HDRCL) lexer.h tools.h shape.h generate.h remove.h unicode.h
|
ORIG_HDR = $(ORIG_HDRCL) lexer.h tools.h shape.h generate.h remove.h unicode.h regulex.h
|
||||||
ORIG_GEN = lexer.l parser.y
|
ORIG_GEN = lexer.l parser.y
|
||||||
ORIG_NORM = boxes.c tools.c shape.c generate.c remove.c unicode.c
|
ORIG_NORM = boxes.c tools.c shape.c generate.c remove.c unicode.c regulex.c
|
||||||
ORIG_SRC = $(ORIG_GEN) $(ORIG_NORM)
|
ORIG_SRC = $(ORIG_GEN) $(ORIG_NORM)
|
||||||
ORIG_FILES = $(ORIG_SRC) $(ORIG_HDR)
|
ORIG_FILES = $(ORIG_SRC) $(ORIG_HDR)
|
||||||
OTH_FILES = Makefile
|
OTH_FILES = Makefile
|
||||||
@ -46,8 +46,7 @@ debug: flags_$(BOXES_PLATFORM)
|
|||||||
$(MAKE) BOXES_PLATFORM=$(BOXES_PLATFORM) ALL_OBJ="$(ALL_OBJ)" CFLAGS_ADDTL="-g $(CFLAGS_ADDTL)" STRIP=false flags_$(BOXES_PLATFORM) $(BOXES_EXECUTABLE_NAME)
|
$(MAKE) BOXES_PLATFORM=$(BOXES_PLATFORM) ALL_OBJ="$(ALL_OBJ)" CFLAGS_ADDTL="-g $(CFLAGS_ADDTL)" STRIP=false flags_$(BOXES_PLATFORM) $(BOXES_EXECUTABLE_NAME)
|
||||||
|
|
||||||
boxes: $(ALL_OBJ)
|
boxes: $(ALL_OBJ)
|
||||||
$(MAKE) -C regexp CC=$(CC) libregexp.a
|
$(CC) $(LDFLAGS) $(ALL_OBJ) -o $(BOXES_EXECUTABLE_NAME) -lunistring -lpcre2-32
|
||||||
$(CC) $(LDFLAGS) $(ALL_OBJ) -o $(BOXES_EXECUTABLE_NAME) -lunistring -lpcre2-32 -lregexp
|
|
||||||
if [ "$(STRIP)" = "true" ] ; then strip $(BOXES_EXECUTABLE_NAME) ; fi
|
if [ "$(STRIP)" = "true" ] ; then strip $(BOXES_EXECUTABLE_NAME) ; fi
|
||||||
|
|
||||||
boxes.exe: $(ALL_OBJ)
|
boxes.exe: $(ALL_OBJ)
|
||||||
@ -56,22 +55,22 @@ boxes.exe: $(ALL_OBJ)
|
|||||||
|
|
||||||
|
|
||||||
flags_unix:
|
flags_unix:
|
||||||
$(eval CFLAGS := -I. -Iregexp -Wall -W $(CFLAGS_ADDTL))
|
$(eval CFLAGS := -I. -Wall -W $(CFLAGS_ADDTL))
|
||||||
$(eval LDFLAGS := -Lregexp $(LDFLAGS_ADDTL))
|
$(eval LDFLAGS := $(LDFLAGS_ADDTL))
|
||||||
$(eval BOXES_EXECUTABLE_NAME := boxes)
|
$(eval BOXES_EXECUTABLE_NAME := boxes)
|
||||||
$(eval ALL_OBJ := $(GEN_SRC:.c=.o) $(ORIG_NORM:.c=.o))
|
$(eval ALL_OBJ := $(GEN_SRC:.c=.o) $(ORIG_NORM:.c=.o))
|
||||||
|
|
||||||
flags_win32:
|
flags_win32:
|
||||||
$(eval CFLAGS := -Os -s -m32 -I. -Iregexp -Wall -W $(CFLAGS_ADDTL))
|
$(eval CFLAGS := -Os -s -m32 -I. -Wall -W $(CFLAGS_ADDTL))
|
||||||
$(eval LDFLAGS := -s -m32)
|
$(eval LDFLAGS := -s -m32)
|
||||||
$(eval BOXES_EXECUTABLE_NAME := boxes.exe)
|
$(eval BOXES_EXECUTABLE_NAME := boxes.exe)
|
||||||
$(eval ALL_OBJ := $(GEN_SRC:.c=.o) $(ORIG_NORM:.c=.o) regexp/regexp.o regexp/regsub.o misc/getopt.o)
|
$(eval ALL_OBJ := $(GEN_SRC:.c=.o) $(ORIG_NORM:.c=.o) misc/getopt.o)
|
||||||
|
|
||||||
flags_:
|
flags_:
|
||||||
@echo Please call make from the top level directory.
|
@echo Please call make from the top level directory.
|
||||||
exit 1
|
exit 1
|
||||||
|
|
||||||
parser.c parser.h: parser.y boxes.h regexp/regexp.h
|
parser.c parser.h: parser.y boxes.h
|
||||||
$(YACC) -o parser.c -d parser.y
|
$(YACC) -o parser.c -d parser.y
|
||||||
|
|
||||||
lex.yy.c: lexer.l boxes.h
|
lex.yy.c: lexer.l boxes.h
|
||||||
@ -81,16 +80,15 @@ lex.yy.c: lexer.l boxes.h
|
|||||||
rm lexer.tmp.c
|
rm lexer.tmp.c
|
||||||
|
|
||||||
|
|
||||||
boxes.o: boxes.c boxes.h regexp/regexp.h shape.h tools.h unicode.h generate.h remove.h config.h
|
boxes.o: boxes.c boxes.h regulex.h shape.h tools.h unicode.h generate.h remove.h config.h
|
||||||
tools.o: tools.c tools.h boxes.h shape.h config.h
|
tools.o: tools.c tools.h boxes.h shape.h config.h
|
||||||
unicode.o: unicode.c unicode.h config.h
|
unicode.o: unicode.c unicode.h config.h
|
||||||
shape.o: shape.c shape.h boxes.h config.h tools.h
|
shape.o: shape.c shape.h boxes.h config.h tools.h
|
||||||
generate.o: generate.c generate.h boxes.h shape.h tools.h config.h
|
generate.o: generate.c generate.h boxes.h shape.h tools.h config.h
|
||||||
remove.o: remove.c remove.h boxes.h shape.h tools.h config.h
|
remove.o: remove.c remove.h boxes.h shape.h tools.h config.h
|
||||||
|
regulex.o: regulex.c regulex.h config.h
|
||||||
lex.yy.o: lex.yy.c parser.h tools.h shape.h lexer.h config.h
|
lex.yy.o: lex.yy.c parser.h tools.h shape.h lexer.h config.h
|
||||||
parser.o: parser.c parser.h tools.h shape.h lexer.h config.h
|
parser.o: parser.c parser.h tools.h shape.h lexer.h config.h
|
||||||
regexp/regexp.o: regexp/regexp.c
|
|
||||||
regexp/regsub.o: regexp/regsub.c
|
|
||||||
misc/getopt.o: misc/getopt.c
|
misc/getopt.o: misc/getopt.c
|
||||||
|
|
||||||
|
|
||||||
@ -102,7 +100,6 @@ clean: flags_unix
|
|||||||
rm -f $(ALL_OBJ)
|
rm -f $(ALL_OBJ)
|
||||||
rm -f $(GEN_FILES)
|
rm -f $(GEN_FILES)
|
||||||
rm -f core boxes boxes.exe
|
rm -f core boxes boxes.exe
|
||||||
$(MAKE) -C regexp clean
|
|
||||||
|
|
||||||
|
|
||||||
#EOF
|
#EOF
|
||||||
|
180
src/boxes.c
180
src/boxes.c
@ -41,8 +41,8 @@
|
|||||||
#include "shape.h"
|
#include "shape.h"
|
||||||
#include "boxes.h"
|
#include "boxes.h"
|
||||||
#include "tools.h"
|
#include "tools.h"
|
||||||
#include "regexp.h"
|
|
||||||
#include "generate.h"
|
#include "generate.h"
|
||||||
|
#include "regulex.h"
|
||||||
#include "remove.h"
|
#include "remove.h"
|
||||||
#include "unicode.h"
|
#include "unicode.h"
|
||||||
|
|
||||||
@ -1218,6 +1218,73 @@ static int get_indent(const line_t *lines, const size_t lines_size)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze the multi-byte string in order to determine its metrics:
|
||||||
|
* - number of visible columns it occupies
|
||||||
|
* - number of escape characters (== number of escape sequences)
|
||||||
|
* - the ASCII equivalent of the string
|
||||||
|
* - the number of invisible characters in the string
|
||||||
|
*
|
||||||
|
* @param <s> the multi-byte string to analyze
|
||||||
|
* @param <num_esc> pointer to where the number of escape sequences should be stored
|
||||||
|
* @param <ascii> pointer to where the ASCII equivalent of the string should be stored
|
||||||
|
* @returns the number of invisible characters in <s>
|
||||||
|
*/
|
||||||
|
static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **ascii)
|
||||||
|
{
|
||||||
|
size_t invis = 0; /* counts invisible column positions */
|
||||||
|
int ansipos = 0; /* progression of ansi sequence */
|
||||||
|
*num_esc = 0; /* counts the number of escape sequences found */
|
||||||
|
|
||||||
|
if (is_empty(s)) {
|
||||||
|
(*ascii) = (char *) strdup("");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
size_t buflen = (size_t) u32_strwidth(s, encoding);
|
||||||
|
(*ascii) = (char *) calloc(buflen, sizeof(char)); /* maybe a little too much, but certainly enough */
|
||||||
|
char *p = *ascii;
|
||||||
|
|
||||||
|
ucs4_t c;
|
||||||
|
const uint32_t *rest = s;
|
||||||
|
while ((rest = u32_next(&c, rest))) {
|
||||||
|
if (ansipos == 0 && c == char_esc) {
|
||||||
|
/* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
|
||||||
|
ansipos++;
|
||||||
|
invis++;
|
||||||
|
(*num_esc)++;
|
||||||
|
} else if (ansipos == 1 && c == '[') {
|
||||||
|
/* Found '[' char after ESC. A CSI sequence has started. */
|
||||||
|
ansipos++;
|
||||||
|
invis++;
|
||||||
|
} else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) {
|
||||||
|
/* Found a byte designating the end of a two-byte escape sequence */
|
||||||
|
invis++;
|
||||||
|
ansipos = 0;
|
||||||
|
} else if (ansipos == 2) {
|
||||||
|
/* Inside CSI sequence - Keep counting bytes as invisible */
|
||||||
|
invis++;
|
||||||
|
|
||||||
|
/* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
|
||||||
|
if (c >= 0x40 && c <= 0x7e) {
|
||||||
|
ansipos = 0;
|
||||||
|
}
|
||||||
|
} else if (is_ascii_printable(c)) {
|
||||||
|
*p = c & 0xff;
|
||||||
|
++p;
|
||||||
|
} else {
|
||||||
|
int cols = uc_width(c, encoding);
|
||||||
|
if (cols > 0) {
|
||||||
|
memset(p, (int) 'x', cols);
|
||||||
|
p += cols;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*p = '\0';
|
||||||
|
return invis;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static int apply_substitutions(const int mode)
|
static int apply_substitutions(const int mode)
|
||||||
/*
|
/*
|
||||||
* Apply regular expression substitutions to input text.
|
* Apply regular expression substitutions to input text.
|
||||||
@ -1236,8 +1303,6 @@ static int apply_substitutions(const int mode)
|
|||||||
size_t anz_rules;
|
size_t anz_rules;
|
||||||
reprule_t *rules;
|
reprule_t *rules;
|
||||||
size_t j, k;
|
size_t j, k;
|
||||||
char buf[LINE_MAX_BYTES * 2];
|
|
||||||
size_t buf_len; /* length of string in buf */
|
|
||||||
|
|
||||||
if (opt.design == NULL) {
|
if (opt.design == NULL) {
|
||||||
return 1;
|
return 1;
|
||||||
@ -1262,7 +1327,10 @@ static int apply_substitutions(const int mode)
|
|||||||
errno = 0;
|
errno = 0;
|
||||||
opt.design->current_rule = rules;
|
opt.design->current_rule = rules;
|
||||||
for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) {
|
for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) {
|
||||||
rules[j].prog = regcomp(rules[j].search);
|
rules[j].prog = compile_pattern(rules[j].search);
|
||||||
|
if (rules[j].prog == NULL) {
|
||||||
|
return 5;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
opt.design->current_rule = NULL;
|
opt.design->current_rule = NULL;
|
||||||
if (errno) {
|
if (errno) {
|
||||||
@ -1276,37 +1344,37 @@ static int apply_substitutions(const int mode)
|
|||||||
opt.design->current_rule = rules;
|
opt.design->current_rule = rules;
|
||||||
for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) {
|
for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) {
|
||||||
#ifdef REGEXP_DEBUG
|
#ifdef REGEXP_DEBUG
|
||||||
fprintf (stderr, "myregsub (0x%p, \"%s\", %d, \"%s\", buf, %d, \'%c\') == ",
|
fprintf (stderr, "regex_replace(0x%p, \"%s\", \"%s\", %d, \'%c\') == ",
|
||||||
rules[j].prog, input.lines[k].text,
|
rules[j].prog, rules[j].repstr, u32_strconv_to_locale(input.lines[k].mbtext),
|
||||||
input.lines[k].len, rules[j].repstr, LINE_MAX_BYTES*2,
|
(int) input.lines[k].num_chars, rules[j].mode);
|
||||||
rules[j].mode);
|
|
||||||
#endif
|
#endif
|
||||||
errno = 0;
|
uint32_t *newtext = regex_replace(rules[j].prog, rules[j].repstr,
|
||||||
buf_len = myregsub(rules[j].prog, input.lines[k].text,
|
input.lines[k].mbtext, input.lines[k].num_chars, rules[j].mode == 'g');
|
||||||
input.lines[k].len, rules[j].repstr, buf, LINE_MAX_BYTES * 2,
|
|
||||||
rules[j].mode);
|
|
||||||
#ifdef REGEXP_DEBUG
|
#ifdef REGEXP_DEBUG
|
||||||
fprintf (stderr, "%d\n", buf_len);
|
fprintf (stderr, "\"%s\"\n", newtext ? u32_strconv_to_locale(newtext) : "NULL");
|
||||||
#endif
|
#endif
|
||||||
if (errno) {
|
if (newtext == NULL) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
BFREE (input.lines[k].text);
|
BFREE(input.lines[k].mbtext_org); /* original address allocated for mbtext */
|
||||||
input.lines[k].text = (char *) strdup(buf);
|
input.lines[k].mbtext = newtext;
|
||||||
if (input.lines[k].text == NULL) {
|
input.lines[k].mbtext_org = newtext;
|
||||||
perror(PROJECT);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
input.lines[k].len = buf_len;
|
|
||||||
|
|
||||||
|
size_t num_esc = 0;
|
||||||
|
char *ascii; // TODO HERE extract into function analyze/asciify(line_t) ?
|
||||||
|
size_t invis = count_invisible_chars(input.lines[k].mbtext, &num_esc, &ascii);
|
||||||
|
input.lines[k].len = u32_strwidth(input.lines[k].mbtext, encoding) - invis + num_esc;
|
||||||
|
input.lines[k].num_chars = u32_strlen(input.lines[k].mbtext);
|
||||||
|
BFREE(input.lines[k].text);
|
||||||
|
input.lines[k].text = ascii;
|
||||||
if (input.lines[k].len > input.maxline) {
|
if (input.lines[k].len > input.maxline) {
|
||||||
input.maxline = input.lines[k].len;
|
input.maxline = input.lines[k].len;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef REGEXP_DEBUG
|
#ifdef REGEXP_DEBUG
|
||||||
fprintf (stderr, "input.lines[%d] == {%d, \"%s\"}\n", k, input.lines[k].len, input.lines[k].text);
|
fprintf (stderr, "input.lines[%d] == {%d, \"%s\"}\n", (int) k,
|
||||||
|
(int) input.lines[k].num_chars, u32_strconv_to_locale(input.lines[k].mbtext));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
opt.design->current_rule = NULL;
|
opt.design->current_rule = NULL;
|
||||||
@ -1357,60 +1425,6 @@ static int has_linebreak(const uint32_t *s, const int len)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
static size_t count_invisible_chars(const uint32_t *s, const size_t buflen, size_t *num_esc, char **ascii)
|
|
||||||
{
|
|
||||||
size_t invis = 0; /* counts invisible column positions */
|
|
||||||
int ansipos = 0; /* progression of ansi sequence */
|
|
||||||
*num_esc = 0; /* counts the number of escape sequences found */
|
|
||||||
|
|
||||||
if (is_empty(s)) {
|
|
||||||
(*ascii) = (char *) strdup("");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
(*ascii) = (char *) calloc(buflen, sizeof(char));
|
|
||||||
char *p = *ascii;
|
|
||||||
|
|
||||||
ucs4_t c;
|
|
||||||
const uint32_t *rest = s;
|
|
||||||
while ((rest = u32_next(&c, rest))) {
|
|
||||||
if (ansipos == 0 && c == char_esc) {
|
|
||||||
/* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
|
|
||||||
ansipos++;
|
|
||||||
invis++;
|
|
||||||
(*num_esc)++;
|
|
||||||
} else if (ansipos == 1 && c == '[') {
|
|
||||||
/* Found '[' char after ESC. A CSI sequence has started. */
|
|
||||||
ansipos++;
|
|
||||||
invis++;
|
|
||||||
} else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) {
|
|
||||||
/* Found a byte designating the end of a two-byte escape sequence */
|
|
||||||
invis++;
|
|
||||||
ansipos = 0;
|
|
||||||
} else if (ansipos == 2) {
|
|
||||||
/* Inside CSI sequence - Keep counting bytes as invisible */
|
|
||||||
invis++;
|
|
||||||
|
|
||||||
/* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
|
|
||||||
if (c >= 0x40 && c <= 0x7e) {
|
|
||||||
ansipos = 0;
|
|
||||||
}
|
|
||||||
} else if (is_ascii_printable(c)) {
|
|
||||||
*p = c & 0xff;
|
|
||||||
++p;
|
|
||||||
} else {
|
|
||||||
int cols = uc_width(c, encoding);
|
|
||||||
if (cols > 0) {
|
|
||||||
memset(p, (int) 'x', cols);
|
|
||||||
p += cols;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*p = '\0';
|
|
||||||
return invis;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static int read_all_input(const int use_stdin)
|
static int read_all_input(const int use_stdin)
|
||||||
/*
|
/*
|
||||||
* Read entire input (possibly from stdin) and store it in 'input' array.
|
* Read entire input (possibly from stdin) and store it in 'input' array.
|
||||||
@ -1444,7 +1458,7 @@ static int read_all_input(const int use_stdin)
|
|||||||
* Start reading
|
* Start reading
|
||||||
*/
|
*/
|
||||||
while (fgets(buf, LINE_MAX_BYTES + 1, opt.infile)) {
|
while (fgets(buf, LINE_MAX_BYTES + 1, opt.infile)) {
|
||||||
if (input_size % 100 == 0) {
|
if (input.anz_lines % 100 == 0) {
|
||||||
input_size += 100;
|
input_size += 100;
|
||||||
line_t *tmp = (line_t *) realloc(input.lines, input_size * sizeof(line_t));
|
line_t *tmp = (line_t *) realloc(input.lines, input_size * sizeof(line_t));
|
||||||
if (tmp == NULL) {
|
if (tmp == NULL) {
|
||||||
@ -1483,18 +1497,20 @@ static int read_all_input(const int use_stdin)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
input.lines[input.anz_lines].mbtext = temp;
|
input.lines[input.anz_lines].mbtext = temp;
|
||||||
|
BFREE(mbtemp);
|
||||||
temp = NULL;
|
temp = NULL;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
input.lines[input.anz_lines].mbtext = mbtemp;
|
input.lines[input.anz_lines].mbtext = mbtemp;
|
||||||
}
|
}
|
||||||
|
input.lines[input.anz_lines].mbtext_org = input.lines[input.anz_lines].mbtext;
|
||||||
input.lines[input.anz_lines].num_chars = len_chars;
|
input.lines[input.anz_lines].num_chars = len_chars;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find ANSI CSI/ESC sequences
|
* Find ANSI CSI/ESC sequences
|
||||||
*/
|
*/
|
||||||
size_t num_esc = 0;
|
size_t num_esc = 0;
|
||||||
size_t invis = count_invisible_chars(input.lines[input.anz_lines].mbtext, strlen(buf), &num_esc,
|
size_t invis = count_invisible_chars(input.lines[input.anz_lines].mbtext, &num_esc,
|
||||||
&(input.lines[input.anz_lines].text));
|
&(input.lines[input.anz_lines].text));
|
||||||
input.lines[input.anz_lines].invis = invis;
|
input.lines[input.anz_lines].invis = invis;
|
||||||
/* u32_strwidth() does not count control characters, i.e. ESC characters, for which we must correct */
|
/* u32_strwidth() does not count control characters, i.e. ESC characters, for which we must correct */
|
||||||
@ -1526,8 +1542,8 @@ static int read_all_input(const int use_stdin)
|
|||||||
/* recalculate input statistics for redrawing the mended box */
|
/* recalculate input statistics for redrawing the mended box */
|
||||||
for (i = 0; i < input.anz_lines; ++i) {
|
for (i = 0; i < input.anz_lines; ++i) {
|
||||||
size_t num_esc = 0;
|
size_t num_esc = 0;
|
||||||
char *dummy;
|
char *dummy; // TODO extract into function
|
||||||
size_t invis = count_invisible_chars(input.lines[i].mbtext, strlen(input.lines[i].text), &num_esc, &dummy);
|
size_t invis = count_invisible_chars(input.lines[i].mbtext, &num_esc, &dummy);
|
||||||
BFREE(dummy);
|
BFREE(dummy);
|
||||||
input.lines[i].len = u32_strwidth(input.lines[i].mbtext, encoding) - invis + num_esc;
|
input.lines[i].len = u32_strwidth(input.lines[i].mbtext, encoding) - invis + num_esc;
|
||||||
input.lines[i].num_chars = u32_strlen(input.lines[i].mbtext);
|
input.lines[i].num_chars = u32_strlen(input.lines[i].mbtext);
|
||||||
@ -1576,7 +1592,7 @@ static int read_all_input(const int use_stdin)
|
|||||||
* Apply regular expression substitutions
|
* Apply regular expression substitutions
|
||||||
*/
|
*/
|
||||||
if (opt.r == 0) {
|
if (opt.r == 0) {
|
||||||
if (apply_substitutions(0) != 0) { // TODO
|
if (apply_substitutions(0) != 0) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -25,13 +25,14 @@
|
|||||||
#ifndef BOXES_H
|
#ifndef BOXES_H
|
||||||
#define BOXES_H
|
#define BOXES_H
|
||||||
|
|
||||||
/* #define DEBUG */
|
/* #define DEBUG 1 */
|
||||||
/* #define REGEXP_DEBUG */
|
#define REGEXP_DEBUG 1
|
||||||
/* #define PARSER_DEBUG */
|
/* #define PARSER_DEBUG 1 */
|
||||||
/* #define LEXER_DEBUG */
|
/* #define LEXER_DEBUG 1 */
|
||||||
|
|
||||||
#include <unitypes.h>
|
#include <unitypes.h>
|
||||||
#include "regexp/regexp.h"
|
#include "regulex.h"
|
||||||
|
#include "shape.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -80,11 +81,11 @@
|
|||||||
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char *search;
|
char *search;
|
||||||
char *repstr;
|
char *repstr;
|
||||||
regexp *prog; /* compiled search pattern */
|
pcre2_code *prog; /* compiled search pattern */
|
||||||
int line; /* line of definition in config file */
|
int line; /* line of definition in config file */
|
||||||
char mode; /* 'g' or 'o' */
|
char mode; /* 'g' or 'o' */
|
||||||
} reprule_t;
|
} reprule_t;
|
||||||
|
|
||||||
|
|
||||||
@ -147,11 +148,12 @@ extern opt_t opt;
|
|||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
size_t len; /* length of visible text in columns (visible character positions in a text terminal), which is the same as the length of the 'text' field */
|
size_t len; /* length of visible text in columns (visible character positions in a text terminal), which is the same as the length of the 'text' field */
|
||||||
char *text; /* ASCII line content, tabs expanded, multi-byte chars replaced with one or more 'x' */
|
char *text; /* ASCII line content, tabs expanded, ansi escapes removed, multi-byte chars replaced with one or more 'x' */
|
||||||
size_t invis; /* number of invisble columns/characters (part of an ansi sequence) */
|
size_t invis; /* number of invisble columns/characters (part of an ansi sequence) */
|
||||||
|
|
||||||
uint32_t *mbtext; /* multi-byte (original) line content, tabs expanded. We use UTF-32 in order to enable pointer arithmetic. */
|
uint32_t *mbtext; /* multi-byte (original) line content, tabs expanded. We use UTF-32 in order to enable pointer arithmetic. */
|
||||||
size_t num_chars; /* total number of characters in mbtext, visible + invisible */
|
size_t num_chars; /* total number of characters in mbtext, visible + invisible */
|
||||||
|
uint32_t *mbtext_org; /* mbtext as originally allocated, so that we can free it again */
|
||||||
|
|
||||||
size_t *tabpos; /* tab positions in expanded work strings, or NULL if not needed */
|
size_t *tabpos; /* tab positions in expanded work strings, or NULL if not needed */
|
||||||
size_t tabpos_len; /* number of tabs in a line */
|
size_t tabpos_len; /* number of tabs in a line */
|
||||||
|
@ -1,49 +0,0 @@
|
|||||||
#
|
|
||||||
# boxes - Command line filter to draw/remove ASCII boxes around text
|
|
||||||
# Copyright (C) 1999 Thomas Jensen and the boxes contributors
|
|
||||||
#
|
|
||||||
# This program is free software; you can redistribute it and/or modify it
|
|
||||||
# under the terms of the GNU General Public License, version 2, as published
|
|
||||||
# by the Free Software Foundation.
|
|
||||||
#
|
|
||||||
# This program is distributed in the hope that it will be useful, but
|
|
||||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
||||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
||||||
# for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the GNU General Public License along
|
|
||||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
||||||
#____________________________________________________________________________
|
|
||||||
#============================================================================
|
|
||||||
|
|
||||||
|
|
||||||
CFLAGS = -O -I. $(CFLAGS_ADDTL)
|
|
||||||
|
|
||||||
ALL_CL = regexp/regexp.c regexp/regsub.c
|
|
||||||
C_SRC = $(notdir $(ALL_CL))
|
|
||||||
ALLFILES = Makefile $(C_SRC) regexp.h regmagic.h
|
|
||||||
ALLOBJ = $(C_SRC:.c=.o)
|
|
||||||
|
|
||||||
|
|
||||||
.PHONY: clean build debug
|
|
||||||
|
|
||||||
|
|
||||||
build: libregexp.a
|
|
||||||
debug: libregexp.a
|
|
||||||
|
|
||||||
libregexp.a: $(ALLOBJ)
|
|
||||||
ar cr libregexp.a $(ALLOBJ)
|
|
||||||
|
|
||||||
regexp.o: regexp.c regmagic.h regexp.h ../config.h
|
|
||||||
regsub.o: regsub.c regmagic.h regexp.h ../config.h
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) $(CFLAGS) -c $<
|
|
||||||
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f $(ALLOBJ) libregexp.a core
|
|
||||||
|
|
||||||
|
|
||||||
#EOF
|
|
1212
src/regexp/regexp.c
1212
src/regexp/regexp.c
File diff suppressed because it is too large
Load Diff
@ -1,30 +0,0 @@
|
|||||||
/*
|
|
||||||
* Definitions etc. for regexp(3) routines.
|
|
||||||
*
|
|
||||||
* Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
|
|
||||||
* not the System V one.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef REGEXP_H
|
|
||||||
#define REGEXP_H
|
|
||||||
|
|
||||||
|
|
||||||
#define NSUBEXP 10
|
|
||||||
typedef struct regexp {
|
|
||||||
char *startp[NSUBEXP];
|
|
||||||
char *endp[NSUBEXP];
|
|
||||||
char regstart; /* Internal use only. */
|
|
||||||
char reganch; /* Internal use only. */
|
|
||||||
char *regmust; /* Internal use only. */
|
|
||||||
int regmlen; /* Internal use only. */
|
|
||||||
char program[1]; /* Unwarranted chumminess with compiler. */
|
|
||||||
} regexp;
|
|
||||||
|
|
||||||
extern regexp *regcomp();
|
|
||||||
extern int regexec();
|
|
||||||
/* extern size_t regsub(); */
|
|
||||||
extern size_t myregsub();
|
|
||||||
/* extern void regerror(); */
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* REGEXP_H */
|
|
@ -1,5 +0,0 @@
|
|||||||
/*
|
|
||||||
* The first byte of the regexp internal "program" is actually this magic
|
|
||||||
* number; the start node begins in the second byte.
|
|
||||||
*/
|
|
||||||
#define MAGIC 0234
|
|
@ -1,187 +0,0 @@
|
|||||||
/*
|
|
||||||
* File: regsub.c
|
|
||||||
* Date created: Copyright (c) 1986 by University of Toronto.
|
|
||||||
* Author: Henry Spencer.
|
|
||||||
* Extensions and modifications by Thomas Jensen
|
|
||||||
* Language: K&R C (traditional)
|
|
||||||
* Purpose: Perform substitutions after a regexp match
|
|
||||||
* License: - Not derived from licensed software.
|
|
||||||
* - Permission is granted to anyone to use this
|
|
||||||
* software for any purpose on any computer system,
|
|
||||||
* and to redistribute it freely, subject to the
|
|
||||||
* following restrictions:
|
|
||||||
* 1. The author is not responsible for the
|
|
||||||
* consequences of use of this software, no matter
|
|
||||||
* how awful, even if they arise from defects in it.
|
|
||||||
* 2. The origin of this software must not be
|
|
||||||
* misrepresented, either by explicit claim or by
|
|
||||||
* omission.
|
|
||||||
* 3. Altered versions must be plainly marked as such,
|
|
||||||
* and must not be misrepresented as being the
|
|
||||||
* original software.
|
|
||||||
*
|
|
||||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include "regexp.h"
|
|
||||||
#include "regmagic.h"
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef CHARBITS
|
|
||||||
#define UCHARAT(p) ((int)*(unsigned char *)(p))
|
|
||||||
#else
|
|
||||||
#define UCHARAT(p) ((int)*(p)&CHARBITS)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
- regsub - perform substitutions after a regexp match
|
|
||||||
*/
|
|
||||||
size_t /* RETURNS length of dest str */
|
|
||||||
regsub (prog, source, dest, dest_size)
|
|
||||||
regexp *prog;
|
|
||||||
char *source;
|
|
||||||
char *dest;
|
|
||||||
size_t dest_size; /* size of destination buffer */
|
|
||||||
{
|
|
||||||
register char *src;
|
|
||||||
register char *dst;
|
|
||||||
register char c;
|
|
||||||
register int no;
|
|
||||||
register int len;
|
|
||||||
size_t fill; /* current number of chars in dest */
|
|
||||||
|
|
||||||
if (prog == NULL || source == NULL || dest == NULL) {
|
|
||||||
regerror("NULL parm to regsub");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (UCHARAT(prog->program) != MAGIC) {
|
|
||||||
regerror("damaged regexp fed to regsub");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
src = source;
|
|
||||||
dst = dest;
|
|
||||||
fill = 0;
|
|
||||||
|
|
||||||
while ((c = *src++) != '\0') {
|
|
||||||
if (c == '&')
|
|
||||||
no = 0;
|
|
||||||
else if (c == '\\' && '0' <= *src && *src <= '9')
|
|
||||||
no = *src++ - '0';
|
|
||||||
else
|
|
||||||
no = -1;
|
|
||||||
|
|
||||||
if (no < 0) { /* Ordinary character. */
|
|
||||||
if (c == '\\' && (*src == '\\' || *src == '&'))
|
|
||||||
c = *src++;
|
|
||||||
*dst++ = c;
|
|
||||||
++fill;
|
|
||||||
} else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
|
|
||||||
len = prog->endp[no] - prog->startp[no];
|
|
||||||
if (len < dest_size-fill) {
|
|
||||||
(void) strncpy(dst, prog->startp[no], len);
|
|
||||||
dst += len;
|
|
||||||
fill += len;
|
|
||||||
if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */
|
|
||||||
regerror("damaged match string");
|
|
||||||
return fill;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
(void) strncpy (dst, prog->startp[no], dest_size-fill);
|
|
||||||
dest[dest_size-1] = '\0';
|
|
||||||
return dest_size-1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (fill >= dest_size) {
|
|
||||||
dest[dest_size-1] = '\0';
|
|
||||||
return dest_size-1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*dst++ = '\0';
|
|
||||||
|
|
||||||
return fill;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
size_t /* RETURNS length of str in destination buffer */
|
|
||||||
myregsub (prog, orig, orig_len, repstr, dest, dest_size, mode)
|
|
||||||
regexp *prog; /* pointers for matched regexp to original text */
|
|
||||||
char *orig; /* original input line */
|
|
||||||
size_t orig_len; /* length of original input line */
|
|
||||||
char *repstr; /* source buffer for replaced parts */
|
|
||||||
char *dest; /* destination buffer */
|
|
||||||
size_t dest_size; /* size of destination buffer */
|
|
||||||
char mode; /* 'g' or 'o' */
|
|
||||||
{
|
|
||||||
size_t fill; /* current number of chars in dest */
|
|
||||||
char *sp, *dp; /* source rover, destination rover */
|
|
||||||
int rc; /* received return codes */
|
|
||||||
size_t rest_size; /* remaining space in dest */
|
|
||||||
size_t partlen; /* temp length of a piece handled */
|
|
||||||
|
|
||||||
fill = 0;
|
|
||||||
sp = orig;
|
|
||||||
dp = dest;
|
|
||||||
rest_size = dest_size;
|
|
||||||
|
|
||||||
do {
|
|
||||||
rc = regexec (prog, sp);
|
|
||||||
if (!rc) break;
|
|
||||||
|
|
||||||
partlen = prog->startp[0] - sp;
|
|
||||||
if (partlen < rest_size) {
|
|
||||||
strncpy (dp, sp, partlen);
|
|
||||||
fill += partlen;
|
|
||||||
sp = prog->startp[0];
|
|
||||||
dp += partlen;
|
|
||||||
rest_size -= partlen;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
strncpy (dp, sp, rest_size);
|
|
||||||
dest[dest_size-1] = '\0';
|
|
||||||
return dest_size - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* fprintf (stderr, "regsub (%p, \"%s\", \"%s\", %d);\n", */
|
|
||||||
/* prog, repstr, dp, rest_size); */
|
|
||||||
fill += regsub (prog, repstr, dp, rest_size);
|
|
||||||
dp = dest + fill;
|
|
||||||
sp = prog->endp[0];
|
|
||||||
rest_size = dest_size - fill;
|
|
||||||
|
|
||||||
if (fill >= dest_size) {
|
|
||||||
dest[dest_size-1] = '\0';
|
|
||||||
return dest_size - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* fprintf (stderr, "dest = \"%s\";\n", dest); */
|
|
||||||
if (prog->startp[0] == prog->endp[0])
|
|
||||||
break; /* match "^" or "$" only once */
|
|
||||||
|
|
||||||
} while (mode == 'g');
|
|
||||||
|
|
||||||
partlen = orig + orig_len - sp;
|
|
||||||
if (partlen < rest_size) {
|
|
||||||
strncpy (dp, sp, partlen);
|
|
||||||
fill += partlen;
|
|
||||||
dp[partlen] = '\0';
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
strncpy (dp, sp, rest_size);
|
|
||||||
dest[dest_size-1] = '\0';
|
|
||||||
fill = dest_size - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return fill;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*EOF*/ /* vim: set sw=4: */
|
|
114
src/regulex.c
Normal file
114
src/regulex.c
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
/*
|
||||||
|
* boxes - Command line filter to draw/remove ASCII boxes around text
|
||||||
|
* Copyright (C) 1999 Thomas Jensen and the boxes contributors
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License, version 2, as published
|
||||||
|
* by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but
|
||||||
|
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
*
|
||||||
|
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convenience functions for PCRE2 regular expression processing
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <uniconv.h>
|
||||||
|
|
||||||
|
#include "tools.h"
|
||||||
|
#include "regulex.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
pcre2_code *compile_pattern(char *pattern)
|
||||||
|
{
|
||||||
|
int errornumber;
|
||||||
|
PCRE2_SIZE erroroffset;
|
||||||
|
PCRE2_SPTR pattern32 = u32_strconv_from_locale(pattern);
|
||||||
|
|
||||||
|
pcre2_code *re = pcre2_compile(
|
||||||
|
pattern32, /* the pattern */
|
||||||
|
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||||
|
0, /* default options */
|
||||||
|
&errornumber,
|
||||||
|
&erroroffset,
|
||||||
|
NULL); /* use default compile context */
|
||||||
|
|
||||||
|
if (re == NULL) {
|
||||||
|
PCRE2_UCHAR buffer[256];
|
||||||
|
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
||||||
|
fprintf(stderr, "Regular expression pattern \"%s\" failed to compile at offset %d: %s\n",
|
||||||
|
pattern, (int) erroroffset, u32_strconv_to_locale(buffer));
|
||||||
|
}
|
||||||
|
return re;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t *regex_replace(pcre2_code *search, char *replace, uint32_t *input, const size_t input_len, const int global)
|
||||||
|
{
|
||||||
|
PCRE2_SPTR replacement = u32_strconv_from_locale(replace);
|
||||||
|
uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED
|
||||||
|
| (global ? PCRE2_SUBSTITUTE_GLOBAL : 0);
|
||||||
|
PCRE2_SIZE outlen = input_len * 2; /* estimated length of output buffer in characters, fine if too small */
|
||||||
|
|
||||||
|
PCRE2_SIZE bufsize = (input_len == 0) ? 16 : outlen;
|
||||||
|
uint32_t *output = (uint32_t *) malloc(sizeof(uint32_t) * bufsize); /* output buffer */
|
||||||
|
int pcre2_rc;
|
||||||
|
|
||||||
|
int done = 0;
|
||||||
|
while (!done) {
|
||||||
|
if (output == NULL) {
|
||||||
|
fprintf(stderr, "out of memory");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
PCRE2_SIZE outlen = bufsize;
|
||||||
|
|
||||||
|
pcre2_rc = pcre2_substitute(search, (PCRE2_SPTR) input, input_len,
|
||||||
|
0, /* start offset */
|
||||||
|
options,
|
||||||
|
NULL, /* ptr to a match data block */
|
||||||
|
NULL, /* match context */
|
||||||
|
replacement, PCRE2_ZERO_TERMINATED,
|
||||||
|
output, &outlen);
|
||||||
|
|
||||||
|
if (pcre2_rc != PCRE2_ERROR_NOMEMORY || bufsize >= outlen) {
|
||||||
|
done = 1;
|
||||||
|
} else {
|
||||||
|
#ifdef REGEXP_DEBUG
|
||||||
|
fprintf(stderr, "Reallocating output buffer from %ld to %ld UTF-32 chars\n", bufsize, outlen);
|
||||||
|
#endif
|
||||||
|
bufsize = outlen;
|
||||||
|
output = (uint32_t *) realloc(output, sizeof(uint32_t) * bufsize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BFREE(replacement);
|
||||||
|
|
||||||
|
if (pcre2_rc < 0) {
|
||||||
|
PCRE2_UCHAR buffer[256];
|
||||||
|
pcre2_get_error_message(pcre2_rc, buffer, sizeof(buffer));
|
||||||
|
/* buffer will normally contain "invalid replacement string" */
|
||||||
|
fprintf(stderr, "Error substituting \"%s\": %s\n", replace, u32_strconv_to_locale(buffer));
|
||||||
|
BFREE(output);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*EOF*/ /* vim: set sw=4: */
|
67
src/regulex.h
Normal file
67
src/regulex.h
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
/*
|
||||||
|
* boxes - Command line filter to draw/remove ASCII boxes around text
|
||||||
|
* Copyright (C) 1999 Thomas Jensen and the boxes contributors
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License, version 2, as published
|
||||||
|
* by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but
|
||||||
|
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
*
|
||||||
|
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convenience functions for PCRE2 regular expression processing
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef REGULEX_H
|
||||||
|
#define REGULEX_H
|
||||||
|
|
||||||
|
/* Building under Windows: If you want to statically link this program against a non-dll .a file, you must define
|
||||||
|
* PCRE2_STATIC before including pcre2.h. */
|
||||||
|
#ifdef __MINGW32__
|
||||||
|
#define PCRE2_STATIC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h. For a program that uses only one code unit
|
||||||
|
* width, setting it to 8, 16, or 32 makes it possible to use generic function names such as pcre2_compile(). Note that
|
||||||
|
* just changing 8 to 16 (for example) is not sufficient to convert this program to process 16-bit characters. Even in
|
||||||
|
* a fully 16-bit environment, where string-handling functions such as strcmp() and printf() work with 16-bit
|
||||||
|
* characters, the code for handling the table of named substrings will still need to be modified. */
|
||||||
|
#define PCRE2_CODE_UNIT_WIDTH 32
|
||||||
|
|
||||||
|
|
||||||
|
#include <pcre2.h>
|
||||||
|
#include <unitypes.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compile the given pattern into a PCRE2 regular expression.
|
||||||
|
*/
|
||||||
|
pcre2_code *compile_pattern(char *pattern);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform a regex replacement on the given string.
|
||||||
|
*
|
||||||
|
* @param <search> the compiled pattern to search for
|
||||||
|
* @param <replace> the replacement string
|
||||||
|
* @param <input> the string to which the replacements shall be applied
|
||||||
|
* @param <input_len> the length of <input> in characters, not bytes
|
||||||
|
* @param <global> flag indicating whether all occurrences shall be replaced (true) or just the first (false)
|
||||||
|
* @return a new string which is a copy of output with the replacements applied, or NULL on error
|
||||||
|
*/
|
||||||
|
uint32_t *regex_replace(pcre2_code *search, char *replace, uint32_t *input, const size_t input_len, const int global);
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*EOF*/ /* vim: set cindent sw=4: */
|
@ -50,7 +50,7 @@ typedef struct {
|
|||||||
char **chars;
|
char **chars;
|
||||||
size_t height;
|
size_t height;
|
||||||
size_t width;
|
size_t width;
|
||||||
int elastic; /* elastic is used only in orginial definition */
|
int elastic; /* elastic is used only in original definition */
|
||||||
} sentry_t;
|
} sentry_t;
|
||||||
|
|
||||||
#define SENTRY_INITIALIZER (sentry_t) {NULL, 0, 0, 0}
|
#define SENTRY_INITIALIZER (sentry_t) {NULL, 0, 0, 0}
|
||||||
|
10
src/tools.h
10
src/tools.h
@ -27,14 +27,16 @@
|
|||||||
|
|
||||||
#include <unitypes.h>
|
#include <unitypes.h>
|
||||||
|
|
||||||
|
#include "boxes.h"
|
||||||
|
|
||||||
|
|
||||||
#define BMAX(a, b) ((a)>(b)? (a):(b)) /* return the larger value */
|
#define BMAX(a, b) ((a)>(b)? (a):(b)) /* return the larger value */
|
||||||
|
|
||||||
#define BFREE(p) { /* free memory and clear pointer */ \
|
#define BFREE(p) { /* free memory and clear pointer */ \
|
||||||
if (p) { \
|
if (p) { \
|
||||||
free (p); \
|
free((void *) p); \
|
||||||
(p) = NULL; \
|
(p) = NULL; \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,7 +28,6 @@
|
|||||||
|
|
||||||
#include <unictype.h>
|
#include <unictype.h>
|
||||||
#include <unistr.h>
|
#include <unistr.h>
|
||||||
#include <unitypes.h>
|
|
||||||
|
|
||||||
#include "unicode.h"
|
#include "unicode.h"
|
||||||
|
|
||||||
|
@ -25,6 +25,9 @@
|
|||||||
#ifndef UNICODE_H
|
#ifndef UNICODE_H
|
||||||
#define UNICODE_H
|
#define UNICODE_H
|
||||||
|
|
||||||
|
#include <unitypes.h>
|
||||||
|
|
||||||
|
|
||||||
extern const char *encoding; /* the character encoding that we use */
|
extern const char *encoding; /* the character encoding that we use */
|
||||||
|
|
||||||
extern const ucs4_t char_tab; /* ucs4_t character '\t' (tab) */
|
extern const ucs4_t char_tab; /* ucs4_t character '\t' (tab) */
|
||||||
|
Loading…
Reference in New Issue
Block a user