mirror of
https://github.com/ascii-boxes/boxes.git
synced 2025-03-10 04:08:17 +01:00
parent
6a3d0e715c
commit
a579da13a5
2
Makefile
2
Makefile
@ -48,7 +48,7 @@ infomsg:
|
||||
|
||||
replaceinfos: src/boxes.h doc/boxes.1
|
||||
|
||||
src/boxes.h: src/boxes.h.in src/regexp/regexp.h Makefile
|
||||
src/boxes.h: src/boxes.h.in src/regulex.h src/shape.h Makefile
|
||||
sed -e 's/--BVERSION--/$(BVERSION) $(GIT_STATUS)/; s/--GLOBALCONF--/$(subst /,\/,$(GLOBALCONF))/' src/boxes.h.in > src/boxes.h
|
||||
|
||||
doc/boxes.1: doc/boxes.1.in Makefile
|
||||
|
11
boxes-config
11
boxes-config
@ -1874,14 +1874,11 @@ shapes {
|
||||
sw ("/*") s ("*") se ("*/")
|
||||
}
|
||||
|
||||
replace "^( *)([^ ])" with "\\1\\2 "
|
||||
replace "([^ ]) ([^ ])" with "\\1 \\2"
|
||||
reverse "^( *)([^ ]*) " to "\\1\\2" # \1 to leave indentation untouched
|
||||
reverse "([^ ]) ([^ ])" to "\\1 \\2"
|
||||
replace "(.)(?!$)" with "$1 "
|
||||
reverse "^( *)([^ ]*) " to "$1$2" # $1 to leave indentation untouched # TODO
|
||||
|
||||
padding {
|
||||
left 2
|
||||
right 1
|
||||
horiz 2
|
||||
}
|
||||
|
||||
elastic (n,e,s,w)
|
||||
@ -2253,7 +2250,7 @@ shapes {
|
||||
elastic (n, s, e, w)
|
||||
|
||||
padding {
|
||||
left 1
|
||||
left 2
|
||||
}
|
||||
|
||||
END unicornsay
|
||||
|
23
src/Makefile
23
src/Makefile
@ -26,9 +26,9 @@ GEN_HDR = parser.h boxes.h
|
||||
GEN_SRC = parser.c lex.yy.c
|
||||
GEN_FILES = $(GEN_SRC) $(GEN_HDR)
|
||||
ORIG_HDRCL = boxes.h.in config.h
|
||||
ORIG_HDR = $(ORIG_HDRCL) lexer.h tools.h shape.h generate.h remove.h unicode.h
|
||||
ORIG_HDR = $(ORIG_HDRCL) lexer.h tools.h shape.h generate.h remove.h unicode.h regulex.h
|
||||
ORIG_GEN = lexer.l parser.y
|
||||
ORIG_NORM = boxes.c tools.c shape.c generate.c remove.c unicode.c
|
||||
ORIG_NORM = boxes.c tools.c shape.c generate.c remove.c unicode.c regulex.c
|
||||
ORIG_SRC = $(ORIG_GEN) $(ORIG_NORM)
|
||||
ORIG_FILES = $(ORIG_SRC) $(ORIG_HDR)
|
||||
OTH_FILES = Makefile
|
||||
@ -46,8 +46,7 @@ debug: flags_$(BOXES_PLATFORM)
|
||||
$(MAKE) BOXES_PLATFORM=$(BOXES_PLATFORM) ALL_OBJ="$(ALL_OBJ)" CFLAGS_ADDTL="-g $(CFLAGS_ADDTL)" STRIP=false flags_$(BOXES_PLATFORM) $(BOXES_EXECUTABLE_NAME)
|
||||
|
||||
boxes: $(ALL_OBJ)
|
||||
$(MAKE) -C regexp CC=$(CC) libregexp.a
|
||||
$(CC) $(LDFLAGS) $(ALL_OBJ) -o $(BOXES_EXECUTABLE_NAME) -lunistring -lpcre2-32 -lregexp
|
||||
$(CC) $(LDFLAGS) $(ALL_OBJ) -o $(BOXES_EXECUTABLE_NAME) -lunistring -lpcre2-32
|
||||
if [ "$(STRIP)" = "true" ] ; then strip $(BOXES_EXECUTABLE_NAME) ; fi
|
||||
|
||||
boxes.exe: $(ALL_OBJ)
|
||||
@ -56,22 +55,22 @@ boxes.exe: $(ALL_OBJ)
|
||||
|
||||
|
||||
flags_unix:
|
||||
$(eval CFLAGS := -I. -Iregexp -Wall -W $(CFLAGS_ADDTL))
|
||||
$(eval LDFLAGS := -Lregexp $(LDFLAGS_ADDTL))
|
||||
$(eval CFLAGS := -I. -Wall -W $(CFLAGS_ADDTL))
|
||||
$(eval LDFLAGS := $(LDFLAGS_ADDTL))
|
||||
$(eval BOXES_EXECUTABLE_NAME := boxes)
|
||||
$(eval ALL_OBJ := $(GEN_SRC:.c=.o) $(ORIG_NORM:.c=.o))
|
||||
|
||||
flags_win32:
|
||||
$(eval CFLAGS := -Os -s -m32 -I. -Iregexp -Wall -W $(CFLAGS_ADDTL))
|
||||
$(eval CFLAGS := -Os -s -m32 -I. -Wall -W $(CFLAGS_ADDTL))
|
||||
$(eval LDFLAGS := -s -m32)
|
||||
$(eval BOXES_EXECUTABLE_NAME := boxes.exe)
|
||||
$(eval ALL_OBJ := $(GEN_SRC:.c=.o) $(ORIG_NORM:.c=.o) regexp/regexp.o regexp/regsub.o misc/getopt.o)
|
||||
$(eval ALL_OBJ := $(GEN_SRC:.c=.o) $(ORIG_NORM:.c=.o) misc/getopt.o)
|
||||
|
||||
flags_:
|
||||
@echo Please call make from the top level directory.
|
||||
exit 1
|
||||
|
||||
parser.c parser.h: parser.y boxes.h regexp/regexp.h
|
||||
parser.c parser.h: parser.y boxes.h
|
||||
$(YACC) -o parser.c -d parser.y
|
||||
|
||||
lex.yy.c: lexer.l boxes.h
|
||||
@ -81,16 +80,15 @@ lex.yy.c: lexer.l boxes.h
|
||||
rm lexer.tmp.c
|
||||
|
||||
|
||||
boxes.o: boxes.c boxes.h regexp/regexp.h shape.h tools.h unicode.h generate.h remove.h config.h
|
||||
boxes.o: boxes.c boxes.h regulex.h shape.h tools.h unicode.h generate.h remove.h config.h
|
||||
tools.o: tools.c tools.h boxes.h shape.h config.h
|
||||
unicode.o: unicode.c unicode.h config.h
|
||||
shape.o: shape.c shape.h boxes.h config.h tools.h
|
||||
generate.o: generate.c generate.h boxes.h shape.h tools.h config.h
|
||||
remove.o: remove.c remove.h boxes.h shape.h tools.h config.h
|
||||
regulex.o: regulex.c regulex.h config.h
|
||||
lex.yy.o: lex.yy.c parser.h tools.h shape.h lexer.h config.h
|
||||
parser.o: parser.c parser.h tools.h shape.h lexer.h config.h
|
||||
regexp/regexp.o: regexp/regexp.c
|
||||
regexp/regsub.o: regexp/regsub.c
|
||||
misc/getopt.o: misc/getopt.c
|
||||
|
||||
|
||||
@ -102,7 +100,6 @@ clean: flags_unix
|
||||
rm -f $(ALL_OBJ)
|
||||
rm -f $(GEN_FILES)
|
||||
rm -f core boxes boxes.exe
|
||||
$(MAKE) -C regexp clean
|
||||
|
||||
|
||||
#EOF
|
||||
|
180
src/boxes.c
180
src/boxes.c
@ -41,8 +41,8 @@
|
||||
#include "shape.h"
|
||||
#include "boxes.h"
|
||||
#include "tools.h"
|
||||
#include "regexp.h"
|
||||
#include "generate.h"
|
||||
#include "regulex.h"
|
||||
#include "remove.h"
|
||||
#include "unicode.h"
|
||||
|
||||
@ -1218,6 +1218,73 @@ static int get_indent(const line_t *lines, const size_t lines_size)
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Analyze the multi-byte string in order to determine its metrics:
|
||||
* - number of visible columns it occupies
|
||||
* - number of escape characters (== number of escape sequences)
|
||||
* - the ASCII equivalent of the string
|
||||
* - the number of invisible characters in the string
|
||||
*
|
||||
* @param <s> the multi-byte string to analyze
|
||||
* @param <num_esc> pointer to where the number of escape sequences should be stored
|
||||
* @param <ascii> pointer to where the ASCII equivalent of the string should be stored
|
||||
* @returns the number of invisible characters in <s>
|
||||
*/
|
||||
static size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **ascii)
|
||||
{
|
||||
size_t invis = 0; /* counts invisible column positions */
|
||||
int ansipos = 0; /* progression of ansi sequence */
|
||||
*num_esc = 0; /* counts the number of escape sequences found */
|
||||
|
||||
if (is_empty(s)) {
|
||||
(*ascii) = (char *) strdup("");
|
||||
return 0;
|
||||
}
|
||||
size_t buflen = (size_t) u32_strwidth(s, encoding);
|
||||
(*ascii) = (char *) calloc(buflen, sizeof(char)); /* maybe a little too much, but certainly enough */
|
||||
char *p = *ascii;
|
||||
|
||||
ucs4_t c;
|
||||
const uint32_t *rest = s;
|
||||
while ((rest = u32_next(&c, rest))) {
|
||||
if (ansipos == 0 && c == char_esc) {
|
||||
/* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
|
||||
ansipos++;
|
||||
invis++;
|
||||
(*num_esc)++;
|
||||
} else if (ansipos == 1 && c == '[') {
|
||||
/* Found '[' char after ESC. A CSI sequence has started. */
|
||||
ansipos++;
|
||||
invis++;
|
||||
} else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) {
|
||||
/* Found a byte designating the end of a two-byte escape sequence */
|
||||
invis++;
|
||||
ansipos = 0;
|
||||
} else if (ansipos == 2) {
|
||||
/* Inside CSI sequence - Keep counting bytes as invisible */
|
||||
invis++;
|
||||
|
||||
/* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
|
||||
if (c >= 0x40 && c <= 0x7e) {
|
||||
ansipos = 0;
|
||||
}
|
||||
} else if (is_ascii_printable(c)) {
|
||||
*p = c & 0xff;
|
||||
++p;
|
||||
} else {
|
||||
int cols = uc_width(c, encoding);
|
||||
if (cols > 0) {
|
||||
memset(p, (int) 'x', cols);
|
||||
p += cols;
|
||||
}
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
return invis;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int apply_substitutions(const int mode)
|
||||
/*
|
||||
* Apply regular expression substitutions to input text.
|
||||
@ -1236,8 +1303,6 @@ static int apply_substitutions(const int mode)
|
||||
size_t anz_rules;
|
||||
reprule_t *rules;
|
||||
size_t j, k;
|
||||
char buf[LINE_MAX_BYTES * 2];
|
||||
size_t buf_len; /* length of string in buf */
|
||||
|
||||
if (opt.design == NULL) {
|
||||
return 1;
|
||||
@ -1262,7 +1327,10 @@ static int apply_substitutions(const int mode)
|
||||
errno = 0;
|
||||
opt.design->current_rule = rules;
|
||||
for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) {
|
||||
rules[j].prog = regcomp(rules[j].search);
|
||||
rules[j].prog = compile_pattern(rules[j].search);
|
||||
if (rules[j].prog == NULL) {
|
||||
return 5;
|
||||
}
|
||||
}
|
||||
opt.design->current_rule = NULL;
|
||||
if (errno) {
|
||||
@ -1276,37 +1344,37 @@ static int apply_substitutions(const int mode)
|
||||
opt.design->current_rule = rules;
|
||||
for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) {
|
||||
#ifdef REGEXP_DEBUG
|
||||
fprintf (stderr, "myregsub (0x%p, \"%s\", %d, \"%s\", buf, %d, \'%c\') == ",
|
||||
rules[j].prog, input.lines[k].text,
|
||||
input.lines[k].len, rules[j].repstr, LINE_MAX_BYTES*2,
|
||||
rules[j].mode);
|
||||
fprintf (stderr, "regex_replace(0x%p, \"%s\", \"%s\", %d, \'%c\') == ",
|
||||
rules[j].prog, rules[j].repstr, u32_strconv_to_locale(input.lines[k].mbtext),
|
||||
(int) input.lines[k].num_chars, rules[j].mode);
|
||||
#endif
|
||||
errno = 0;
|
||||
buf_len = myregsub(rules[j].prog, input.lines[k].text,
|
||||
input.lines[k].len, rules[j].repstr, buf, LINE_MAX_BYTES * 2,
|
||||
rules[j].mode);
|
||||
uint32_t *newtext = regex_replace(rules[j].prog, rules[j].repstr,
|
||||
input.lines[k].mbtext, input.lines[k].num_chars, rules[j].mode == 'g');
|
||||
#ifdef REGEXP_DEBUG
|
||||
fprintf (stderr, "%d\n", buf_len);
|
||||
fprintf (stderr, "\"%s\"\n", newtext ? u32_strconv_to_locale(newtext) : "NULL");
|
||||
#endif
|
||||
if (errno) {
|
||||
if (newtext == NULL) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
BFREE (input.lines[k].text);
|
||||
input.lines[k].text = (char *) strdup(buf);
|
||||
if (input.lines[k].text == NULL) {
|
||||
perror(PROJECT);
|
||||
return 1;
|
||||
}
|
||||
|
||||
input.lines[k].len = buf_len;
|
||||
BFREE(input.lines[k].mbtext_org); /* original address allocated for mbtext */
|
||||
input.lines[k].mbtext = newtext;
|
||||
input.lines[k].mbtext_org = newtext;
|
||||
|
||||
size_t num_esc = 0;
|
||||
char *ascii; // TODO HERE extract into function analyze/asciify(line_t) ?
|
||||
size_t invis = count_invisible_chars(input.lines[k].mbtext, &num_esc, &ascii);
|
||||
input.lines[k].len = u32_strwidth(input.lines[k].mbtext, encoding) - invis + num_esc;
|
||||
input.lines[k].num_chars = u32_strlen(input.lines[k].mbtext);
|
||||
BFREE(input.lines[k].text);
|
||||
input.lines[k].text = ascii;
|
||||
if (input.lines[k].len > input.maxline) {
|
||||
input.maxline = input.lines[k].len;
|
||||
}
|
||||
|
||||
#ifdef REGEXP_DEBUG
|
||||
fprintf (stderr, "input.lines[%d] == {%d, \"%s\"}\n", k, input.lines[k].len, input.lines[k].text);
|
||||
fprintf (stderr, "input.lines[%d] == {%d, \"%s\"}\n", (int) k,
|
||||
(int) input.lines[k].num_chars, u32_strconv_to_locale(input.lines[k].mbtext));
|
||||
#endif
|
||||
}
|
||||
opt.design->current_rule = NULL;
|
||||
@ -1357,60 +1425,6 @@ static int has_linebreak(const uint32_t *s, const int len)
|
||||
|
||||
|
||||
|
||||
static size_t count_invisible_chars(const uint32_t *s, const size_t buflen, size_t *num_esc, char **ascii)
|
||||
{
|
||||
size_t invis = 0; /* counts invisible column positions */
|
||||
int ansipos = 0; /* progression of ansi sequence */
|
||||
*num_esc = 0; /* counts the number of escape sequences found */
|
||||
|
||||
if (is_empty(s)) {
|
||||
(*ascii) = (char *) strdup("");
|
||||
return 0;
|
||||
}
|
||||
(*ascii) = (char *) calloc(buflen, sizeof(char));
|
||||
char *p = *ascii;
|
||||
|
||||
ucs4_t c;
|
||||
const uint32_t *rest = s;
|
||||
while ((rest = u32_next(&c, rest))) {
|
||||
if (ansipos == 0 && c == char_esc) {
|
||||
/* Found an ESC char, count it as invisible and move 1 forward in the detection of CSI sequences */
|
||||
ansipos++;
|
||||
invis++;
|
||||
(*num_esc)++;
|
||||
} else if (ansipos == 1 && c == '[') {
|
||||
/* Found '[' char after ESC. A CSI sequence has started. */
|
||||
ansipos++;
|
||||
invis++;
|
||||
} else if (ansipos == 1 && c >= 0x40 && c <= 0x5f) {
|
||||
/* Found a byte designating the end of a two-byte escape sequence */
|
||||
invis++;
|
||||
ansipos = 0;
|
||||
} else if (ansipos == 2) {
|
||||
/* Inside CSI sequence - Keep counting bytes as invisible */
|
||||
invis++;
|
||||
|
||||
/* A char between 0x40 and 0x7e signals the end of an CSI or escape sequence */
|
||||
if (c >= 0x40 && c <= 0x7e) {
|
||||
ansipos = 0;
|
||||
}
|
||||
} else if (is_ascii_printable(c)) {
|
||||
*p = c & 0xff;
|
||||
++p;
|
||||
} else {
|
||||
int cols = uc_width(c, encoding);
|
||||
if (cols > 0) {
|
||||
memset(p, (int) 'x', cols);
|
||||
p += cols;
|
||||
}
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
return invis;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int read_all_input(const int use_stdin)
|
||||
/*
|
||||
* Read entire input (possibly from stdin) and store it in 'input' array.
|
||||
@ -1444,7 +1458,7 @@ static int read_all_input(const int use_stdin)
|
||||
* Start reading
|
||||
*/
|
||||
while (fgets(buf, LINE_MAX_BYTES + 1, opt.infile)) {
|
||||
if (input_size % 100 == 0) {
|
||||
if (input.anz_lines % 100 == 0) {
|
||||
input_size += 100;
|
||||
line_t *tmp = (line_t *) realloc(input.lines, input_size * sizeof(line_t));
|
||||
if (tmp == NULL) {
|
||||
@ -1483,18 +1497,20 @@ static int read_all_input(const int use_stdin)
|
||||
return 1;
|
||||
}
|
||||
input.lines[input.anz_lines].mbtext = temp;
|
||||
BFREE(mbtemp);
|
||||
temp = NULL;
|
||||
}
|
||||
else {
|
||||
input.lines[input.anz_lines].mbtext = mbtemp;
|
||||
}
|
||||
input.lines[input.anz_lines].mbtext_org = input.lines[input.anz_lines].mbtext;
|
||||
input.lines[input.anz_lines].num_chars = len_chars;
|
||||
|
||||
/*
|
||||
* Find ANSI CSI/ESC sequences
|
||||
*/
|
||||
size_t num_esc = 0;
|
||||
size_t invis = count_invisible_chars(input.lines[input.anz_lines].mbtext, strlen(buf), &num_esc,
|
||||
size_t invis = count_invisible_chars(input.lines[input.anz_lines].mbtext, &num_esc,
|
||||
&(input.lines[input.anz_lines].text));
|
||||
input.lines[input.anz_lines].invis = invis;
|
||||
/* u32_strwidth() does not count control characters, i.e. ESC characters, for which we must correct */
|
||||
@ -1526,8 +1542,8 @@ static int read_all_input(const int use_stdin)
|
||||
/* recalculate input statistics for redrawing the mended box */
|
||||
for (i = 0; i < input.anz_lines; ++i) {
|
||||
size_t num_esc = 0;
|
||||
char *dummy;
|
||||
size_t invis = count_invisible_chars(input.lines[i].mbtext, strlen(input.lines[i].text), &num_esc, &dummy);
|
||||
char *dummy; // TODO extract into function
|
||||
size_t invis = count_invisible_chars(input.lines[i].mbtext, &num_esc, &dummy);
|
||||
BFREE(dummy);
|
||||
input.lines[i].len = u32_strwidth(input.lines[i].mbtext, encoding) - invis + num_esc;
|
||||
input.lines[i].num_chars = u32_strlen(input.lines[i].mbtext);
|
||||
@ -1576,7 +1592,7 @@ static int read_all_input(const int use_stdin)
|
||||
* Apply regular expression substitutions
|
||||
*/
|
||||
if (opt.r == 0) {
|
||||
if (apply_substitutions(0) != 0) { // TODO
|
||||
if (apply_substitutions(0) != 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@ -25,13 +25,14 @@
|
||||
#ifndef BOXES_H
|
||||
#define BOXES_H
|
||||
|
||||
/* #define DEBUG */
|
||||
/* #define REGEXP_DEBUG */
|
||||
/* #define PARSER_DEBUG */
|
||||
/* #define LEXER_DEBUG */
|
||||
/* #define DEBUG 1 */
|
||||
#define REGEXP_DEBUG 1
|
||||
/* #define PARSER_DEBUG 1 */
|
||||
/* #define LEXER_DEBUG 1 */
|
||||
|
||||
#include <unitypes.h>
|
||||
#include "regexp/regexp.h"
|
||||
#include "regulex.h"
|
||||
#include "shape.h"
|
||||
|
||||
|
||||
|
||||
@ -80,11 +81,11 @@
|
||||
|
||||
|
||||
typedef struct {
|
||||
char *search;
|
||||
char *repstr;
|
||||
regexp *prog; /* compiled search pattern */
|
||||
int line; /* line of definition in config file */
|
||||
char mode; /* 'g' or 'o' */
|
||||
char *search;
|
||||
char *repstr;
|
||||
pcre2_code *prog; /* compiled search pattern */
|
||||
int line; /* line of definition in config file */
|
||||
char mode; /* 'g' or 'o' */
|
||||
} reprule_t;
|
||||
|
||||
|
||||
@ -147,11 +148,12 @@ extern opt_t opt;
|
||||
|
||||
typedef struct {
|
||||
size_t len; /* length of visible text in columns (visible character positions in a text terminal), which is the same as the length of the 'text' field */
|
||||
char *text; /* ASCII line content, tabs expanded, multi-byte chars replaced with one or more 'x' */
|
||||
char *text; /* ASCII line content, tabs expanded, ansi escapes removed, multi-byte chars replaced with one or more 'x' */
|
||||
size_t invis; /* number of invisble columns/characters (part of an ansi sequence) */
|
||||
|
||||
uint32_t *mbtext; /* multi-byte (original) line content, tabs expanded. We use UTF-32 in order to enable pointer arithmetic. */
|
||||
size_t num_chars; /* total number of characters in mbtext, visible + invisible */
|
||||
uint32_t *mbtext_org; /* mbtext as originally allocated, so that we can free it again */
|
||||
|
||||
size_t *tabpos; /* tab positions in expanded work strings, or NULL if not needed */
|
||||
size_t tabpos_len; /* number of tabs in a line */
|
||||
|
@ -1,49 +0,0 @@
|
||||
#
|
||||
# boxes - Command line filter to draw/remove ASCII boxes around text
|
||||
# Copyright (C) 1999 Thomas Jensen and the boxes contributors
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License, version 2, as published
|
||||
# by the Free Software Foundation.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
# for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#____________________________________________________________________________
|
||||
#============================================================================
|
||||
|
||||
|
||||
CFLAGS = -O -I. $(CFLAGS_ADDTL)
|
||||
|
||||
ALL_CL = regexp/regexp.c regexp/regsub.c
|
||||
C_SRC = $(notdir $(ALL_CL))
|
||||
ALLFILES = Makefile $(C_SRC) regexp.h regmagic.h
|
||||
ALLOBJ = $(C_SRC:.c=.o)
|
||||
|
||||
|
||||
.PHONY: clean build debug
|
||||
|
||||
|
||||
build: libregexp.a
|
||||
debug: libregexp.a
|
||||
|
||||
libregexp.a: $(ALLOBJ)
|
||||
ar cr libregexp.a $(ALLOBJ)
|
||||
|
||||
regexp.o: regexp.c regmagic.h regexp.h ../config.h
|
||||
regsub.o: regsub.c regmagic.h regexp.h ../config.h
|
||||
|
||||
.c.o:
|
||||
$(CC) $(CFLAGS) -c $<
|
||||
|
||||
|
||||
clean:
|
||||
rm -f $(ALLOBJ) libregexp.a core
|
||||
|
||||
|
||||
#EOF
|
1212
src/regexp/regexp.c
1212
src/regexp/regexp.c
File diff suppressed because it is too large
Load Diff
@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Definitions etc. for regexp(3) routines.
|
||||
*
|
||||
* Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
|
||||
* not the System V one.
|
||||
*/
|
||||
|
||||
#ifndef REGEXP_H
|
||||
#define REGEXP_H
|
||||
|
||||
|
||||
#define NSUBEXP 10
|
||||
typedef struct regexp {
|
||||
char *startp[NSUBEXP];
|
||||
char *endp[NSUBEXP];
|
||||
char regstart; /* Internal use only. */
|
||||
char reganch; /* Internal use only. */
|
||||
char *regmust; /* Internal use only. */
|
||||
int regmlen; /* Internal use only. */
|
||||
char program[1]; /* Unwarranted chumminess with compiler. */
|
||||
} regexp;
|
||||
|
||||
extern regexp *regcomp();
|
||||
extern int regexec();
|
||||
/* extern size_t regsub(); */
|
||||
extern size_t myregsub();
|
||||
/* extern void regerror(); */
|
||||
|
||||
|
||||
#endif /* REGEXP_H */
|
@ -1,5 +0,0 @@
|
||||
/*
|
||||
* The first byte of the regexp internal "program" is actually this magic
|
||||
* number; the start node begins in the second byte.
|
||||
*/
|
||||
#define MAGIC 0234
|
@ -1,187 +0,0 @@
|
||||
/*
|
||||
* File: regsub.c
|
||||
* Date created: Copyright (c) 1986 by University of Toronto.
|
||||
* Author: Henry Spencer.
|
||||
* Extensions and modifications by Thomas Jensen
|
||||
* Language: K&R C (traditional)
|
||||
* Purpose: Perform substitutions after a regexp match
|
||||
* License: - Not derived from licensed software.
|
||||
* - Permission is granted to anyone to use this
|
||||
* software for any purpose on any computer system,
|
||||
* and to redistribute it freely, subject to the
|
||||
* following restrictions:
|
||||
* 1. The author is not responsible for the
|
||||
* consequences of use of this software, no matter
|
||||
* how awful, even if they arise from defects in it.
|
||||
* 2. The origin of this software must not be
|
||||
* misrepresented, either by explicit claim or by
|
||||
* omission.
|
||||
* 3. Altered versions must be plainly marked as such,
|
||||
* and must not be misrepresented as being the
|
||||
* original software.
|
||||
*
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "regexp.h"
|
||||
#include "regmagic.h"
|
||||
|
||||
|
||||
#ifndef CHARBITS
|
||||
#define UCHARAT(p) ((int)*(unsigned char *)(p))
|
||||
#else
|
||||
#define UCHARAT(p) ((int)*(p)&CHARBITS)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*
|
||||
- regsub - perform substitutions after a regexp match
|
||||
*/
|
||||
size_t /* RETURNS length of dest str */
|
||||
regsub (prog, source, dest, dest_size)
|
||||
regexp *prog;
|
||||
char *source;
|
||||
char *dest;
|
||||
size_t dest_size; /* size of destination buffer */
|
||||
{
|
||||
register char *src;
|
||||
register char *dst;
|
||||
register char c;
|
||||
register int no;
|
||||
register int len;
|
||||
size_t fill; /* current number of chars in dest */
|
||||
|
||||
if (prog == NULL || source == NULL || dest == NULL) {
|
||||
regerror("NULL parm to regsub");
|
||||
return 0;
|
||||
}
|
||||
if (UCHARAT(prog->program) != MAGIC) {
|
||||
regerror("damaged regexp fed to regsub");
|
||||
return 0;
|
||||
}
|
||||
|
||||
src = source;
|
||||
dst = dest;
|
||||
fill = 0;
|
||||
|
||||
while ((c = *src++) != '\0') {
|
||||
if (c == '&')
|
||||
no = 0;
|
||||
else if (c == '\\' && '0' <= *src && *src <= '9')
|
||||
no = *src++ - '0';
|
||||
else
|
||||
no = -1;
|
||||
|
||||
if (no < 0) { /* Ordinary character. */
|
||||
if (c == '\\' && (*src == '\\' || *src == '&'))
|
||||
c = *src++;
|
||||
*dst++ = c;
|
||||
++fill;
|
||||
} else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
|
||||
len = prog->endp[no] - prog->startp[no];
|
||||
if (len < dest_size-fill) {
|
||||
(void) strncpy(dst, prog->startp[no], len);
|
||||
dst += len;
|
||||
fill += len;
|
||||
if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */
|
||||
regerror("damaged match string");
|
||||
return fill;
|
||||
}
|
||||
}
|
||||
else {
|
||||
(void) strncpy (dst, prog->startp[no], dest_size-fill);
|
||||
dest[dest_size-1] = '\0';
|
||||
return dest_size-1;
|
||||
}
|
||||
}
|
||||
if (fill >= dest_size) {
|
||||
dest[dest_size-1] = '\0';
|
||||
return dest_size-1;
|
||||
}
|
||||
}
|
||||
*dst++ = '\0';
|
||||
|
||||
return fill;
|
||||
}
|
||||
|
||||
|
||||
|
||||
size_t /* RETURNS length of str in destination buffer */
|
||||
myregsub (prog, orig, orig_len, repstr, dest, dest_size, mode)
|
||||
regexp *prog; /* pointers for matched regexp to original text */
|
||||
char *orig; /* original input line */
|
||||
size_t orig_len; /* length of original input line */
|
||||
char *repstr; /* source buffer for replaced parts */
|
||||
char *dest; /* destination buffer */
|
||||
size_t dest_size; /* size of destination buffer */
|
||||
char mode; /* 'g' or 'o' */
|
||||
{
|
||||
size_t fill; /* current number of chars in dest */
|
||||
char *sp, *dp; /* source rover, destination rover */
|
||||
int rc; /* received return codes */
|
||||
size_t rest_size; /* remaining space in dest */
|
||||
size_t partlen; /* temp length of a piece handled */
|
||||
|
||||
fill = 0;
|
||||
sp = orig;
|
||||
dp = dest;
|
||||
rest_size = dest_size;
|
||||
|
||||
do {
|
||||
rc = regexec (prog, sp);
|
||||
if (!rc) break;
|
||||
|
||||
partlen = prog->startp[0] - sp;
|
||||
if (partlen < rest_size) {
|
||||
strncpy (dp, sp, partlen);
|
||||
fill += partlen;
|
||||
sp = prog->startp[0];
|
||||
dp += partlen;
|
||||
rest_size -= partlen;
|
||||
}
|
||||
else {
|
||||
strncpy (dp, sp, rest_size);
|
||||
dest[dest_size-1] = '\0';
|
||||
return dest_size - 1;
|
||||
}
|
||||
|
||||
/* fprintf (stderr, "regsub (%p, \"%s\", \"%s\", %d);\n", */
|
||||
/* prog, repstr, dp, rest_size); */
|
||||
fill += regsub (prog, repstr, dp, rest_size);
|
||||
dp = dest + fill;
|
||||
sp = prog->endp[0];
|
||||
rest_size = dest_size - fill;
|
||||
|
||||
if (fill >= dest_size) {
|
||||
dest[dest_size-1] = '\0';
|
||||
return dest_size - 1;
|
||||
}
|
||||
|
||||
/* fprintf (stderr, "dest = \"%s\";\n", dest); */
|
||||
if (prog->startp[0] == prog->endp[0])
|
||||
break; /* match "^" or "$" only once */
|
||||
|
||||
} while (mode == 'g');
|
||||
|
||||
partlen = orig + orig_len - sp;
|
||||
if (partlen < rest_size) {
|
||||
strncpy (dp, sp, partlen);
|
||||
fill += partlen;
|
||||
dp[partlen] = '\0';
|
||||
}
|
||||
else {
|
||||
strncpy (dp, sp, rest_size);
|
||||
dest[dest_size-1] = '\0';
|
||||
fill = dest_size - 1;
|
||||
}
|
||||
|
||||
return fill;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*EOF*/ /* vim: set sw=4: */
|
114
src/regulex.c
Normal file
114
src/regulex.c
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* boxes - Command line filter to draw/remove ASCII boxes around text
|
||||
* Copyright (C) 1999 Thomas Jensen and the boxes contributors
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License, version 2, as published
|
||||
* by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||
*/
|
||||
|
||||
/*
|
||||
* Convenience functions for PCRE2 regular expression processing
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <uniconv.h>
|
||||
|
||||
#include "tools.h"
|
||||
#include "regulex.h"
|
||||
|
||||
|
||||
|
||||
pcre2_code *compile_pattern(char *pattern)
|
||||
{
|
||||
int errornumber;
|
||||
PCRE2_SIZE erroroffset;
|
||||
PCRE2_SPTR pattern32 = u32_strconv_from_locale(pattern);
|
||||
|
||||
pcre2_code *re = pcre2_compile(
|
||||
pattern32, /* the pattern */
|
||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||
0, /* default options */
|
||||
&errornumber,
|
||||
&erroroffset,
|
||||
NULL); /* use default compile context */
|
||||
|
||||
if (re == NULL) {
|
||||
PCRE2_UCHAR buffer[256];
|
||||
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
||||
fprintf(stderr, "Regular expression pattern \"%s\" failed to compile at offset %d: %s\n",
|
||||
pattern, (int) erroroffset, u32_strconv_to_locale(buffer));
|
||||
}
|
||||
return re;
|
||||
}
|
||||
|
||||
|
||||
|
||||
uint32_t *regex_replace(pcre2_code *search, char *replace, uint32_t *input, const size_t input_len, const int global)
|
||||
{
|
||||
PCRE2_SPTR replacement = u32_strconv_from_locale(replace);
|
||||
uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED
|
||||
| (global ? PCRE2_SUBSTITUTE_GLOBAL : 0);
|
||||
PCRE2_SIZE outlen = input_len * 2; /* estimated length of output buffer in characters, fine if too small */
|
||||
|
||||
PCRE2_SIZE bufsize = (input_len == 0) ? 16 : outlen;
|
||||
uint32_t *output = (uint32_t *) malloc(sizeof(uint32_t) * bufsize); /* output buffer */
|
||||
int pcre2_rc;
|
||||
|
||||
int done = 0;
|
||||
while (!done) {
|
||||
if (output == NULL) {
|
||||
fprintf(stderr, "out of memory");
|
||||
return NULL;
|
||||
}
|
||||
PCRE2_SIZE outlen = bufsize;
|
||||
|
||||
pcre2_rc = pcre2_substitute(search, (PCRE2_SPTR) input, input_len,
|
||||
0, /* start offset */
|
||||
options,
|
||||
NULL, /* ptr to a match data block */
|
||||
NULL, /* match context */
|
||||
replacement, PCRE2_ZERO_TERMINATED,
|
||||
output, &outlen);
|
||||
|
||||
if (pcre2_rc != PCRE2_ERROR_NOMEMORY || bufsize >= outlen) {
|
||||
done = 1;
|
||||
} else {
|
||||
#ifdef REGEXP_DEBUG
|
||||
fprintf(stderr, "Reallocating output buffer from %ld to %ld UTF-32 chars\n", bufsize, outlen);
|
||||
#endif
|
||||
bufsize = outlen;
|
||||
output = (uint32_t *) realloc(output, sizeof(uint32_t) * bufsize);
|
||||
}
|
||||
}
|
||||
BFREE(replacement);
|
||||
|
||||
if (pcre2_rc < 0) {
|
||||
PCRE2_UCHAR buffer[256];
|
||||
pcre2_get_error_message(pcre2_rc, buffer, sizeof(buffer));
|
||||
/* buffer will normally contain "invalid replacement string" */
|
||||
fprintf(stderr, "Error substituting \"%s\": %s\n", replace, u32_strconv_to_locale(buffer));
|
||||
BFREE(output);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*EOF*/ /* vim: set sw=4: */
|
67
src/regulex.h
Normal file
67
src/regulex.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* boxes - Command line filter to draw/remove ASCII boxes around text
|
||||
* Copyright (C) 1999 Thomas Jensen and the boxes contributors
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License, version 2, as published
|
||||
* by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||
*/
|
||||
|
||||
/*
|
||||
* Convenience functions for PCRE2 regular expression processing
|
||||
*/
|
||||
|
||||
#ifndef REGULEX_H
|
||||
#define REGULEX_H
|
||||
|
||||
/* Building under Windows: If you want to statically link this program against a non-dll .a file, you must define
|
||||
* PCRE2_STATIC before including pcre2.h. */
|
||||
#ifdef __MINGW32__
|
||||
#define PCRE2_STATIC
|
||||
#endif
|
||||
|
||||
/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h. For a program that uses only one code unit
|
||||
* width, setting it to 8, 16, or 32 makes it possible to use generic function names such as pcre2_compile(). Note that
|
||||
* just changing 8 to 16 (for example) is not sufficient to convert this program to process 16-bit characters. Even in
|
||||
* a fully 16-bit environment, where string-handling functions such as strcmp() and printf() work with 16-bit
|
||||
* characters, the code for handling the table of named substrings will still need to be modified. */
|
||||
#define PCRE2_CODE_UNIT_WIDTH 32
|
||||
|
||||
|
||||
#include <pcre2.h>
|
||||
#include <unitypes.h>
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Compile the given pattern into a PCRE2 regular expression.
|
||||
*/
|
||||
pcre2_code *compile_pattern(char *pattern);
|
||||
|
||||
/*
|
||||
* Perform a regex replacement on the given string.
|
||||
*
|
||||
* @param <search> the compiled pattern to search for
|
||||
* @param <replace> the replacement string
|
||||
* @param <input> the string to which the replacements shall be applied
|
||||
* @param <input_len> the length of <input> in characters, not bytes
|
||||
* @param <global> flag indicating whether all occurrences shall be replaced (true) or just the first (false)
|
||||
* @return a new string which is a copy of output with the replacements applied, or NULL on error
|
||||
*/
|
||||
uint32_t *regex_replace(pcre2_code *search, char *replace, uint32_t *input, const size_t input_len, const int global);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
/*EOF*/ /* vim: set cindent sw=4: */
|
@ -50,7 +50,7 @@ typedef struct {
|
||||
char **chars;
|
||||
size_t height;
|
||||
size_t width;
|
||||
int elastic; /* elastic is used only in orginial definition */
|
||||
int elastic; /* elastic is used only in original definition */
|
||||
} sentry_t;
|
||||
|
||||
#define SENTRY_INITIALIZER (sentry_t) {NULL, 0, 0, 0}
|
||||
|
10
src/tools.h
10
src/tools.h
@ -27,14 +27,16 @@
|
||||
|
||||
#include <unitypes.h>
|
||||
|
||||
#include "boxes.h"
|
||||
|
||||
|
||||
#define BMAX(a, b) ((a)>(b)? (a):(b)) /* return the larger value */
|
||||
|
||||
#define BFREE(p) { /* free memory and clear pointer */ \
|
||||
if (p) { \
|
||||
free (p); \
|
||||
(p) = NULL; \
|
||||
} \
|
||||
if (p) { \
|
||||
free((void *) p); \
|
||||
(p) = NULL; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
|
@ -28,7 +28,6 @@
|
||||
|
||||
#include <unictype.h>
|
||||
#include <unistr.h>
|
||||
#include <unitypes.h>
|
||||
|
||||
#include "unicode.h"
|
||||
|
||||
|
@ -25,6 +25,9 @@
|
||||
#ifndef UNICODE_H
|
||||
#define UNICODE_H
|
||||
|
||||
#include <unitypes.h>
|
||||
|
||||
|
||||
extern const char *encoding; /* the character encoding that we use */
|
||||
|
||||
extern const ucs4_t char_tab; /* ucs4_t character '\t' (tab) */
|
||||
|
Loading…
Reference in New Issue
Block a user