Extract a new module 'detect' from 'remove' module

This is the "design autodetect" functionality.
This commit is contained in:
Thomas Jensen 2023-06-08 22:28:52 +02:00
parent a759026790
commit 47c32efa5f
No known key found for this signature in database
GPG Key ID: A4ACEE270D0FB7DB
8 changed files with 725 additions and 341 deletions

View File

@ -23,10 +23,10 @@ GEN_HDR = parser.h boxes.h lex.yy.h
GEN_SRC = parser.c lex.yy.c
GEN_FILES = $(GEN_SRC) $(GEN_HDR)
ORIG_HDRCL = boxes.in.h config.h
ORIG_HDR = $(ORIG_HDRCL) bxstring.h cmdline.h discovery.h generate.h input.h list.h parsecode.h parsing.h query.h \
regulex.h remove.h shape.h tools.h unicode.h
ORIG_HDR = $(ORIG_HDRCL) bxstring.h cmdline.h detect.h discovery.h generate.h input.h list.h parsecode.h parsing.h \
query.h regulex.h remove.h shape.h tools.h unicode.h
ORIG_GEN = lexer.l parser.y
ORIG_NORM = boxes.c bxstring.c cmdline.c discovery.c generate.c input.c list.c parsecode.c parsing.c query.c \
ORIG_NORM = boxes.c bxstring.c cmdline.c detect.c discovery.c generate.c input.c list.c parsecode.c parsing.c query.c \
regulex.c remove.c shape.c tools.c unicode.c
ORIG_SRC = $(ORIG_GEN) $(ORIG_NORM)
ORIG_FILES = $(ORIG_SRC) $(ORIG_HDR)
@ -110,6 +110,7 @@ lex.yy.c lex.yy.h: lexer.l | check_dir
boxes.o: boxes.c boxes.h cmdline.h discovery.h generate.h input.h list.h parsing.h query.h remove.h tools.h unicode.h config.h | check_dir
bxstring.o: bxstring.c bxstring.h tools.h unicode.h config.h | check_dir
cmdline.o: cmdline.c cmdline.h boxes.h discovery.h query.h tools.h config.h | check_dir
detect.o: detect.c detect.h boxes.h bxstring.h shape.h tools.h config.h | check_dir
discovery.o: discovery.c discovery.h boxes.h tools.h unicode.h config.h | check_dir
generate.o: generate.c generate.h boxes.h shape.h tools.h unicode.h config.h | check_dir
input.o: input.c boxes.h input.h regulex.h tools.h unicode.h config.h | check_dir

View File

@ -147,6 +147,7 @@ extern int color_output_enabled;
typedef struct {
bxstr_t *text; /* text content of the line as a boxes string */
uint32_t *cache_visible; /* only the visible characters of `text`, initially NULL. This is a cache. */
size_t *tabpos; /* tab positions in expanded work strings, or NULL if not needed */
size_t tabpos_len; /* number of tabs in a line */
} line_t;

View File

@ -366,6 +366,16 @@ uint32_t *bxs_first_char_ptr(bxstr_t *pString, size_t n)
uint32_t *bxs_unindent_ptr(bxstr_t *pString)
{
if (pString == NULL) {
return NULL;
}
return pString->memory + pString->first_char[pString->indent];
}
bxstr_t *bxs_trim(bxstr_t *pString)
{
if (pString == NULL) {

View File

@ -24,7 +24,8 @@
/**
* A boxes-internal string. Should be treated as immutable.
* A boxes-internal string. Should be treated as immutable, although some functions DO modify an instance. At the very
* least, make sure never to change the values of an instance from outside this module.
*/
typedef struct {
/** Pointer to the original memory area for the string, NUL terminated */
@ -161,6 +162,15 @@ bxstr_t *bxs_cut_front(bxstr_t *pString, size_t n);
uint32_t *bxs_first_char_ptr(bxstr_t *pString, size_t n);
/**
* Return the first character of the visible character directly following the indent (for example a letter following
* some spaces).
* @param pString the string to use
* @return a pointer into existing memory
*/
uint32_t *bxs_unindent_ptr(bxstr_t *pString);
/**
* Create a new string from which all leading and trailing whitespace have been removed.
* @param pString the string to trim, which will not be modified

623
src/detect.c Normal file
View File

@ -0,0 +1,623 @@
/*
* boxes - Command line filter to draw/remove ASCII boxes around text
* Copyright (c) 1999-2023 Thomas Jensen and the boxes contributors
*
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
* License, version 3, as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
* You should have received a copy of the GNU General Public License along with this program.
* If not, see <https://www.gnu.org/licenses/>.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
/*
* Autodetect design used by box in input.
*
* When detecting box shapes in input lines, we can find (in order of precedence):
* 1. (box design: colored, input: colored)
* Colored boxes which are colored as per their design. In that case, matching all the invisible characters
* increases our confidence that we have found the right shape.
* -> use both shapes and input as-is, comparison type `literal`
* 2. (box design: monochrome, input: monochrome)
* Boxes which have no invisible characters because color was never involved. This one is the classic case.
* -> use both shapes and input as-is, comparison type `literal` (same as 1.)
* 3. (box design: monochrome, input: colored)
* Colored boxes which are colored because of lolcat processing or similar. In that case, we can ignore the
* invisible characters in the input.
* -> use shapes as-is, and ignore invisible characters in input, comparison type `ignore_invisible_input`
* 4. (box design: colored, input: monochrome)
* Boxes which have no invisible characters because they have been removed (for example via --no-color), even though
* the original design was colored. In that case, we must ignore the invisible characters in the design.
* -> ignore invisible characters in shapes, use input as-is, comparison type `ignore_invisible_shape`
* 5. Fallback: We assume to never see a colored box design PLUS lolcat-induced codes, or a case where a colored box
* design had its color removed and replaced with lolcat colors. That's just messy and we will treat it as case
* number two, where we ignore colors on both input and box design.
* -> ignore invisible characters in both shapes and input, comparison type `ignore_invisible_all`
*/
#include "config.h"
#include <unistr.h>
#include <unitypes.h>
#include "boxes.h"
#include "bxstring.h"
#include "shape.h"
#include "tools.h"
#include "detect.h"
#define NUM_COMPARISON_TYPES 4 /* number of elements in `comparison_t` */
typedef enum {
/** leave invisible characters in both shapes and input */
literal,
/** leave invisible characters in shapes, but ignore them in input */
ignore_invisible_input,
/** ignore invisible characters in shapes, but leave them in input */
ignore_invisible_shape,
/** ignore all invisible characters in both shapes and input */
ignore_invisible_all
} comparison_t;
static char *comparison_name[] = {
"literal", "ignore_invisible_input", "ignore_invisible_shape", "ignore_invisible_all"
};
/**
* Determine whether the input text contains ANSI escape codes (i.e. it is potentially colored) or not.
* @return 1 if no invisible characters are in the input, 0 if there are any
*/
static int input_is_mono()
{
for (size_t line_no = 0; line_no < input.num_lines; line_no++) {
if (input.lines[line_no].text->num_chars_invisible > 0) {
return 0;
}
}
return 1;
}
/**
* Determine whether the given box design contains ANSI escape codes in any of its shapes (i.e. it is potentially
* colored) or not.
* @param current_design the box design to check
* @return 1 if no invisible characters are found in the box design, 0 if there are any
*/
static int design_is_mono(design_t *current_design)
{
for (shape_t scnt = 0; scnt < NUM_SHAPES; ++scnt) {
if (isempty(current_design->shape + scnt)) {
continue;
}
for (size_t line_no = 0; line_no < input.num_lines; line_no++) {
bxstr_t *shape_line = current_design->shape[scnt].mbcs[line_no];
if (shape_line->num_chars_invisible > 0) {
return 0;
}
}
}
return 1;
}
static int *determine_empty_sides(design_t *current_design)
{
int *result = (int *) calloc(NUM_SIDES, sizeof(int));
for (size_t j = 0; j < NUM_SIDES; ++j) {
result[j] = empty_side(current_design->shape, j);
}
#ifdef DEBUG
fprintf (stderr, "Empty sides: TOP %d, LEFT %d, BOTTOM %d, RIGHT %d\n",
result[BTOP], result[BLEF], result[BBOT], result[BRIG]);
#endif
return result;
}
static uint32_t *get_visible_text(line_t *line)
{
uint32_t *result = NULL;
if (line != NULL) {
if (line->cache_visible == NULL) {
line->cache_visible = bxs_filter_visible(line->text);
}
result = line->cache_visible;
}
return result;
}
/**
* Try and find west corner shapes. Every non-empty shape line is searched for on every input line. A hit is generated
* whenever a match is found.
* @param current_design the current design to check
* @param comp_type the comparison type (how to compare colored strings)
* @param empty information on which box sides are empty in that design
* @param corner which west corner to search for
* @return the number of hits for this corner
*/
static size_t find_west_corner(design_t *current_design, comparison_t comp_type, int *empty, shape_t corner)
{
size_t hits = 0;
if (empty[BLEF] || (empty[BTOP] && corner == NW) || (empty[BBOT] && corner == SW)) {
return hits;
}
for (size_t j = 0; j < current_design->shape[corner].height; ++j) {
bxstr_t *shape_line = current_design->shape[corner].mbcs[j];
if (bxs_is_blank(shape_line)) {
continue;
}
uint32_t *shape_relevant_for_freeing = NULL;
uint32_t *shape_relevant = NULL;
size_t length_relevant;
if ((comp_type == ignore_invisible_shape || comp_type == ignore_invisible_all)
&& shape_line->num_chars_invisible > 0)
{
shape_relevant_for_freeing = bxs_filter_visible(shape_line);
shape_relevant = shape_relevant_for_freeing + shape_line->indent;
length_relevant = shape_line->num_chars_visible - shape_line->indent;
}
else {
shape_relevant = bxs_unindent_ptr(shape_line);
length_relevant = shape_line->num_chars - (shape_relevant - shape_line->memory);
}
for (size_t k = 0; k < current_design->shape[corner].height; ++k) {
size_t a = k;
if (corner == SW) {
a += input.num_lines - current_design->shape[corner].height;
}
if (a >= input.num_lines) {
break;
}
uint32_t *input_relevant = NULL;
if (comp_type == ignore_invisible_input || comp_type == ignore_invisible_all) {
input_relevant = get_visible_text(input.lines + a) + input.lines[a].text->indent;
}
else {
input_relevant = bxs_unindent_ptr(input.lines[a].text);
}
if (u32_strncmp(input_relevant, shape_relevant, length_relevant) == 0) {
++hits; /* CHECK more hit points for longer matches, or simple boxes might match too easily */
}
}
BFREE(shape_relevant_for_freeing);
}
#ifdef DEBUG
fprintf(stderr, "Checking %s corner produced %d hits.\n", shape_name[corner], (int) hits);
#endif
return hits;
}
/**
* Try and find east corner shapes. Every non-empty shape line is searched for on every input line. A hit is generated
* whenever a match is found.
* @param current_design the current design to check
* @param comp_type the comparison type (how to compare colored strings)
* @param empty information on which box sides are empty in that design
* @param corner which west corner to search for
* @return the number of hits for this corner
*/
static size_t find_east_corner(design_t *current_design, comparison_t comp_type, int *empty, shape_t corner)
{
size_t hits = 0;
if (empty[BRIG] || (empty[BTOP] && corner == NE) || (empty[BBOT] && corner == SE)) {
return hits;
}
for (size_t j = 0; j < current_design->shape[corner].height; ++j) {
bxstr_t *shape_line = current_design->shape[corner].mbcs[j];
if (bxs_is_blank(shape_line)) {
continue;
}
bxstr_t *shape_line_rtrimmed = bxs_rtrim(shape_line);
uint32_t *shape_relevant_for_freeing = NULL;
uint32_t *shape_relevant = NULL;
size_t length_relevant;
if ((comp_type == ignore_invisible_shape || comp_type == ignore_invisible_all)
&& shape_line_rtrimmed->num_chars_invisible > 0)
{
shape_relevant_for_freeing = bxs_filter_visible(shape_line_rtrimmed);
shape_relevant = shape_relevant_for_freeing;
length_relevant = shape_line_rtrimmed->num_chars_visible;
}
else {
shape_relevant = shape_line_rtrimmed->memory;
length_relevant = shape_line_rtrimmed->num_chars;
}
for (size_t k = 0; k < current_design->shape[corner].height; ++k) {
size_t a = k;
if (corner == SE) {
a += input.num_lines - current_design->shape[corner].height;
}
if (a >= input.num_lines) {
break;
}
bxstr_t *input_line = input.lines[a].text;
uint32_t *input_relevant = NULL;
if (comp_type == ignore_invisible_input || comp_type == ignore_invisible_all) {
input_relevant = get_visible_text(input.lines + a);
uint32_t *p = input_relevant + input_line->num_chars_visible - input_line->trailing - length_relevant;
if (p < input_relevant) {
continue;
}
input_relevant = p;
}
else {
int idx = (int) input_line->first_char[input_line->num_chars_visible - input_line->trailing]
- length_relevant;
if (idx < 0) {
continue;
}
input_relevant = input_line->memory + idx;
}
if (u32_strncmp(input_relevant, shape_relevant, length_relevant) == 0) {
++hits; /* CHECK more hit points for longer matches, or simple boxes might match too easily */
}
}
BFREE(shape_relevant_for_freeing);
bxs_free(shape_line_rtrimmed);
}
#ifdef DEBUG
fprintf(stderr, "Checking %s corner produced %d hits.\n", shape_name[corner], (int) hits);
#endif
return hits;
}
/**
* Try and find a horizontal shape between the box corners. Every non-empty shape line is searched for on every input
* line. Elastic shapes must occur twice in an uninterrupted row to generate a hit.
* @param current_design the current design to check
* @param comp_type the comparison type (how to compare colored strings)
* @param empty information on which box sides are empty in that design
* @param hshape which horizontal shape to search for
* @return the number of hits for this horizontal shape
*/
static size_t find_horizontal_shape(design_t *current_design, comparison_t comp_type, int *empty, shape_t hshape)
{
size_t hits = 0;
if (empty[BTOP] || empty[BBOT]) {
return ++hits; /* horizontal box part is empty */
}
for (size_t j = 0; j < current_design->shape[hshape].height; ++j) {
bxstr_t *shape_line = current_design->shape[hshape].mbcs[j];
if (bxs_is_blank(shape_line)) {
continue;
}
uint32_t *shape_relevant_for_freeing = NULL;
uint32_t *shape_relevant = NULL;
size_t length_relevant;
if ((comp_type == ignore_invisible_shape || comp_type == ignore_invisible_all)
&& shape_line->num_chars_invisible > 0)
{
shape_relevant_for_freeing = bxs_filter_visible(shape_line);
shape_relevant = shape_relevant_for_freeing;
length_relevant = shape_line->num_chars_visible;
}
else {
shape_relevant = shape_line->memory;
length_relevant = shape_line->num_chars;
}
for (size_t k = 0; k < current_design->shape[hshape].height; ++k) {
size_t a = k;
if (hshape >= SSE && hshape <= SSW) {
a += input.num_lines - current_design->shape[hshape].height;
}
if (a >= input.num_lines) {
break;
}
uint32_t *input_relevant = NULL; /* CHECK this eats blank NW corners, too */
if (comp_type == ignore_invisible_input || comp_type == ignore_invisible_all) {
input_relevant = get_visible_text(input.lines + a) + input.lines[a].text->indent;
}
else {
input_relevant = bxs_unindent_ptr(input.lines[a].text);
}
uint32_t *p = u32_strstr(input_relevant, shape_relevant);
if (p) {
if (current_design->shape[hshape].elastic) {
p += length_relevant;
if (p - input_relevant >= (long) u32_strlen(input_relevant)) {
continue;
}
if (u32_strncmp(p, shape_relevant, length_relevant) == 0) {
++hits;
}
}
else {
++hits;
}
}
}
BFREE(shape_relevant_for_freeing);
}
#ifdef DEBUG
fprintf(stderr, "Checking %s shape produced %d hits.\n", shape_name[hshape], (int) hits);
#endif
return hits;
}
/**
* Iterate over all input lines except for potential top and bottom box parts. Check if west line starts match a
* non-empty shape line. If so, generate a hit.
* @param current_design the current design to check
* @param comp_type the comparison type (how to compare colored strings)
* @param empty information on which box sides are empty in that design
* @param vshape which vertical shape to search for
* @return the number of hits for this vertical shape
*/
static size_t find_vertical_west(design_t *current_design, comparison_t comp_type, int *empty, shape_t vshape)
{
size_t hits = 0;
if (((empty[BTOP] ? 0 : current_design->shape[NW].height) + (empty[BBOT] ? 0 : current_design->shape[SW].height))
>= input.num_lines) {
return hits;
}
if (isempty(current_design->shape + vshape)) {
return hits;
}
for (size_t k = empty[BTOP] ? 0 : current_design->shape[NW].height;
k < input.num_lines - (empty[BBOT] ? 0 : current_design->shape[SW].height); ++k)
{
uint32_t *input_relevant = NULL;
if (comp_type == ignore_invisible_input || comp_type == ignore_invisible_all) {
input_relevant = get_visible_text(input.lines + k) + input.lines[k].text->indent;
}
else {
input_relevant = bxs_unindent_ptr(input.lines[k].text);
}
for (size_t j = 0; j < current_design->shape[vshape].height; ++j) {
bxstr_t *shape_line = current_design->shape[vshape].mbcs[j];
if (bxs_is_blank(shape_line)) {
continue;
}
uint32_t *shape_relevant_for_freeing = NULL;
uint32_t *shape_relevant = NULL;
size_t length_relevant;
if ((comp_type == ignore_invisible_shape || comp_type == ignore_invisible_all)
&& shape_line->num_chars_invisible > 0)
{
shape_relevant_for_freeing = bxs_filter_visible(shape_line);
shape_relevant = shape_relevant_for_freeing + shape_line->indent;
length_relevant = shape_line->num_chars_visible - shape_line->indent;
}
else {
shape_relevant = bxs_unindent_ptr(shape_line);
length_relevant = shape_line->num_chars - (shape_relevant - shape_line->memory);
}
if (u32_strncmp(input_relevant, shape_relevant, length_relevant) == 0) {
++hits;
break;
}
BFREE(shape_relevant_for_freeing);
}
}
#ifdef DEBUG
fprintf(stderr, "Checking %s shape produced %d hits.\n", shape_name[vshape], (int) hits);
#endif
return hits;
}
/**
* Iterate over all input lines except for potential top and bottom box parts. Check if east line ends match a
* non-empty shape line. If so, generate a hit.
* @param current_design the current design to check
* @param comp_type the comparison type (how to compare colored strings)
* @param empty information on which box sides are empty in that design
* @param vshape which vertical shape to search for
* @return the number of hits for this vertical shape
*/
static size_t find_vertical_east(design_t *current_design, comparison_t comp_type, int *empty, shape_t vshape)
{
size_t hits = 0;
if (((empty[BTOP] ? 0 : current_design->shape[NW].height) + (empty[BBOT] ? 0 : current_design->shape[SW].height))
>= input.num_lines) {
return hits;
}
if (isempty(current_design->shape + vshape)) {
return hits;
}
for (size_t j = 0; j < current_design->shape[vshape].height; ++j) {
bxstr_t *shape_line = current_design->shape[vshape].mbcs[j];
if (bxs_is_blank(shape_line)) {
continue;
}
bxstr_t *shape_line_trimmed = bxs_trim(shape_line);
uint32_t *shape_relevant_for_freeing = NULL;
uint32_t *shape_relevant = NULL;
size_t length_relevant;
if ((comp_type == ignore_invisible_shape || comp_type == ignore_invisible_all)
&& shape_line_trimmed->num_chars_invisible > 0)
{
shape_relevant_for_freeing = bxs_filter_visible(shape_line_trimmed);
shape_relevant = shape_relevant_for_freeing;
length_relevant = shape_line_trimmed->num_chars_visible;
}
else {
shape_relevant = shape_line_trimmed->memory;
length_relevant = shape_line_trimmed->num_chars;
}
for (size_t k = empty[BTOP] ? 0 : current_design->shape[NW].height;
k < input.num_lines - (empty[BBOT] ? 0 : current_design->shape[SW].height); ++k)
{
bxstr_t *input_line = input.lines[k].text;
uint32_t *input_relevant = NULL;
if (comp_type == ignore_invisible_input || comp_type == ignore_invisible_all) {
input_relevant = get_visible_text(input.lines + k);
uint32_t *p = input_relevant + input_line->num_chars_visible - input_line->trailing - length_relevant;
if (p < input_relevant) {
continue;
}
input_relevant = p;
}
else {
int idx = (int) input_line->first_char[input_line->num_chars_visible - input_line->trailing]
- length_relevant;
if (idx < 0) {
continue;
}
input_relevant = input_line->memory + idx;
}
if (u32_strncmp(input_relevant, shape_relevant, length_relevant) == 0) {
++hits;
break;
}
}
BFREE(shape_relevant_for_freeing);
bxs_free(shape_line_trimmed);
}
#ifdef DEBUG
fprintf(stderr, "Checking %s shape produced %d hits.\n", shape_name[vshape], (int) hits);
#endif
return hits;
}
static long match_design(design_t *current_design, comparison_t comp_type)
{
int *empty = determine_empty_sides(current_design);
long hits = 0;
for (shape_t scnt = 0; scnt < NUM_SHAPES; ++scnt) {
switch (scnt) {
case NW:
case SW:
hits += find_west_corner(current_design, comp_type, empty, scnt);
break;
case NE:
case SE:
hits += find_east_corner(current_design, comp_type, empty, scnt);
break;
case NNW: case N: case NNE:
case SSE: case S: case SSW:
hits += find_horizontal_shape(current_design, comp_type, empty, scnt);
break;
case ENE: case E: case ESE:
hits += find_vertical_east(current_design, comp_type, empty, scnt);
break;
case WSW: case W: case WNW:
hits += find_vertical_west(current_design, comp_type, empty, scnt);
break;
default:
fprintf(stderr, "%s: internal error (scnt=%d)\n", PROJECT, (int) scnt);
return 0;
}
}
BFREE(empty);
return hits;
}
design_t *autodetect_design()
{
design_t *current_design; /* ptr to currently tested design */
long maxhits = 0; /* maximum no. of hits so far */
design_t *result = NULL; /* ptr to design with the most hits */
int mono_input = input_is_mono();
(void) comparison_name; /* used only in debug statements */
for (comparison_t comp_type = 0; comp_type < NUM_COMPARISON_TYPES; comp_type++) {
current_design = designs;
for (size_t dcnt = 0; ((int) dcnt) < num_designs; ++dcnt, ++current_design) {
int mono_design = design_is_mono(current_design);
if ((comp_type == literal && mono_input != mono_design)
|| (comp_type == ignore_invisible_input && (mono_input || !mono_design))
|| (comp_type == ignore_invisible_shape && (!mono_input || mono_design))
|| (comp_type == ignore_invisible_all && (mono_input || mono_design)))
{
#ifdef DEBUG
fprintf(stderr, "Design \"%s\" skipped for comparison type '%s' because mono_input=%d and "
"mono_design=%d\n", current_design->name, comparison_name[comp_type], mono_input, mono_design);
#endif
continue;
}
#ifdef DEBUG
fprintf(stderr, "CONSIDERING DESIGN ---- \"%s\" ---------------\n", current_design->name);
fprintf(stderr, " comparison_type = '%s'\n", comparison_name[comp_type]);
#endif
long hits = match_design(current_design, comp_type);
#ifdef DEBUG
fprintf(stderr, "Design \"%s\" scored %ld points\n", current_design->name, hits);
#endif
if (hits > maxhits) {
maxhits = hits;
result = current_design;
}
}
if (maxhits > 2) {
break; /* do not try other comparison types if one found something */
}
}
#ifdef DEBUG
if (result) {
fprintf(stderr, "CHOOSING \"%s\" design (%ld hits).\n", result->name, maxhits);
}
else {
fprintf(stderr, "NO DESIGN FOUND WITH EVEN ONE HIT POINT!\n");
}
#endif
return result;
}
/* vim: set cindent sw=4: */

38
src/detect.h Normal file
View File

@ -0,0 +1,38 @@
/*
* boxes - Command line filter to draw/remove ASCII boxes around text
* Copyright (c) 1999-2023 Thomas Jensen and the boxes contributors
*
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
* License, version 3, as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
* You should have received a copy of the GNU General Public License along with this program.
* If not, see <https://www.gnu.org/licenses/>.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
/*
* Autodetect design used by box in input.
*/
#ifndef BOXES_DETECT_H
#define BOXES_DETECT_H
#include "boxes.h"
/**
* Autodetect design used by box in input.
* This requires knowledge about ALL designs, so the entire config file had to be parsed at some earlier time.
* @return != NULL: success, pointer to detected design;
* == NULL: on error
*/
design_t *autodetect_design();
#endif /* BOXES_DETECT_H */
/* vim: set cindent sw=4: */

View File

@ -24,6 +24,7 @@
#include <string.h>
#include <unistr.h>
#include "detect.h"
#include "shape.h"
#include "boxes.h"
#include "tools.h"
@ -32,20 +33,18 @@
static int best_match(const line_t *line,
char **ws, char **we, char **es, char **ee)
/*
* Find positions of west and east box parts in line.
*
* line line to examine
* ws etc. result parameters (west start, west end, east start, east end)
*
* RETURNS: > 0 a match was found (ws etc are set to indicate positions)
* == 0 no match was found
* < 0 internal error (out of memory)
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
/**
* Find positions of west and east box parts in line.
* @param line line to examine
* @param ws result parameter: west start as pointer to a character in `line->text->memory`
* @param we result parameter: west end as pointer to a character in `line->text->memory`
* @param es result parameter: east start as pointer to a character in `line->text->memory`
* @param ee result parameter: east end as pointer to a character in `line->text->memory`
* @return > 0: a match was found (ws etc. are set to indicate positions);
* == 0: no match was found;
* < 0: internal error (out of memory)
*/
static int best_match(const line_t *line, uint32_t **ws, uint32_t **we, uint32_t **es, uint32_t **ee)
{
size_t numw = 0; /* number of shape lines on west side */
size_t nume = 0; /* number of shape lines on east side */
@ -70,8 +69,8 @@ static int best_match(const line_t *line,
nume += opt.design->shape[ESE].height;
#ifdef DEBUG
fprintf (stderr, "Number of WEST side shape lines: %d\n", (int) numw);
fprintf (stderr, "Number of EAST side shape lines: %d\n", (int) nume);
fprintf (stderr, "Number of WEST side shape lines: %d\n", (int) numw);
fprintf (stderr, "Number of EAST side shape lines: %d\n", (int) nume);
#endif
/*
@ -498,301 +497,6 @@ static int detect_horiz(const int aside, size_t *hstart, size_t *hend)
static design_t *detect_design()
/*
* Autodetect design used by box in input.
*
* This requires knowledge about ALL designs, so the entire config file had
* to be parsed at some earlier time.
*
* RETURNS: != NULL success, pointer to detected design
* == NULL on error
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
{
design_t *d = designs; /* ptr to currently tested design */
long hits; /* hit points of the current design */
long maxhits = 0; /* maximum no. of hits so far */
design_t *res = NULL; /* ptr to design with the most hits */
int dcnt; /* design loop counter */
shape_t scnt; /* shape loop counter */
size_t j, k;
char *p;
char *s;
line_t shpln; /* a line which is part of a shape */
size_t a;
int empty[NUM_SIDES];
for (dcnt = 0; dcnt < num_designs; ++dcnt, ++d) {
#ifdef DEBUG
fprintf(stderr, "CONSIDERING DESIGN ---- \"%s\" ---------------\n", d->name);
#endif
hits = 0;
for (j = 0; j < NUM_SIDES; ++j) {
empty[j] = empty_side(d->shape, j);
}
#ifdef DEBUG
fprintf (stderr, "Empty sides: TOP %d, LEFT %d, BOTTOM %d, RIGHT %d\n",
empty[BTOP], empty[BLEF], empty[BBOT], empty[BRIG]);
#endif
for (scnt = 0; scnt < NUM_SHAPES; ++scnt) {
switch (scnt) {
case NW:
case SW:
/*
* Try and find west corner shapes. Every non-empty shape
* line is searched for on every input line. A hit is
* generated whenever a match is found.
*/
if (empty[BLEF] || (empty[BTOP] && scnt == NW)
|| (empty[BBOT] && scnt == SW)) {
break;
}
for (j = 0; j < d->shape[scnt].height; ++j) {
shpln.text = d->shape[scnt].chars[j]; // TODO
shpln.len = d->shape[scnt].width;
if (empty_line(&shpln)) {
continue;
}
for (s = shpln.text; *s == ' ' || *s == '\t'; ++s) {
}
for (k = 0; k < d->shape[scnt].height; ++k) {
a = k;
if (scnt == SW) {
a += input.num_lines - d->shape[scnt].height;
}
if (a >= input.num_lines) {
break;
}
for (p = input.lines[a].text; *p == ' ' || *p == '\t'; ++p) {
}
if (strncmp(p, s, shpln.len - (s - shpln.text)) == 0) {
++hits;
}
}
}
#ifdef DEBUG
fprintf(stderr, "After %s corner check:\t%ld hits.\n", shape_name[scnt], hits);
#endif
break;
case NE:
case SE:
/*
* Try and find east corner shapes. Every non-empty shape
* line is searched for on every input line. A hit is
* generated whenever a match is found.
*/
if (empty[BRIG] || (empty[BTOP] && scnt == NE)
|| (empty[BBOT] && scnt == SE)) {
break;
}
for (j = 0; j < d->shape[scnt].height; ++j) {
shpln.text = d->shape[scnt].chars[j];
shpln.len = d->shape[scnt].width;
if (empty_line(&shpln)) {
continue;
}
for (s = shpln.text + shpln.len - 1;
(*s == ' ' || *s == '\t') && shpln.len;
--s, --(shpln.len)) {
}
for (k = 0; k < d->shape[scnt].height; ++k) {
a = k;
if (scnt == SE) {
a += input.num_lines - d->shape[scnt].height;
}
if (a >= input.num_lines) {
break;
}
for (p = input.lines[a].text + input.lines[a].len - 1;
p >= input.lines[a].text && (*p == ' ' || *p == '\t');
--p) {
}
p = p - shpln.len + 1;
if (p < input.lines[a].text) {
continue;
}
if (strncmp(p, shpln.text, shpln.len) == 0) {
++hits;
}
}
}
#ifdef DEBUG
fprintf (stderr, "After %s corner check:\t%ld hits.\n", shape_name[scnt], hits);
#endif
break;
default:
if (isempty(d->shape + scnt)) {
continue;
}
if ((scnt >= NNW && scnt <= NNE)
|| (scnt >= SSE && scnt <= SSW)) {
/*
* Try and find horizontal shapes between the box
* corners. Every non-empty shape line is searched for
* on every input line. Elastic shapes must occur
* twice in an uninterrupted row to generate a hit.
*/
if ((scnt >= NNW && scnt <= NNE && empty[BTOP])
|| (scnt >= SSE && scnt <= SSW && empty[BBOT])) {
++hits;
break; /* horizontal box part is empty */
}
for (j = 0; j < d->shape[scnt].height; ++j) {
shpln.text = d->shape[scnt].chars[j];
shpln.len = d->shape[scnt].width;
if (empty_line(&shpln)) {
continue;
}
for (k = 0; k < d->shape[scnt].height; ++k) {
a = k;
if (scnt >= SSE && scnt <= SSW) {
a += input.num_lines - d->shape[scnt].height;
}
if (a >= input.num_lines) {
break;
}
for (p = input.lines[a].text;
*p == ' ' || *p == '\t'; ++p) {
}
p += d->shape[NW].width;
if (p - input.lines[a].text
>= (long) input.lines[a].len) {
continue;
}
p = strstr(p, shpln.text);
if (p) {
if (d->shape[scnt].elastic) {
p += shpln.len;
if (p - input.lines[a].text
>= (long) input.lines[a].len) {
continue;
}
if (!strncmp(p, shpln.text, shpln.len)) {
++hits;
}
}
else {
++hits;
}
}
}
}
}
else if ((scnt >= ENE && scnt <= ESE)
|| (scnt >= WSW && scnt <= WNW)) {
/* handle later */
break;
}
else {
fprintf(stderr, "%s: internal error\n", PROJECT);
return NULL;
}
#ifdef DEBUG
fprintf (stderr, "After %s shape check:\t%ld hits.\n",
shape_name[scnt], hits);
#endif
}
}
/*
* Now iterate over all input lines except for potential top and
* bottom box parts. Check if east and west line ends match a
* non-empty shape line. If so, generate a hit.
*/
if (((empty[BTOP] ? 0 : d->shape[NW].height)
+ (empty[BBOT] ? 0 : d->shape[SW].height)) < input.num_lines) {
for (k = empty[BTOP] ? 0 : d->shape[NW].height;
k < input.num_lines - (empty[BBOT] ? 0 : d->shape[SW].height);
++k) {
for (p = input.lines[k].text; *p == ' ' || *p == '\t'; ++p) {
}
for (scnt = WSW; scnt <= WNW; ++scnt) {
a = 0;
if (isempty(d->shape + scnt)) {
continue;
}
for (j = 0; j < d->shape[scnt].height; ++j) {
shpln.text = d->shape[scnt].chars[j];
shpln.len = d->shape[scnt].width;
if (empty_line(&shpln)) {
continue;
}
for (s = shpln.text; *s == ' ' || *s == '\t'; ++s) {
}
if (strncmp(p, s, shpln.len - (s - shpln.text)) == 0) {
++hits;
a = 1;
break;
}
}
if (a) {
break;
}
}
for (scnt = ENE; scnt <= ESE; ++scnt) {
a = 0;
if (isempty(d->shape + scnt)) {
continue;
}
for (j = 0; j < d->shape[scnt].height; ++j) {
shpln.text = d->shape[scnt].chars[j];
shpln.len = d->shape[scnt].width;
if (empty_line(&shpln)) {
continue;
}
for (p = input.lines[k].text + input.lines[k].len - 1;
p >= input.lines[a].text && (*p == ' ' || *p == '\t');
--p) {
}
for (s = shpln.text + shpln.len - 1;
(*s == ' ' || *s == '\t') && shpln.len;
--s, --(shpln.len)) {
}
p = p - shpln.len + 1;
if (strncmp(p, shpln.text, shpln.len) == 0) {
++hits;
a = 1;
break;
}
}
if (a) {
break;
}
}
}
}
#ifdef DEBUG
fprintf (stderr, "After side checks:\t%ld hits.\n", hits);
#endif
if (hits > maxhits) {
maxhits = hits;
res = d;
}
}
#ifdef DEBUG
if (res) {
fprintf (stderr, "CHOOSING \"%s\" design (%ld hits).\n", res->name, maxhits);
} else {
fprintf (stderr, "NO DESIGN FOUND WITH EVEN ONE HIT!\n");
}
#endif
return res;
}
static void add_spaces_to_line(line_t* line, const size_t n)
{
if (n == 0) {
@ -828,7 +532,7 @@ int remove_box()
* config file had to be parsed (earlier).
*/
if (opt.design_choice_by_user == 0) {
design_t *tmp = detect_design();
design_t *tmp = autodetect_design();
if (tmp) {
opt.design = tmp;
#ifdef DEBUG
@ -905,8 +609,8 @@ int remove_box()
* Phase 3: Iterate over body lines, removing box sides where applicable
*/
for (j = textstart; j < textend; ++j) {
char *ws, *we, *es, *ee; /* west start & end, east start&end */
char *p;
uint32_t *ws, *we, *es, *ee; /* west start & end, east start & end */
uint32_t *p;
#ifdef DEBUG
fprintf(stderr, "Calling best_match() for line %d:\n", (int) j);
@ -921,22 +625,22 @@ int remove_box()
fprintf(stderr, "\033[00;33;01mline %2d: no side match\033[00m\n", (int) j);
#endif
}
else { // TODO HERE
else {
#ifdef DEBUG
fprintf(stderr, "\033[00;33;01mline %2d: west: %d (\'%c\') to %d (\'%c\') [len %d]; "
"east: %d (\'%c\') to %d (\'%c\') [len %d]\033[00m\n", (int) j,
(int) (ws ? ws - input.lines[j].text : 0), ws ? ws[0] : '?',
(int) (we ? we - input.lines[j].text - 1 : 0), we ? we[-1] : '?',
(int) (ws && we ? (we - input.lines[j].text - (ws - input.lines[j].text)) : 0),
(int) (es ? es - input.lines[j].text : 0), es ? es[0] : '?',
(int) (ee ? ee - input.lines[j].text - 1 : 0), ee ? ee[-1] : '?',
(int) (es && ee ? (ee - input.lines[j].text - (es - input.lines[j].text)) : 0));
fprintf(stderr, "\033[00;33;01mline %2d: west: %d (\'%lc\') to %d (\'%lc\') [len %d]; "
"east: %d (\'%lc\') to %d (\'%lc\') [len %d]\033[00m\n", (int) j,
(int) (ws ? ws - input.lines[j].text->memory : 0), ws ? ws[0] : to_utf32('?'),
(int) (we ? we - input.lines[j].text->memory - 1 : 0), we ? we[-1] : to_utf32('?'),
(int) (ws && we ? (we - input.lines[j].text->memory - (ws - input.lines[j].text->memory)) : 0),
(int) (es ? es - input.lines[j].text->memory : 0), es ? es[0] : to_utf32('?'),
(int) (ee ? ee - input.lines[j].text->memory - 1 : 0), ee ? ee[-1] : to_utf32('?'),
(int) (es && ee ? (ee - input.lines[j].text->memory - (es - input.lines[j].text->memory)) : 0));
#endif
if (ws && we) {
did_something = 1;
for (p = ws; p < we; ++p) {
size_t idx = p - input.lines[j].text;
*p = ' ';
size_t idx = p - input.lines[j].text->memory; // TODO HERE
*p = char_space; // TODO this might be wrong
set_char_at(input.lines[j].mbtext, input.lines[j].posmap[idx], char_space);
}
}

View File

@ -190,7 +190,7 @@ int isempty(const sentry_t *shape)
{
if (shape == NULL) {
return 1;
} else if (shape->chars == NULL) {
} else if (shape->chars == NULL || shape->mbcs == NULL) {
return 1;
} else if (shape->width == 0 || shape->height == 0) {
return 1;
@ -358,19 +358,16 @@ shape_t leftmost(const int aside, const int cnt)
int empty_side(sentry_t *sarr, const int aside)
/*
* Return true if the shapes on the given side consist entirely out of
* spaces - and spaces only, tabs are considered non-empty.
/**
* Return true if the shapes on the given side consist entirely of spaces - and spaces only, tabs are considered
* non-empty.
*
* sarr pointer to shape list of design to check
* aside the box side (one of BTOP etc.)
*
* RETURNS: == 0 side is not empty
* != 0 side is empty
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* @param sarr pointer to shape list of design to check
* @param aside the box side (one of `BTOP` etc.)
* @return == 0: side is not empty;
* \!= 0: side is empty
*/
int empty_side(sentry_t *sarr, const int aside)
{
int i;
@ -387,4 +384,4 @@ int empty_side(sentry_t *sarr, const int aside)
/*EOF*/ /* vim: set sw=4: */
/* vim: set sw=4: */