From 79555dbf297f827844af2ae16041b5f7b1a8fc38 Mon Sep 17 00:00:00 2001 From: Thomas Jensen Date: Wed, 23 Jun 1999 19:14:53 +0000 Subject: [PATCH] Initial revision --- src/remove.c | 967 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/remove.h | 28 ++ 2 files changed, 995 insertions(+) create mode 100644 src/remove.c create mode 100644 src/remove.h diff --git a/src/remove.c b/src/remove.c new file mode 100644 index 0000000..f9466db --- /dev/null +++ b/src/remove.c @@ -0,0 +1,967 @@ +/* + * File: remove.c + * Project Main: boxes.c + * Date created: June 23, 1999 (Wednesday, 20:59h) + * Author: Thomas Jensen + * tsjensen@stud.informatik.uni-erlangen.de + * Version: $Id$ + * Language: ANSI C + * World Wide Web: http://home.pages.de/~jensen/boxes/ + * Purpose: Box removal, i.e. the deletion of boxes + * Remarks: --- + * + * Revision History: + * + * $Log$ +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + */ + +#include +#include +#include +#include "shape.h" +#include "boxes.h" +#include "tools.h" +#include "remove.h" + +static const char rcsid_remove_c[] = + "$Id$"; + + + +static int best_match (const line_t *line, + char **ws, char **we, char **es, char **ee) +/* + * Find positions of west and east box parts in line. + * + * line line to examine + * ws etc. result parameters (west start, west end, east start, east end) + * + * RETURNS: > 0 a match was found (ws etc are set to indicate positions) + * == 0 no match was found + * < 0 internal error (out of memory) + * +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + */ +{ + size_t numw = 0; /* number of shape lines on west side */ + size_t nume = 0; /* number of shape lines on east side */ + size_t j; /* counts number of lines of all shapes tested */ + size_t k; /* line counter within shape */ + int w; /* shape counter */ + sentry_t *cs; /* current shape */ + char *s; /* duplicate of current shape part */ + char *p; /* position found by strstr */ + size_t cq; /* current quality */ + char *q; /* space check rover */ + size_t quality; + + *ws = *we = *es = *ee = NULL; + + numw = opt.design->shape[WNW].height; + numw += opt.design->shape[ W ].height; + numw += opt.design->shape[WSW].height; + + nume = opt.design->shape[ENE].height; + nume += opt.design->shape[ E ].height; + nume += opt.design->shape[ESE].height; + + #ifdef DEBUG + fprintf (stderr, "Number of WEST side shape lines: %d\n", numw); + fprintf (stderr, "Number of EAST side shape lines: %d\n", nume); + #endif + + /* + * Find match for WEST side + */ + quality = 0; + cs = opt.design->shape + WNW; + for (j=0,k=0,w=3; jheight) { + k = 0; + cs = opt.design->shape + west_side[--w]; + } + + s = (char *) strdup (cs->chars[k]); + if (s == NULL) { + perror (PROJECT); + return -1; + } + cq = cs->width; + + do { + p = strstr (line->text, s); + if (p) { + q = p-1; + while (q >= line->text) { + if (*q-- != ' ') { + p = NULL; + break; + } + } + if (p) + break; + } + if (!p && cq) { + if (*s == ' ') + memmove (s, s+1, cq--); + else if (s[cq-1] == ' ') + s[--cq] = '\0'; + else { + cq = 0; + break; + } + } + } while (cq && !p); + + if (cq == 0) { + BFREE (s); + continue; + } + + /* + * If the current match is the best yet, adjust result values + */ + if (cq > quality) { + quality = cq; + *ws = p; + *we = p + cq; + } + + BFREE (s); + } + + /* + * Find match for EAST side + */ + quality = 0; + cs = opt.design->shape + ENE; + for (j=0,k=0,w=1; jheight) { + k = 0; + cs = opt.design->shape + east_side[++w]; + } + #ifdef DEBUG + fprintf (stderr, "\nj %d, k %d, w %d, cs->chars[k] = \"%s\"\n", + j, k, w, cs->chars[k]?cs->chars[k]:"(null)"); + #endif + + s = (char *) strdup (cs->chars[k]); + if (s == NULL) { + perror (PROJECT); + return -1; + } + cq = cs->width; + + do { + p = my_strnrstr (line->text, s, cq, 0); + if (p) { + q = p + cq; + while (*q) { + if (*q++ != ' ') { + p = NULL; + break; + } + } + if (p) + break; + } + if (!p && cq) { + if (*s == ' ') + memmove (s, s+1, cq--); + else if (s[cq-1] == ' ') + s[--cq] = '\0'; + else { + cq = 0; + break; + } + } + } while (cq && !p); + + if (cq == 0) { + BFREE (s); + continue; + } + + /* + * If the current match is the best yet, adjust result values + */ + if (cq > quality) { + quality = cq; + *es = p; + *ee = p + cq; + } + + BFREE (s); + } + + return *ws || *es ? 1:0; +} + + + +static int hmm (const int aside, const size_t follow, + const char *p, const char *ecs, const int cnt) +/* + * (horizontal middle match) + * + * aside box part to check (BTOP or BBOT) + * follow index of line number in shape spec to check + * p current check position + * ecs pointer to first char of east corner shape + * cnt current shape to check (0 == leftmost middle shape) + * + * Recursive helper function for detect_horiz() + * + * RETURNS: == 0 success + * != 0 error + * +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + */ +{ + int cmp; + sentry_t *cs; + shape_t sh; + int rc; + + #ifdef DEBUG + fprintf (stderr, "hmm (%s, %d, \'%c\', \'%c\', %d)\n", + aside==BTOP?"BTOP":"BBOT", follow, p[0], *ecs, cnt); + #endif + + if (p > ecs) /* last shape tried was too long */ + return 2; + + sh = leftmost (aside, cnt); + if (sh == ANZ_SHAPES) + return 1; + + cs = opt.design->shape + sh; + + cmp = strncmp (p, cs->chars[follow], cs->width); + + if (cmp == 0) { + if (p+cs->width == ecs) { + if (leftmost (aside, cnt+1) == ANZ_SHAPES) + return 0; /* good! all clear, it matched */ + else + return 3; /* didn't use all shapes to do it */ + } + if (cs->elastic) { + rc = hmm (aside, follow, p+cs->width, ecs, cnt); + #ifdef DEBUG + fprintf (stderr, "hmm returned %d\n", rc); + #endif + if (rc) { + rc = hmm (aside, follow, p+cs->width, ecs, cnt+1); + #ifdef DEBUG + fprintf (stderr, "hmm returned %d\n", rc); + #endif + } + } + else { + rc = hmm (aside, follow, p+cs->width, ecs, cnt+1); + #ifdef DEBUG + fprintf (stderr, "hmm returned %d\n", rc); + #endif + } + if (rc == 0) + return 0; /* we're on the way back */ + else + return 4; /* can't continue on this path */ + } + else { + return 5; /* no match */ + } +} + + + +static int detect_horiz (const int aside, size_t *hstart, size_t *hend) +/* + * Detect which part of the input belongs to the top of the box + * + * aside part of box to detect (BTOP or BBOT) + * hstart index of first line of detected box part (result) + * hend index of first line following detected box part (result) + * + * We assume the horizontal parts of the box to be in one piece, i.e. no + * blank lines inserted. Lines may be missing, though. Lines may not be + * duplicated. They may be shifted left and right by inserting whitespace, + * but whitespace which is part of the box must not have been deleted. + * Unfortunately, they may even differ in length as long as each line is + * in itself a valid horizontal box line. + * + * RETURNS: == 0 success (hstart & hend are set) + * != 0 error + * +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + */ +{ + size_t follow; /* possible box line */ + sentry_t *cs; /* current shape */ + line_t *line; /* currently processed input line */ + size_t lcnt; /* index of currently proc.inp.line */ + char *p; /* middle line part scanner */ + char *q; /* space check rover */ + char *wcs = NULL; /* west corner shape position */ + char *ecs = NULL; /* east corner shape position */ + char *ecs_save; /* temp copy of ecs */ + int mmok; /* true if middle match was ok */ + size_t mheight; /* regular height of box part */ + int result_init = 0; /* true if hstart was set */ + int goeast, gowest; + + *hstart = *hend = 0; + + mheight = opt.design->shape[sides[aside][0]].height; + if (aside == BTOP) { + follow = 0; + line=input.lines; + } + else { + follow = mheight - 1; + line = input.lines + input.anz_lines - 1; + } + + for (lcnt=0; lcnt= input.lines; ++lcnt) + { + goeast = gowest = 0; + + #ifdef DEBUG + fprintf (stderr, "----- Processing line index %2d ----------" + "-------------------------------------\n", + aside == BTOP? lcnt: input.anz_lines - lcnt - 1); + #endif + + do { + /* + * Look for west corner shape + */ + cs = opt.design->shape + sides[aside][aside==BTOP?0:SHAPES_PER_SIDE-1]; + if (gowest) { + wcs = strstr (wcs+1, cs->chars[follow]); + gowest = 0; + } + else if (!wcs) { + wcs = strstr (line->text, cs->chars[follow]); + } + if (wcs) { + for (q=wcs-1; q>=line->text; --q) { + if (*q != ' ' && *q != '\t') + break; + } + if (q >= line->text) + wcs = NULL; + } + #ifdef DEBUG + if (wcs) + fprintf (stderr, "West corner shape matched at position %d.\n", + wcs - line->text); + else + fprintf (stderr, "West corner shape not found.\n"); + #endif + + p = wcs + cs->width; + + /* + * Look for east corner shape + */ + if (wcs) { + cs = opt.design->shape + sides[aside][aside==BTOP?SHAPES_PER_SIDE-1:0]; + ecs_save = ecs; + ecs = my_strnrstr (p, cs->chars[follow], cs->width, goeast); + if (ecs) { + for (q=ecs+cs->width; *q; ++q) { + if (*q != ' ' && *q != '\t') + break; + } + if (*q) + ecs = NULL; + } + if (!ecs) { + gowest = 1; + goeast = 0; + ecs = ecs_save; + } + } + #ifdef DEBUG + if (ecs) + fprintf (stderr, "East corner shape matched at position %d.\n", + ecs-line->text); + else + fprintf (stderr, "East corner shape not found.\n"); + #endif + + /* + * Check if text between corner shapes is valid + */ + if (wcs && ecs) { + mmok = !hmm (aside, follow, p, ecs, 0); + #ifdef DEBUG + fprintf (stderr, "Text between corner shapes is%s valid.\n", + mmok? "": " NOT"); + #endif + if (!mmok) + ++goeast; + } + + } while (!mmok && wcs); + + /* + * Proceed to next line + */ + if (wcs && ecs && mmok) { /* match found */ + if (!result_init) { + result_init = 1; + if (aside == BTOP) + *hstart = lcnt; + else + *hend = (input.anz_lines - lcnt - 1) + 1; + } + if (aside == BTOP) + *hend = lcnt + 1; + else + *hstart = input.anz_lines - lcnt - 1; + } + else { + if (result_init) + break; + } + wcs = NULL; + ecs = NULL; + + if (aside == BTOP) { + ++follow; + ++line; + } + else { + --follow; + --line; + } + } + + return result_init? 0: 1; +} + + + +static design_t *detect_design() +/* + * Autodetect design used by box in input. + * + * This requires knowledge about ALL designs, so the entire config file had + * to be parsed at some earlier time. + * + * RETURNS: != NULL success, pointer to detected design + * == NULL on error + * +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + */ +{ + design_t *d = designs; /* ptr to currently tested design */ + long hits; /* hit points of the current design */ + long maxhits = 0; /* maximum no. of hits so far */ + design_t *res = NULL; /* ptr to design with the most hits */ + int dcnt; /* design loop counter */ + shape_t scnt; /* shape loop counter */ + size_t j, k; + char *p; + char *s; + line_t shpln; /* a line which is part of a shape */ + size_t a; + int empty[ANZ_SIDES]; + + for (dcnt=0; dcntname); + #endif + hits = 0; + + for (j=0; jshape[scnt].height; ++j) { + shpln.text = d->shape[scnt].chars[j]; + shpln.len = d->shape[scnt].width; + if (empty_line (&shpln)) + continue; + for (s=shpln.text; *s==' ' || *s=='\t'; ++s); + for (k=0; kshape[scnt].height; ++k) { + a = k; + if (scnt == SW) + a += input.anz_lines - d->shape[scnt].height; + if (a >= input.anz_lines) + break; + for (p=input.lines[a].text; *p==' '||*p=='\t'; ++p); + if (strncmp (p, s, shpln.len-(s-shpln.text)) == 0) + ++hits; + } + } + #ifdef DEBUG + fprintf (stderr, "After %s corner check:\t%ld hits.\n", + shape_name[scnt], hits); + #endif + break; + + case NE: case SE: + /* + * Try and find east corner shapes. Every non-empty shape + * line is searched for on every input line. A hit is + * generated whenever a match is found. + */ + if (empty[BRIG] || (empty[BTOP] && scnt == NE) + || (empty[BBOT] && scnt == SE)) + break; + for (j=0; jshape[scnt].height; ++j) { + shpln.text = d->shape[scnt].chars[j]; + shpln.len = d->shape[scnt].width; + if (empty_line (&shpln)) + continue; + for (s = shpln.text + shpln.len -1; + (*s==' ' || *s=='\t') && shpln.len; + --s, --(shpln.len)); + for (k=0; kshape[scnt].height; ++k) { + a = k; + if (scnt == SE) + a += input.anz_lines - d->shape[scnt].height; + if (a >= input.anz_lines) + break; + for (p=input.lines[a].text + input.lines[a].len -1; + p>=input.lines[a].text && (*p==' ' || *p=='\t'); + --p); + p = p - shpln.len + 1; + if (p < input.lines[a].text) + continue; + if (strncmp (p, shpln.text, shpln.len) == 0) + ++hits; + } + } + #ifdef DEBUG + fprintf (stderr, "After %s corner check:\t%ld hits.\n", + shape_name[scnt], hits); + #endif + break; + + default: + if (isempty (d->shape+scnt)) + continue; + + if ((scnt >= NNW && scnt <= NNE) + || (scnt >= SSE && scnt <= SSW)) { + /* + * Try and find horizontal shapes between the box + * corners. Every non-empty shape line is searched for + * on every input line. Elastic shapes must occur + * twice in an uninterrupted row to generate a hit. + */ + if ((scnt >= NNW && scnt <= NNE && empty[BTOP]) + || (scnt >= SSE && scnt <= SSW && empty[BBOT])) { + ++hits; + break; /* horizontal box part is empty */ + } + for (j=0; jshape[scnt].height; ++j) { + shpln.text = d->shape[scnt].chars[j]; + shpln.len = d->shape[scnt].width; + if (empty_line (&shpln)) + continue; + for (k=0; kshape[scnt].height; ++k) { + a = k; + if (scnt >= SSE && scnt <= SSW) + a += input.anz_lines-d->shape[scnt].height; + if (a >= input.anz_lines) + break; + for (p=input.lines[a].text; + *p == ' ' || *p == '\t'; ++p); + p += d->shape[NW].width; + if (p-input.lines[a].text + >= (long) input.lines[a].len) + continue; + p = strstr (p, shpln.text); + if (p) { + if (d->shape[scnt].elastic) { + p += shpln.len; + if (p-input.lines[a].text + >= (long) input.lines[a].len) + continue; + if (!strncmp (p, shpln.text, shpln.len)) + ++hits; + } + else { + ++hits; + } + } + } + } + } + + else if ((scnt >= ENE && scnt <= ESE) + || (scnt >= WSW && scnt <= WNW)) { + /* handle later */ + break; + } + else { + fprintf (stderr, "%s: internal error\n", PROJECT); + return NULL; + } + #ifdef DEBUG + fprintf (stderr, "After %s shape check:\t%ld hits.\n", + shape_name[scnt], hits); + #endif + } + } + + /* + * Now iterate over all input lines except for potential top and + * bottom box parts. Check if east and west line ends match a + * non-empty shape line. If so, generate a hit. + */ + if (((empty[BTOP]? 0: d->shape[NW].height) + + (empty[BBOT]? 0: d->shape[SW].height)) < input.anz_lines) + { + for (k = empty[BTOP]? 0: d->shape[NW].height; + k < input.anz_lines -(empty[BBOT]? 0: d->shape[SW].height); + ++k) + { + for (p=input.lines[k].text; *p==' ' || *p=='\t'; ++p); + for (scnt=WSW; scnt<=WNW; ++scnt) { + a = 0; + if (isempty (d->shape + scnt)) + continue; + for (j=0; jshape[scnt].height; ++j) { + shpln.text = d->shape[scnt].chars[j]; + shpln.len = d->shape[scnt].width; + if (empty_line (&shpln)) + continue; + for (s=shpln.text; *s==' ' || *s=='\t'; ++s); + if (strncmp (p, s, shpln.len-(s-shpln.text)) == 0) { + ++hits; + a = 1; + break; + } + } + if (a) + break; + } + + for (scnt=ENE; scnt<=ESE; ++scnt) { + a = 0; + if (isempty (d->shape + scnt)) + continue; + for (j=0; jshape[scnt].height; ++j) { + shpln.text = d->shape[scnt].chars[j]; + shpln.len = d->shape[scnt].width; + if (empty_line (&shpln)) + continue; + for (p=input.lines[k].text + input.lines[k].len -1; + p>=input.lines[a].text && (*p==' ' || *p=='\t'); + --p); + for (s = shpln.text + shpln.len -1; + (*s==' ' || *s=='\t') && shpln.len; + --s, --(shpln.len)); + p = p - shpln.len + 1; + if (strncmp (p, shpln.text, shpln.len) == 0) { + ++hits; + a = 1; + break; + } + } + if (a) + break; + } + } + } + #ifdef DEBUG + fprintf (stderr, "After side checks:\t%ld hits.\n", hits); + #endif + + if (hits > maxhits) { + maxhits = hits; + res = d; + } + } + + #ifdef DEBUG + if (res) + fprintf (stderr, "CHOOSING \"%s\" design (%ld hits).\n", + res->name, maxhits); + else + fprintf (stderr, "NO DESIGN FOUND WITH EVEN ONE HIT!\n"); + #endif + + return res; +} + + + +int remove_box() +/* + * foo + * + * RETURNS: == 0 success + * != 0 error + * +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + */ +{ + size_t textstart = 0; /* index of 1st line of box body */ + size_t textend = 0; /* index of 1st line of south side */ + size_t boxstart = 0; /* index of 1st line of box */ + size_t boxend = 0; /* index of 1st line trailing the box */ + int m; /* true if a match was found */ + size_t j; /* loop counter */ + int did_something = 0; /* true if there was something to remove */ + + /* + * If the user didn't specify a design to remove, autodetect it. + * Since this requires knowledge of all available designs, the entire + * config file had to be parsed (earlier). + */ + if (opt.design_choice_by_user == 0) { + design_t *tmp = detect_design(); + if (tmp) { + opt.design = tmp; + #ifdef DEBUG + fprintf (stderr, "Design autodetection: Removing box of " + "design \"%s\".\n", opt.design->name); + #endif + } + else { + fprintf (stderr, "%s: Box design autodetection failed. Use -d " + "option.\n", PROJECT); + return 1; + } + } + + /* + * Make all lines the same length by adding trailing spaces (needed + * for recognition). + * Also append a number of spaces to ALL input lines. A greater number + * takes more space and time, but enables the correct removal of boxes + * whose east sides consist of lots of spaces (the given value). So we + * add a number of spaces equal to the east side width. + */ + input.maxline += opt.design->shape[NE].width; + for (j=0; j First line of box is %d, ", boxstart); + fprintf (stderr, "first line of box body (text) is %d.\n", textstart); + #endif + + + /* + * Phase 2: Find out how many lines belong to the bottom of the box + */ + textend = 0; + boxend = 0; + detect_horiz (BBOT, &textend, &boxend); + if (textend == 0 && boxend == 0) { + textend = input.anz_lines; + boxend = input.anz_lines; + } + #ifdef DEBUG + fprintf (stderr, "----> Last line of box body (text) is %d, ", textend-1); + fprintf (stderr, "last line of box is %d.\n", boxend-1); + #endif + + /* + * Phase 3: Iterate over body lines, removing box sides where applicable + */ + for (j=textstart; jshape[NW].width + opt.design->padding[BLEF]; + for (c=0; c textstart) { + #ifdef DEBUG + fprintf (stderr, "Killing trailing blank line in box body.\n"); + #endif + --textend; + } + + if (textstart > boxstart) { + for (j=boxstart; j textend) { + for (j=textend; j input.maxline) + input.maxline = input.lines[j].len; + } + memset (input.lines + input.anz_lines, 0, + (BMAX (textstart - boxstart, 0) + BMAX (boxend - textend, 0)) * + sizeof(line_t)); + + #ifdef DEBUG + #if 0 + for (j=0; j