/* * boxes - Command line filter to draw/remove ASCII boxes around text * Copyright (c) 1999-2023 Thomas Jensen and the boxes contributors * * This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public * License, version 3, as published by the Free Software Foundation. * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * You should have received a copy of the GNU General Public License along with this program. * If not, see . * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* * Box removal, i.e. the deletion of boxes */ #include "config.h" #include #include #include #include #include #include #include "boxes.h" #include "detect.h" #include "remove.h" #include "shape.h" #include "tools.h" #include "unicode.h" typedef struct _line_ctx_t { /** index of the first character of the west shape */ size_t west_start; /** index of the character following the last character of the west shape. If equal to `west_start`, then no west * shape was detected. */ size_t west_end; /** the length in characters of the matched west shape part */ size_t west_quality; /** index of the first character of the east shape */ size_t east_start; /** index of the character following the last character of the east shape. If equal to `east_start`, then no east * shape was detected. */ size_t east_end; /** the length in characters of the matched east shape part */ size_t east_quality; /** the input line to which the above values refer. Will look very different depending on comparison type. */ uint32_t *input_line_used; } line_ctx_t; typedef struct _remove_ctx_t { /** Array of flags indicating which sides of the box design are defined as empty. Access via `BTOP` etc. constants. */ int empty_side[NUM_SIDES]; /** Flag indicating that there are no invisible characters in the definition of the design we are removing. */ int design_is_mono; /** Flag indicating that there are no invisible characters in the input. */ int input_is_mono; /** Index into `input.lines` of the first line of the box (topmost box line). Lines above are blank. */ size_t top_start_idx; /** Index into `input.lines` of the line following the last line of the top part of the box. If the top part of the * box is empty or missing, this value will be equal to `top_start_idx`. */ size_t top_end_idx; /** Index into `input.lines` of the first line of the bottom side of the box. */ size_t bottom_start_idx; /** Index into `input.lines` of the line following the last line of the bottom part of the box. If the bottom part * of the box is empty or missing, this value will be equal to `bottom_start_idx`. Lines below are blank. */ size_t bottom_end_idx; /** The current comparison type. This changes whenever another comparison type is tried. */ comparison_t comp_type; /** number of lines in `body` */ size_t body_num_lines; /** Information on the vertical east and west shapes in body lines, one entry for each line between `top_end_idx` * (inclusive) and `bottom_start_idx` (exclusive) */ line_ctx_t *body; } remove_ctx_t; static void debug_print_remove_ctx(remove_ctx_t *ctx, char *heading) { #ifdef DEBUG fprintf(stderr, "Remove Context %s:\n", heading); fprintf(stderr, " - empty_side[BTOP] = %s\n", ctx->empty_side[BTOP] ? "true" : "false"); fprintf(stderr, " - empty_side[BRIG] = %s\n", ctx->empty_side[BRIG] ? "true" : "false"); fprintf(stderr, " - empty_side[BBOT] = %s\n", ctx->empty_side[BBOT] ? "true" : "false"); fprintf(stderr, " - empty_side[BLEF] = %s\n", ctx->empty_side[BLEF] ? "true" : "false"); fprintf(stderr, " - design_is_mono = %s\n", ctx->design_is_mono ? "true" : "false"); fprintf(stderr, " - input_is_mono = %s\n", ctx->input_is_mono ? "true" : "false"); fprintf(stderr, " - top_start_idx = %d\n", (int) ctx->top_start_idx); fprintf(stderr, " - top_end_idx = %d\n", (int) ctx->top_end_idx); fprintf(stderr, " - bottom_start_idx = %d\n", (int) ctx->bottom_start_idx); fprintf(stderr, " - bottom_end_idx = %d\n", (int) ctx->bottom_end_idx); fprintf(stderr, " - comp_type = %s\n", comparison_name[ctx->comp_type]); fprintf(stderr, " - body (%d lines):\n", (int) ctx->body_num_lines); for (size_t i = 0; i < ctx->body_num_lines; i++) { if (ctx->body[i].input_line_used != NULL) { char *out_input_line_used = u32_strconv_to_output(ctx->body[i].input_line_used); fprintf(stderr, " - lctx: \"%s\" (%d characters)\n", out_input_line_used, (int) u32_strlen(ctx->body[i].input_line_used)); BFREE(out_input_line_used); } else { fprintf(stderr, " - lctx: (null)\n"); } bxstr_t *orgline = input.lines[ctx->top_end_idx + i].text; if (orgline != NULL) { char *out_orgline = bxs_to_output(orgline); fprintf(stderr, " orgl: \"%s\" (%d characters, %d columns)\n", out_orgline, (int) orgline->num_chars, (int) orgline->num_columns); BFREE(out_orgline); } else { fprintf(stderr, " orgl: (null)\n"); } fprintf(stderr, " west: %d-%d (quality: %d), east: %d-%d (quality: %d)\n", (int) ctx->body[i].west_start, (int) ctx->body[i].west_end, (int) ctx->body[i].west_quality, (int) ctx->body[i].east_start, (int) ctx->body[i].east_end, (int) ctx->body[i].east_quality); } #else UNUSED(ctx); UNUSED(heading); #endif } static void debug_print_shapes_relevant(shape_line_ctx_t *shapes_relevant) { #ifdef DEBUG fprintf(stderr, " shapes_relevant = {"); for (size_t ds = 0; ds < SHAPES_PER_SIDE; ds++) { if (shapes_relevant[ds].empty) { fprintf(stderr, "-"); } else { char *out_shp_text = bxs_to_output(shapes_relevant[ds].text); fprintf(stderr, "\"%s\"(%d%s)", out_shp_text, (int) shapes_relevant[ds].text->num_chars, shapes_relevant[ds].elastic ? "E" : ""); BFREE(out_shp_text); } if (ds < SHAPES_PER_SIDE - 1) { fprintf(stderr, ", "); } } fprintf(stderr, "}\n"); #else UNUSED(shapes_relevant); #endif } static size_t find_first_line() { size_t result = input.num_lines; for (size_t line_idx = 0; line_idx < input.num_lines; line_idx++) { if (!bxs_is_blank(input.lines[line_idx].text)) { result = line_idx; break; } } return result; } static size_t find_last_line() { size_t result = input.num_lines - 1; for (long line_idx = (long) input.num_lines - 1; line_idx >= 0; line_idx--) { if (!bxs_is_blank(input.lines[line_idx].text)) { result = (size_t) line_idx; break; } } return result; } static int is_shape_line_empty(shape_line_ctx_t *shapes_relevant, size_t shape_idx) { if (shape_idx < SHAPES_PER_SIDE) { return shapes_relevant[shape_idx].empty || bxs_is_blank(shapes_relevant[shape_idx].text); } return 1; } static int non_empty_shapes_after(shape_line_ctx_t *shapes_relevant, size_t shape_idx) { /* CHECK Can we use shape->is_blank_rightward? */ for (size_t i = shape_idx + 1; i < SHAPES_PER_SIDE - 1; i++) { if (!is_shape_line_empty(shapes_relevant, i)) { return 1; } } return 0; } static int is_blank_between(uint32_t *start, uint32_t *end) { for (uint32_t *p = start; p < end; p++) { if (!is_blank(*p)) { return 0; } } return 1; } /** * Take a shape line and shorten it by cutting off blanks from both ends. * @param shape_line_ctx info record on the shape line to work on. Contains the original shape line, unshortened. * @param quality (IN/OUT) the current quality, here the value that was last tested. We will reduce this by one. * @param prefer_left if 1, first cut all blanks from the start of the shape line, if 0, first cut at the end * @param allow_left if 1, blanks may be cut from the left of the shape line, if 0, we never cut from the left * @param allow_right if 1, blanks may be cut from the right of the shape line, if 0, we never cut from the right * @return the shortened shape line, in new memory, or NULL if further shortening was not possible */ uint32_t *shorten(shape_line_ctx_t *shape_line_ctx, size_t *quality, int prefer_left, int allow_left, int allow_right) { if (shape_line_ctx == NULL || shape_line_ctx->text == NULL || quality == NULL || *quality > shape_line_ctx->text->num_chars) { return NULL; } uint32_t *s = shape_line_ctx->text->memory; uint32_t *e = shape_line_ctx->text->memory + shape_line_ctx->text->num_chars; prefer_left = allow_left ? prefer_left : 0; size_t reduction_steps = shape_line_ctx->text->num_chars - *quality + 1; for (size_t i = 0; i < reduction_steps; i++) { if (prefer_left) { if (s < e && is_blank(*s)) { s++; } else if (e > s && allow_right && is_blank(*(e - 1))) { e--; } else { break; } } else { if (e > s && allow_right && is_blank(*(e - 1))) { e--; } else if (s < e && allow_left && is_blank(*s)) { s++; } else { break; } } } uint32_t *result = NULL; size_t new_quality = e - s; if (new_quality < *quality) { result = u32_strdup(s); set_char_at(result, new_quality, char_nul); *quality = new_quality; } return result; } static int hmm_shiftable(shape_line_ctx_t *shapes_relevant, uint32_t *cur_pos, size_t shape_idx, uint32_t *end_pos, int anchored_right); /** * (horizontal middle match) * Recursive helper function for match_horiz_line(), uses backtracking. * @param shapes_relevant the prepared shape lines to be concatenated * @param cur_pos current position in the input line being matched * @param shape_idx index into `shapes_relevant` indicating which shape to try now * @param end_pos first character of the east corner * @param anchored_left flag indicating that `cur_pos` is already "anchored" or still "shiftable". "Anchored" means * that we have matched a non-blank shape line already (corner shape line was not blank). Else "shiftable". * @param anchored_right flag indicating that the east corner shape was not blank. If this is `false`, it means that * a shape may be shortened right if only blank shape lines follow. * @return `== 1`: success; * `== 0`: failed to match */ int hmm(shape_line_ctx_t *shapes_relevant, uint32_t *cur_pos, size_t shape_idx, uint32_t *end_pos, int anchored_left, int anchored_right) { #ifdef DEBUG char *out_cur_pos = u32_strconv_to_output(cur_pos); char *out_end_pos = u32_strconv_to_output(end_pos); fprintf(stderr, "hmm(shapes_relevant, \"%s\", %d, \"%s\", %s, %s) - enter\n", out_cur_pos, (int) shape_idx, out_end_pos, anchored_left ? "true" : "false", anchored_right ? "true" : "false"); BFREE(out_cur_pos); BFREE(out_end_pos); #endif int result = 0; if (!anchored_left) { result = hmm_shiftable(shapes_relevant, cur_pos, shape_idx, end_pos, anchored_right); } else if (cur_pos > end_pos) { /* invalid input */ result = 0; } else if (cur_pos == end_pos) { /* we are at the end, which is fine if there is nothing else to match */ result = (shape_idx == (SHAPES_PER_SIDE - 1) && anchored_right) || ((shapes_relevant[shape_idx].empty || bxs_is_blank(shapes_relevant[shape_idx].text)) && !non_empty_shapes_after(shapes_relevant, shape_idx) ? 1 : 0); } else if (shape_idx >= SHAPES_PER_SIDE - 1) { /* no more shapes to try, which is fine if the rest of the line is blank */ result = u32_is_blank(cur_pos); } else if (shapes_relevant[shape_idx].empty) { /* the current shape line is empty, try the next one */ result = hmm(shapes_relevant, cur_pos, shape_idx + 1, end_pos, 1, anchored_right); } else { uint32_t *shape_line = u32_strdup(shapes_relevant[shape_idx].text->memory); size_t quality = shapes_relevant[shape_idx].text->num_chars; while (shape_line != NULL && quality > 0) { if (u32_strncmp(cur_pos, shape_line, quality) == 0) { BFREE(shape_line); cur_pos = cur_pos + quality; if (cur_pos == end_pos && !non_empty_shapes_after(shapes_relevant, shape_idx)) { result = 1; /* success */ } else { int rc = 0; if (shapes_relevant[shape_idx].elastic) { rc = hmm(shapes_relevant, cur_pos, shape_idx, end_pos, 1, anchored_right); } if (rc == 0) { result = hmm(shapes_relevant, cur_pos, shape_idx + 1, end_pos, 1, anchored_right); } else { result = rc; } } } else if (!anchored_right) { shape_line = shorten(shapes_relevant + shape_idx, &quality, 0, 0, 1); #ifdef DEBUG char *out_shape_line = u32_strconv_to_output(shape_line); fprintf(stderr, "hmm() - shape_line shortened to %d (\"%s\")\n", (int) quality, out_shape_line); BFREE(out_shape_line); #endif } else { BFREE(shape_line); } } } #ifdef DEBUG fprintf(stderr, "hmm() - exit, result = %d\n", result); #endif return result; } static int hmm_shiftable(shape_line_ctx_t *shapes_relevant, uint32_t *cur_pos, size_t shape_idx, uint32_t *end_pos, int anchored_right) { int result = 0; int shapes_are_empty = 1; for (size_t i = shape_idx; i < SHAPES_PER_SIDE - 1; i++) { if (!is_shape_line_empty(shapes_relevant, i)) { shapes_are_empty = 0; int can_shorten_right = -1; size_t quality = shapes_relevant[i].text->num_chars; uint32_t *shape_line = shapes_relevant[i].text->memory; while (shape_line != NULL) { uint32_t *p = u32_strstr(cur_pos, shape_line); if (p != NULL && p < end_pos && is_blank_between(cur_pos, p)) { result = hmm(shapes_relevant, p + quality, i + (shapes_relevant[i].elastic ? 0 : 1), end_pos, 1, anchored_right); if (result == 0 && shapes_relevant[i].elastic) { result = hmm(shapes_relevant, p + quality, i + 1, end_pos, 1, anchored_right); } break; } if (can_shorten_right == -1) { /* we can only shorten right if the east corner shape line is also empty */ can_shorten_right = non_empty_shapes_after(shapes_relevant, i) || !is_shape_line_empty(shapes_relevant, SHAPES_PER_SIDE - 1) ? 0 : 1; } shape_line = shorten(shapes_relevant + i, &quality, 0, 1, can_shorten_right); } break; } } if (shapes_are_empty) { /* all shapes were empty, which is fine if line was blank */ result = is_blank_between(cur_pos, end_pos); } return result; } static shape_line_ctx_t *prepare_comp_shapes_horiz(int hside, comparison_t comp_type, size_t shape_line_idx) { shape_t *side_shapes = hside == BTOP ? north_side : south_side_rev; shape_line_ctx_t *shapes_relevant = (shape_line_ctx_t *) calloc(SHAPES_PER_SIDE, sizeof(shape_line_ctx_t)); for (size_t i = 0; i < SHAPES_PER_SIDE; i++) { shapes_relevant[i].elastic = opt.design->shape[side_shapes[i]].elastic; shapes_relevant[i].empty = isempty(opt.design->shape + side_shapes[i]); if (!shapes_relevant[i].empty) { uint32_t *s = prepare_comp_shape(opt.design, side_shapes[i], shape_line_idx, comp_type, 0, i == SHAPES_PER_SIDE - 1); shapes_relevant[i].text = bxs_from_unicode(s); BFREE(s); } } return shapes_relevant; } static match_result_t *new_match_result(uint32_t *p, size_t p_idx, size_t len, int shiftable) { match_result_t *result = (match_result_t *) calloc(1, sizeof(match_result_t)); result->p = p; result->p_idx = p_idx; result->len = len; result->shiftable = shiftable; return result; } /** * Match a `shape_line` at the beginning (`vside` == `BLEF`) or the end (`vside` == `BRIG`) of an `input_line`. * Both `input_line` and `shape_line` may contain invisible characters, who are then matched, too, just like any other * characters. * @param vside BLEF or BRIG * @param input_line the input line to examine. We expect that it was NOT trimmed. * @param shape_line the shape line to match, also NOT trimmed * @return pointer to the match result (in existing memory of `input_line->memory`), or `NULL` if no match */ match_result_t *match_outer_shape(int vside, bxstr_t *input_line, bxstr_t *shape_line) { if (input_line == NULL || input_line->num_chars == 0 || shape_line == NULL || shape_line->num_chars == 0) { return NULL; } if (vside == BLEF) { if (bxs_is_blank(shape_line)) { return new_match_result(input_line->memory, 0, 0, 1); } for (uint32_t *s = shape_line->memory; s == shape_line->memory || is_blank(*s); s++) { uint32_t *p = u32_strstr(input_line->memory, s); size_t p_idx = p != NULL ? p - input_line->memory : 0; if (p == NULL || p_idx > input_line->first_char[input_line->indent]) { continue; /* not found or found too far in */ } return new_match_result(p, p_idx, shape_line->num_chars - (s - shape_line->memory), 0); } } else { if (bxs_is_blank(shape_line)) { uint32_t *p = bxs_last_char_ptr(input_line); size_t p_idx = p - input_line->memory; return new_match_result(p, p_idx, 0, 1); } int slen = shape_line->num_chars; uint32_t *s = u32_strdup(shape_line->memory); for (; slen == (int) shape_line->num_chars || is_blank(s[slen]); slen--) { s[slen] = char_nul; uint32_t *p = u32_strnrstr(input_line->memory, s, slen); size_t p_idx = p != NULL ? p - input_line->memory : 0; if (p == NULL || p_idx + slen < input_line->first_char[input_line->num_chars_visible - input_line->trailing]) { continue; /* not found or found too far in */ } BFREE(s); return new_match_result(p, p_idx, (size_t) slen, 0); } BFREE(s); } return NULL; } static int match_horiz_line(remove_ctx_t *ctx, int hside, size_t input_line_idx, size_t shape_line_idx) { #ifdef DEBUG fprintf(stderr, "match_horiz_line(ctx, %s, %d, %d)\n", hside == BTOP ? "BTOP" : "BBOT", (int) input_line_idx, (int) shape_line_idx); #endif int result = 0; for (comparison_t comp_type = 0; comp_type < NUM_COMPARISON_TYPES; comp_type++) { if (!comp_type_is_viable(comp_type, ctx->input_is_mono, ctx->design_is_mono)) { continue; } ctx->comp_type = comp_type; #ifdef DEBUG fprintf(stderr, " Setting comparison type to: %s\n", comparison_name[comp_type]); #endif shape_line_ctx_t *shapes_relevant = prepare_comp_shapes_horiz(hside, comp_type, shape_line_idx); debug_print_shapes_relevant(shapes_relevant); bxstr_t *input_prepped1 = bxs_from_unicode(prepare_comp_input(input_line_idx, 0, comp_type, 0, NULL, NULL)); bxstr_t *input_prepped = bxs_rtrim(input_prepped1); bxs_append_spaces(input_prepped, opt.design->shape[NW].width + opt.design->shape[NE].width); bxs_free(input_prepped1); #ifdef DEBUG char *out_input_prepped = bxs_to_output(input_prepped); fprintf(stderr, " input_prepped = \"%s\"\n", out_input_prepped); BFREE(out_input_prepped); #endif uint32_t *cur_pos = input_prepped->memory; match_result_t *mrl = NULL; if (!ctx->empty_side[BLEF]) { mrl = match_outer_shape(BLEF, input_prepped, shapes_relevant[0].text); if (mrl != NULL) { cur_pos = mrl->p + mrl->len; } } uint32_t *end_pos = bxs_last_char_ptr(input_prepped); match_result_t *mrr = NULL; if (!ctx->empty_side[BRIG]) { mrr = match_outer_shape(BRIG, input_prepped, shapes_relevant[SHAPES_PER_SIDE - 1].text); if (mrr != NULL) { end_pos = mrr->p; } } #ifdef DEBUG char *out_cur_pos = u32_strconv_to_output(cur_pos); char *out_end_pos = u32_strconv_to_output(end_pos); fprintf(stderr, " cur_pos = \"%s\" (index %d)\n", out_cur_pos, (int) BMAX(cur_pos - input_prepped->memory, 0)); fprintf(stderr, " end_pos = \"%s\" (index %d)\n", out_end_pos, (int) BMAX(end_pos - input_prepped->memory, 0)); BFREE(out_cur_pos); BFREE(out_end_pos); #endif result = hmm(shapes_relevant, cur_pos, 1, end_pos, (mrl == NULL) || mrl->shiftable ? 0 : 1, (mrr == NULL) || mrr->shiftable ? 0 : 1); BFREE(mrl); BFREE(mrr); for (size_t i = 0; i < SHAPES_PER_SIDE; i++) { bxs_free(shapes_relevant[i].text); } BFREE(shapes_relevant); if (result) { #ifdef DEBUG fprintf(stderr, "Matched %s side line using comp_type=%s and shape_line_idx=%d\n", hside == BTOP ? "top" : "bottom", comparison_name[comp_type], (int) shape_line_idx); #endif break; } } return result; } static size_t find_top_side(remove_ctx_t *ctx) { size_t result = ctx->top_start_idx; sentry_t *shapes = opt.design->shape; for (size_t input_line_idx = ctx->top_start_idx; input_line_idx < input.num_lines && input_line_idx < ctx->top_start_idx + shapes[NE].height; input_line_idx++) { int matched = 0; size_t shape_lines_tested = 0; for (size_t shape_line_idx = (input_line_idx - ctx->top_start_idx) % shapes[NE].height; shape_lines_tested < shapes[NE].height; shape_line_idx = (shape_line_idx + 1) % shapes[NE].height, shape_lines_tested++) { if (match_horiz_line(ctx, BTOP, input_line_idx, shape_line_idx)) { matched = 1; break; } } if (!matched) { break; } result = input_line_idx + 1; } return result; } static size_t find_bottom_side(remove_ctx_t *ctx) { size_t result = ctx->bottom_end_idx; sentry_t *shapes = opt.design->shape; for (long input_line_idx = (long) ctx->bottom_end_idx - 1; input_line_idx >= 0 && input_line_idx >= (long) ctx->bottom_end_idx - (long) shapes[SE].height; input_line_idx--) { int matched = 0; size_t shape_lines_tested = 0; for (long shape_line_idx = shapes[SE].height - (ctx->bottom_end_idx - input_line_idx); shape_line_idx >= 0 && shape_lines_tested < shapes[SE].height; shape_lines_tested++, shape_line_idx = shape_line_idx == 0 ? (long) (shapes[SE].height - 1) : (long) (shape_line_idx - 1)) { if (match_horiz_line(ctx, BBOT, input_line_idx, shape_line_idx)) { matched = 1; break; } } if (!matched) { break; } result = input_line_idx; } return result; } static size_t count_shape_lines(shape_t side_shapes[]) { size_t result = 0; for (size_t i = 0; i < SHAPES_PER_SIDE - CORNERS_PER_SIDE; i++) { if (!isempty(opt.design->shape + side_shapes[i])) { result += opt.design->shape[side_shapes[i]].height; } } return result; } static shape_line_ctx_t **prepare_comp_shapes_vert(int vside, comparison_t comp_type) { shape_t west_side_shapes[SHAPES_PER_SIDE - CORNERS_PER_SIDE] = {WNW, W, WSW}; shape_t east_side_shapes[SHAPES_PER_SIDE - CORNERS_PER_SIDE] = {ENE, E, ESE}; shape_t side_shapes[SHAPES_PER_SIDE - CORNERS_PER_SIDE]; if (vside == BLEF) { memcpy(side_shapes, west_side_shapes, (SHAPES_PER_SIDE - CORNERS_PER_SIDE) * sizeof(shape_t)); } else { memcpy(side_shapes, east_side_shapes, (SHAPES_PER_SIDE - CORNERS_PER_SIDE) * sizeof(shape_t)); } size_t num_shape_lines = count_shape_lines(side_shapes); shape_line_ctx_t **shape_lines = (shape_line_ctx_t **) calloc(num_shape_lines + 1, sizeof(shape_line_ctx_t *)); for (size_t i = 0; i < num_shape_lines; i++) { shape_lines[i] = (shape_line_ctx_t *) calloc(1, sizeof(shape_line_ctx_t)); } for (size_t shape_idx = 0, i = 0; shape_idx < SHAPES_PER_SIDE - CORNERS_PER_SIDE; shape_idx++) { if (!isempty(opt.design->shape + side_shapes[shape_idx])) { int deep_empty = isdeepempty(opt.design->shape + side_shapes[shape_idx]); for (size_t slno = 0; slno < opt.design->shape[side_shapes[shape_idx]].height; slno++, i++) { uint32_t *s = prepare_comp_shape(opt.design, side_shapes[shape_idx], slno, comp_type, 0, 0); shape_lines[i]->text = bxs_from_unicode(s); shape_lines[i]->empty = deep_empty; shape_lines[i]->elastic = opt.design->shape[side_shapes[shape_idx]].elastic; BFREE(s); } } } return shape_lines; } static void free_shape_lines(shape_line_ctx_t **shape_lines) { if (shape_lines != NULL) { for (shape_line_ctx_t **p = shape_lines; *p != NULL; p++) { bxs_free((*p)->text); BFREE(*p); } BFREE(shape_lines); } } static void match_vertical_side(remove_ctx_t *ctx, int vside, shape_line_ctx_t **shape_lines, uint32_t *input_line, size_t line_idx, size_t input_length, size_t input_indent, size_t input_trailing) { line_ctx_t *line_ctx = ctx->body + (line_idx - ctx->top_end_idx); for (shape_line_ctx_t **shape_line_ctx = shape_lines; *shape_line_ctx != NULL; shape_line_ctx++) { if ((*shape_line_ctx)->empty) { continue; } size_t max_quality = (*shape_line_ctx)->text->num_chars; size_t quality = max_quality; uint32_t *shape_text = (*shape_line_ctx)->text->memory; uint32_t *to_free = NULL; while(shape_text != NULL) { uint32_t *p; if (vside == BLEF) { p = u32_strstr(input_line, shape_text); } else { p = u32_strnrstr(input_line, shape_text, quality); } BFREE(to_free); shape_text = NULL; if ((p == NULL) || (vside == BLEF && ((size_t) (p - input_line) > input_indent + (max_quality - quality))) || (vside == BRIG && ((size_t) (p - input_line) < input_length - input_trailing - quality))) { shape_text = shorten(*shape_line_ctx, &quality, vside == BLEF, 1, 1); to_free = shape_text; } else if (vside == BLEF) { if (quality > line_ctx->west_quality) { line_ctx->west_start = (size_t) (p - input_line); line_ctx->west_end = line_ctx->west_start + quality; line_ctx->west_quality = quality; BFREE(line_ctx->input_line_used); line_ctx->input_line_used = u32_strdup(input_line); break; } } else if (vside == BRIG) { if (quality > line_ctx->east_quality) { line_ctx->east_start = (size_t) (p - input_line); line_ctx->east_end = line_ctx->east_start + quality; line_ctx->east_quality = quality; BFREE(line_ctx->input_line_used); line_ctx->input_line_used = u32_strdup(input_line); break; } } } } } static int sufficient_body_quality(remove_ctx_t *ctx) { size_t num_body_lines = ctx->bottom_start_idx - ctx->top_end_idx; size_t total_quality = 0; line_ctx_t *body = ctx->body; for (size_t body_line_idx = 0; body_line_idx < num_body_lines; body_line_idx++) { line_ctx_t line_ctx = body[body_line_idx]; total_quality += line_ctx.east_quality + line_ctx.west_quality; } size_t max_quality = 0; if (!ctx->empty_side[BLEF]) { max_quality += opt.design->shape[NW].width; } if (!ctx->empty_side[BRIG]) { max_quality += opt.design->shape[NE].width; } max_quality = max_quality * num_body_lines; /* If we manage to match 50%, then it is unlikely to improve with a different comparison mode. */ int sufficient = (max_quality == 0 && total_quality == 0) || (max_quality > 0 && (total_quality > 0.5 * max_quality)); #ifdef DEBUG fprintf(stderr, "sufficient_body_quality() found body match quality of %d/%d (%s).\n", (int) total_quality, (int) max_quality, sufficient ? "sufficient" : "NOT sufficient"); #endif return sufficient; } static void reset_body(remove_ctx_t *ctx) { if (ctx->body != NULL) { for (size_t i = 0; i < ctx->body_num_lines; i++) { BFREE(ctx->body[i].input_line_used); } memset(ctx->body, 0, ctx->body_num_lines * sizeof(line_ctx_t)); } } static void find_vertical_shapes(remove_ctx_t *ctx) { int west_empty = ctx->empty_side[BLEF]; int east_empty = ctx->empty_side[BRIG]; if (west_empty && east_empty) { return; } for (comparison_t comp_type = 0; comp_type < NUM_COMPARISON_TYPES; comp_type++) { if (!comp_type_is_viable(comp_type, ctx->input_is_mono, ctx->design_is_mono)) { continue; } ctx->comp_type = comp_type; #ifdef DEBUG fprintf(stderr, "find_vertical_shapes(): comp_type = %s\n", comparison_name[comp_type]); #endif reset_body(ctx); shape_line_ctx_t **shape_lines_west = NULL; if (!west_empty) { shape_lines_west = prepare_comp_shapes_vert(BLEF, comp_type); } shape_line_ctx_t **shape_lines_east = NULL; if (!east_empty) { shape_lines_east = prepare_comp_shapes_vert(BRIG, comp_type); } for (size_t input_line_idx = ctx->top_end_idx; input_line_idx < ctx->bottom_start_idx; input_line_idx++) { size_t input_indent = 0; size_t input_trailing = 0; uint32_t *input_line = prepare_comp_input(input_line_idx, 0, comp_type, 0, &input_indent, &input_trailing); size_t input_length = u32_strlen(input_line); if (!west_empty) { match_vertical_side(ctx, BLEF, shape_lines_west, input_line, input_line_idx, input_length, input_indent, input_trailing); } if (!east_empty) { match_vertical_side(ctx, BRIG, shape_lines_east, input_line, input_line_idx, input_length, input_indent, input_trailing); } } free_shape_lines(shape_lines_west); free_shape_lines(shape_lines_east); if (sufficient_body_quality(ctx)) { break; } } } /** * If the user didn't specify a design to remove, autodetect it. * Since this requires knowledge of all available designs, the entire config file had to be parsed (earlier). */ static void detect_design_if_needed() { if (opt.design_choice_by_user == 0) { design_t *tmp = autodetect_design(); if (tmp) { opt.design = tmp; #ifdef DEBUG fprintf(stderr, "Design autodetection: Removing box of design \"%s\".\n", opt.design->name); #endif } else { fprintf(stderr, "%s: Box design autodetection failed. Use -d option.\n", PROJECT); exit(EXIT_FAILURE); } } #ifdef DEBUG else { fprintf(stderr, "Design was chosen by user: %s\n", opt.design->name); } #endif } static void free_line_text(line_t *line) { BFREE(line->cache_visible); bxs_free(line->text); line->text = NULL; } static void free_line(line_t *line) { free_line_text(line); BFREE(line->tabpos); line->tabpos_len = 0; } static void killblank(remove_ctx_t *ctx) { size_t lines_removed = 0; size_t max_lines_removable = opt.mend && !opt.killblank ? (size_t) BMAX(opt.design->padding[BTOP], 0) : SIZE_MAX; while (ctx->top_end_idx < ctx->bottom_start_idx && lines_removed < max_lines_removable && empty_line(input.lines + ctx->top_end_idx)) { #ifdef DEBUG fprintf(stderr, "Killing leading blank line in box body.\n"); #endif ++(ctx->top_end_idx); --(ctx->body_num_lines); ++lines_removed; } lines_removed = 0; max_lines_removable = opt.mend && !opt.killblank ? (size_t) BMAX(opt.design->padding[BBOT], 0) : SIZE_MAX; while (ctx->bottom_start_idx > ctx->top_end_idx && lines_removed < max_lines_removable && empty_line(input.lines + ctx->bottom_start_idx - 1)) { #ifdef DEBUG fprintf(stderr, "Killing trailing blank line in box body.\n"); #endif --(ctx->bottom_start_idx); --(ctx->body_num_lines); ++lines_removed; } } static int org_is_not_blank(bxstr_t *org_line, comparison_t comp_type, size_t idx) { if (comp_type == literal || comp_type == ignore_invisible_shape) { return !is_blank(org_line->memory[idx]); } return !is_blank(org_line->memory[org_line->visible_char[idx]]); } static size_t max_chars_line(bxstr_t *org_line, comparison_t comp_type) { return (comp_type == literal || comp_type == ignore_invisible_shape) ? org_line->num_chars : org_line->num_chars_visible; } static size_t confirmed_padding(bxstr_t *org_line, comparison_t comp_type, size_t start_idx, size_t num_padding) { size_t result = 0; size_t max_chars = max_chars_line(org_line, comp_type); for (size_t i = start_idx; i < BMIN(max_chars, start_idx + num_padding); i++) { if (org_is_not_blank(org_line, comp_type, i)) { break; } result++; } return result; } static void remove_top_from_input(remove_ctx_t *ctx) { if (ctx->top_end_idx > ctx->top_start_idx) { for (size_t j = ctx->top_start_idx; j < ctx->top_end_idx; ++j) { free_line(input.lines + j); } memmove(input.lines + ctx->top_start_idx, input.lines + ctx->top_end_idx, (input.num_lines - ctx->top_end_idx) * sizeof(line_t)); input.num_lines -= ctx->top_end_idx - ctx->top_start_idx; } } static size_t calculate_start_idx(remove_ctx_t *ctx, size_t body_line_idx) { size_t input_line_idx = ctx->top_end_idx + body_line_idx; line_ctx_t *lctx = ctx->body + body_line_idx; bxstr_t *org_line = input.lines[input_line_idx].text; size_t s_idx = 0; if (lctx->west_quality > 0) { s_idx = lctx->west_end + confirmed_padding(org_line, ctx->comp_type, lctx->west_end, opt.design->padding[BLEF]); } if (ctx->comp_type == ignore_invisible_input || ctx->comp_type == ignore_invisible_all) { /* our line context worked with visible characters only, convert back to org_line */ s_idx = org_line->first_char[s_idx]; } return s_idx; } static size_t calculate_end_idx(remove_ctx_t *ctx, size_t body_line_idx) { size_t input_line_idx = ctx->top_end_idx + body_line_idx; line_ctx_t *lctx = ctx->body + body_line_idx; bxstr_t *org_line = input.lines[input_line_idx].text; size_t e_idx = lctx->east_quality > 0 ? lctx->east_start : max_chars_line(org_line, ctx->comp_type); if (ctx->comp_type == ignore_invisible_input || ctx->comp_type == ignore_invisible_all) { e_idx = org_line->first_char[e_idx]; } return e_idx; } static void remove_vertical_from_input(remove_ctx_t *ctx) { for (size_t body_line_idx = 0; body_line_idx < ctx->body_num_lines; body_line_idx++) { size_t input_line_idx = ctx->top_end_idx + body_line_idx; bxstr_t *org_line = input.lines[input_line_idx].text; size_t s_idx = calculate_start_idx(ctx, body_line_idx); size_t e_idx = calculate_end_idx(ctx, body_line_idx); #ifdef DEBUG fprintf(stderr, "remove_vertical_from_input(): body_line_idx=%d, input_line_idx=%d, s_idx=%d, e_idx=%d, " "input.indent=%d\n", (int) body_line_idx, (int) input_line_idx, (int) s_idx, (int) e_idx, (int) input.indent); #endif bxstr_t *temp2 = bxs_substr(org_line, s_idx, e_idx); if (opt.indentmode == 'b' || opt.indentmode == '\0') { /* restore indentation */ bxstr_t *temp = bxs_prepend_spaces(temp2, input.indent); free_line_text(input.lines + input_line_idx); input.lines[input_line_idx].text = temp; bxs_free(temp2); } else { /* remove indentation */ free_line_text(input.lines + input_line_idx); input.lines[input_line_idx].text = temp2; } } } static void remove_bottom_from_input(remove_ctx_t *ctx) { if (ctx->bottom_end_idx > ctx->bottom_start_idx) { for (size_t j = ctx->bottom_start_idx; j < ctx->bottom_end_idx; ++j) { free_line(input.lines + j); } if (ctx->bottom_end_idx < input.num_lines) { memmove(input.lines + ctx->bottom_start_idx, input.lines + ctx->bottom_end_idx, (input.num_lines - ctx->bottom_end_idx) * sizeof(line_t)); } input.num_lines -= ctx->bottom_end_idx - ctx->bottom_start_idx; } } static void remove_default_padding(remove_ctx_t *ctx, int num_blanks) { if (num_blanks > 0) { for (size_t body_line_idx = 0; body_line_idx < ctx->body_num_lines; body_line_idx++) { size_t input_line_idx = ctx->top_start_idx + body_line_idx; /* top_start_idx, because top was removed! */ bxstr_t *temp = bxs_cut_front(input.lines[input_line_idx].text, (size_t) num_blanks); free_line_text(input.lines + input_line_idx); input.lines[input_line_idx].text = temp; } input.indent -= (size_t) num_blanks; input.maxline -= (size_t) num_blanks; } } static void apply_results_to_input(remove_ctx_t *ctx) { remove_vertical_from_input(ctx); if (opt.killblank || opt.mend) { killblank(ctx); } remove_bottom_from_input(ctx); remove_top_from_input(ctx); input.maxline = 0; input.indent = SIZE_MAX; for (size_t j = 0; j < input.num_lines; ++j) { if (input.lines[j].text->num_columns > input.maxline) { input.maxline = input.lines[j].text->num_columns; } if (input.lines[j].text->indent < input.indent) { input.indent = input.lines[j].text->indent; } } if (ctx->empty_side[BLEF]) { /* If the side were not open, default padding would have been removed when the side was removed. */ remove_default_padding(ctx, BMIN((int) input.indent, opt.design->padding[BLEF])); } size_t num_lines_removed = BMAX(ctx->top_end_idx - ctx->top_start_idx, (size_t) 0) + BMAX(ctx->bottom_end_idx - ctx->bottom_start_idx, (size_t) 0); memset(input.lines + input.num_lines, 0, num_lines_removed * sizeof(line_t)); #ifdef DEBUG print_input_lines(" (remove_box) after box removal"); fprintf(stderr, "Number of lines shrunk by %d.\n", (int) num_lines_removed); #endif } int remove_box() { detect_design_if_needed(); remove_ctx_t *ctx = (remove_ctx_t *) calloc(1, sizeof(remove_ctx_t)); ctx->empty_side[BTOP] = empty_side(opt.design->shape, BTOP); ctx->empty_side[BRIG] = empty_side(opt.design->shape, BRIG); ctx->empty_side[BBOT] = empty_side(opt.design->shape, BBOT); ctx->empty_side[BLEF] = empty_side(opt.design->shape, BLEF); #ifdef DEBUG fprintf(stderr, "Empty sides? Top: %d, Right: %d, Bottom: %d, Left: %d\n", ctx->empty_side[BTOP], ctx->empty_side[BRIG], ctx->empty_side[BBOT], ctx->empty_side[BLEF]); #endif ctx->design_is_mono = design_is_mono(opt.design); ctx->input_is_mono = input_is_mono(); ctx->top_start_idx = find_first_line(); if (ctx->top_start_idx >= input.num_lines) { return 0; /* all lines were already blank, so there is no box to remove */ } if (ctx->empty_side[BTOP]) { ctx->top_end_idx = ctx->top_start_idx; } else { ctx->top_end_idx = find_top_side(ctx); } #ifdef DEBUG fprintf(stderr, "ctx->top_start_idx = %d, ctx->top_end_idx = %d\n", (int) ctx->top_start_idx, (int) ctx->top_end_idx); #endif ctx->bottom_end_idx = find_last_line() + 1; if (ctx->empty_side[BBOT]) { ctx->bottom_start_idx = ctx->bottom_end_idx; } else { ctx->bottom_start_idx = find_bottom_side(ctx); } #ifdef DEBUG fprintf(stderr, "ctx->bottom_start_idx = %d, ctx->bottom_end_idx = %d\n", (int) ctx->bottom_start_idx, (int) ctx->bottom_end_idx); #endif if (ctx->bottom_start_idx > ctx->top_end_idx) { ctx->body_num_lines = ctx->bottom_start_idx - ctx->top_end_idx; } if (ctx->body_num_lines > 0) { ctx->body = (line_ctx_t *) calloc(ctx->body_num_lines, sizeof(line_ctx_t)); find_vertical_shapes(ctx); } debug_print_remove_ctx(ctx, "before apply_results_to_input()"); apply_results_to_input(ctx); if (ctx->body != NULL) { for (size_t i = 0; i < ctx->body_num_lines; i++) { BFREE(ctx->body[i].input_line_used); } BFREE(ctx->body); } BFREE(ctx); return 0; } void output_input(const int trim_only) { size_t indent; int ntabs, nspcs; #ifdef DEBUG fprintf(stderr, "output_input() - enter (trim_only=%d)\n", trim_only); #endif for (size_t j = 0; j < input.num_lines; ++j) { if (input.lines[j].text == NULL) { continue; } bxstr_t *temp = bxs_rtrim(input.lines[j].text); bxs_free(input.lines[j].text); input.lines[j].text = temp; if (trim_only) { continue; } char *indentspc = NULL; if (opt.tabexp == 'u') { indent = input.lines[j].text->indent; ntabs = indent / opt.tabstop; nspcs = indent % opt.tabstop; indentspc = (char *) malloc(ntabs + nspcs + 1); if (indentspc == NULL) { perror(PROJECT); return; } memset(indentspc, (int) '\t', ntabs); memset(indentspc + ntabs, (int) ' ', nspcs); indentspc[ntabs + nspcs] = '\0'; } else if (opt.tabexp == 'k') { uint32_t *indent32 = tabbify_indent(j, NULL, input.indent); indentspc = u32_strconv_to_output(indent32); BFREE(indent32); indent = input.indent; } else { indentspc = (char *) strdup(""); indent = 0; } char *outtext = u32_strconv_to_output(bxs_first_char_ptr(input.lines[j].text, indent)); fprintf(opt.outfile, "%s%s%s", indentspc, outtext, (input.final_newline || j < input.num_lines - 1 ? opt.eol : "")); BFREE(outtext); BFREE(indentspc); } } /* vim: set sw=4: */