From 1fcd08f6d5d0236898fc9797dd135b675c8ff89f Mon Sep 17 00:00:00 2001 From: Johannes Zillmann Date: Mon, 27 Feb 2017 21:19:29 +0100 Subject: [PATCH] [WIP] small fixes --- src/javascript/models/TextItemCombiner.jsx | 3 +- .../transformations/DetectFootnotes.jsx | 2 +- .../models/transformations/DetectTOC.jsx | 29 ++++++++++++------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/javascript/models/TextItemCombiner.jsx b/src/javascript/models/TextItemCombiner.jsx index 07453cc..4333fbc 100644 --- a/src/javascript/models/TextItemCombiner.jsx +++ b/src/javascript/models/TextItemCombiner.jsx @@ -1,5 +1,5 @@ import TextItem from './TextItem.jsx'; -import { isNumber, isDigit } from '../functions.jsx' +import { isNumber } from '../functions.jsx' import { sortByX } from '../textItemFunctions.jsx' //Combines text items which are on the same Y at the same time doing inline transformations like @@ -39,6 +39,7 @@ export default class TextItemCombiner { text += item.text; widthSum += item.width; lastItem = item; + maxHeight = Math.max(maxHeight, item.height); }); resultItems.push(new TextItem({ ...itemGroup[0], diff --git a/src/javascript/models/transformations/DetectFootnotes.jsx b/src/javascript/models/transformations/DetectFootnotes.jsx index f3ba1af..96282a8 100644 --- a/src/javascript/models/transformations/DetectFootnotes.jsx +++ b/src/javascript/models/transformations/DetectFootnotes.jsx @@ -34,7 +34,7 @@ export default class DetectFootnotes extends ToPdfBlockViewTransformation { type: FOOTNOTE_BLOCK, annotation: ADDED_ANNOTATION, parsedElements: combineResult.parsedElements - }) + }); newBlocks.push(lastFootnote); } else if (lastFootnote) { // likely to be the second line of aboves footnote diff --git a/src/javascript/models/transformations/DetectTOC.jsx b/src/javascript/models/transformations/DetectTOC.jsx index 933ec31..b16a102 100644 --- a/src/javascript/models/transformations/DetectTOC.jsx +++ b/src/javascript/models/transformations/DetectTOC.jsx @@ -4,7 +4,7 @@ import TextItem from '../TextItem.jsx'; import PdfBlock from '../PdfBlock.jsx'; import TextItemCombiner from '../TextItemCombiner.jsx'; import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx'; -import { TOC_BLOCK } from '../MarkdownElements.jsx'; +import { TOC_BLOCK, HEADLINE2 } from '../MarkdownElements.jsx'; import { isDigit } from '../../functions.jsx' //Detect table of contents pages @@ -17,13 +17,13 @@ export default class DetectTOC extends ToPdfBlockViewTransformation { transform(parseResult:ParseResult) { const {mostUsedDistance} = parseResult.globals; var foundTocPages = 0; - var x = Math.min(12, parseResult.content.length); + const maxPagesToEvaluate = Math.min(20, parseResult.content.length); const textCombiner = new TextItemCombiner({ mostUsedDistance: mostUsedDistance }); - parseResult.content.slice(0, x).forEach(page => { + parseResult.content.slice(0, maxPagesToEvaluate).forEach(page => { var linesCount = 0; var linesWithDigitsCount = 0; var lineItemsWithDigits = []; @@ -49,25 +49,32 @@ export default class DetectTOC extends ToPdfBlockViewTransformation { })); } }); - if (!blockHasLinesWithDigits) { - if (!headlineBlock) { - headlineBlock = block; - } + if (!headlineBlock && !blockHasLinesWithDigits) { + headlineBlock = block; } }); if (linesWithDigitsCount * 100 / linesCount > 75) { foundTocPages++; - page.blocks.forEach(block => { - if (block !== headlineBlock) { - block.annotation = REMOVED_ANNOTATION; + const newBlocks = []; + page.blocks.forEach((block) => { + block.annotation = REMOVED_ANNOTATION; + newBlocks.push(block); + + if (block === headlineBlock) { + newBlocks.push(new PdfBlock({ + textItems: textCombiner.combine(block.textItems).textItems, + type: HEADLINE2, + annotation: ADDED_ANNOTATION + })); } }); - page.blocks.push(new PdfBlock({ + newBlocks.push(new PdfBlock({ textItems: lineItemsWithDigits, type: TOC_BLOCK, annotation: ADDED_ANNOTATION })); + page.blocks = newBlocks; } });