[WIP] small fixes

This commit is contained in:
Johannes Zillmann 2017-02-27 21:19:29 +01:00
parent 5827379d1b
commit 1fcd08f6d5
3 changed files with 21 additions and 13 deletions

View File

@ -1,5 +1,5 @@
import TextItem from './TextItem.jsx'; import TextItem from './TextItem.jsx';
import { isNumber, isDigit } from '../functions.jsx' import { isNumber } from '../functions.jsx'
import { sortByX } from '../textItemFunctions.jsx' import { sortByX } from '../textItemFunctions.jsx'
//Combines text items which are on the same Y at the same time doing inline transformations like //Combines text items which are on the same Y at the same time doing inline transformations like
@ -39,6 +39,7 @@ export default class TextItemCombiner {
text += item.text; text += item.text;
widthSum += item.width; widthSum += item.width;
lastItem = item; lastItem = item;
maxHeight = Math.max(maxHeight, item.height);
}); });
resultItems.push(new TextItem({ resultItems.push(new TextItem({
...itemGroup[0], ...itemGroup[0],

View File

@ -34,7 +34,7 @@ export default class DetectFootnotes extends ToPdfBlockViewTransformation {
type: FOOTNOTE_BLOCK, type: FOOTNOTE_BLOCK,
annotation: ADDED_ANNOTATION, annotation: ADDED_ANNOTATION,
parsedElements: combineResult.parsedElements parsedElements: combineResult.parsedElements
}) });
newBlocks.push(lastFootnote); newBlocks.push(lastFootnote);
} else if (lastFootnote) { } else if (lastFootnote) {
// likely to be the second line of aboves footnote // likely to be the second line of aboves footnote

View File

@ -4,7 +4,7 @@ import TextItem from '../TextItem.jsx';
import PdfBlock from '../PdfBlock.jsx'; import PdfBlock from '../PdfBlock.jsx';
import TextItemCombiner from '../TextItemCombiner.jsx'; import TextItemCombiner from '../TextItemCombiner.jsx';
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx'; import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx';
import { TOC_BLOCK } from '../MarkdownElements.jsx'; import { TOC_BLOCK, HEADLINE2 } from '../MarkdownElements.jsx';
import { isDigit } from '../../functions.jsx' import { isDigit } from '../../functions.jsx'
//Detect table of contents pages //Detect table of contents pages
@ -17,13 +17,13 @@ export default class DetectTOC extends ToPdfBlockViewTransformation {
transform(parseResult:ParseResult) { transform(parseResult:ParseResult) {
const {mostUsedDistance} = parseResult.globals; const {mostUsedDistance} = parseResult.globals;
var foundTocPages = 0; var foundTocPages = 0;
var x = Math.min(12, parseResult.content.length); const maxPagesToEvaluate = Math.min(20, parseResult.content.length);
const textCombiner = new TextItemCombiner({ const textCombiner = new TextItemCombiner({
mostUsedDistance: mostUsedDistance mostUsedDistance: mostUsedDistance
}); });
parseResult.content.slice(0, x).forEach(page => { parseResult.content.slice(0, maxPagesToEvaluate).forEach(page => {
var linesCount = 0; var linesCount = 0;
var linesWithDigitsCount = 0; var linesWithDigitsCount = 0;
var lineItemsWithDigits = []; var lineItemsWithDigits = [];
@ -49,25 +49,32 @@ export default class DetectTOC extends ToPdfBlockViewTransformation {
})); }));
} }
}); });
if (!blockHasLinesWithDigits) { if (!headlineBlock && !blockHasLinesWithDigits) {
if (!headlineBlock) { headlineBlock = block;
headlineBlock = block;
}
} }
}); });
if (linesWithDigitsCount * 100 / linesCount > 75) { if (linesWithDigitsCount * 100 / linesCount > 75) {
foundTocPages++; foundTocPages++;
page.blocks.forEach(block => { const newBlocks = [];
if (block !== headlineBlock) { page.blocks.forEach((block) => {
block.annotation = REMOVED_ANNOTATION; block.annotation = REMOVED_ANNOTATION;
newBlocks.push(block);
if (block === headlineBlock) {
newBlocks.push(new PdfBlock({
textItems: textCombiner.combine(block.textItems).textItems,
type: HEADLINE2,
annotation: ADDED_ANNOTATION
}));
} }
}); });
page.blocks.push(new PdfBlock({ newBlocks.push(new PdfBlock({
textItems: lineItemsWithDigits, textItems: lineItemsWithDigits,
type: TOC_BLOCK, type: TOC_BLOCK,
annotation: ADDED_ANNOTATION annotation: ADDED_ANNOTATION
})); }));
page.blocks = newBlocks;
} }
}); });