From df07968c4de03b3d7a40c5b1867f6fd0b8d493af Mon Sep 17 00:00:00 2001 From: Johannes Zillmann Date: Sat, 28 Jan 2017 21:23:11 +0100 Subject: [PATCH] CombineSameY => Switch to annotation strategy --- .../CombineSameYTransformation.jsx | 37 +++++++++++++++---- .../RemoveRepetitiveElements.jsx | 2 +- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/javascript/models/transformations/CombineSameYTransformation.jsx b/src/javascript/models/transformations/CombineSameYTransformation.jsx index 5f41af6..59440fd 100644 --- a/src/javascript/models/transformations/CombineSameYTransformation.jsx +++ b/src/javascript/models/transformations/CombineSameYTransformation.jsx @@ -2,6 +2,7 @@ import Transformation from './Transformation.jsx'; import TextItem from '../TextItem.jsx'; import PdfPage from '../PdfPage.jsx'; import ContentView from '../ContentView.jsx'; +import Annotation from '../Annotation.jsx'; export default class CombineSameYTransformation extends Transformation { @@ -14,6 +15,16 @@ export default class CombineSameYTransformation extends Transformation { } transform(pages:PdfPage[]) { + + const removedAnnotation = new Annotation({ + category: 'removed', + color: 'red' + }); + const combinedAnnotation = new Annotation({ + category: 'combined', + color: 'green' + }); + return pages.map(pdfPage => { const newTextItems = []; var lastTextItem; @@ -21,12 +32,14 @@ export default class CombineSameYTransformation extends Transformation { if (!lastTextItem) { lastTextItem = textItem; } else { - if (textItem.y == lastTextItem.y) { - //combine + if (textItem.y == lastTextItem.y) { //combine - // console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width); - // console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width); - // console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width)); + if (!lastTextItem.annotation) { + lastTextItem.annotation = removedAnnotation; + newTextItems.push(lastTextItem); + } + textItem.annotation = removedAnnotation; + newTextItems.push(textItem); var combinedText = lastTextItem.text; //TODO make 5 dependent on text size or biggest gap? @@ -40,10 +53,10 @@ export default class CombineSameYTransformation extends Transformation { y: lastTextItem.y, width: textItem.x - lastTextItem.x + textItem.width, height: lastTextItem.height, //might this cause problems ? - text: combinedText + text: combinedText, + annotation: combinedAnnotation }); - } else { - //rotate + } else { //rotate newTextItems.push(lastTextItem); lastTextItem = textItem; } @@ -60,4 +73,12 @@ export default class CombineSameYTransformation extends Transformation { }); } + processAnnotations(pages:PdfPage[]) { + pages.forEach(page => { + page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed'); + page.textItems.forEach(textItem => textItem.annotation = null) + }); + return pages; + } + } \ No newline at end of file diff --git a/src/javascript/models/transformations/RemoveRepetitiveElements.jsx b/src/javascript/models/transformations/RemoveRepetitiveElements.jsx index f341f38..8e84bb4 100644 --- a/src/javascript/models/transformations/RemoveRepetitiveElements.jsx +++ b/src/javascript/models/transformations/RemoveRepetitiveElements.jsx @@ -1,7 +1,7 @@ import Transformation from './Transformation.jsx'; -import Annotation from '../Annotation.jsx'; import PdfPage from '../PdfPage.jsx'; import ContentView from '../ContentView.jsx'; +import Annotation from '../Annotation.jsx'; function hashCodeIgnoringNumbers(string) {