CombineSameY => Switch to annotation strategy

This commit is contained in:
Johannes Zillmann 2017-01-28 21:23:11 +01:00
parent 201753a2e0
commit df07968c4d
2 changed files with 30 additions and 9 deletions

View File

@ -2,6 +2,7 @@ import Transformation from './Transformation.jsx';
import TextItem from '../TextItem.jsx'; import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx'; import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx';
export default class CombineSameYTransformation extends Transformation { export default class CombineSameYTransformation extends Transformation {
@ -14,6 +15,16 @@ export default class CombineSameYTransformation extends Transformation {
} }
transform(pages:PdfPage[]) { transform(pages:PdfPage[]) {
const removedAnnotation = new Annotation({
category: 'removed',
color: 'red'
});
const combinedAnnotation = new Annotation({
category: 'combined',
color: 'green'
});
return pages.map(pdfPage => { return pages.map(pdfPage => {
const newTextItems = []; const newTextItems = [];
var lastTextItem; var lastTextItem;
@ -21,12 +32,14 @@ export default class CombineSameYTransformation extends Transformation {
if (!lastTextItem) { if (!lastTextItem) {
lastTextItem = textItem; lastTextItem = textItem;
} else { } else {
if (textItem.y == lastTextItem.y) { if (textItem.y == lastTextItem.y) { //combine
//combine
// console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width); if (!lastTextItem.annotation) {
// console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width); lastTextItem.annotation = removedAnnotation;
// console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width)); newTextItems.push(lastTextItem);
}
textItem.annotation = removedAnnotation;
newTextItems.push(textItem);
var combinedText = lastTextItem.text; var combinedText = lastTextItem.text;
//TODO make 5 dependent on text size or biggest gap? //TODO make 5 dependent on text size or biggest gap?
@ -40,10 +53,10 @@ export default class CombineSameYTransformation extends Transformation {
y: lastTextItem.y, y: lastTextItem.y,
width: textItem.x - lastTextItem.x + textItem.width, width: textItem.x - lastTextItem.x + textItem.width,
height: lastTextItem.height, //might this cause problems ? height: lastTextItem.height, //might this cause problems ?
text: combinedText text: combinedText,
annotation: combinedAnnotation
}); });
} else { } else { //rotate
//rotate
newTextItems.push(lastTextItem); newTextItems.push(lastTextItem);
lastTextItem = textItem; lastTextItem = textItem;
} }
@ -60,4 +73,12 @@ export default class CombineSameYTransformation extends Transformation {
}); });
} }
processAnnotations(pages:PdfPage[]) {
pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed');
page.textItems.forEach(textItem => textItem.annotation = null)
});
return pages;
}
} }

View File

@ -1,7 +1,7 @@
import Transformation from './Transformation.jsx'; import Transformation from './Transformation.jsx';
import Annotation from '../Annotation.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx'; import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx';
function hashCodeIgnoringNumbers(string) { function hashCodeIgnoringNumbers(string) {