CombineSameY => Switch to annotation strategy

This commit is contained in:
Johannes Zillmann 2017-01-28 21:23:11 +01:00
parent 201753a2e0
commit df07968c4d
2 changed files with 30 additions and 9 deletions

View File

@ -2,6 +2,7 @@ import Transformation from './Transformation.jsx';
import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx';
export default class CombineSameYTransformation extends Transformation {
@ -14,6 +15,16 @@ export default class CombineSameYTransformation extends Transformation {
}
transform(pages:PdfPage[]) {
const removedAnnotation = new Annotation({
category: 'removed',
color: 'red'
});
const combinedAnnotation = new Annotation({
category: 'combined',
color: 'green'
});
return pages.map(pdfPage => {
const newTextItems = [];
var lastTextItem;
@ -21,12 +32,14 @@ export default class CombineSameYTransformation extends Transformation {
if (!lastTextItem) {
lastTextItem = textItem;
} else {
if (textItem.y == lastTextItem.y) {
//combine
if (textItem.y == lastTextItem.y) { //combine
// console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width);
// console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width);
// console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width));
if (!lastTextItem.annotation) {
lastTextItem.annotation = removedAnnotation;
newTextItems.push(lastTextItem);
}
textItem.annotation = removedAnnotation;
newTextItems.push(textItem);
var combinedText = lastTextItem.text;
//TODO make 5 dependent on text size or biggest gap?
@ -40,10 +53,10 @@ export default class CombineSameYTransformation extends Transformation {
y: lastTextItem.y,
width: textItem.x - lastTextItem.x + textItem.width,
height: lastTextItem.height, //might this cause problems ?
text: combinedText
text: combinedText,
annotation: combinedAnnotation
});
} else {
//rotate
} else { //rotate
newTextItems.push(lastTextItem);
lastTextItem = textItem;
}
@ -60,4 +73,12 @@ export default class CombineSameYTransformation extends Transformation {
});
}
processAnnotations(pages:PdfPage[]) {
pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed');
page.textItems.forEach(textItem => textItem.annotation = null)
});
return pages;
}
}

View File

@ -1,7 +1,7 @@
import Transformation from './Transformation.jsx';
import Annotation from '../Annotation.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx';
function hashCodeIgnoringNumbers(string) {