mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-28 10:43:46 +01:00
CombineSameY => Switch to annotation strategy
This commit is contained in:
parent
201753a2e0
commit
df07968c4d
@ -2,6 +2,7 @@ import Transformation from './Transformation.jsx';
|
||||
import TextItem from '../TextItem.jsx';
|
||||
import PdfPage from '../PdfPage.jsx';
|
||||
import ContentView from '../ContentView.jsx';
|
||||
import Annotation from '../Annotation.jsx';
|
||||
|
||||
export default class CombineSameYTransformation extends Transformation {
|
||||
|
||||
@ -14,6 +15,16 @@ export default class CombineSameYTransformation extends Transformation {
|
||||
}
|
||||
|
||||
transform(pages:PdfPage[]) {
|
||||
|
||||
const removedAnnotation = new Annotation({
|
||||
category: 'removed',
|
||||
color: 'red'
|
||||
});
|
||||
const combinedAnnotation = new Annotation({
|
||||
category: 'combined',
|
||||
color: 'green'
|
||||
});
|
||||
|
||||
return pages.map(pdfPage => {
|
||||
const newTextItems = [];
|
||||
var lastTextItem;
|
||||
@ -21,12 +32,14 @@ export default class CombineSameYTransformation extends Transformation {
|
||||
if (!lastTextItem) {
|
||||
lastTextItem = textItem;
|
||||
} else {
|
||||
if (textItem.y == lastTextItem.y) {
|
||||
//combine
|
||||
if (textItem.y == lastTextItem.y) { //combine
|
||||
|
||||
// console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width);
|
||||
// console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width);
|
||||
// console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width));
|
||||
if (!lastTextItem.annotation) {
|
||||
lastTextItem.annotation = removedAnnotation;
|
||||
newTextItems.push(lastTextItem);
|
||||
}
|
||||
textItem.annotation = removedAnnotation;
|
||||
newTextItems.push(textItem);
|
||||
|
||||
var combinedText = lastTextItem.text;
|
||||
//TODO make 5 dependent on text size or biggest gap?
|
||||
@ -40,10 +53,10 @@ export default class CombineSameYTransformation extends Transformation {
|
||||
y: lastTextItem.y,
|
||||
width: textItem.x - lastTextItem.x + textItem.width,
|
||||
height: lastTextItem.height, //might this cause problems ?
|
||||
text: combinedText
|
||||
text: combinedText,
|
||||
annotation: combinedAnnotation
|
||||
});
|
||||
} else {
|
||||
//rotate
|
||||
} else { //rotate
|
||||
newTextItems.push(lastTextItem);
|
||||
lastTextItem = textItem;
|
||||
}
|
||||
@ -60,4 +73,12 @@ export default class CombineSameYTransformation extends Transformation {
|
||||
});
|
||||
}
|
||||
|
||||
processAnnotations(pages:PdfPage[]) {
|
||||
pages.forEach(page => {
|
||||
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed');
|
||||
page.textItems.forEach(textItem => textItem.annotation = null)
|
||||
});
|
||||
return pages;
|
||||
}
|
||||
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
import Transformation from './Transformation.jsx';
|
||||
import Annotation from '../Annotation.jsx';
|
||||
import PdfPage from '../PdfPage.jsx';
|
||||
import ContentView from '../ContentView.jsx';
|
||||
import Annotation from '../Annotation.jsx';
|
||||
|
||||
|
||||
function hashCodeIgnoringNumbers(string) {
|
||||
|
Loading…
Reference in New Issue
Block a user