Outsource annotation definitions

This commit is contained in:
Johannes Zillmann 2017-02-11 15:42:30 +01:00
parent 996e5fae62
commit 1ca9fa4362
8 changed files with 41 additions and 73 deletions

View File

@ -7,3 +7,18 @@ export default class Annotation {
} }
} }
export const ADDED_ANNOTATION = new Annotation({
category: 'Added',
color: 'green'
});
export const REMOVED_ANNOTATION = new Annotation({
category: 'Removed',
color: 'red'
});
export const UNCHANGED_ANNOTATION = new Annotation({
category: 'Unchanged',
color: 'brown'
})

View File

@ -2,7 +2,7 @@ import Transformation from './Transformation.jsx';
import TextItem from '../TextItem.jsx'; import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx'; import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx'; import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
function combineTextItems(textItems:TextItem[]) { function combineTextItems(textItems:TextItem[]) {
var numChars = 0; var numChars = 0;
@ -37,11 +37,7 @@ function combineTextItems(textItems:TextItem[]) {
width: sumWidthWithWhitespaces, width: sumWidthWithWhitespaces,
height: maxHeight, height: maxHeight,
text: combinedText, text: combinedText,
annotation: new Annotation({ annotation: ADDED_ANNOTATION
category: 'combined',
color: 'green'
})
}); });
} }
@ -57,11 +53,6 @@ export default class CombineSameY extends Transformation {
transform(pages:PdfPage[]) { transform(pages:PdfPage[]) {
const removedAnnotation = new Annotation({
category: 'removed',
color: 'red'
});
return pages.map(pdfPage => { return pages.map(pdfPage => {
const newTextItems = []; const newTextItems = [];
var textItemsWithSameY = []; var textItemsWithSameY = [];
@ -72,7 +63,7 @@ export default class CombineSameY extends Transformation {
} else { } else {
// add removed text-items // add removed text-items
textItemsWithSameY.forEach(textItem => { textItemsWithSameY.forEach(textItem => {
textItem.annotation = removedAnnotation; textItem.annotation = REMOVED_ANNOTATION;
newTextItems.push(textItem); newTextItems.push(textItem);
}); });
newTextItems.push(combineTextItems(textItemsWithSameY)); newTextItems.push(combineTextItems(textItemsWithSameY));
@ -102,7 +93,7 @@ export default class CombineSameY extends Transformation {
processAnnotations(pages:PdfPage[]) { processAnnotations(pages:PdfPage[]) {
pages.forEach(page => { pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed'); page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
page.textItems.forEach(textItem => textItem.annotation = null) page.textItems.forEach(textItem => textItem.annotation = null)
}); });
return pages; return pages;

View File

@ -2,7 +2,7 @@ import Transformation from './Transformation.jsx';
import TextItem from '../TextItem.jsx'; import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx'; import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx'; import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
import { isNumber } from '../../functions.jsx' import { isNumber } from '../../functions.jsx'
@ -21,19 +21,14 @@ export default class DetectFootnotes extends Transformation {
var nextFooterNumber = 1; var nextFooterNumber = 1;
var potentialFootnoteItem; var potentialFootnoteItem;
const removedAnnotation = new Annotation({
category: 'removed',
color: 'red'
});
return pages.map(page => { return pages.map(page => {
const newTextItems = []; const newTextItems = [];
for (var i = 0; i < page.textItems.length; i++) { for (var i = 0; i < page.textItems.length; i++) {
const item = page.textItems[i]; const item = page.textItems[i];
if (potentialFootnoteItem) { if (potentialFootnoteItem) {
if (potentialFootnoteItem.y - item.y < item.height) { if (potentialFootnoteItem.y - item.y < item.height) {
potentialFootnoteItem.annotation = removedAnnotation; potentialFootnoteItem.annotation = REMOVED_ANNOTATION;
item.annotation = removedAnnotation; item.annotation = REMOVED_ANNOTATION;
newTextItems.push(potentialFootnoteItem); newTextItems.push(potentialFootnoteItem);
newTextItems.push(item); newTextItems.push(item);
newTextItems.push(new TextItem({ newTextItems.push(new TextItem({
@ -42,10 +37,7 @@ export default class DetectFootnotes extends Transformation {
width: potentialFootnoteItem.width + item.width, width: potentialFootnoteItem.width + item.width,
height: item.height, height: item.height,
text: '[' + potentialFootnoteItem.text + '] ' + item.text, text: '[' + potentialFootnoteItem.text + '] ' + item.text,
annotation: new Annotation({ annotation: ADDED_ANNOTATION
category: 'footnote',
color: 'green'
})
})); }));
//TODO repsect multiline!! //TODO repsect multiline!!
nextFooterNumber++; nextFooterNumber++;
@ -66,7 +58,7 @@ export default class DetectFootnotes extends Transformation {
processAnnotations(pages:PdfPage[]) { processAnnotations(pages:PdfPage[]) {
pages.forEach(page => { pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed'); page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
page.textItems.forEach(textItem => textItem.annotation = null) page.textItems.forEach(textItem => textItem.annotation = null)
}); });
return pages; return pages;

View File

@ -3,7 +3,7 @@ import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx'; import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx'; import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
export default class DetectLinks extends Transformation { export default class DetectLinks extends Transformation {
@ -16,15 +16,6 @@ export default class DetectLinks extends Transformation {
} }
transform(pages:PdfPage[]) { transform(pages:PdfPage[]) {
const addedAnnotation = new Annotation({
category: 'added',
color: 'green'
});
const removedAnnotation = new Annotation({
category: 'removed',
color: 'red'
});
pages.forEach(page => { pages.forEach(page => {
const newTextItems = []; const newTextItems = [];
page.textItems.forEach(item => { page.textItems.forEach(item => {
@ -47,9 +38,9 @@ export default class DetectLinks extends Transformation {
newTextItems.push(new TextItem({ newTextItems.push(new TextItem({
...item, ...item,
text: changedWords.join(' '), text: changedWords.join(' '),
annotation: addedAnnotation, annotation: ADDED_ANNOTATION,
})); }));
item.annotation = removedAnnotation; item.annotation = REMOVED_ANNOTATION;
} }
}); });
page.textItems = newTextItems; page.textItems = newTextItems;
@ -59,7 +50,7 @@ export default class DetectLinks extends Transformation {
processAnnotations(pages:PdfPage[]) { processAnnotations(pages:PdfPage[]) {
pages.forEach(page => { pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed'); page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
page.textItems.forEach(textItem => textItem.annotation = null) page.textItems.forEach(textItem => textItem.annotation = null)
}); });
return pages; return pages;

View File

@ -132,7 +132,7 @@ export default class HeadlineDetector extends Transformation {
...item, ...item,
text: item.text, text: item.text,
annotation: new Annotation({ annotation: new Annotation({
category: "Headline " + headlineLevel, category: "Headline-" + headlineLevel,
color: 'green' color: 'green'
}), }),
markdownElement: new Headline({ markdownElement: new Headline({

View File

@ -2,7 +2,7 @@ import Transformation from './Transformation.jsx';
import TextItem from '../TextItem.jsx'; import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx'; import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx'; import { ADDED_ANNOTATION, REMOVED_ANNOTATION, UNCHANGED_ANNOTATION } from '../Annotation.jsx';
import { hasUpperCaseCharacterInMiddleOfWord } from '../../functions.jsx' import { hasUpperCaseCharacterInMiddleOfWord } from '../../functions.jsx'
@ -27,24 +27,15 @@ export default class HeadlineToUppercase extends Transformation {
if (item.markdownElement && item.markdownElement.constructor.name === 'Headline') { if (item.markdownElement && item.markdownElement.constructor.name === 'Headline') {
const headline = item.text.trim(); const headline = item.text.trim();
if (hasUpperCaseCharacterInMiddleOfWord(headline)) { if (hasUpperCaseCharacterInMiddleOfWord(headline)) {
item.annotation = new Annotation({ item.annotation = REMOVED_ANNOTATION;
category: 'removed',
color: 'red'
});
newTextItems.push(item); newTextItems.push(item);
newTextItems.push(new TextItem({ newTextItems.push(new TextItem({
...item, ...item,
text: item.text.toUpperCase(), text: item.text.toUpperCase(),
annotation: new Annotation({ annotation: ADDED_ANNOTATION
category: "Uppercased",
color: 'green'
})
})); }));
} else { } else {
item.annotation = new Annotation({ item.annotation = UNCHANGED_ANNOTATION;
category: 'Untouched',
color: 'brown'
});
newTextItems.push(item); newTextItems.push(item);
} }
} else { } else {
@ -60,7 +51,7 @@ export default class HeadlineToUppercase extends Transformation {
processAnnotations(pages:PdfPage[]) { processAnnotations(pages:PdfPage[]) {
pages.forEach(page => { pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed'); page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
page.textItems.forEach(textItem => textItem.annotation = null) page.textItems.forEach(textItem => textItem.annotation = null)
}); });
return pages; return pages;

View File

@ -1,7 +1,7 @@
import Transformation from './Transformation.jsx'; import Transformation from './Transformation.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx'; import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx'; import { REMOVED_ANNOTATION } from '../Annotation.jsx';
import { isDigit } from '../../functions.jsx' import { isDigit } from '../../functions.jsx'
@ -51,10 +51,7 @@ export default class RemoveRepetitiveElements extends Transformation {
var combinedCoordinates = combineCoordinates(textItem); var combinedCoordinates = combineCoordinates(textItem);
if (repetitionCounts[combinedCoordinates] > 1) { if (repetitionCounts[combinedCoordinates] > 1) {
// console.debug("page " + pdfPage.index + " removed :" + repetitionCounts[combinedCoordinates] + " :" + textItem.text); // console.debug("page " + pdfPage.index + " removed :" + repetitionCounts[combinedCoordinates] + " :" + textItem.text);
textItem.annotation = new Annotation({ textItem.annotation = REMOVED_ANNOTATION;
category: 'removed',
color: 'red'
});
} }
}); });
}); });
@ -63,7 +60,7 @@ export default class RemoveRepetitiveElements extends Transformation {
processAnnotations(pages:PdfPage[]) { processAnnotations(pages:PdfPage[]) {
pages.forEach(page => { pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed'); page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
}); });
return pages; return pages;
} }

View File

@ -3,7 +3,7 @@ import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx'; import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx'; import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
export default class RemoveWhitespaces extends Transformation { export default class RemoveWhitespaces extends Transformation {
@ -16,15 +16,6 @@ export default class RemoveWhitespaces extends Transformation {
} }
transform(pages:PdfPage[]) { transform(pages:PdfPage[]) {
const addedAnnotation = new Annotation({
category: 'added',
color: 'green'
});
const removedAnnotation = new Annotation({
category: 'removed',
color: 'red'
});
pages.forEach(page => { pages.forEach(page => {
const newTextItems = []; const newTextItems = [];
page.textItems.forEach(item => { page.textItems.forEach(item => {
@ -43,9 +34,9 @@ export default class RemoveWhitespaces extends Transformation {
newTextItems.push(new TextItem({ newTextItems.push(new TextItem({
...item, ...item,
text: changedWords.join(' '), text: changedWords.join(' '),
annotation: addedAnnotation, annotation: ADDED_ANNOTATION,
})); }));
item.annotation = removedAnnotation; item.annotation = REMOVED_ANNOTATION;
} }
}); });
page.textItems = newTextItems; page.textItems = newTextItems;
@ -55,7 +46,7 @@ export default class RemoveWhitespaces extends Transformation {
processAnnotations(pages:PdfPage[]) { processAnnotations(pages:PdfPage[]) {
pages.forEach(page => { pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed'); page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
page.textItems.forEach(textItem => textItem.annotation = null) page.textItems.forEach(textItem => textItem.annotation = null)
}); });
return pages; return pages;