Outsource annotation definitions

This commit is contained in:
Johannes Zillmann 2017-02-11 15:42:30 +01:00
parent 996e5fae62
commit 1ca9fa4362
8 changed files with 41 additions and 73 deletions

View File

@ -7,3 +7,18 @@ export default class Annotation {
}
}
export const ADDED_ANNOTATION = new Annotation({
category: 'Added',
color: 'green'
});
export const REMOVED_ANNOTATION = new Annotation({
category: 'Removed',
color: 'red'
});
export const UNCHANGED_ANNOTATION = new Annotation({
category: 'Unchanged',
color: 'brown'
})

View File

@ -2,7 +2,7 @@ import Transformation from './Transformation.jsx';
import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
function combineTextItems(textItems:TextItem[]) {
var numChars = 0;
@ -37,11 +37,7 @@ function combineTextItems(textItems:TextItem[]) {
width: sumWidthWithWhitespaces,
height: maxHeight,
text: combinedText,
annotation: new Annotation({
category: 'combined',
color: 'green'
})
annotation: ADDED_ANNOTATION
});
}
@ -57,11 +53,6 @@ export default class CombineSameY extends Transformation {
transform(pages:PdfPage[]) {
const removedAnnotation = new Annotation({
category: 'removed',
color: 'red'
});
return pages.map(pdfPage => {
const newTextItems = [];
var textItemsWithSameY = [];
@ -72,7 +63,7 @@ export default class CombineSameY extends Transformation {
} else {
// add removed text-items
textItemsWithSameY.forEach(textItem => {
textItem.annotation = removedAnnotation;
textItem.annotation = REMOVED_ANNOTATION;
newTextItems.push(textItem);
});
newTextItems.push(combineTextItems(textItemsWithSameY));
@ -102,7 +93,7 @@ export default class CombineSameY extends Transformation {
processAnnotations(pages:PdfPage[]) {
pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed');
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
page.textItems.forEach(textItem => textItem.annotation = null)
});
return pages;

View File

@ -2,7 +2,7 @@ import Transformation from './Transformation.jsx';
import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
import { isNumber } from '../../functions.jsx'
@ -21,19 +21,14 @@ export default class DetectFootnotes extends Transformation {
var nextFooterNumber = 1;
var potentialFootnoteItem;
const removedAnnotation = new Annotation({
category: 'removed',
color: 'red'
});
return pages.map(page => {
const newTextItems = [];
for (var i = 0; i < page.textItems.length; i++) {
const item = page.textItems[i];
if (potentialFootnoteItem) {
if (potentialFootnoteItem.y - item.y < item.height) {
potentialFootnoteItem.annotation = removedAnnotation;
item.annotation = removedAnnotation;
potentialFootnoteItem.annotation = REMOVED_ANNOTATION;
item.annotation = REMOVED_ANNOTATION;
newTextItems.push(potentialFootnoteItem);
newTextItems.push(item);
newTextItems.push(new TextItem({
@ -42,10 +37,7 @@ export default class DetectFootnotes extends Transformation {
width: potentialFootnoteItem.width + item.width,
height: item.height,
text: '[' + potentialFootnoteItem.text + '] ' + item.text,
annotation: new Annotation({
category: 'footnote',
color: 'green'
})
annotation: ADDED_ANNOTATION
}));
//TODO repsect multiline!!
nextFooterNumber++;
@ -66,7 +58,7 @@ export default class DetectFootnotes extends Transformation {
processAnnotations(pages:PdfPage[]) {
pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed');
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
page.textItems.forEach(textItem => textItem.annotation = null)
});
return pages;

View File

@ -3,7 +3,7 @@ import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
export default class DetectLinks extends Transformation {
@ -16,15 +16,6 @@ export default class DetectLinks extends Transformation {
}
transform(pages:PdfPage[]) {
const addedAnnotation = new Annotation({
category: 'added',
color: 'green'
});
const removedAnnotation = new Annotation({
category: 'removed',
color: 'red'
});
pages.forEach(page => {
const newTextItems = [];
page.textItems.forEach(item => {
@ -47,9 +38,9 @@ export default class DetectLinks extends Transformation {
newTextItems.push(new TextItem({
...item,
text: changedWords.join(' '),
annotation: addedAnnotation,
annotation: ADDED_ANNOTATION,
}));
item.annotation = removedAnnotation;
item.annotation = REMOVED_ANNOTATION;
}
});
page.textItems = newTextItems;
@ -59,7 +50,7 @@ export default class DetectLinks extends Transformation {
processAnnotations(pages:PdfPage[]) {
pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed');
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
page.textItems.forEach(textItem => textItem.annotation = null)
});
return pages;

View File

@ -132,7 +132,7 @@ export default class HeadlineDetector extends Transformation {
...item,
text: item.text,
annotation: new Annotation({
category: "Headline " + headlineLevel,
category: "Headline-" + headlineLevel,
color: 'green'
}),
markdownElement: new Headline({

View File

@ -2,7 +2,7 @@ import Transformation from './Transformation.jsx';
import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION, UNCHANGED_ANNOTATION } from '../Annotation.jsx';
import { hasUpperCaseCharacterInMiddleOfWord } from '../../functions.jsx'
@ -27,24 +27,15 @@ export default class HeadlineToUppercase extends Transformation {
if (item.markdownElement && item.markdownElement.constructor.name === 'Headline') {
const headline = item.text.trim();
if (hasUpperCaseCharacterInMiddleOfWord(headline)) {
item.annotation = new Annotation({
category: 'removed',
color: 'red'
});
item.annotation = REMOVED_ANNOTATION;
newTextItems.push(item);
newTextItems.push(new TextItem({
...item,
text: item.text.toUpperCase(),
annotation: new Annotation({
category: "Uppercased",
color: 'green'
})
annotation: ADDED_ANNOTATION
}));
} else {
item.annotation = new Annotation({
category: 'Untouched',
color: 'brown'
});
item.annotation = UNCHANGED_ANNOTATION;
newTextItems.push(item);
}
} else {
@ -60,7 +51,7 @@ export default class HeadlineToUppercase extends Transformation {
processAnnotations(pages:PdfPage[]) {
pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed');
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
page.textItems.forEach(textItem => textItem.annotation = null)
});
return pages;

View File

@ -1,7 +1,7 @@
import Transformation from './Transformation.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx';
import { REMOVED_ANNOTATION } from '../Annotation.jsx';
import { isDigit } from '../../functions.jsx'
@ -51,10 +51,7 @@ export default class RemoveRepetitiveElements extends Transformation {
var combinedCoordinates = combineCoordinates(textItem);
if (repetitionCounts[combinedCoordinates] > 1) {
// console.debug("page " + pdfPage.index + " removed :" + repetitionCounts[combinedCoordinates] + " :" + textItem.text);
textItem.annotation = new Annotation({
category: 'removed',
color: 'red'
});
textItem.annotation = REMOVED_ANNOTATION;
}
});
});
@ -63,7 +60,7 @@ export default class RemoveRepetitiveElements extends Transformation {
processAnnotations(pages:PdfPage[]) {
pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed');
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
});
return pages;
}

View File

@ -3,7 +3,7 @@ import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
export default class RemoveWhitespaces extends Transformation {
@ -16,15 +16,6 @@ export default class RemoveWhitespaces extends Transformation {
}
transform(pages:PdfPage[]) {
const addedAnnotation = new Annotation({
category: 'added',
color: 'green'
});
const removedAnnotation = new Annotation({
category: 'removed',
color: 'red'
});
pages.forEach(page => {
const newTextItems = [];
page.textItems.forEach(item => {
@ -43,9 +34,9 @@ export default class RemoveWhitespaces extends Transformation {
newTextItems.push(new TextItem({
...item,
text: changedWords.join(' '),
annotation: addedAnnotation,
annotation: ADDED_ANNOTATION,
}));
item.annotation = removedAnnotation;
item.annotation = REMOVED_ANNOTATION;
}
});
page.textItems = newTextItems;
@ -55,7 +46,7 @@ export default class RemoveWhitespaces extends Transformation {
processAnnotations(pages:PdfPage[]) {
pages.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed');
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
page.textItems.forEach(textItem => textItem.annotation = null)
});
return pages;