From 1ca9fa4362a7dced3e18dfa3af21415f3a4be34a Mon Sep 17 00:00:00 2001 From: Johannes Zillmann Date: Sat, 11 Feb 2017 15:42:30 +0100 Subject: [PATCH] Outsource annotation definitions --- src/javascript/models/Annotation.jsx | 15 +++++++++++++++ .../models/transformations/CombineSameY.jsx | 17 ++++------------- .../transformations/DetectFootnotes.jsx | 18 +++++------------- .../models/transformations/DetectLinks.jsx | 17 ++++------------- .../transformations/HeadlineDetector.jsx | 2 +- .../transformations/HeadlineToUppercase.jsx | 19 +++++-------------- .../RemoveRepetitiveElements.jsx | 9 +++------ .../transformations/RemoveWhitespaces.jsx | 17 ++++------------- 8 files changed, 41 insertions(+), 73 deletions(-) diff --git a/src/javascript/models/Annotation.jsx b/src/javascript/models/Annotation.jsx index 65eb890..21bda9b 100644 --- a/src/javascript/models/Annotation.jsx +++ b/src/javascript/models/Annotation.jsx @@ -7,3 +7,18 @@ export default class Annotation { } } + +export const ADDED_ANNOTATION = new Annotation({ + category: 'Added', + color: 'green' +}); + +export const REMOVED_ANNOTATION = new Annotation({ + category: 'Removed', + color: 'red' +}); + +export const UNCHANGED_ANNOTATION = new Annotation({ + category: 'Unchanged', + color: 'brown' +}) diff --git a/src/javascript/models/transformations/CombineSameY.jsx b/src/javascript/models/transformations/CombineSameY.jsx index 6310176..959d3da 100644 --- a/src/javascript/models/transformations/CombineSameY.jsx +++ b/src/javascript/models/transformations/CombineSameY.jsx @@ -2,7 +2,7 @@ import Transformation from './Transformation.jsx'; import TextItem from '../TextItem.jsx'; import PdfPage from '../PdfPage.jsx'; import ContentView from '../ContentView.jsx'; -import Annotation from '../Annotation.jsx'; +import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx'; function combineTextItems(textItems:TextItem[]) { var numChars = 0; @@ -37,11 +37,7 @@ function combineTextItems(textItems:TextItem[]) { width: sumWidthWithWhitespaces, height: maxHeight, text: combinedText, - annotation: new Annotation({ - category: 'combined', - color: 'green' - }) - + annotation: ADDED_ANNOTATION }); } @@ -57,11 +53,6 @@ export default class CombineSameY extends Transformation { transform(pages:PdfPage[]) { - const removedAnnotation = new Annotation({ - category: 'removed', - color: 'red' - }); - return pages.map(pdfPage => { const newTextItems = []; var textItemsWithSameY = []; @@ -72,7 +63,7 @@ export default class CombineSameY extends Transformation { } else { // add removed text-items textItemsWithSameY.forEach(textItem => { - textItem.annotation = removedAnnotation; + textItem.annotation = REMOVED_ANNOTATION; newTextItems.push(textItem); }); newTextItems.push(combineTextItems(textItemsWithSameY)); @@ -102,7 +93,7 @@ export default class CombineSameY extends Transformation { processAnnotations(pages:PdfPage[]) { pages.forEach(page => { - page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed'); + page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION); page.textItems.forEach(textItem => textItem.annotation = null) }); return pages; diff --git a/src/javascript/models/transformations/DetectFootnotes.jsx b/src/javascript/models/transformations/DetectFootnotes.jsx index e307361..2f4ba5e 100644 --- a/src/javascript/models/transformations/DetectFootnotes.jsx +++ b/src/javascript/models/transformations/DetectFootnotes.jsx @@ -2,7 +2,7 @@ import Transformation from './Transformation.jsx'; import TextItem from '../TextItem.jsx'; import PdfPage from '../PdfPage.jsx'; import ContentView from '../ContentView.jsx'; -import Annotation from '../Annotation.jsx'; +import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx'; import { isNumber } from '../../functions.jsx' @@ -21,19 +21,14 @@ export default class DetectFootnotes extends Transformation { var nextFooterNumber = 1; var potentialFootnoteItem; - const removedAnnotation = new Annotation({ - category: 'removed', - color: 'red' - }); - return pages.map(page => { const newTextItems = []; for (var i = 0; i < page.textItems.length; i++) { const item = page.textItems[i]; if (potentialFootnoteItem) { if (potentialFootnoteItem.y - item.y < item.height) { - potentialFootnoteItem.annotation = removedAnnotation; - item.annotation = removedAnnotation; + potentialFootnoteItem.annotation = REMOVED_ANNOTATION; + item.annotation = REMOVED_ANNOTATION; newTextItems.push(potentialFootnoteItem); newTextItems.push(item); newTextItems.push(new TextItem({ @@ -42,10 +37,7 @@ export default class DetectFootnotes extends Transformation { width: potentialFootnoteItem.width + item.width, height: item.height, text: '[' + potentialFootnoteItem.text + '] ' + item.text, - annotation: new Annotation({ - category: 'footnote', - color: 'green' - }) + annotation: ADDED_ANNOTATION })); //TODO repsect multiline!! nextFooterNumber++; @@ -66,7 +58,7 @@ export default class DetectFootnotes extends Transformation { processAnnotations(pages:PdfPage[]) { pages.forEach(page => { - page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed'); + page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION); page.textItems.forEach(textItem => textItem.annotation = null) }); return pages; diff --git a/src/javascript/models/transformations/DetectLinks.jsx b/src/javascript/models/transformations/DetectLinks.jsx index aa79919..d675fc7 100644 --- a/src/javascript/models/transformations/DetectLinks.jsx +++ b/src/javascript/models/transformations/DetectLinks.jsx @@ -3,7 +3,7 @@ import TextItem from '../TextItem.jsx'; import PdfPage from '../PdfPage.jsx'; import ContentView from '../ContentView.jsx'; -import Annotation from '../Annotation.jsx'; +import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx'; export default class DetectLinks extends Transformation { @@ -16,15 +16,6 @@ export default class DetectLinks extends Transformation { } transform(pages:PdfPage[]) { - const addedAnnotation = new Annotation({ - category: 'added', - color: 'green' - }); - const removedAnnotation = new Annotation({ - category: 'removed', - color: 'red' - }); - pages.forEach(page => { const newTextItems = []; page.textItems.forEach(item => { @@ -47,9 +38,9 @@ export default class DetectLinks extends Transformation { newTextItems.push(new TextItem({ ...item, text: changedWords.join(' '), - annotation: addedAnnotation, + annotation: ADDED_ANNOTATION, })); - item.annotation = removedAnnotation; + item.annotation = REMOVED_ANNOTATION; } }); page.textItems = newTextItems; @@ -59,7 +50,7 @@ export default class DetectLinks extends Transformation { processAnnotations(pages:PdfPage[]) { pages.forEach(page => { - page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed'); + page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION); page.textItems.forEach(textItem => textItem.annotation = null) }); return pages; diff --git a/src/javascript/models/transformations/HeadlineDetector.jsx b/src/javascript/models/transformations/HeadlineDetector.jsx index 65591c7..d08ab4b 100644 --- a/src/javascript/models/transformations/HeadlineDetector.jsx +++ b/src/javascript/models/transformations/HeadlineDetector.jsx @@ -132,7 +132,7 @@ export default class HeadlineDetector extends Transformation { ...item, text: item.text, annotation: new Annotation({ - category: "Headline " + headlineLevel, + category: "Headline-" + headlineLevel, color: 'green' }), markdownElement: new Headline({ diff --git a/src/javascript/models/transformations/HeadlineToUppercase.jsx b/src/javascript/models/transformations/HeadlineToUppercase.jsx index 5d4f8b4..5c45cbe 100644 --- a/src/javascript/models/transformations/HeadlineToUppercase.jsx +++ b/src/javascript/models/transformations/HeadlineToUppercase.jsx @@ -2,7 +2,7 @@ import Transformation from './Transformation.jsx'; import TextItem from '../TextItem.jsx'; import PdfPage from '../PdfPage.jsx'; import ContentView from '../ContentView.jsx'; -import Annotation from '../Annotation.jsx'; +import { ADDED_ANNOTATION, REMOVED_ANNOTATION, UNCHANGED_ANNOTATION } from '../Annotation.jsx'; import { hasUpperCaseCharacterInMiddleOfWord } from '../../functions.jsx' @@ -27,24 +27,15 @@ export default class HeadlineToUppercase extends Transformation { if (item.markdownElement && item.markdownElement.constructor.name === 'Headline') { const headline = item.text.trim(); if (hasUpperCaseCharacterInMiddleOfWord(headline)) { - item.annotation = new Annotation({ - category: 'removed', - color: 'red' - }); + item.annotation = REMOVED_ANNOTATION; newTextItems.push(item); newTextItems.push(new TextItem({ ...item, text: item.text.toUpperCase(), - annotation: new Annotation({ - category: "Uppercased", - color: 'green' - }) + annotation: ADDED_ANNOTATION })); } else { - item.annotation = new Annotation({ - category: 'Untouched', - color: 'brown' - }); + item.annotation = UNCHANGED_ANNOTATION; newTextItems.push(item); } } else { @@ -60,7 +51,7 @@ export default class HeadlineToUppercase extends Transformation { processAnnotations(pages:PdfPage[]) { pages.forEach(page => { - page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed'); + page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION); page.textItems.forEach(textItem => textItem.annotation = null) }); return pages; diff --git a/src/javascript/models/transformations/RemoveRepetitiveElements.jsx b/src/javascript/models/transformations/RemoveRepetitiveElements.jsx index 6523ca0..bbf3a58 100644 --- a/src/javascript/models/transformations/RemoveRepetitiveElements.jsx +++ b/src/javascript/models/transformations/RemoveRepetitiveElements.jsx @@ -1,7 +1,7 @@ import Transformation from './Transformation.jsx'; import PdfPage from '../PdfPage.jsx'; import ContentView from '../ContentView.jsx'; -import Annotation from '../Annotation.jsx'; +import { REMOVED_ANNOTATION } from '../Annotation.jsx'; import { isDigit } from '../../functions.jsx' @@ -51,10 +51,7 @@ export default class RemoveRepetitiveElements extends Transformation { var combinedCoordinates = combineCoordinates(textItem); if (repetitionCounts[combinedCoordinates] > 1) { // console.debug("page " + pdfPage.index + " removed :" + repetitionCounts[combinedCoordinates] + " :" + textItem.text); - textItem.annotation = new Annotation({ - category: 'removed', - color: 'red' - }); + textItem.annotation = REMOVED_ANNOTATION; } }); }); @@ -63,7 +60,7 @@ export default class RemoveRepetitiveElements extends Transformation { processAnnotations(pages:PdfPage[]) { pages.forEach(page => { - page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed'); + page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION); }); return pages; } diff --git a/src/javascript/models/transformations/RemoveWhitespaces.jsx b/src/javascript/models/transformations/RemoveWhitespaces.jsx index 7f4ba33..972e0df 100644 --- a/src/javascript/models/transformations/RemoveWhitespaces.jsx +++ b/src/javascript/models/transformations/RemoveWhitespaces.jsx @@ -3,7 +3,7 @@ import TextItem from '../TextItem.jsx'; import PdfPage from '../PdfPage.jsx'; import ContentView from '../ContentView.jsx'; -import Annotation from '../Annotation.jsx'; +import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx'; export default class RemoveWhitespaces extends Transformation { @@ -16,15 +16,6 @@ export default class RemoveWhitespaces extends Transformation { } transform(pages:PdfPage[]) { - const addedAnnotation = new Annotation({ - category: 'added', - color: 'green' - }); - const removedAnnotation = new Annotation({ - category: 'removed', - color: 'red' - }); - pages.forEach(page => { const newTextItems = []; page.textItems.forEach(item => { @@ -43,9 +34,9 @@ export default class RemoveWhitespaces extends Transformation { newTextItems.push(new TextItem({ ...item, text: changedWords.join(' '), - annotation: addedAnnotation, + annotation: ADDED_ANNOTATION, })); - item.annotation = removedAnnotation; + item.annotation = REMOVED_ANNOTATION; } }); page.textItems = newTextItems; @@ -55,7 +46,7 @@ export default class RemoveWhitespaces extends Transformation { processAnnotations(pages:PdfPage[]) { pages.forEach(page => { - page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation.category !== 'removed'); + page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION); page.textItems.forEach(textItem => textItem.annotation = null) }); return pages;