Move pageView construction into Transformer

This commit is contained in:
Johannes Zillmann 2017-02-14 21:47:54 +01:00
parent 92a4337387
commit 41bc2f6c34
15 changed files with 66 additions and 106 deletions

View File

@ -9,11 +9,6 @@ import MenuItem from 'react-bootstrap/lib/MenuItem'
import Label from 'react-bootstrap/lib/Label' import Label from 'react-bootstrap/lib/Label'
import Checkbox from 'react-bootstrap/lib/Checkbox' import Checkbox from 'react-bootstrap/lib/Checkbox'
import ContentView from '../models/ContentView.jsx';
import PdfPageView from './debug/PdfPageView.jsx';
import BlockPageView from './debug/BlockPageView.jsx';
import MarkdownPageView from './debug/MarkdownPageView.jsx';
// A view which displays the content of the given pages transformed by the given transformations // A view which displays the content of the given pages transformed by the given transformations
export default class DebugView extends React.Component { export default class DebugView extends React.Component {
@ -69,32 +64,18 @@ export default class DebugView extends React.Component {
const currentTransformationName = transformations[currentTransformation].name; const currentTransformationName = transformations[currentTransformation].name;
var transformedPages = pdfPages; var transformedPages = pdfPages;
var contentView;
var lastTransformation; var lastTransformation;
for (var i = 0; i <= currentTransformation; i++) { for (var i = 0; i <= currentTransformation; i++) {
if (lastTransformation) { if (lastTransformation) {
transformedPages = lastTransformation.processAnnotations(transformedPages); transformedPages = lastTransformation.processAnnotations(transformedPages);
} }
transformedPages = transformations[i].transform(transformedPages); transformedPages = transformations[i].transform(transformedPages);
contentView = transformations[i].contentView();
lastTransformation = transformations[i]; lastTransformation = transformations[i];
} }
transformedPages = transformedPages.filter((elem, i) => pageNr == -1 || i == pageNr); transformedPages = transformedPages.filter((elem, i) => pageNr == -1 || i == pageNr);
var pageComponents; const pageComponents = transformedPages.map(page => lastTransformation.createPageView(page, this.state.modificationsOnly));
var showModificationCheckbox = false; const showModificationCheckbox = lastTransformation.showModificationCheckbox();
switch (contentView) {
case ContentView.PDF:
pageComponents = transformedPages.map(page => <PdfPageView key={ page.index } pdfPage={ page } modificationsOnly={ this.state.modificationsOnly } />);
showModificationCheckbox = true;
break;
case ContentView.BLOCK:
pageComponents = transformedPages.map(page => <BlockPageView key={ page.index } page={ page } />);
break;
case ContentView.MARKDOWN:
pageComponents = transformedPages.map(page => <MarkdownPageView key={ page.index } page={ page } />);
break;
}
return ( return (
<div> <div>

View File

@ -1,5 +0,0 @@
import { Enum } from 'enumify';
export default class ContentView extends Enum {
}
ContentView.initEnum(['PDF', 'BLOCK', 'MARKDOWN'])

View File

@ -1,7 +1,6 @@
import Transformation from './Transformation.jsx'; import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import TextItem from '../TextItem.jsx'; import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx'; import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
function combineTextItems(textItems:TextItem[]) { function combineTextItems(textItems:TextItem[]) {
@ -41,16 +40,12 @@ function combineTextItems(textItems:TextItem[]) {
}); });
} }
export default class CombineSameY extends Transformation { export default class CombineSameY extends ToPdfViewTransformation {
constructor() { constructor() {
super("Combine Text On Same Y"); super("Combine Text On Same Y");
} }
contentView() {
return ContentView.PDF;
}
transform(pages:PdfPage[]) { transform(pages:PdfPage[]) {
return pages.map(pdfPage => { return pages.map(pdfPage => {

View File

@ -1,21 +1,16 @@
import Transformation from './Transformation.jsx'; import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import TextItem from '../TextItem.jsx'; import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx'; import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
import { isNumber } from '../../functions.jsx' import { isNumber } from '../../functions.jsx'
export default class DetectFootnotes extends Transformation { export default class DetectFootnotes extends ToPdfViewTransformation {
constructor() { constructor() {
super("Detect Footnotes"); super("Detect Footnotes");
} }
contentView() {
return ContentView.PDF;
}
transform(pages:PdfPage[]) { transform(pages:PdfPage[]) {
var nextFooterNumber = 1; var nextFooterNumber = 1;

View File

@ -1,20 +1,15 @@
import Transformation from './Transformation.jsx'; import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import TextItem from '../TextItem.jsx'; import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx'; import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
export default class DetectLinks extends Transformation { export default class DetectLinks extends ToPdfViewTransformation {
constructor() { constructor() {
super("Detect Links"); super("Detect Links");
} }
contentView() {
return ContentView.PDF;
}
transform(pages:PdfPage[]) { transform(pages:PdfPage[]) {
pages.forEach(page => { pages.forEach(page => {
const newTextItems = []; const newTextItems = [];

View File

@ -1,7 +1,6 @@
import Transformation from './Transformation.jsx'; import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import TextItem from '../TextItem.jsx'; import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx'; import Annotation from '../Annotation.jsx';
import Headline from '../markdown/Headline.jsx'; import Headline from '../markdown/Headline.jsx';
@ -59,16 +58,12 @@ function findNextMajorHeight(heights, currentHeight, headlineLevels) {
} }
export default class HeadlineDetector extends Transformation { export default class HeadlineDetector extends ToPdfViewTransformation {
constructor() { constructor() {
super("Detect Headlines"); super("Detect Headlines");
} }
contentView() {
return ContentView.PDF;
}
// Strategy: // Strategy:
// - find most used height => this & every height below is paragraph // - find most used height => this & every height below is paragraph
// - heights which start a page are likely to be headlines // - heights which start a page are likely to be headlines

View File

@ -1,23 +1,18 @@
import Transformation from './Transformation.jsx'; import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import TextItem from '../TextItem.jsx'; import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION, UNCHANGED_ANNOTATION } from '../Annotation.jsx'; import { ADDED_ANNOTATION, REMOVED_ANNOTATION, UNCHANGED_ANNOTATION } from '../Annotation.jsx';
import { hasUpperCaseCharacterInMiddleOfWord } from '../../functions.jsx' import { hasUpperCaseCharacterInMiddleOfWord } from '../../functions.jsx'
// Uppercase headlines are often parsed with very mixed character with pdf.js, like 'A heAdLine'. // Uppercase headlines are often parsed with very mixed character with pdf.js, like 'A heAdLine'.
// This tries to detect them and make them all uppercase. // This tries to detect them and make them all uppercase.
export default class HeadlineToUppercase extends Transformation { export default class HeadlineToUppercase extends ToPdfViewTransformation {
constructor() { constructor() {
super("Headlines Uppercase"); super("Headlines Uppercase");
} }
contentView() {
return ContentView.PDF;
}
transform(pages:PdfPage[]) { transform(pages:PdfPage[]) {

View File

@ -1,17 +1,12 @@
import Transformation from './Transformation.jsx'; import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
export default class NoOp extends Transformation { export default class NoOp extends ToPdfViewTransformation {
constructor() { constructor() {
super("Original"); super("Original");
} }
contentView() {
return ContentView.PDF;
}
transform(pdfPages:PdfPage[]) { transform(pdfPages:PdfPage[]) {
return pdfPages; return pdfPages;
} }

View File

@ -1,6 +1,5 @@
import Transformation from './Transformation.jsx'; import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import { REMOVED_ANNOTATION } from '../Annotation.jsx'; import { REMOVED_ANNOTATION } from '../Annotation.jsx';
import { isDigit } from '../../functions.jsx' import { isDigit } from '../../functions.jsx'
@ -25,16 +24,12 @@ function combineCoordinates(textItem) {
} }
// Remove elements with similar content on same page positions, like page numbers, licenes information, etc... // Remove elements with similar content on same page positions, like page numbers, licenes information, etc...
export default class RemoveRepetitiveElements extends Transformation { export default class RemoveRepetitiveElements extends ToPdfViewTransformation {
constructor() { constructor() {
super("Remove Repetitive Elements"); super("Remove Repetitive Elements");
} }
contentView() {
return ContentView.PDF;
}
transform(pages:PdfPage[]) { transform(pages:PdfPage[]) {
//build repetition counts for every element //build repetition counts for every element
const repetitionCounts = {}; const repetitionCounts = {};

View File

@ -1,20 +1,15 @@
import Transformation from './Transformation.jsx'; import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import TextItem from '../TextItem.jsx'; import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx'; import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
export default class RemoveWhitespaces extends Transformation { export default class RemoveWhitespaces extends ToPdfViewTransformation {
constructor() { constructor() {
super("Remove Whitespaces"); super("Remove Whitespaces");
} }
contentView() {
return ContentView.PDF;
}
transform(pages:PdfPage[]) { transform(pages:PdfPage[]) {
pages.forEach(page => { pages.forEach(page => {
const newTextItems = []; const newTextItems = [];

View File

@ -1,17 +1,12 @@
import Transformation from './Transformation.jsx'; import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
export default class RoundCoordinates extends Transformation { export default class RoundCoordinates extends ToPdfViewTransformation {
constructor() { constructor() {
super("Round Coordinates"); super("Round Coordinates");
} }
contentView() {
return ContentView.PDF;
}
transform(pdfPages:PdfPage[]) { transform(pdfPages:PdfPage[]) {
return pdfPages.map(pdfPage => { return pdfPages.map(pdfPage => {
return { return {

View File

@ -1,7 +1,8 @@
import React from 'react';
import Transformation from './Transformation.jsx'; import Transformation from './Transformation.jsx';
import BlockPageView from '../../components/debug/BlockPageView.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import BlockPage from '../BlockPage.jsx'; import BlockPage from '../BlockPage.jsx';
import ContentView from '../ContentView.jsx';
export default class ToBlockSystem extends Transformation { export default class ToBlockSystem extends Transformation {
@ -9,12 +10,8 @@ export default class ToBlockSystem extends Transformation {
super("To Block System"); super("To Block System");
} }
contentView() { createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
return ContentView.BLOCK; return <BlockPageView key={ page.index } page={ page } />;
}
showPageSelection() {
return false;
} }
transform(pages:PdfPage[]) { transform(pages:PdfPage[]) {

View File

@ -1,6 +1,7 @@
import React from 'react';
import MarkdownPageView from '../../components/debug/MarkdownPageView.jsx';
import Transformation from './Transformation.jsx'; import Transformation from './Transformation.jsx';
import TextPage from '../TextPage.jsx'; import TextPage from '../TextPage.jsx';
import ContentView from '../ContentView.jsx';
export default class ToMarkdown extends Transformation { export default class ToMarkdown extends Transformation {
@ -8,12 +9,8 @@ export default class ToMarkdown extends Transformation {
super("To Markdown"); super("To Markdown");
} }
showPageSelection() { createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
return false; return <MarkdownPageView key={ page.index } page={ page } />;
}
contentView() {
return ContentView.MARKDOWN;
} }
transform(pages:TextPage[]) { transform(pages:TextPage[]) {

View File

@ -0,0 +1,32 @@
import React from 'react';
import Transformation from './Transformation.jsx';
import PdfPage from '../PdfPage.jsx';
import PdfPageView from '../../components/debug/PdfPageView.jsx';
// Abstract pdfView transformation
export default class ToPdfViewTransformation extends Transformation {
constructor(name) {
super(name);
if (this.constructor === ToPdfViewTransformation) {
throw new TypeError("Can not construct abstract class.");
}
}
showPageSelection() {
return true;
}
showModificationCheckbox() {
return true;
}
createPageView(page, modificationsOnly) {
return <PdfPageView key={ page.index } pdfPage={ page } modificationsOnly={ modificationsOnly } />;
}
transform(pdfPages:PdfPage[]) {
return pdfPages;
}
}

View File

@ -12,11 +12,14 @@ export default class Transformation {
} }
showPageSelection() { showPageSelection() {
return true; return false;
} }
// Returns with which type the transformed pages can be viewed showModificationCheckbox() {
contentView() { return false;
}
createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
throw new TypeError("Do not call abstract method foo from child."); throw new TypeError("Do not call abstract method foo from child.");
} }