Move pageView construction into Transformer

This commit is contained in:
Johannes Zillmann 2017-02-14 21:47:54 +01:00
parent 92a4337387
commit 41bc2f6c34
15 changed files with 66 additions and 106 deletions

View File

@ -9,11 +9,6 @@ import MenuItem from 'react-bootstrap/lib/MenuItem'
import Label from 'react-bootstrap/lib/Label'
import Checkbox from 'react-bootstrap/lib/Checkbox'
import ContentView from '../models/ContentView.jsx';
import PdfPageView from './debug/PdfPageView.jsx';
import BlockPageView from './debug/BlockPageView.jsx';
import MarkdownPageView from './debug/MarkdownPageView.jsx';
// A view which displays the content of the given pages transformed by the given transformations
export default class DebugView extends React.Component {
@ -69,32 +64,18 @@ export default class DebugView extends React.Component {
const currentTransformationName = transformations[currentTransformation].name;
var transformedPages = pdfPages;
var contentView;
var lastTransformation;
for (var i = 0; i <= currentTransformation; i++) {
if (lastTransformation) {
transformedPages = lastTransformation.processAnnotations(transformedPages);
}
transformedPages = transformations[i].transform(transformedPages);
contentView = transformations[i].contentView();
lastTransformation = transformations[i];
}
transformedPages = transformedPages.filter((elem, i) => pageNr == -1 || i == pageNr);
var pageComponents;
var showModificationCheckbox = false;
switch (contentView) {
case ContentView.PDF:
pageComponents = transformedPages.map(page => <PdfPageView key={ page.index } pdfPage={ page } modificationsOnly={ this.state.modificationsOnly } />);
showModificationCheckbox = true;
break;
case ContentView.BLOCK:
pageComponents = transformedPages.map(page => <BlockPageView key={ page.index } page={ page } />);
break;
case ContentView.MARKDOWN:
pageComponents = transformedPages.map(page => <MarkdownPageView key={ page.index } page={ page } />);
break;
}
const pageComponents = transformedPages.map(page => lastTransformation.createPageView(page, this.state.modificationsOnly));
const showModificationCheckbox = lastTransformation.showModificationCheckbox();
return (
<div>

View File

@ -1,5 +0,0 @@
import { Enum } from 'enumify';
export default class ContentView extends Enum {
}
ContentView.initEnum(['PDF', 'BLOCK', 'MARKDOWN'])

View File

@ -1,7 +1,6 @@
import Transformation from './Transformation.jsx';
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
function combineTextItems(textItems:TextItem[]) {
@ -41,16 +40,12 @@ function combineTextItems(textItems:TextItem[]) {
});
}
export default class CombineSameY extends Transformation {
export default class CombineSameY extends ToPdfViewTransformation {
constructor() {
super("Combine Text On Same Y");
}
contentView() {
return ContentView.PDF;
}
transform(pages:PdfPage[]) {
return pages.map(pdfPage => {

View File

@ -1,21 +1,16 @@
import Transformation from './Transformation.jsx';
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
import { isNumber } from '../../functions.jsx'
export default class DetectFootnotes extends Transformation {
export default class DetectFootnotes extends ToPdfViewTransformation {
constructor() {
super("Detect Footnotes");
}
contentView() {
return ContentView.PDF;
}
transform(pages:PdfPage[]) {
var nextFooterNumber = 1;

View File

@ -1,20 +1,15 @@
import Transformation from './Transformation.jsx';
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
export default class DetectLinks extends Transformation {
export default class DetectLinks extends ToPdfViewTransformation {
constructor() {
super("Detect Links");
}
contentView() {
return ContentView.PDF;
}
transform(pages:PdfPage[]) {
pages.forEach(page => {
const newTextItems = [];

View File

@ -1,7 +1,6 @@
import Transformation from './Transformation.jsx';
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import Annotation from '../Annotation.jsx';
import Headline from '../markdown/Headline.jsx';
@ -59,16 +58,12 @@ function findNextMajorHeight(heights, currentHeight, headlineLevels) {
}
export default class HeadlineDetector extends Transformation {
export default class HeadlineDetector extends ToPdfViewTransformation {
constructor() {
super("Detect Headlines");
}
contentView() {
return ContentView.PDF;
}
// Strategy:
// - find most used height => this & every height below is paragraph
// - heights which start a page are likely to be headlines

View File

@ -1,23 +1,18 @@
import Transformation from './Transformation.jsx';
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION, UNCHANGED_ANNOTATION } from '../Annotation.jsx';
import { hasUpperCaseCharacterInMiddleOfWord } from '../../functions.jsx'
// Uppercase headlines are often parsed with very mixed character with pdf.js, like 'A heAdLine'.
// This tries to detect them and make them all uppercase.
export default class HeadlineToUppercase extends Transformation {
export default class HeadlineToUppercase extends ToPdfViewTransformation {
constructor() {
super("Headlines Uppercase");
}
contentView() {
return ContentView.PDF;
}
transform(pages:PdfPage[]) {

View File

@ -1,17 +1,12 @@
import Transformation from './Transformation.jsx';
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
export default class NoOp extends Transformation {
export default class NoOp extends ToPdfViewTransformation {
constructor() {
super("Original");
}
contentView() {
return ContentView.PDF;
}
transform(pdfPages:PdfPage[]) {
return pdfPages;
}

View File

@ -1,6 +1,5 @@
import Transformation from './Transformation.jsx';
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import { REMOVED_ANNOTATION } from '../Annotation.jsx';
import { isDigit } from '../../functions.jsx'
@ -25,16 +24,12 @@ function combineCoordinates(textItem) {
}
// Remove elements with similar content on same page positions, like page numbers, licenes information, etc...
export default class RemoveRepetitiveElements extends Transformation {
export default class RemoveRepetitiveElements extends ToPdfViewTransformation {
constructor() {
super("Remove Repetitive Elements");
}
contentView() {
return ContentView.PDF;
}
transform(pages:PdfPage[]) {
//build repetition counts for every element
const repetitionCounts = {};

View File

@ -1,20 +1,15 @@
import Transformation from './Transformation.jsx';
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
export default class RemoveWhitespaces extends Transformation {
export default class RemoveWhitespaces extends ToPdfViewTransformation {
constructor() {
super("Remove Whitespaces");
}
contentView() {
return ContentView.PDF;
}
transform(pages:PdfPage[]) {
pages.forEach(page => {
const newTextItems = [];

View File

@ -1,17 +1,12 @@
import Transformation from './Transformation.jsx';
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
export default class RoundCoordinates extends Transformation {
export default class RoundCoordinates extends ToPdfViewTransformation {
constructor() {
super("Round Coordinates");
}
contentView() {
return ContentView.PDF;
}
transform(pdfPages:PdfPage[]) {
return pdfPages.map(pdfPage => {
return {

View File

@ -1,7 +1,8 @@
import React from 'react';
import Transformation from './Transformation.jsx';
import BlockPageView from '../../components/debug/BlockPageView.jsx';
import PdfPage from '../PdfPage.jsx';
import BlockPage from '../BlockPage.jsx';
import ContentView from '../ContentView.jsx';
export default class ToBlockSystem extends Transformation {
@ -9,12 +10,8 @@ export default class ToBlockSystem extends Transformation {
super("To Block System");
}
contentView() {
return ContentView.BLOCK;
}
showPageSelection() {
return false;
createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
return <BlockPageView key={ page.index } page={ page } />;
}
transform(pages:PdfPage[]) {

View File

@ -1,6 +1,7 @@
import React from 'react';
import MarkdownPageView from '../../components/debug/MarkdownPageView.jsx';
import Transformation from './Transformation.jsx';
import TextPage from '../TextPage.jsx';
import ContentView from '../ContentView.jsx';
export default class ToMarkdown extends Transformation {
@ -8,12 +9,8 @@ export default class ToMarkdown extends Transformation {
super("To Markdown");
}
showPageSelection() {
return false;
}
contentView() {
return ContentView.MARKDOWN;
createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
return <MarkdownPageView key={ page.index } page={ page } />;
}
transform(pages:TextPage[]) {

View File

@ -0,0 +1,32 @@
import React from 'react';
import Transformation from './Transformation.jsx';
import PdfPage from '../PdfPage.jsx';
import PdfPageView from '../../components/debug/PdfPageView.jsx';
// Abstract pdfView transformation
export default class ToPdfViewTransformation extends Transformation {
constructor(name) {
super(name);
if (this.constructor === ToPdfViewTransformation) {
throw new TypeError("Can not construct abstract class.");
}
}
showPageSelection() {
return true;
}
showModificationCheckbox() {
return true;
}
createPageView(page, modificationsOnly) {
return <PdfPageView key={ page.index } pdfPage={ page } modificationsOnly={ modificationsOnly } />;
}
transform(pdfPages:PdfPage[]) {
return pdfPages;
}
}

View File

@ -12,11 +12,14 @@ export default class Transformation {
}
showPageSelection() {
return true;
return false;
}
// Returns with which type the transformed pages can be viewed
contentView() {
showModificationCheckbox() {
return false;
}
createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
throw new TypeError("Do not call abstract method foo from child.");
}