From e43cf9a6a9ed6a9e745b94ccc38ded15ddbc86e2 Mon Sep 17 00:00:00 2001 From: Johannes Zillmann Date: Sun, 22 Jan 2017 19:04:23 +0100 Subject: [PATCH] Add text view --- src/javascript/components/App.jsx | 6 +- .../components/{PdfView.jsx => DebugView.jsx} | 74 +++++++++------- src/javascript/components/TextPageView.jsx | 20 +++++ src/javascript/models/AppState.jsx | 8 +- src/javascript/models/ContentView.jsx | 5 ++ src/javascript/models/TextPage.jsx | 9 ++ .../CombineSameYTransformation.jsx | 88 ++++++++++--------- .../transformations/NoOpTransformation.jsx | 9 +- .../RoundCoordinatesTransformation.jsx | 33 ++++--- .../ToSingleTextPageTransformation.jsx | 28 ++++++ .../ToTextPagesTransformation.jsx | 27 ++++++ .../models/transformations/Transformation.jsx | 16 +++- 12 files changed, 228 insertions(+), 95 deletions(-) rename src/javascript/components/{PdfView.jsx => DebugView.jsx} (58%) create mode 100644 src/javascript/components/TextPageView.jsx create mode 100644 src/javascript/models/ContentView.jsx create mode 100644 src/javascript/models/TextPage.jsx create mode 100644 src/javascript/models/transformations/ToSingleTextPageTransformation.jsx create mode 100644 src/javascript/models/transformations/ToTextPagesTransformation.jsx diff --git a/src/javascript/components/App.jsx b/src/javascript/components/App.jsx index c3bf0a4..3786c6f 100644 --- a/src/javascript/components/App.jsx +++ b/src/javascript/components/App.jsx @@ -6,7 +6,7 @@ import TopBar from './TopBar.jsx'; import { View } from '../models/AppState.jsx'; import PdfUploadView from './PdfUploadView.jsx'; import LoadingView from './LoadingView.jsx'; -import PdfView from './PdfView.jsx'; +import DebugView from './DebugView.jsx'; export default class App extends React.Component { @@ -26,8 +26,8 @@ export default class App extends React.Component { case View.LOADING: mainView = break; - case View.PDF_VIEW: - mainView = + case View.DEBUG: + mainView = break; } diff --git a/src/javascript/components/PdfView.jsx b/src/javascript/components/DebugView.jsx similarity index 58% rename from src/javascript/components/PdfView.jsx rename to src/javascript/components/DebugView.jsx index 291ba6e..d78163b 100644 --- a/src/javascript/components/PdfView.jsx +++ b/src/javascript/components/DebugView.jsx @@ -6,10 +6,12 @@ import Button from 'react-bootstrap/lib/Button' import DropdownButton from 'react-bootstrap/lib/DropdownButton' import MenuItem from 'react-bootstrap/lib/MenuItem' +import ContentView from '../models/ContentView.jsx'; import PdfPageView from './PdfPageView.jsx'; +import TextPageView from './TextPageView.jsx'; -// A view which displays the TextItems of multiple PdfPages -export default class PdfView extends React.Component { +// A view which displays the content of the given pages transformed by the given transformations +export default class DebugView extends React.Component { static propTypes = { pdfPages: React.PropTypes.array.isRequired, @@ -55,39 +57,51 @@ export default class PdfView extends React.Component { const currentTransformationName = transformations[currentTransformation].name; - const transformedPdfPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr).map(pdfPage => { - for (var i = 0; i <= currentTransformation; i++) { - pdfPage = transformations[i].transform(pdfPage); - } - return pdfPage; - }); + var transformedPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr); + var contentView; + var lastTransformation; + for (var i = 0; i <= currentTransformation; i++) { + transformedPages = transformations[i].transform(transformedPages); + lastTransformation = transformations[i]; + contentView = transformations[i].contentView(); + } - var pageComponents = transformedPdfPages.map(page => ); + var pageComponents; + switch (contentView) { + case ContentView.PDF: + pageComponents = transformedPages.map(page => ); + break; + case ContentView.TEXT: + //transformedPages.forEach(p => console.debug(p)); + pageComponents = transformedPages.map(page => ); + break; + } return (
- - - - - - - -
- Pages -
- - - - { pdfPages.map((pdfPage, i) => ) } - - -
+ { lastTransformation.showPageSelection() && + + + + + + + +
+ Pages +
+ + + + { pdfPages.map((pdfPage, i) => ) } + + +
}
diff --git a/src/javascript/components/TextPageView.jsx b/src/javascript/components/TextPageView.jsx new file mode 100644 index 0000000..8bdea2f --- /dev/null +++ b/src/javascript/components/TextPageView.jsx @@ -0,0 +1,20 @@ +import React from 'react'; + +export default class TextPageView extends React.Component { + + static propTypes = { + page: React.PropTypes.object.isRequired, + }; + + render() { + const header = "Page " + (this.props.page.index + 1); + return ( +
+

{ header }

+ +
+ ); + } + +} \ No newline at end of file diff --git a/src/javascript/models/AppState.jsx b/src/javascript/models/AppState.jsx index 878883d..79acc82 100644 --- a/src/javascript/models/AppState.jsx +++ b/src/javascript/models/AppState.jsx @@ -3,6 +3,8 @@ import { Enum } from 'enumify'; import NoOpTransformation from './transformations/NoOpTransformation.jsx'; import RoundCoordinatesTransformation from './transformations/RoundCoordinatesTransformation.jsx'; import CombineSameYTransformation from './transformations/CombineSameYTransformation.jsx'; +import ToTextPagesTransformation from './transformations/ToTextPagesTransformation.jsx'; +import ToSingleTextPageTransformation from './transformations/ToSingleTextPageTransformation.jsx' // Holds the state of the Application export default class AppState { @@ -12,7 +14,7 @@ export default class AppState { this.mainView = View.UPLOAD; this.fileBuffer; this.pdfPages = []; - this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation()]; + this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation(), new ToTextPagesTransformation(), new ToSingleTextPageTransformation()]; //bind functions this.render = this.render.bind(this); @@ -34,7 +36,7 @@ export default class AppState { storePdfPages(pdfPages) { this.pdfPages = pdfPages; this.fileBuffer = null; - this.mainView = View.PDF_VIEW; + this.mainView = View.DEBUG; this.render(); } @@ -42,4 +44,4 @@ export default class AppState { export class View extends Enum { } -View.initEnum(['UPLOAD', 'LOADING', 'PDF_VIEW']) \ No newline at end of file +View.initEnum(['UPLOAD', 'LOADING', 'DEBUG']) \ No newline at end of file diff --git a/src/javascript/models/ContentView.jsx b/src/javascript/models/ContentView.jsx new file mode 100644 index 0000000..96e92bf --- /dev/null +++ b/src/javascript/models/ContentView.jsx @@ -0,0 +1,5 @@ +import { Enum } from 'enumify'; + +export default class ContentView extends Enum { +} +ContentView.initEnum(['PDF', 'TEXT']) \ No newline at end of file diff --git a/src/javascript/models/TextPage.jsx b/src/javascript/models/TextPage.jsx new file mode 100644 index 0000000..88f2806 --- /dev/null +++ b/src/javascript/models/TextPage.jsx @@ -0,0 +1,9 @@ +// A page which holds TextItems displayable via PdfPageView +export default class TextPage { + + constructor(options) { + this.index = options.index; + this.text = options.text; + } + +} diff --git a/src/javascript/models/transformations/CombineSameYTransformation.jsx b/src/javascript/models/transformations/CombineSameYTransformation.jsx index d33c433..941f695 100644 --- a/src/javascript/models/transformations/CombineSameYTransformation.jsx +++ b/src/javascript/models/transformations/CombineSameYTransformation.jsx @@ -1,6 +1,7 @@ import Transformation from './Transformation.jsx'; import TextItem from '../TextItem.jsx'; import PdfPage from '../PdfPage.jsx'; +import ContentView from '../ContentView.jsx'; export default class CombineSameYTransformation extends Transformation { @@ -8,50 +9,55 @@ export default class CombineSameYTransformation extends Transformation { super("Combine text on same Y"); } - transform(pdfPage:PdfPage) { + contentView() { + return ContentView.PDF; + } - const newTextItems = []; - var lastTextItem; - pdfPage.textItems.forEach(textItem => { - if (!lastTextItem) { - lastTextItem = textItem; - } else { - if (textItem.y == lastTextItem.y) { - //combine - - // console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width); - // console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width); - // console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width)); - - var combinedText = lastTextItem.text; - //TODO make 5 dependent on text size or biggest gap? - if (textItem.x - lastTextItem.x - lastTextItem.width > 7) { - combinedText += ' '; - } - combinedText += textItem.text; - - lastTextItem = new TextItem({ - x: lastTextItem.x, - y: lastTextItem.y, - width: textItem.x - lastTextItem.x + textItem.width, - height: lastTextItem.height, //might this cause problems ? - text: combinedText - }); - } else { - //rotate - newTextItems.push(lastTextItem); + transform(pages:PdfPage[]) { + return pages.map(pdfPage => { + const newTextItems = []; + var lastTextItem; + pdfPage.textItems.forEach(textItem => { + if (!lastTextItem) { lastTextItem = textItem; - } - } - }); - if (lastTextItem) { - newTextItems.push(lastTextItem); - } + } else { + if (textItem.y == lastTextItem.y) { + //combine - return { - ...pdfPage, - textItems: newTextItems - }; + // console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width); + // console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width); + // console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width)); + + var combinedText = lastTextItem.text; + //TODO make 5 dependent on text size or biggest gap? + if (textItem.x - lastTextItem.x - lastTextItem.width > 7) { + combinedText += ' '; + } + combinedText += textItem.text; + + lastTextItem = new TextItem({ + x: lastTextItem.x, + y: lastTextItem.y, + width: textItem.x - lastTextItem.x + textItem.width, + height: lastTextItem.height, //might this cause problems ? + text: combinedText + }); + } else { + //rotate + newTextItems.push(lastTextItem); + lastTextItem = textItem; + } + } + }); + if (lastTextItem) { + newTextItems.push(lastTextItem); + } + + return { + ...pdfPage, + textItems: newTextItems + }; + }); } } \ No newline at end of file diff --git a/src/javascript/models/transformations/NoOpTransformation.jsx b/src/javascript/models/transformations/NoOpTransformation.jsx index c2b0dc9..cb538b0 100644 --- a/src/javascript/models/transformations/NoOpTransformation.jsx +++ b/src/javascript/models/transformations/NoOpTransformation.jsx @@ -1,5 +1,6 @@ import Transformation from './Transformation.jsx'; import PdfPage from '../PdfPage.jsx'; +import ContentView from '../ContentView.jsx'; export default class NoOpTransformation extends Transformation { @@ -7,8 +8,12 @@ export default class NoOpTransformation extends Transformation { super("Original"); } - transform(pdfPage:PdfPage) { - return pdfPage; + contentView() { + return ContentView.PDF; + } + + transform(pdfPages:PdfPage[]) { + return pdfPages; } } \ No newline at end of file diff --git a/src/javascript/models/transformations/RoundCoordinatesTransformation.jsx b/src/javascript/models/transformations/RoundCoordinatesTransformation.jsx index 3085818..fa30a7a 100644 --- a/src/javascript/models/transformations/RoundCoordinatesTransformation.jsx +++ b/src/javascript/models/transformations/RoundCoordinatesTransformation.jsx @@ -1,5 +1,6 @@ import Transformation from './Transformation.jsx'; import PdfPage from '../PdfPage.jsx'; +import ContentView from '../ContentView.jsx'; export default class RoundCoordinatesTransformation extends Transformation { @@ -7,19 +8,25 @@ export default class RoundCoordinatesTransformation extends Transformation { super("Round coordinates"); } - transform(pdfPage:PdfPage) { - return { - ...pdfPage, - textItems: pdfPage.textItems.map(textItem => { - return { - ...textItem, - x: Math.round(textItem.x), - y: Math.round(textItem.y), - width: Math.round(textItem.width), - height: Math.round(textItem.height) - } - }) - }; + contentView() { + return ContentView.PDF; + } + + transform(pdfPages:PdfPage[]) { + return pdfPages.map(pdfPage => { + return { + ...pdfPage, + textItems: pdfPage.textItems.map(textItem => { + return { + ...textItem, + x: Math.round(textItem.x), + y: Math.round(textItem.y), + width: Math.round(textItem.width), + height: Math.round(textItem.height) + } + }) + }; + }); } } \ No newline at end of file diff --git a/src/javascript/models/transformations/ToSingleTextPageTransformation.jsx b/src/javascript/models/transformations/ToSingleTextPageTransformation.jsx new file mode 100644 index 0000000..7773904 --- /dev/null +++ b/src/javascript/models/transformations/ToSingleTextPageTransformation.jsx @@ -0,0 +1,28 @@ +import Transformation from './Transformation.jsx'; +import TextPage from '../TextPage.jsx'; +import ContentView from '../ContentView.jsx'; + +export default class ToSingleTextPageTransformation extends Transformation { + + constructor() { + super("To Single Text Page"); + } + + showPageSelection() { + return false; + } + + contentView() { + return ContentView.TEXT; + } + + transform(pages:TextPage[]) { + var text = ''; + pages.forEach(page => text += page.text + '\n'); + return [new TextPage({ + index: 0, + text: text + })]; + } + +} \ No newline at end of file diff --git a/src/javascript/models/transformations/ToTextPagesTransformation.jsx b/src/javascript/models/transformations/ToTextPagesTransformation.jsx new file mode 100644 index 0000000..f6deb7e --- /dev/null +++ b/src/javascript/models/transformations/ToTextPagesTransformation.jsx @@ -0,0 +1,27 @@ +import Transformation from './Transformation.jsx'; +import PdfPage from '../PdfPage.jsx'; +import TextPage from '../TextPage.jsx'; +import ContentView from '../ContentView.jsx'; + +export default class ToTextPagesTransformation extends Transformation { + + constructor() { + super("To Text Pages"); + } + + contentView() { + return ContentView.TEXT; + } + + transform(pdfPages:PdfPage[]) { + return pdfPages.map(pdfPage => { + var text = ''; + pdfPage.textItems.forEach(textItem => text += textItem.text + '\n'); + return new TextPage({ + index: pdfPage.index, + text: text + }); + }); + } + +} \ No newline at end of file diff --git a/src/javascript/models/transformations/Transformation.jsx b/src/javascript/models/transformations/Transformation.jsx index afeab11..4b0b8d4 100644 --- a/src/javascript/models/transformations/Transformation.jsx +++ b/src/javascript/models/transformations/Transformation.jsx @@ -1,5 +1,3 @@ -import PdfPage from '../PdfPage.jsx'; - // A transformation from an PdfPage to an PdfPage export default class Transformation { @@ -13,7 +11,19 @@ export default class Transformation { this.name = name; } - transform(pdfPage:PdfPage) { // eslint-disable-line no-unused-vars + showPageSelection() { + return true; + } + + // Returns with which type the transformed pages can be viewed + contentView() { throw new TypeError("Do not call abstract method foo from child."); } + + // Transform incoming pages (like PdfPage[]) into different pages (either PdfPages[] or TextPages[]) + transform(pages) { // eslint-disable-line no-unused-vars + throw new TypeError("Do not call abstract method foo from child."); + } + + } \ No newline at end of file