diff --git a/src/javascript/components/App.jsx b/src/javascript/components/App.jsx
index c3bf0a4..3786c6f 100644
--- a/src/javascript/components/App.jsx
+++ b/src/javascript/components/App.jsx
@@ -6,7 +6,7 @@ import TopBar from './TopBar.jsx';
import { View } from '../models/AppState.jsx';
import PdfUploadView from './PdfUploadView.jsx';
import LoadingView from './LoadingView.jsx';
-import PdfView from './PdfView.jsx';
+import DebugView from './DebugView.jsx';
export default class App extends React.Component {
@@ -26,8 +26,8 @@ export default class App extends React.Component {
case View.LOADING:
mainView =
break;
- case View.PDF_VIEW:
- mainView =
+ case View.DEBUG:
+ mainView =
break;
}
diff --git a/src/javascript/components/PdfView.jsx b/src/javascript/components/DebugView.jsx
similarity index 58%
rename from src/javascript/components/PdfView.jsx
rename to src/javascript/components/DebugView.jsx
index 291ba6e..d78163b 100644
--- a/src/javascript/components/PdfView.jsx
+++ b/src/javascript/components/DebugView.jsx
@@ -6,10 +6,12 @@ import Button from 'react-bootstrap/lib/Button'
import DropdownButton from 'react-bootstrap/lib/DropdownButton'
import MenuItem from 'react-bootstrap/lib/MenuItem'
+import ContentView from '../models/ContentView.jsx';
import PdfPageView from './PdfPageView.jsx';
+import TextPageView from './TextPageView.jsx';
-// A view which displays the TextItems of multiple PdfPages
-export default class PdfView extends React.Component {
+// A view which displays the content of the given pages transformed by the given transformations
+export default class DebugView extends React.Component {
static propTypes = {
pdfPages: React.PropTypes.array.isRequired,
@@ -55,39 +57,51 @@ export default class PdfView extends React.Component {
const currentTransformationName = transformations[currentTransformation].name;
- const transformedPdfPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr).map(pdfPage => {
- for (var i = 0; i <= currentTransformation; i++) {
- pdfPage = transformations[i].transform(pdfPage);
- }
- return pdfPage;
- });
+ var transformedPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr);
+ var contentView;
+ var lastTransformation;
+ for (var i = 0; i <= currentTransformation; i++) {
+ transformedPages = transformations[i].transform(transformedPages);
+ lastTransformation = transformations[i];
+ contentView = transformations[i].contentView();
+ }
- var pageComponents = transformedPdfPages.map(page => );
+ var pageComponents;
+ switch (contentView) {
+ case ContentView.PDF:
+ pageComponents = transformedPages.map(page => );
+ break;
+ case ContentView.TEXT:
+ //transformedPages.forEach(p => console.debug(p));
+ pageComponents = transformedPages.map(page => );
+ break;
+ }
return (
-
-
- Pages
-
-
-
-
-
-
-
- { pdfPages.map((pdfPage, i) => ) }
-
-
- |
-
-
-
+ { lastTransformation.showPageSelection() &&
+
+
+ Pages
+
+
+
+
+
+
+
+ { pdfPages.map((pdfPage, i) => ) }
+
+
+ |
+
+
+
}
diff --git a/src/javascript/components/TextPageView.jsx b/src/javascript/components/TextPageView.jsx
new file mode 100644
index 0000000..8bdea2f
--- /dev/null
+++ b/src/javascript/components/TextPageView.jsx
@@ -0,0 +1,20 @@
+import React from 'react';
+
+export default class TextPageView extends React.Component {
+
+ static propTypes = {
+ page: React.PropTypes.object.isRequired,
+ };
+
+ render() {
+ const header = "Page " + (this.props.page.index + 1);
+ return (
+
+
{ header }
+
+
+ );
+ }
+
+}
\ No newline at end of file
diff --git a/src/javascript/models/AppState.jsx b/src/javascript/models/AppState.jsx
index 878883d..79acc82 100644
--- a/src/javascript/models/AppState.jsx
+++ b/src/javascript/models/AppState.jsx
@@ -3,6 +3,8 @@ import { Enum } from 'enumify';
import NoOpTransformation from './transformations/NoOpTransformation.jsx';
import RoundCoordinatesTransformation from './transformations/RoundCoordinatesTransformation.jsx';
import CombineSameYTransformation from './transformations/CombineSameYTransformation.jsx';
+import ToTextPagesTransformation from './transformations/ToTextPagesTransformation.jsx';
+import ToSingleTextPageTransformation from './transformations/ToSingleTextPageTransformation.jsx'
// Holds the state of the Application
export default class AppState {
@@ -12,7 +14,7 @@ export default class AppState {
this.mainView = View.UPLOAD;
this.fileBuffer;
this.pdfPages = [];
- this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation()];
+ this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation(), new ToTextPagesTransformation(), new ToSingleTextPageTransformation()];
//bind functions
this.render = this.render.bind(this);
@@ -34,7 +36,7 @@ export default class AppState {
storePdfPages(pdfPages) {
this.pdfPages = pdfPages;
this.fileBuffer = null;
- this.mainView = View.PDF_VIEW;
+ this.mainView = View.DEBUG;
this.render();
}
@@ -42,4 +44,4 @@ export default class AppState {
export class View extends Enum {
}
-View.initEnum(['UPLOAD', 'LOADING', 'PDF_VIEW'])
\ No newline at end of file
+View.initEnum(['UPLOAD', 'LOADING', 'DEBUG'])
\ No newline at end of file
diff --git a/src/javascript/models/ContentView.jsx b/src/javascript/models/ContentView.jsx
new file mode 100644
index 0000000..96e92bf
--- /dev/null
+++ b/src/javascript/models/ContentView.jsx
@@ -0,0 +1,5 @@
+import { Enum } from 'enumify';
+
+export default class ContentView extends Enum {
+}
+ContentView.initEnum(['PDF', 'TEXT'])
\ No newline at end of file
diff --git a/src/javascript/models/TextPage.jsx b/src/javascript/models/TextPage.jsx
new file mode 100644
index 0000000..88f2806
--- /dev/null
+++ b/src/javascript/models/TextPage.jsx
@@ -0,0 +1,9 @@
+// A page which holds TextItems displayable via PdfPageView
+export default class TextPage {
+
+ constructor(options) {
+ this.index = options.index;
+ this.text = options.text;
+ }
+
+}
diff --git a/src/javascript/models/transformations/CombineSameYTransformation.jsx b/src/javascript/models/transformations/CombineSameYTransformation.jsx
index d33c433..941f695 100644
--- a/src/javascript/models/transformations/CombineSameYTransformation.jsx
+++ b/src/javascript/models/transformations/CombineSameYTransformation.jsx
@@ -1,6 +1,7 @@
import Transformation from './Transformation.jsx';
import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
+import ContentView from '../ContentView.jsx';
export default class CombineSameYTransformation extends Transformation {
@@ -8,50 +9,55 @@ export default class CombineSameYTransformation extends Transformation {
super("Combine text on same Y");
}
- transform(pdfPage:PdfPage) {
+ contentView() {
+ return ContentView.PDF;
+ }
- const newTextItems = [];
- var lastTextItem;
- pdfPage.textItems.forEach(textItem => {
- if (!lastTextItem) {
- lastTextItem = textItem;
- } else {
- if (textItem.y == lastTextItem.y) {
- //combine
-
- // console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width);
- // console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width);
- // console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width));
-
- var combinedText = lastTextItem.text;
- //TODO make 5 dependent on text size or biggest gap?
- if (textItem.x - lastTextItem.x - lastTextItem.width > 7) {
- combinedText += ' ';
- }
- combinedText += textItem.text;
-
- lastTextItem = new TextItem({
- x: lastTextItem.x,
- y: lastTextItem.y,
- width: textItem.x - lastTextItem.x + textItem.width,
- height: lastTextItem.height, //might this cause problems ?
- text: combinedText
- });
- } else {
- //rotate
- newTextItems.push(lastTextItem);
+ transform(pages:PdfPage[]) {
+ return pages.map(pdfPage => {
+ const newTextItems = [];
+ var lastTextItem;
+ pdfPage.textItems.forEach(textItem => {
+ if (!lastTextItem) {
lastTextItem = textItem;
- }
- }
- });
- if (lastTextItem) {
- newTextItems.push(lastTextItem);
- }
+ } else {
+ if (textItem.y == lastTextItem.y) {
+ //combine
- return {
- ...pdfPage,
- textItems: newTextItems
- };
+ // console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width);
+ // console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width);
+ // console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width));
+
+ var combinedText = lastTextItem.text;
+ //TODO make 5 dependent on text size or biggest gap?
+ if (textItem.x - lastTextItem.x - lastTextItem.width > 7) {
+ combinedText += ' ';
+ }
+ combinedText += textItem.text;
+
+ lastTextItem = new TextItem({
+ x: lastTextItem.x,
+ y: lastTextItem.y,
+ width: textItem.x - lastTextItem.x + textItem.width,
+ height: lastTextItem.height, //might this cause problems ?
+ text: combinedText
+ });
+ } else {
+ //rotate
+ newTextItems.push(lastTextItem);
+ lastTextItem = textItem;
+ }
+ }
+ });
+ if (lastTextItem) {
+ newTextItems.push(lastTextItem);
+ }
+
+ return {
+ ...pdfPage,
+ textItems: newTextItems
+ };
+ });
}
}
\ No newline at end of file
diff --git a/src/javascript/models/transformations/NoOpTransformation.jsx b/src/javascript/models/transformations/NoOpTransformation.jsx
index c2b0dc9..cb538b0 100644
--- a/src/javascript/models/transformations/NoOpTransformation.jsx
+++ b/src/javascript/models/transformations/NoOpTransformation.jsx
@@ -1,5 +1,6 @@
import Transformation from './Transformation.jsx';
import PdfPage from '../PdfPage.jsx';
+import ContentView from '../ContentView.jsx';
export default class NoOpTransformation extends Transformation {
@@ -7,8 +8,12 @@ export default class NoOpTransformation extends Transformation {
super("Original");
}
- transform(pdfPage:PdfPage) {
- return pdfPage;
+ contentView() {
+ return ContentView.PDF;
+ }
+
+ transform(pdfPages:PdfPage[]) {
+ return pdfPages;
}
}
\ No newline at end of file
diff --git a/src/javascript/models/transformations/RoundCoordinatesTransformation.jsx b/src/javascript/models/transformations/RoundCoordinatesTransformation.jsx
index 3085818..fa30a7a 100644
--- a/src/javascript/models/transformations/RoundCoordinatesTransformation.jsx
+++ b/src/javascript/models/transformations/RoundCoordinatesTransformation.jsx
@@ -1,5 +1,6 @@
import Transformation from './Transformation.jsx';
import PdfPage from '../PdfPage.jsx';
+import ContentView from '../ContentView.jsx';
export default class RoundCoordinatesTransformation extends Transformation {
@@ -7,19 +8,25 @@ export default class RoundCoordinatesTransformation extends Transformation {
super("Round coordinates");
}
- transform(pdfPage:PdfPage) {
- return {
- ...pdfPage,
- textItems: pdfPage.textItems.map(textItem => {
- return {
- ...textItem,
- x: Math.round(textItem.x),
- y: Math.round(textItem.y),
- width: Math.round(textItem.width),
- height: Math.round(textItem.height)
- }
- })
- };
+ contentView() {
+ return ContentView.PDF;
+ }
+
+ transform(pdfPages:PdfPage[]) {
+ return pdfPages.map(pdfPage => {
+ return {
+ ...pdfPage,
+ textItems: pdfPage.textItems.map(textItem => {
+ return {
+ ...textItem,
+ x: Math.round(textItem.x),
+ y: Math.round(textItem.y),
+ width: Math.round(textItem.width),
+ height: Math.round(textItem.height)
+ }
+ })
+ };
+ });
}
}
\ No newline at end of file
diff --git a/src/javascript/models/transformations/ToSingleTextPageTransformation.jsx b/src/javascript/models/transformations/ToSingleTextPageTransformation.jsx
new file mode 100644
index 0000000..7773904
--- /dev/null
+++ b/src/javascript/models/transformations/ToSingleTextPageTransformation.jsx
@@ -0,0 +1,28 @@
+import Transformation from './Transformation.jsx';
+import TextPage from '../TextPage.jsx';
+import ContentView from '../ContentView.jsx';
+
+export default class ToSingleTextPageTransformation extends Transformation {
+
+ constructor() {
+ super("To Single Text Page");
+ }
+
+ showPageSelection() {
+ return false;
+ }
+
+ contentView() {
+ return ContentView.TEXT;
+ }
+
+ transform(pages:TextPage[]) {
+ var text = '';
+ pages.forEach(page => text += page.text + '\n');
+ return [new TextPage({
+ index: 0,
+ text: text
+ })];
+ }
+
+}
\ No newline at end of file
diff --git a/src/javascript/models/transformations/ToTextPagesTransformation.jsx b/src/javascript/models/transformations/ToTextPagesTransformation.jsx
new file mode 100644
index 0000000..f6deb7e
--- /dev/null
+++ b/src/javascript/models/transformations/ToTextPagesTransformation.jsx
@@ -0,0 +1,27 @@
+import Transformation from './Transformation.jsx';
+import PdfPage from '../PdfPage.jsx';
+import TextPage from '../TextPage.jsx';
+import ContentView from '../ContentView.jsx';
+
+export default class ToTextPagesTransformation extends Transformation {
+
+ constructor() {
+ super("To Text Pages");
+ }
+
+ contentView() {
+ return ContentView.TEXT;
+ }
+
+ transform(pdfPages:PdfPage[]) {
+ return pdfPages.map(pdfPage => {
+ var text = '';
+ pdfPage.textItems.forEach(textItem => text += textItem.text + '\n');
+ return new TextPage({
+ index: pdfPage.index,
+ text: text
+ });
+ });
+ }
+
+}
\ No newline at end of file
diff --git a/src/javascript/models/transformations/Transformation.jsx b/src/javascript/models/transformations/Transformation.jsx
index afeab11..4b0b8d4 100644
--- a/src/javascript/models/transformations/Transformation.jsx
+++ b/src/javascript/models/transformations/Transformation.jsx
@@ -1,5 +1,3 @@
-import PdfPage from '../PdfPage.jsx';
-
// A transformation from an PdfPage to an PdfPage
export default class Transformation {
@@ -13,7 +11,19 @@ export default class Transformation {
this.name = name;
}
- transform(pdfPage:PdfPage) { // eslint-disable-line no-unused-vars
+ showPageSelection() {
+ return true;
+ }
+
+ // Returns with which type the transformed pages can be viewed
+ contentView() {
throw new TypeError("Do not call abstract method foo from child.");
}
+
+ // Transform incoming pages (like PdfPage[]) into different pages (either PdfPages[] or TextPages[])
+ transform(pages) { // eslint-disable-line no-unused-vars
+ throw new TypeError("Do not call abstract method foo from child.");
+ }
+
+
}
\ No newline at end of file