Add text view

This commit is contained in:
Johannes Zillmann 2017-01-22 19:04:23 +01:00
parent 5f500ad110
commit e43cf9a6a9
12 changed files with 228 additions and 95 deletions

View File

@ -6,7 +6,7 @@ import TopBar from './TopBar.jsx';
import { View } from '../models/AppState.jsx'; import { View } from '../models/AppState.jsx';
import PdfUploadView from './PdfUploadView.jsx'; import PdfUploadView from './PdfUploadView.jsx';
import LoadingView from './LoadingView.jsx'; import LoadingView from './LoadingView.jsx';
import PdfView from './PdfView.jsx'; import DebugView from './DebugView.jsx';
export default class App extends React.Component { export default class App extends React.Component {
@ -26,8 +26,8 @@ export default class App extends React.Component {
case View.LOADING: case View.LOADING:
mainView = <LoadingView fileBuffer={ appState.fileBuffer } storePdfPagesFunction={ appState.storePdfPages } /> mainView = <LoadingView fileBuffer={ appState.fileBuffer } storePdfPagesFunction={ appState.storePdfPages } />
break; break;
case View.PDF_VIEW: case View.DEBUG:
mainView = <PdfView pdfPages={ appState.pdfPages } transformations={ appState.transformations } /> mainView = <DebugView pdfPages={ appState.pdfPages } transformations={ appState.transformations } />
break; break;
} }

View File

@ -6,10 +6,12 @@ import Button from 'react-bootstrap/lib/Button'
import DropdownButton from 'react-bootstrap/lib/DropdownButton' import DropdownButton from 'react-bootstrap/lib/DropdownButton'
import MenuItem from 'react-bootstrap/lib/MenuItem' import MenuItem from 'react-bootstrap/lib/MenuItem'
import ContentView from '../models/ContentView.jsx';
import PdfPageView from './PdfPageView.jsx'; import PdfPageView from './PdfPageView.jsx';
import TextPageView from './TextPageView.jsx';
// A view which displays the TextItems of multiple PdfPages // A view which displays the content of the given pages transformed by the given transformations
export default class PdfView extends React.Component { export default class DebugView extends React.Component {
static propTypes = { static propTypes = {
pdfPages: React.PropTypes.array.isRequired, pdfPages: React.PropTypes.array.isRequired,
@ -55,39 +57,51 @@ export default class PdfView extends React.Component {
const currentTransformationName = transformations[currentTransformation].name; const currentTransformationName = transformations[currentTransformation].name;
const transformedPdfPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr).map(pdfPage => { var transformedPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr);
for (var i = 0; i <= currentTransformation; i++) { var contentView;
pdfPage = transformations[i].transform(pdfPage); var lastTransformation;
} for (var i = 0; i <= currentTransformation; i++) {
return pdfPage; transformedPages = transformations[i].transform(transformedPages);
}); lastTransformation = transformations[i];
contentView = transformations[i].contentView();
}
var pageComponents = transformedPdfPages.map(page => <PdfPageView key={ page.index } pdfPage={ page } />); var pageComponents;
switch (contentView) {
case ContentView.PDF:
pageComponents = transformedPages.map(page => <PdfPageView key={ page.index } pdfPage={ page } />);
break;
case ContentView.TEXT:
//transformedPages.forEach(p => console.debug(p));
pageComponents = transformedPages.map(page => <TextPageView key={ page.index } page={ page } />);
break;
}
return ( return (
<div> <div>
<div> <div>
<table style={ { width: '100%' } }> { lastTransformation.showPageSelection() &&
<caption> <table style={ { width: '100%' } }>
Pages <caption>
</caption> Pages
<tbody> </caption>
<tr> <tbody>
<td> <tr>
<ButtonToolbar> <td>
<ButtonGroup> <ButtonToolbar>
<Button onClick={ this.selectPage.bind(this, -1) } className={ pageNr == -1 ? 'active' : '' }> <ButtonGroup>
All <Button onClick={ this.selectPage.bind(this, -1) } className={ pageNr == -1 ? 'active' : '' }>
</Button> All
{ pdfPages.map((pdfPage, i) => <Button key={ i } onClick={ this.selectPage.bind(this, i) } className={ pageNr == i ? 'active' : '' }> </Button>
{ i + 1 } { pdfPages.map((pdfPage, i) => <Button key={ i } onClick={ this.selectPage.bind(this, i) } className={ pageNr == i ? 'active' : '' }>
</Button>) } { i + 1 }
</ButtonGroup> </Button>) }
</ButtonToolbar> </ButtonGroup>
</td> </ButtonToolbar>
</tr> </td>
</tbody> </tr>
</table> </tbody>
</table> }
<br/> <br/>
<table> <table>
<caption> <caption>

View File

@ -0,0 +1,20 @@
import React from 'react';
export default class TextPageView extends React.Component {
static propTypes = {
page: React.PropTypes.object.isRequired,
};
render() {
const header = "Page " + (this.props.page.index + 1);
return (
<div>
<h2>{ header }</h2>
<textarea rows="45" cols="150" defaultValue={ this.props.page.text }>
</textarea>
</div>
);
}
}

View File

@ -3,6 +3,8 @@ import { Enum } from 'enumify';
import NoOpTransformation from './transformations/NoOpTransformation.jsx'; import NoOpTransformation from './transformations/NoOpTransformation.jsx';
import RoundCoordinatesTransformation from './transformations/RoundCoordinatesTransformation.jsx'; import RoundCoordinatesTransformation from './transformations/RoundCoordinatesTransformation.jsx';
import CombineSameYTransformation from './transformations/CombineSameYTransformation.jsx'; import CombineSameYTransformation from './transformations/CombineSameYTransformation.jsx';
import ToTextPagesTransformation from './transformations/ToTextPagesTransformation.jsx';
import ToSingleTextPageTransformation from './transformations/ToSingleTextPageTransformation.jsx'
// Holds the state of the Application // Holds the state of the Application
export default class AppState { export default class AppState {
@ -12,7 +14,7 @@ export default class AppState {
this.mainView = View.UPLOAD; this.mainView = View.UPLOAD;
this.fileBuffer; this.fileBuffer;
this.pdfPages = []; this.pdfPages = [];
this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation()]; this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation(), new ToTextPagesTransformation(), new ToSingleTextPageTransformation()];
//bind functions //bind functions
this.render = this.render.bind(this); this.render = this.render.bind(this);
@ -34,7 +36,7 @@ export default class AppState {
storePdfPages(pdfPages) { storePdfPages(pdfPages) {
this.pdfPages = pdfPages; this.pdfPages = pdfPages;
this.fileBuffer = null; this.fileBuffer = null;
this.mainView = View.PDF_VIEW; this.mainView = View.DEBUG;
this.render(); this.render();
} }
@ -42,4 +44,4 @@ export default class AppState {
export class View extends Enum { export class View extends Enum {
} }
View.initEnum(['UPLOAD', 'LOADING', 'PDF_VIEW']) View.initEnum(['UPLOAD', 'LOADING', 'DEBUG'])

View File

@ -0,0 +1,5 @@
import { Enum } from 'enumify';
export default class ContentView extends Enum {
}
ContentView.initEnum(['PDF', 'TEXT'])

View File

@ -0,0 +1,9 @@
// A page which holds TextItems displayable via PdfPageView
export default class TextPage {
constructor(options) {
this.index = options.index;
this.text = options.text;
}
}

View File

@ -1,6 +1,7 @@
import Transformation from './Transformation.jsx'; import Transformation from './Transformation.jsx';
import TextItem from '../TextItem.jsx'; import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
export default class CombineSameYTransformation extends Transformation { export default class CombineSameYTransformation extends Transformation {
@ -8,50 +9,55 @@ export default class CombineSameYTransformation extends Transformation {
super("Combine text on same Y"); super("Combine text on same Y");
} }
transform(pdfPage:PdfPage) { contentView() {
return ContentView.PDF;
}
const newTextItems = []; transform(pages:PdfPage[]) {
var lastTextItem; return pages.map(pdfPage => {
pdfPage.textItems.forEach(textItem => { const newTextItems = [];
if (!lastTextItem) { var lastTextItem;
lastTextItem = textItem; pdfPage.textItems.forEach(textItem => {
} else { if (!lastTextItem) {
if (textItem.y == lastTextItem.y) {
//combine
// console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width);
// console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width);
// console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width));
var combinedText = lastTextItem.text;
//TODO make 5 dependent on text size or biggest gap?
if (textItem.x - lastTextItem.x - lastTextItem.width > 7) {
combinedText += ' ';
}
combinedText += textItem.text;
lastTextItem = new TextItem({
x: lastTextItem.x,
y: lastTextItem.y,
width: textItem.x - lastTextItem.x + textItem.width,
height: lastTextItem.height, //might this cause problems ?
text: combinedText
});
} else {
//rotate
newTextItems.push(lastTextItem);
lastTextItem = textItem; lastTextItem = textItem;
} } else {
} if (textItem.y == lastTextItem.y) {
}); //combine
if (lastTextItem) {
newTextItems.push(lastTextItem);
}
return { // console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width);
...pdfPage, // console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width);
textItems: newTextItems // console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width));
};
var combinedText = lastTextItem.text;
//TODO make 5 dependent on text size or biggest gap?
if (textItem.x - lastTextItem.x - lastTextItem.width > 7) {
combinedText += ' ';
}
combinedText += textItem.text;
lastTextItem = new TextItem({
x: lastTextItem.x,
y: lastTextItem.y,
width: textItem.x - lastTextItem.x + textItem.width,
height: lastTextItem.height, //might this cause problems ?
text: combinedText
});
} else {
//rotate
newTextItems.push(lastTextItem);
lastTextItem = textItem;
}
}
});
if (lastTextItem) {
newTextItems.push(lastTextItem);
}
return {
...pdfPage,
textItems: newTextItems
};
});
} }
} }

View File

@ -1,5 +1,6 @@
import Transformation from './Transformation.jsx'; import Transformation from './Transformation.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
export default class NoOpTransformation extends Transformation { export default class NoOpTransformation extends Transformation {
@ -7,8 +8,12 @@ export default class NoOpTransformation extends Transformation {
super("Original"); super("Original");
} }
transform(pdfPage:PdfPage) { contentView() {
return pdfPage; return ContentView.PDF;
}
transform(pdfPages:PdfPage[]) {
return pdfPages;
} }
} }

View File

@ -1,5 +1,6 @@
import Transformation from './Transformation.jsx'; import Transformation from './Transformation.jsx';
import PdfPage from '../PdfPage.jsx'; import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
export default class RoundCoordinatesTransformation extends Transformation { export default class RoundCoordinatesTransformation extends Transformation {
@ -7,19 +8,25 @@ export default class RoundCoordinatesTransformation extends Transformation {
super("Round coordinates"); super("Round coordinates");
} }
transform(pdfPage:PdfPage) { contentView() {
return { return ContentView.PDF;
...pdfPage, }
textItems: pdfPage.textItems.map(textItem => {
return { transform(pdfPages:PdfPage[]) {
...textItem, return pdfPages.map(pdfPage => {
x: Math.round(textItem.x), return {
y: Math.round(textItem.y), ...pdfPage,
width: Math.round(textItem.width), textItems: pdfPage.textItems.map(textItem => {
height: Math.round(textItem.height) return {
} ...textItem,
}) x: Math.round(textItem.x),
}; y: Math.round(textItem.y),
width: Math.round(textItem.width),
height: Math.round(textItem.height)
}
})
};
});
} }
} }

View File

@ -0,0 +1,28 @@
import Transformation from './Transformation.jsx';
import TextPage from '../TextPage.jsx';
import ContentView from '../ContentView.jsx';
export default class ToSingleTextPageTransformation extends Transformation {
constructor() {
super("To Single Text Page");
}
showPageSelection() {
return false;
}
contentView() {
return ContentView.TEXT;
}
transform(pages:TextPage[]) {
var text = '';
pages.forEach(page => text += page.text + '\n');
return [new TextPage({
index: 0,
text: text
})];
}
}

View File

@ -0,0 +1,27 @@
import Transformation from './Transformation.jsx';
import PdfPage from '../PdfPage.jsx';
import TextPage from '../TextPage.jsx';
import ContentView from '../ContentView.jsx';
export default class ToTextPagesTransformation extends Transformation {
constructor() {
super("To Text Pages");
}
contentView() {
return ContentView.TEXT;
}
transform(pdfPages:PdfPage[]) {
return pdfPages.map(pdfPage => {
var text = '';
pdfPage.textItems.forEach(textItem => text += textItem.text + '\n');
return new TextPage({
index: pdfPage.index,
text: text
});
});
}
}

View File

@ -1,5 +1,3 @@
import PdfPage from '../PdfPage.jsx';
// A transformation from an PdfPage to an PdfPage // A transformation from an PdfPage to an PdfPage
export default class Transformation { export default class Transformation {
@ -13,7 +11,19 @@ export default class Transformation {
this.name = name; this.name = name;
} }
transform(pdfPage:PdfPage) { // eslint-disable-line no-unused-vars showPageSelection() {
return true;
}
// Returns with which type the transformed pages can be viewed
contentView() {
throw new TypeError("Do not call abstract method foo from child."); throw new TypeError("Do not call abstract method foo from child.");
} }
// Transform incoming pages (like PdfPage[]) into different pages (either PdfPages[] or TextPages[])
transform(pages) { // eslint-disable-line no-unused-vars
throw new TypeError("Do not call abstract method foo from child.");
}
} }