Add text view

This commit is contained in:
Johannes Zillmann 2017-01-22 19:04:23 +01:00
parent 5f500ad110
commit e43cf9a6a9
12 changed files with 228 additions and 95 deletions

View File

@ -6,7 +6,7 @@ import TopBar from './TopBar.jsx';
import { View } from '../models/AppState.jsx';
import PdfUploadView from './PdfUploadView.jsx';
import LoadingView from './LoadingView.jsx';
import PdfView from './PdfView.jsx';
import DebugView from './DebugView.jsx';
export default class App extends React.Component {
@ -26,8 +26,8 @@ export default class App extends React.Component {
case View.LOADING:
mainView = <LoadingView fileBuffer={ appState.fileBuffer } storePdfPagesFunction={ appState.storePdfPages } />
break;
case View.PDF_VIEW:
mainView = <PdfView pdfPages={ appState.pdfPages } transformations={ appState.transformations } />
case View.DEBUG:
mainView = <DebugView pdfPages={ appState.pdfPages } transformations={ appState.transformations } />
break;
}

View File

@ -6,10 +6,12 @@ import Button from 'react-bootstrap/lib/Button'
import DropdownButton from 'react-bootstrap/lib/DropdownButton'
import MenuItem from 'react-bootstrap/lib/MenuItem'
import ContentView from '../models/ContentView.jsx';
import PdfPageView from './PdfPageView.jsx';
import TextPageView from './TextPageView.jsx';
// A view which displays the TextItems of multiple PdfPages
export default class PdfView extends React.Component {
// A view which displays the content of the given pages transformed by the given transformations
export default class DebugView extends React.Component {
static propTypes = {
pdfPages: React.PropTypes.array.isRequired,
@ -55,39 +57,51 @@ export default class PdfView extends React.Component {
const currentTransformationName = transformations[currentTransformation].name;
const transformedPdfPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr).map(pdfPage => {
for (var i = 0; i <= currentTransformation; i++) {
pdfPage = transformations[i].transform(pdfPage);
}
return pdfPage;
});
var transformedPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr);
var contentView;
var lastTransformation;
for (var i = 0; i <= currentTransformation; i++) {
transformedPages = transformations[i].transform(transformedPages);
lastTransformation = transformations[i];
contentView = transformations[i].contentView();
}
var pageComponents = transformedPdfPages.map(page => <PdfPageView key={ page.index } pdfPage={ page } />);
var pageComponents;
switch (contentView) {
case ContentView.PDF:
pageComponents = transformedPages.map(page => <PdfPageView key={ page.index } pdfPage={ page } />);
break;
case ContentView.TEXT:
//transformedPages.forEach(p => console.debug(p));
pageComponents = transformedPages.map(page => <TextPageView key={ page.index } page={ page } />);
break;
}
return (
<div>
<div>
<table style={ { width: '100%' } }>
<caption>
Pages
</caption>
<tbody>
<tr>
<td>
<ButtonToolbar>
<ButtonGroup>
<Button onClick={ this.selectPage.bind(this, -1) } className={ pageNr == -1 ? 'active' : '' }>
All
</Button>
{ pdfPages.map((pdfPage, i) => <Button key={ i } onClick={ this.selectPage.bind(this, i) } className={ pageNr == i ? 'active' : '' }>
{ i + 1 }
</Button>) }
</ButtonGroup>
</ButtonToolbar>
</td>
</tr>
</tbody>
</table>
{ lastTransformation.showPageSelection() &&
<table style={ { width: '100%' } }>
<caption>
Pages
</caption>
<tbody>
<tr>
<td>
<ButtonToolbar>
<ButtonGroup>
<Button onClick={ this.selectPage.bind(this, -1) } className={ pageNr == -1 ? 'active' : '' }>
All
</Button>
{ pdfPages.map((pdfPage, i) => <Button key={ i } onClick={ this.selectPage.bind(this, i) } className={ pageNr == i ? 'active' : '' }>
{ i + 1 }
</Button>) }
</ButtonGroup>
</ButtonToolbar>
</td>
</tr>
</tbody>
</table> }
<br/>
<table>
<caption>

View File

@ -0,0 +1,20 @@
import React from 'react';
export default class TextPageView extends React.Component {
static propTypes = {
page: React.PropTypes.object.isRequired,
};
render() {
const header = "Page " + (this.props.page.index + 1);
return (
<div>
<h2>{ header }</h2>
<textarea rows="45" cols="150" defaultValue={ this.props.page.text }>
</textarea>
</div>
);
}
}

View File

@ -3,6 +3,8 @@ import { Enum } from 'enumify';
import NoOpTransformation from './transformations/NoOpTransformation.jsx';
import RoundCoordinatesTransformation from './transformations/RoundCoordinatesTransformation.jsx';
import CombineSameYTransformation from './transformations/CombineSameYTransformation.jsx';
import ToTextPagesTransformation from './transformations/ToTextPagesTransformation.jsx';
import ToSingleTextPageTransformation from './transformations/ToSingleTextPageTransformation.jsx'
// Holds the state of the Application
export default class AppState {
@ -12,7 +14,7 @@ export default class AppState {
this.mainView = View.UPLOAD;
this.fileBuffer;
this.pdfPages = [];
this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation()];
this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation(), new ToTextPagesTransformation(), new ToSingleTextPageTransformation()];
//bind functions
this.render = this.render.bind(this);
@ -34,7 +36,7 @@ export default class AppState {
storePdfPages(pdfPages) {
this.pdfPages = pdfPages;
this.fileBuffer = null;
this.mainView = View.PDF_VIEW;
this.mainView = View.DEBUG;
this.render();
}
@ -42,4 +44,4 @@ export default class AppState {
export class View extends Enum {
}
View.initEnum(['UPLOAD', 'LOADING', 'PDF_VIEW'])
View.initEnum(['UPLOAD', 'LOADING', 'DEBUG'])

View File

@ -0,0 +1,5 @@
import { Enum } from 'enumify';
export default class ContentView extends Enum {
}
ContentView.initEnum(['PDF', 'TEXT'])

View File

@ -0,0 +1,9 @@
// A page which holds TextItems displayable via PdfPageView
export default class TextPage {
constructor(options) {
this.index = options.index;
this.text = options.text;
}
}

View File

@ -1,6 +1,7 @@
import Transformation from './Transformation.jsx';
import TextItem from '../TextItem.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
export default class CombineSameYTransformation extends Transformation {
@ -8,50 +9,55 @@ export default class CombineSameYTransformation extends Transformation {
super("Combine text on same Y");
}
transform(pdfPage:PdfPage) {
contentView() {
return ContentView.PDF;
}
const newTextItems = [];
var lastTextItem;
pdfPage.textItems.forEach(textItem => {
if (!lastTextItem) {
lastTextItem = textItem;
} else {
if (textItem.y == lastTextItem.y) {
//combine
// console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width);
// console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width);
// console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width));
var combinedText = lastTextItem.text;
//TODO make 5 dependent on text size or biggest gap?
if (textItem.x - lastTextItem.x - lastTextItem.width > 7) {
combinedText += ' ';
}
combinedText += textItem.text;
lastTextItem = new TextItem({
x: lastTextItem.x,
y: lastTextItem.y,
width: textItem.x - lastTextItem.x + textItem.width,
height: lastTextItem.height, //might this cause problems ?
text: combinedText
});
} else {
//rotate
newTextItems.push(lastTextItem);
transform(pages:PdfPage[]) {
return pages.map(pdfPage => {
const newTextItems = [];
var lastTextItem;
pdfPage.textItems.forEach(textItem => {
if (!lastTextItem) {
lastTextItem = textItem;
}
}
});
if (lastTextItem) {
newTextItems.push(lastTextItem);
}
} else {
if (textItem.y == lastTextItem.y) {
//combine
return {
...pdfPage,
textItems: newTextItems
};
// console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width);
// console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width);
// console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width));
var combinedText = lastTextItem.text;
//TODO make 5 dependent on text size or biggest gap?
if (textItem.x - lastTextItem.x - lastTextItem.width > 7) {
combinedText += ' ';
}
combinedText += textItem.text;
lastTextItem = new TextItem({
x: lastTextItem.x,
y: lastTextItem.y,
width: textItem.x - lastTextItem.x + textItem.width,
height: lastTextItem.height, //might this cause problems ?
text: combinedText
});
} else {
//rotate
newTextItems.push(lastTextItem);
lastTextItem = textItem;
}
}
});
if (lastTextItem) {
newTextItems.push(lastTextItem);
}
return {
...pdfPage,
textItems: newTextItems
};
});
}
}

View File

@ -1,5 +1,6 @@
import Transformation from './Transformation.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
export default class NoOpTransformation extends Transformation {
@ -7,8 +8,12 @@ export default class NoOpTransformation extends Transformation {
super("Original");
}
transform(pdfPage:PdfPage) {
return pdfPage;
contentView() {
return ContentView.PDF;
}
transform(pdfPages:PdfPage[]) {
return pdfPages;
}
}

View File

@ -1,5 +1,6 @@
import Transformation from './Transformation.jsx';
import PdfPage from '../PdfPage.jsx';
import ContentView from '../ContentView.jsx';
export default class RoundCoordinatesTransformation extends Transformation {
@ -7,19 +8,25 @@ export default class RoundCoordinatesTransformation extends Transformation {
super("Round coordinates");
}
transform(pdfPage:PdfPage) {
return {
...pdfPage,
textItems: pdfPage.textItems.map(textItem => {
return {
...textItem,
x: Math.round(textItem.x),
y: Math.round(textItem.y),
width: Math.round(textItem.width),
height: Math.round(textItem.height)
}
})
};
contentView() {
return ContentView.PDF;
}
transform(pdfPages:PdfPage[]) {
return pdfPages.map(pdfPage => {
return {
...pdfPage,
textItems: pdfPage.textItems.map(textItem => {
return {
...textItem,
x: Math.round(textItem.x),
y: Math.round(textItem.y),
width: Math.round(textItem.width),
height: Math.round(textItem.height)
}
})
};
});
}
}

View File

@ -0,0 +1,28 @@
import Transformation from './Transformation.jsx';
import TextPage from '../TextPage.jsx';
import ContentView from '../ContentView.jsx';
export default class ToSingleTextPageTransformation extends Transformation {
constructor() {
super("To Single Text Page");
}
showPageSelection() {
return false;
}
contentView() {
return ContentView.TEXT;
}
transform(pages:TextPage[]) {
var text = '';
pages.forEach(page => text += page.text + '\n');
return [new TextPage({
index: 0,
text: text
})];
}
}

View File

@ -0,0 +1,27 @@
import Transformation from './Transformation.jsx';
import PdfPage from '../PdfPage.jsx';
import TextPage from '../TextPage.jsx';
import ContentView from '../ContentView.jsx';
export default class ToTextPagesTransformation extends Transformation {
constructor() {
super("To Text Pages");
}
contentView() {
return ContentView.TEXT;
}
transform(pdfPages:PdfPage[]) {
return pdfPages.map(pdfPage => {
var text = '';
pdfPage.textItems.forEach(textItem => text += textItem.text + '\n');
return new TextPage({
index: pdfPage.index,
text: text
});
});
}
}

View File

@ -1,5 +1,3 @@
import PdfPage from '../PdfPage.jsx';
// A transformation from an PdfPage to an PdfPage
export default class Transformation {
@ -13,7 +11,19 @@ export default class Transformation {
this.name = name;
}
transform(pdfPage:PdfPage) { // eslint-disable-line no-unused-vars
showPageSelection() {
return true;
}
// Returns with which type the transformed pages can be viewed
contentView() {
throw new TypeError("Do not call abstract method foo from child.");
}
// Transform incoming pages (like PdfPage[]) into different pages (either PdfPages[] or TextPages[])
transform(pages) { // eslint-disable-line no-unused-vars
throw new TypeError("Do not call abstract method foo from child.");
}
}