mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-22 15:53:34 +01:00
Add text view
This commit is contained in:
parent
5f500ad110
commit
e43cf9a6a9
@ -6,7 +6,7 @@ import TopBar from './TopBar.jsx';
|
|||||||
import { View } from '../models/AppState.jsx';
|
import { View } from '../models/AppState.jsx';
|
||||||
import PdfUploadView from './PdfUploadView.jsx';
|
import PdfUploadView from './PdfUploadView.jsx';
|
||||||
import LoadingView from './LoadingView.jsx';
|
import LoadingView from './LoadingView.jsx';
|
||||||
import PdfView from './PdfView.jsx';
|
import DebugView from './DebugView.jsx';
|
||||||
|
|
||||||
export default class App extends React.Component {
|
export default class App extends React.Component {
|
||||||
|
|
||||||
@ -26,8 +26,8 @@ export default class App extends React.Component {
|
|||||||
case View.LOADING:
|
case View.LOADING:
|
||||||
mainView = <LoadingView fileBuffer={ appState.fileBuffer } storePdfPagesFunction={ appState.storePdfPages } />
|
mainView = <LoadingView fileBuffer={ appState.fileBuffer } storePdfPagesFunction={ appState.storePdfPages } />
|
||||||
break;
|
break;
|
||||||
case View.PDF_VIEW:
|
case View.DEBUG:
|
||||||
mainView = <PdfView pdfPages={ appState.pdfPages } transformations={ appState.transformations } />
|
mainView = <DebugView pdfPages={ appState.pdfPages } transformations={ appState.transformations } />
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,10 +6,12 @@ import Button from 'react-bootstrap/lib/Button'
|
|||||||
import DropdownButton from 'react-bootstrap/lib/DropdownButton'
|
import DropdownButton from 'react-bootstrap/lib/DropdownButton'
|
||||||
import MenuItem from 'react-bootstrap/lib/MenuItem'
|
import MenuItem from 'react-bootstrap/lib/MenuItem'
|
||||||
|
|
||||||
|
import ContentView from '../models/ContentView.jsx';
|
||||||
import PdfPageView from './PdfPageView.jsx';
|
import PdfPageView from './PdfPageView.jsx';
|
||||||
|
import TextPageView from './TextPageView.jsx';
|
||||||
|
|
||||||
// A view which displays the TextItems of multiple PdfPages
|
// A view which displays the content of the given pages transformed by the given transformations
|
||||||
export default class PdfView extends React.Component {
|
export default class DebugView extends React.Component {
|
||||||
|
|
||||||
static propTypes = {
|
static propTypes = {
|
||||||
pdfPages: React.PropTypes.array.isRequired,
|
pdfPages: React.PropTypes.array.isRequired,
|
||||||
@ -55,18 +57,30 @@ export default class PdfView extends React.Component {
|
|||||||
|
|
||||||
const currentTransformationName = transformations[currentTransformation].name;
|
const currentTransformationName = transformations[currentTransformation].name;
|
||||||
|
|
||||||
const transformedPdfPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr).map(pdfPage => {
|
var transformedPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr);
|
||||||
|
var contentView;
|
||||||
|
var lastTransformation;
|
||||||
for (var i = 0; i <= currentTransformation; i++) {
|
for (var i = 0; i <= currentTransformation; i++) {
|
||||||
pdfPage = transformations[i].transform(pdfPage);
|
transformedPages = transformations[i].transform(transformedPages);
|
||||||
|
lastTransformation = transformations[i];
|
||||||
|
contentView = transformations[i].contentView();
|
||||||
}
|
}
|
||||||
return pdfPage;
|
|
||||||
});
|
|
||||||
|
|
||||||
var pageComponents = transformedPdfPages.map(page => <PdfPageView key={ page.index } pdfPage={ page } />);
|
var pageComponents;
|
||||||
|
switch (contentView) {
|
||||||
|
case ContentView.PDF:
|
||||||
|
pageComponents = transformedPages.map(page => <PdfPageView key={ page.index } pdfPage={ page } />);
|
||||||
|
break;
|
||||||
|
case ContentView.TEXT:
|
||||||
|
//transformedPages.forEach(p => console.debug(p));
|
||||||
|
pageComponents = transformedPages.map(page => <TextPageView key={ page.index } page={ page } />);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div>
|
<div>
|
||||||
<div>
|
<div>
|
||||||
|
{ lastTransformation.showPageSelection() &&
|
||||||
<table style={ { width: '100%' } }>
|
<table style={ { width: '100%' } }>
|
||||||
<caption>
|
<caption>
|
||||||
Pages
|
Pages
|
||||||
@ -87,7 +101,7 @@ export default class PdfView extends React.Component {
|
|||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table> }
|
||||||
<br/>
|
<br/>
|
||||||
<table>
|
<table>
|
||||||
<caption>
|
<caption>
|
20
src/javascript/components/TextPageView.jsx
Normal file
20
src/javascript/components/TextPageView.jsx
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
import React from 'react';
|
||||||
|
|
||||||
|
export default class TextPageView extends React.Component {
|
||||||
|
|
||||||
|
static propTypes = {
|
||||||
|
page: React.PropTypes.object.isRequired,
|
||||||
|
};
|
||||||
|
|
||||||
|
render() {
|
||||||
|
const header = "Page " + (this.props.page.index + 1);
|
||||||
|
return (
|
||||||
|
<div>
|
||||||
|
<h2>{ header }</h2>
|
||||||
|
<textarea rows="45" cols="150" defaultValue={ this.props.page.text }>
|
||||||
|
</textarea>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -3,6 +3,8 @@ import { Enum } from 'enumify';
|
|||||||
import NoOpTransformation from './transformations/NoOpTransformation.jsx';
|
import NoOpTransformation from './transformations/NoOpTransformation.jsx';
|
||||||
import RoundCoordinatesTransformation from './transformations/RoundCoordinatesTransformation.jsx';
|
import RoundCoordinatesTransformation from './transformations/RoundCoordinatesTransformation.jsx';
|
||||||
import CombineSameYTransformation from './transformations/CombineSameYTransformation.jsx';
|
import CombineSameYTransformation from './transformations/CombineSameYTransformation.jsx';
|
||||||
|
import ToTextPagesTransformation from './transformations/ToTextPagesTransformation.jsx';
|
||||||
|
import ToSingleTextPageTransformation from './transformations/ToSingleTextPageTransformation.jsx'
|
||||||
|
|
||||||
// Holds the state of the Application
|
// Holds the state of the Application
|
||||||
export default class AppState {
|
export default class AppState {
|
||||||
@ -12,7 +14,7 @@ export default class AppState {
|
|||||||
this.mainView = View.UPLOAD;
|
this.mainView = View.UPLOAD;
|
||||||
this.fileBuffer;
|
this.fileBuffer;
|
||||||
this.pdfPages = [];
|
this.pdfPages = [];
|
||||||
this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation()];
|
this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation(), new ToTextPagesTransformation(), new ToSingleTextPageTransformation()];
|
||||||
|
|
||||||
//bind functions
|
//bind functions
|
||||||
this.render = this.render.bind(this);
|
this.render = this.render.bind(this);
|
||||||
@ -34,7 +36,7 @@ export default class AppState {
|
|||||||
storePdfPages(pdfPages) {
|
storePdfPages(pdfPages) {
|
||||||
this.pdfPages = pdfPages;
|
this.pdfPages = pdfPages;
|
||||||
this.fileBuffer = null;
|
this.fileBuffer = null;
|
||||||
this.mainView = View.PDF_VIEW;
|
this.mainView = View.DEBUG;
|
||||||
this.render();
|
this.render();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -42,4 +44,4 @@ export default class AppState {
|
|||||||
|
|
||||||
export class View extends Enum {
|
export class View extends Enum {
|
||||||
}
|
}
|
||||||
View.initEnum(['UPLOAD', 'LOADING', 'PDF_VIEW'])
|
View.initEnum(['UPLOAD', 'LOADING', 'DEBUG'])
|
5
src/javascript/models/ContentView.jsx
Normal file
5
src/javascript/models/ContentView.jsx
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
import { Enum } from 'enumify';
|
||||||
|
|
||||||
|
export default class ContentView extends Enum {
|
||||||
|
}
|
||||||
|
ContentView.initEnum(['PDF', 'TEXT'])
|
9
src/javascript/models/TextPage.jsx
Normal file
9
src/javascript/models/TextPage.jsx
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
// A page which holds TextItems displayable via PdfPageView
|
||||||
|
export default class TextPage {
|
||||||
|
|
||||||
|
constructor(options) {
|
||||||
|
this.index = options.index;
|
||||||
|
this.text = options.text;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
import Transformation from './Transformation.jsx';
|
import Transformation from './Transformation.jsx';
|
||||||
import TextItem from '../TextItem.jsx';
|
import TextItem from '../TextItem.jsx';
|
||||||
import PdfPage from '../PdfPage.jsx';
|
import PdfPage from '../PdfPage.jsx';
|
||||||
|
import ContentView from '../ContentView.jsx';
|
||||||
|
|
||||||
export default class CombineSameYTransformation extends Transformation {
|
export default class CombineSameYTransformation extends Transformation {
|
||||||
|
|
||||||
@ -8,8 +9,12 @@ export default class CombineSameYTransformation extends Transformation {
|
|||||||
super("Combine text on same Y");
|
super("Combine text on same Y");
|
||||||
}
|
}
|
||||||
|
|
||||||
transform(pdfPage:PdfPage) {
|
contentView() {
|
||||||
|
return ContentView.PDF;
|
||||||
|
}
|
||||||
|
|
||||||
|
transform(pages:PdfPage[]) {
|
||||||
|
return pages.map(pdfPage => {
|
||||||
const newTextItems = [];
|
const newTextItems = [];
|
||||||
var lastTextItem;
|
var lastTextItem;
|
||||||
pdfPage.textItems.forEach(textItem => {
|
pdfPage.textItems.forEach(textItem => {
|
||||||
@ -52,6 +57,7 @@ export default class CombineSameYTransformation extends Transformation {
|
|||||||
...pdfPage,
|
...pdfPage,
|
||||||
textItems: newTextItems
|
textItems: newTextItems
|
||||||
};
|
};
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -1,5 +1,6 @@
|
|||||||
import Transformation from './Transformation.jsx';
|
import Transformation from './Transformation.jsx';
|
||||||
import PdfPage from '../PdfPage.jsx';
|
import PdfPage from '../PdfPage.jsx';
|
||||||
|
import ContentView from '../ContentView.jsx';
|
||||||
|
|
||||||
export default class NoOpTransformation extends Transformation {
|
export default class NoOpTransformation extends Transformation {
|
||||||
|
|
||||||
@ -7,8 +8,12 @@ export default class NoOpTransformation extends Transformation {
|
|||||||
super("Original");
|
super("Original");
|
||||||
}
|
}
|
||||||
|
|
||||||
transform(pdfPage:PdfPage) {
|
contentView() {
|
||||||
return pdfPage;
|
return ContentView.PDF;
|
||||||
|
}
|
||||||
|
|
||||||
|
transform(pdfPages:PdfPage[]) {
|
||||||
|
return pdfPages;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -1,5 +1,6 @@
|
|||||||
import Transformation from './Transformation.jsx';
|
import Transformation from './Transformation.jsx';
|
||||||
import PdfPage from '../PdfPage.jsx';
|
import PdfPage from '../PdfPage.jsx';
|
||||||
|
import ContentView from '../ContentView.jsx';
|
||||||
|
|
||||||
export default class RoundCoordinatesTransformation extends Transformation {
|
export default class RoundCoordinatesTransformation extends Transformation {
|
||||||
|
|
||||||
@ -7,7 +8,12 @@ export default class RoundCoordinatesTransformation extends Transformation {
|
|||||||
super("Round coordinates");
|
super("Round coordinates");
|
||||||
}
|
}
|
||||||
|
|
||||||
transform(pdfPage:PdfPage) {
|
contentView() {
|
||||||
|
return ContentView.PDF;
|
||||||
|
}
|
||||||
|
|
||||||
|
transform(pdfPages:PdfPage[]) {
|
||||||
|
return pdfPages.map(pdfPage => {
|
||||||
return {
|
return {
|
||||||
...pdfPage,
|
...pdfPage,
|
||||||
textItems: pdfPage.textItems.map(textItem => {
|
textItems: pdfPage.textItems.map(textItem => {
|
||||||
@ -20,6 +26,7 @@ export default class RoundCoordinatesTransformation extends Transformation {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
};
|
};
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -0,0 +1,28 @@
|
|||||||
|
import Transformation from './Transformation.jsx';
|
||||||
|
import TextPage from '../TextPage.jsx';
|
||||||
|
import ContentView from '../ContentView.jsx';
|
||||||
|
|
||||||
|
export default class ToSingleTextPageTransformation extends Transformation {
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
super("To Single Text Page");
|
||||||
|
}
|
||||||
|
|
||||||
|
showPageSelection() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
contentView() {
|
||||||
|
return ContentView.TEXT;
|
||||||
|
}
|
||||||
|
|
||||||
|
transform(pages:TextPage[]) {
|
||||||
|
var text = '';
|
||||||
|
pages.forEach(page => text += page.text + '\n');
|
||||||
|
return [new TextPage({
|
||||||
|
index: 0,
|
||||||
|
text: text
|
||||||
|
})];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,27 @@
|
|||||||
|
import Transformation from './Transformation.jsx';
|
||||||
|
import PdfPage from '../PdfPage.jsx';
|
||||||
|
import TextPage from '../TextPage.jsx';
|
||||||
|
import ContentView from '../ContentView.jsx';
|
||||||
|
|
||||||
|
export default class ToTextPagesTransformation extends Transformation {
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
super("To Text Pages");
|
||||||
|
}
|
||||||
|
|
||||||
|
contentView() {
|
||||||
|
return ContentView.TEXT;
|
||||||
|
}
|
||||||
|
|
||||||
|
transform(pdfPages:PdfPage[]) {
|
||||||
|
return pdfPages.map(pdfPage => {
|
||||||
|
var text = '';
|
||||||
|
pdfPage.textItems.forEach(textItem => text += textItem.text + '\n');
|
||||||
|
return new TextPage({
|
||||||
|
index: pdfPage.index,
|
||||||
|
text: text
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,5 +1,3 @@
|
|||||||
import PdfPage from '../PdfPage.jsx';
|
|
||||||
|
|
||||||
// A transformation from an PdfPage to an PdfPage
|
// A transformation from an PdfPage to an PdfPage
|
||||||
export default class Transformation {
|
export default class Transformation {
|
||||||
|
|
||||||
@ -13,7 +11,19 @@ export default class Transformation {
|
|||||||
this.name = name;
|
this.name = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
transform(pdfPage:PdfPage) { // eslint-disable-line no-unused-vars
|
showPageSelection() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns with which type the transformed pages can be viewed
|
||||||
|
contentView() {
|
||||||
throw new TypeError("Do not call abstract method foo from child.");
|
throw new TypeError("Do not call abstract method foo from child.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Transform incoming pages (like PdfPage[]) into different pages (either PdfPages[] or TextPages[])
|
||||||
|
transform(pages) { // eslint-disable-line no-unused-vars
|
||||||
|
throw new TypeError("Do not call abstract method foo from child.");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user