add combine-Y transformation

This commit is contained in:
Johannes Zillmann 2017-01-07 13:20:04 +01:00
parent 92a904a0a6
commit 9ae32e02b5
4 changed files with 69 additions and 21 deletions

View File

@ -15,7 +15,6 @@ export default class PdfView extends React.Component {
constructor(props) { constructor(props) {
super(props); super(props);
this.state = { this.state = {
transformations: this.props.transformations,
currentTransformation: 0, currentTransformation: 0,
pageNr: -1 pageNr: -1
}; };
@ -29,25 +28,21 @@ export default class PdfView extends React.Component {
} }
nextTransformation() { nextTransformation() {
console.debug("nextTransformation");
this.setState({ this.setState({
currentTransformation: this.state.currentTransformation + 1 currentTransformation: this.state.currentTransformation + 1
}); });
console.debug(this.state.currentTransformation);
} }
prevTransformation() { prevTransformation() {
console.debug("prevTransformation");
this.setState({ this.setState({
currentTransformation: this.state.currentTransformation - 1 currentTransformation: this.state.currentTransformation - 1
}); });
console.debug(this.state.currentTransformation);
} }
render() { render() {
const {transformations, currentTransformation, pageNr} = this.state; const {currentTransformation, pageNr} = this.state;
const {pdfPages} = this.props; const {pdfPages, transformations} = this.props;
const header = "Parsed " + pdfPages.length + " pages!" const header = "Parsed " + pdfPages.length + " pages!"
@ -65,21 +60,14 @@ export default class PdfView extends React.Component {
{ '==>' } { '==>' }
</a>; </a>;
const transformedPdfPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr).map(pdfPage => {
//TODO only transform selected page ?
const transformedPdfPages = pdfPages.map(pdfPage => {
for (var i = 0; i <= currentTransformation; i++) { for (var i = 0; i <= currentTransformation; i++) {
pdfPage = transformations[i].transform(pdfPage); pdfPage = transformations[i].transform(pdfPage);
} }
return pdfPage; return pdfPage;
}); });
var pageComponents; var pageComponents = transformedPdfPages.map(page => <PdfPageView key={ page.index } pdfPage={ page } />);
if (pageNr >= 0) {
pageComponents = <PdfPageView key={ pageNr } pdfPage={ transformedPdfPages[pageNr] } />;
} else {
pageComponents = transformedPdfPages.map((page) => <PdfPageView key={ page.index } pdfPage={ page } />);
}
return ( return (
<div> <div>

View File

@ -4,7 +4,8 @@ import { pdfToTextItemsAsync } from '../functions/pdfToTextItems.jsx'
import PdfPage from './PdfPage.jsx'; import PdfPage from './PdfPage.jsx';
import NoOpTransformation from './transformations/NoOpTransformation.jsx'; import NoOpTransformation from './transformations/NoOpTransformation.jsx';
import RoundYTransformation from './transformations/RoundYTransformation.jsx'; import RoundCoordinatesTransformation from './transformations/RoundCoordinatesTransformation.jsx';
import CombineSameYTransformation from './transformations/CombineSameYTransformation.jsx';
// Holds the state of the Application // Holds the state of the Application
export default class AppState { export default class AppState {
@ -15,7 +16,7 @@ export default class AppState {
this.pagesToUpload = 0; this.pagesToUpload = 0;
this.uploadedPages = 0; this.uploadedPages = 0;
this.pdfPages = []; this.pdfPages = [];
this.transformations = [new NoOpTransformation(), new RoundYTransformation()]; this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation()];
//bind functions //bind functions
this.render = this.render.bind(this); this.render = this.render.bind(this);

View File

@ -0,0 +1,56 @@
import Transformation from './Transformation.jsx';
import TextItem from '../TextItem.jsx';
export default class CombineSameYTransformation extends Transformation {
constructor() {
super("Combine text on same Y");
}
transform(pdfPage:PdfPage) {
const newTextItems = [];
var lastTextItem;
pdfPage.textItems.forEach(textItem => {
if (!lastTextItem) {
lastTextItem = textItem;
} else {
if (textItem.y == lastTextItem.y) {
//combine
console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width);
console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width);
console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width));
var combinedText = lastTextItem.text;
//TODO make 5 dependent on text size or biggest gap?
if (textItem.x - lastTextItem.x - lastTextItem.width > 7) {
combinedText += ' ';
}
combinedText += textItem.text;
lastTextItem = new TextItem({
x: lastTextItem.x,
y: lastTextItem.y,
width: textItem.x - lastTextItem.x + textItem.width,
height: lastTextItem.height, //might this cause problems ?
text: combinedText
});
} else {
//rotate
newTextItems.push(lastTextItem);
lastTextItem = textItem;
}
}
});
if (lastTextItem) {
newTextItems.push(lastTextItem);
}
return {
...pdfPage,
textItems: newTextItems
};
}
}

View File

@ -1,9 +1,9 @@
import Transformation from './Transformation.jsx'; import Transformation from './Transformation.jsx';
export default class RoundYTransformation extends Transformation { export default class RoundCoordinatesTransformation extends Transformation {
constructor() { constructor() {
super("Round all Y"); super("Round coordinates");
} }
transform(pdfPage:PdfPage) { transform(pdfPage:PdfPage) {
@ -12,7 +12,10 @@ export default class RoundYTransformation extends Transformation {
textItems: pdfPage.textItems.map(textItem => { textItems: pdfPage.textItems.map(textItem => {
return { return {
...textItem, ...textItem,
y: Math.round(textItem.y) x: Math.round(textItem.x),
y: Math.round(textItem.y),
width: Math.round(textItem.width),
height: Math.round(textItem.height)
} }
}) })
}; };