add combine-Y transformation

This commit is contained in:
Johannes Zillmann 2017-01-07 13:20:04 +01:00
parent 92a904a0a6
commit 9ae32e02b5
4 changed files with 69 additions and 21 deletions

View File

@ -15,7 +15,6 @@ export default class PdfView extends React.Component {
constructor(props) {
super(props);
this.state = {
transformations: this.props.transformations,
currentTransformation: 0,
pageNr: -1
};
@ -29,25 +28,21 @@ export default class PdfView extends React.Component {
}
nextTransformation() {
console.debug("nextTransformation");
this.setState({
currentTransformation: this.state.currentTransformation + 1
});
console.debug(this.state.currentTransformation);
}
prevTransformation() {
console.debug("prevTransformation");
this.setState({
currentTransformation: this.state.currentTransformation - 1
});
console.debug(this.state.currentTransformation);
}
render() {
const {transformations, currentTransformation, pageNr} = this.state;
const {pdfPages} = this.props;
const {currentTransformation, pageNr} = this.state;
const {pdfPages, transformations} = this.props;
const header = "Parsed " + pdfPages.length + " pages!"
@ -65,21 +60,14 @@ export default class PdfView extends React.Component {
{ '==>' }
</a>;
//TODO only transform selected page ?
const transformedPdfPages = pdfPages.map(pdfPage => {
const transformedPdfPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr).map(pdfPage => {
for (var i = 0; i <= currentTransformation; i++) {
pdfPage = transformations[i].transform(pdfPage);
}
return pdfPage;
});
var pageComponents;
if (pageNr >= 0) {
pageComponents = <PdfPageView key={ pageNr } pdfPage={ transformedPdfPages[pageNr] } />;
} else {
pageComponents = transformedPdfPages.map((page) => <PdfPageView key={ page.index } pdfPage={ page } />);
}
var pageComponents = transformedPdfPages.map(page => <PdfPageView key={ page.index } pdfPage={ page } />);
return (
<div>

View File

@ -4,7 +4,8 @@ import { pdfToTextItemsAsync } from '../functions/pdfToTextItems.jsx'
import PdfPage from './PdfPage.jsx';
import NoOpTransformation from './transformations/NoOpTransformation.jsx';
import RoundYTransformation from './transformations/RoundYTransformation.jsx';
import RoundCoordinatesTransformation from './transformations/RoundCoordinatesTransformation.jsx';
import CombineSameYTransformation from './transformations/CombineSameYTransformation.jsx';
// Holds the state of the Application
export default class AppState {
@ -15,7 +16,7 @@ export default class AppState {
this.pagesToUpload = 0;
this.uploadedPages = 0;
this.pdfPages = [];
this.transformations = [new NoOpTransformation(), new RoundYTransformation()];
this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation()];
//bind functions
this.render = this.render.bind(this);

View File

@ -0,0 +1,56 @@
import Transformation from './Transformation.jsx';
import TextItem from '../TextItem.jsx';
export default class CombineSameYTransformation extends Transformation {
constructor() {
super("Combine text on same Y");
}
transform(pdfPage:PdfPage) {
const newTextItems = [];
var lastTextItem;
pdfPage.textItems.forEach(textItem => {
if (!lastTextItem) {
lastTextItem = textItem;
} else {
if (textItem.y == lastTextItem.y) {
//combine
console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width);
console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width);
console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width));
var combinedText = lastTextItem.text;
//TODO make 5 dependent on text size or biggest gap?
if (textItem.x - lastTextItem.x - lastTextItem.width > 7) {
combinedText += ' ';
}
combinedText += textItem.text;
lastTextItem = new TextItem({
x: lastTextItem.x,
y: lastTextItem.y,
width: textItem.x - lastTextItem.x + textItem.width,
height: lastTextItem.height, //might this cause problems ?
text: combinedText
});
} else {
//rotate
newTextItems.push(lastTextItem);
lastTextItem = textItem;
}
}
});
if (lastTextItem) {
newTextItems.push(lastTextItem);
}
return {
...pdfPage,
textItems: newTextItems
};
}
}

View File

@ -1,9 +1,9 @@
import Transformation from './Transformation.jsx';
export default class RoundYTransformation extends Transformation {
export default class RoundCoordinatesTransformation extends Transformation {
constructor() {
super("Round all Y");
super("Round coordinates");
}
transform(pdfPage:PdfPage) {
@ -12,7 +12,10 @@ export default class RoundYTransformation extends Transformation {
textItems: pdfPage.textItems.map(textItem => {
return {
...textItem,
y: Math.round(textItem.y)
x: Math.round(textItem.x),
y: Math.round(textItem.y),
width: Math.round(textItem.width),
height: Math.round(textItem.height)
}
})
};