diff --git a/src/javascript/components/PdfView.jsx b/src/javascript/components/PdfView.jsx
index ab95776..f092857 100644
--- a/src/javascript/components/PdfView.jsx
+++ b/src/javascript/components/PdfView.jsx
@@ -15,7 +15,6 @@ export default class PdfView extends React.Component {
constructor(props) {
super(props);
this.state = {
- transformations: this.props.transformations,
currentTransformation: 0,
pageNr: -1
};
@@ -29,25 +28,21 @@ export default class PdfView extends React.Component {
}
nextTransformation() {
- console.debug("nextTransformation");
this.setState({
currentTransformation: this.state.currentTransformation + 1
});
- console.debug(this.state.currentTransformation);
}
prevTransformation() {
- console.debug("prevTransformation");
this.setState({
currentTransformation: this.state.currentTransformation - 1
});
- console.debug(this.state.currentTransformation);
}
render() {
- const {transformations, currentTransformation, pageNr} = this.state;
- const {pdfPages} = this.props;
+ const {currentTransformation, pageNr} = this.state;
+ const {pdfPages, transformations} = this.props;
const header = "Parsed " + pdfPages.length + " pages!"
@@ -65,21 +60,14 @@ export default class PdfView extends React.Component {
{ '==>' }
;
-
- //TODO only transform selected page ?
- const transformedPdfPages = pdfPages.map(pdfPage => {
+ const transformedPdfPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr).map(pdfPage => {
for (var i = 0; i <= currentTransformation; i++) {
pdfPage = transformations[i].transform(pdfPage);
}
return pdfPage;
});
- var pageComponents;
- if (pageNr >= 0) {
- pageComponents = ;
- } else {
- pageComponents = transformedPdfPages.map((page) => );
- }
+ var pageComponents = transformedPdfPages.map(page => );
return (
diff --git a/src/javascript/models/AppState.jsx b/src/javascript/models/AppState.jsx
index dd86e74..952805a 100644
--- a/src/javascript/models/AppState.jsx
+++ b/src/javascript/models/AppState.jsx
@@ -4,7 +4,8 @@ import { pdfToTextItemsAsync } from '../functions/pdfToTextItems.jsx'
import PdfPage from './PdfPage.jsx';
import NoOpTransformation from './transformations/NoOpTransformation.jsx';
-import RoundYTransformation from './transformations/RoundYTransformation.jsx';
+import RoundCoordinatesTransformation from './transformations/RoundCoordinatesTransformation.jsx';
+import CombineSameYTransformation from './transformations/CombineSameYTransformation.jsx';
// Holds the state of the Application
export default class AppState {
@@ -15,7 +16,7 @@ export default class AppState {
this.pagesToUpload = 0;
this.uploadedPages = 0;
this.pdfPages = [];
- this.transformations = [new NoOpTransformation(), new RoundYTransformation()];
+ this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation()];
//bind functions
this.render = this.render.bind(this);
diff --git a/src/javascript/models/transformations/CombineSameYTransformation.jsx b/src/javascript/models/transformations/CombineSameYTransformation.jsx
new file mode 100644
index 0000000..aef96a2
--- /dev/null
+++ b/src/javascript/models/transformations/CombineSameYTransformation.jsx
@@ -0,0 +1,56 @@
+import Transformation from './Transformation.jsx';
+import TextItem from '../TextItem.jsx';
+
+export default class CombineSameYTransformation extends Transformation {
+
+ constructor() {
+ super("Combine text on same Y");
+ }
+
+ transform(pdfPage:PdfPage) {
+
+ const newTextItems = [];
+ var lastTextItem;
+ pdfPage.textItems.forEach(textItem => {
+ if (!lastTextItem) {
+ lastTextItem = textItem;
+ } else {
+ if (textItem.y == lastTextItem.y) {
+ //combine
+
+ console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width);
+ console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width);
+ console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width));
+
+ var combinedText = lastTextItem.text;
+ //TODO make 5 dependent on text size or biggest gap?
+ if (textItem.x - lastTextItem.x - lastTextItem.width > 7) {
+ combinedText += ' ';
+ }
+ combinedText += textItem.text;
+
+ lastTextItem = new TextItem({
+ x: lastTextItem.x,
+ y: lastTextItem.y,
+ width: textItem.x - lastTextItem.x + textItem.width,
+ height: lastTextItem.height, //might this cause problems ?
+ text: combinedText
+ });
+ } else {
+ //rotate
+ newTextItems.push(lastTextItem);
+ lastTextItem = textItem;
+ }
+ }
+ });
+ if (lastTextItem) {
+ newTextItems.push(lastTextItem);
+ }
+
+ return {
+ ...pdfPage,
+ textItems: newTextItems
+ };
+ }
+
+}
\ No newline at end of file
diff --git a/src/javascript/models/transformations/RoundYTransformation.jsx b/src/javascript/models/transformations/RoundCoordinatesTransformation.jsx
similarity index 50%
rename from src/javascript/models/transformations/RoundYTransformation.jsx
rename to src/javascript/models/transformations/RoundCoordinatesTransformation.jsx
index a6e6033..3ee4a7f 100644
--- a/src/javascript/models/transformations/RoundYTransformation.jsx
+++ b/src/javascript/models/transformations/RoundCoordinatesTransformation.jsx
@@ -1,9 +1,9 @@
import Transformation from './Transformation.jsx';
-export default class RoundYTransformation extends Transformation {
+export default class RoundCoordinatesTransformation extends Transformation {
constructor() {
- super("Round all Y");
+ super("Round coordinates");
}
transform(pdfPage:PdfPage) {
@@ -12,7 +12,10 @@ export default class RoundYTransformation extends Transformation {
textItems: pdfPage.textItems.map(textItem => {
return {
...textItem,
- y: Math.round(textItem.y)
+ x: Math.round(textItem.x),
+ y: Math.round(textItem.y),
+ width: Math.round(textItem.width),
+ height: Math.round(textItem.height)
}
})
};