diff --git a/src/javascript/components/PdfView.jsx b/src/javascript/components/PdfView.jsx index ab95776..f092857 100644 --- a/src/javascript/components/PdfView.jsx +++ b/src/javascript/components/PdfView.jsx @@ -15,7 +15,6 @@ export default class PdfView extends React.Component { constructor(props) { super(props); this.state = { - transformations: this.props.transformations, currentTransformation: 0, pageNr: -1 }; @@ -29,25 +28,21 @@ export default class PdfView extends React.Component { } nextTransformation() { - console.debug("nextTransformation"); this.setState({ currentTransformation: this.state.currentTransformation + 1 }); - console.debug(this.state.currentTransformation); } prevTransformation() { - console.debug("prevTransformation"); this.setState({ currentTransformation: this.state.currentTransformation - 1 }); - console.debug(this.state.currentTransformation); } render() { - const {transformations, currentTransformation, pageNr} = this.state; - const {pdfPages} = this.props; + const {currentTransformation, pageNr} = this.state; + const {pdfPages, transformations} = this.props; const header = "Parsed " + pdfPages.length + " pages!" @@ -65,21 +60,14 @@ export default class PdfView extends React.Component { { '==>' } ; - - //TODO only transform selected page ? - const transformedPdfPages = pdfPages.map(pdfPage => { + const transformedPdfPages = pdfPages.filter((elem, i) => pageNr == -1 || i == pageNr).map(pdfPage => { for (var i = 0; i <= currentTransformation; i++) { pdfPage = transformations[i].transform(pdfPage); } return pdfPage; }); - var pageComponents; - if (pageNr >= 0) { - pageComponents = ; - } else { - pageComponents = transformedPdfPages.map((page) => ); - } + var pageComponents = transformedPdfPages.map(page => ); return (
diff --git a/src/javascript/models/AppState.jsx b/src/javascript/models/AppState.jsx index dd86e74..952805a 100644 --- a/src/javascript/models/AppState.jsx +++ b/src/javascript/models/AppState.jsx @@ -4,7 +4,8 @@ import { pdfToTextItemsAsync } from '../functions/pdfToTextItems.jsx' import PdfPage from './PdfPage.jsx'; import NoOpTransformation from './transformations/NoOpTransformation.jsx'; -import RoundYTransformation from './transformations/RoundYTransformation.jsx'; +import RoundCoordinatesTransformation from './transformations/RoundCoordinatesTransformation.jsx'; +import CombineSameYTransformation from './transformations/CombineSameYTransformation.jsx'; // Holds the state of the Application export default class AppState { @@ -15,7 +16,7 @@ export default class AppState { this.pagesToUpload = 0; this.uploadedPages = 0; this.pdfPages = []; - this.transformations = [new NoOpTransformation(), new RoundYTransformation()]; + this.transformations = [new NoOpTransformation(), new RoundCoordinatesTransformation(), new CombineSameYTransformation()]; //bind functions this.render = this.render.bind(this); diff --git a/src/javascript/models/transformations/CombineSameYTransformation.jsx b/src/javascript/models/transformations/CombineSameYTransformation.jsx new file mode 100644 index 0000000..aef96a2 --- /dev/null +++ b/src/javascript/models/transformations/CombineSameYTransformation.jsx @@ -0,0 +1,56 @@ +import Transformation from './Transformation.jsx'; +import TextItem from '../TextItem.jsx'; + +export default class CombineSameYTransformation extends Transformation { + + constructor() { + super("Combine text on same Y"); + } + + transform(pdfPage:PdfPage) { + + const newTextItems = []; + var lastTextItem; + pdfPage.textItems.forEach(textItem => { + if (!lastTextItem) { + lastTextItem = textItem; + } else { + if (textItem.y == lastTextItem.y) { + //combine + + console.debug("last=" + lastTextItem.text + ", x=" + lastTextItem.x + ", width=" + lastTextItem.width); + console.debug("new=" + textItem.text + ", x=" + textItem.x + ", width=" + textItem.width); + console.debug("diff=" + (textItem.x - lastTextItem.x - lastTextItem.width)); + + var combinedText = lastTextItem.text; + //TODO make 5 dependent on text size or biggest gap? + if (textItem.x - lastTextItem.x - lastTextItem.width > 7) { + combinedText += ' '; + } + combinedText += textItem.text; + + lastTextItem = new TextItem({ + x: lastTextItem.x, + y: lastTextItem.y, + width: textItem.x - lastTextItem.x + textItem.width, + height: lastTextItem.height, //might this cause problems ? + text: combinedText + }); + } else { + //rotate + newTextItems.push(lastTextItem); + lastTextItem = textItem; + } + } + }); + if (lastTextItem) { + newTextItems.push(lastTextItem); + } + + return { + ...pdfPage, + textItems: newTextItems + }; + } + +} \ No newline at end of file diff --git a/src/javascript/models/transformations/RoundYTransformation.jsx b/src/javascript/models/transformations/RoundCoordinatesTransformation.jsx similarity index 50% rename from src/javascript/models/transformations/RoundYTransformation.jsx rename to src/javascript/models/transformations/RoundCoordinatesTransformation.jsx index a6e6033..3ee4a7f 100644 --- a/src/javascript/models/transformations/RoundYTransformation.jsx +++ b/src/javascript/models/transformations/RoundCoordinatesTransformation.jsx @@ -1,9 +1,9 @@ import Transformation from './Transformation.jsx'; -export default class RoundYTransformation extends Transformation { +export default class RoundCoordinatesTransformation extends Transformation { constructor() { - super("Round all Y"); + super("Round coordinates"); } transform(pdfPage:PdfPage) { @@ -12,7 +12,10 @@ export default class RoundYTransformation extends Transformation { textItems: pdfPage.textItems.map(textItem => { return { ...textItem, - y: Math.round(textItem.y) + x: Math.round(textItem.x), + y: Math.round(textItem.y), + width: Math.round(textItem.width), + height: Math.round(textItem.height) } }) };