diff --git a/src/javascript/components/LoadingView.jsx b/src/javascript/components/LoadingView.jsx index 8cd4fe8..4d1c44a 100644 --- a/src/javascript/components/LoadingView.jsx +++ b/src/javascript/components/LoadingView.jsx @@ -78,10 +78,10 @@ export default class LoadingView extends React.Component { const style = textContent.styles[item.fontName]; return new TextItem({ - x: item.transform[4], - y: item.transform[5], - width: item.width, - height: dividedHeight <= 1 ? item.height : dividedHeight, + x: Math.round(item.transform[4]), + y: Math.round(item.transform[5]), + width: Math.round(item.width), + height: Math.round(dividedHeight <= 1 ? item.height : dividedHeight), text: item.str, font: item.fontName, fontAscent: style.ascent, diff --git a/src/javascript/models/AppState.jsx b/src/javascript/models/AppState.jsx index 165cca9..d42b73d 100644 --- a/src/javascript/models/AppState.jsx +++ b/src/javascript/models/AppState.jsx @@ -1,7 +1,6 @@ import { Enum } from 'enumify'; import CalculateGlobalStats from './transformations/CalculateGlobalStats.jsx'; -import RoundCoordinates from './transformations/RoundCoordinates.jsx'; import DetectFormats from './transformations/DetectFormats.jsx' import CombineSameY from './transformations/CombineSameY.jsx'; import RemoveWhitespaces from './transformations/RemoveWhitespaces.jsx' @@ -23,7 +22,6 @@ export default class AppState { this.pdfPages = []; this.transformations = [ new CalculateGlobalStats(), - new RoundCoordinates(), new DetectFormats(), new CombineSameY(), new RemoveWhitespaces(), diff --git a/src/javascript/models/transformations/CalculateGlobalStats.jsx b/src/javascript/models/transformations/CalculateGlobalStats.jsx index 2ddb72c..211c96e 100644 --- a/src/javascript/models/transformations/CalculateGlobalStats.jsx +++ b/src/javascript/models/transformations/CalculateGlobalStats.jsx @@ -17,6 +17,9 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation {
  • { 'Most-used font: ' + parseResult.globals.mostUsedFont + ' ' }
  • +
  • + { 'Most-used distance: ' + parseResult.globals.mostUsedDistance + ' ' } +
  • { 'Max height: ' + parseResult.globals.maxHeight + ' ' }
  • @@ -30,11 +33,16 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation {
  • { 'Items per font: ' + JSON.stringify(parseResult.summary.fontToOccurrence) + ' ' }
  • +
  • + { 'Items per distance: ' + JSON.stringify(parseResult.summary.distanceToOccurrence) + ' ' } +
  • ; } transform(parseResult:ParseResult) { + + // Parse heights const heightToOccurrence = {}; const fontToOccurrence = {}; var maxHeight = 0; @@ -51,15 +59,39 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation { }); const mostUsedHeight = parseInt(getMostUsedKey(heightToOccurrence)); const mostUsedFont = getMostUsedKey(fontToOccurrence); + + // Parse line distances + const distanceToOccurrence = {}; + parseResult.content.forEach(page => { + var lastItemOfMostUsedHeight; + page.textItems.forEach(item => { + if (item.height == mostUsedHeight) { + if (lastItemOfMostUsedHeight && item.y != lastItemOfMostUsedHeight.y) { + const distance = lastItemOfMostUsedHeight.y - item.y; + if (distance > 0) { + distanceToOccurrence[distance] = distanceToOccurrence[distance] ? distanceToOccurrence[distance] + 1 : 1; + } + } + lastItemOfMostUsedHeight = item; + } else { + lastItemOfMostUsedHeight = null; + } + }); + }); + const mostUsedDistance = parseInt(getMostUsedKey(distanceToOccurrence)); + + parseResult.globals = { mostUsedHeight: mostUsedHeight, mostUsedFont: mostUsedFont, + mostUsedDistance: mostUsedDistance, maxHeight: maxHeight, maxHeightFont: maxHeightFont } parseResult.summary = { heightToOccurrence: heightToOccurrence, - fontToOccurrence: fontToOccurrence + fontToOccurrence: fontToOccurrence, + distanceToOccurrence: distanceToOccurrence, } return parseResult; } diff --git a/src/javascript/models/transformations/RoundCoordinates.jsx b/src/javascript/models/transformations/RoundCoordinates.jsx deleted file mode 100644 index 8a0515f..0000000 --- a/src/javascript/models/transformations/RoundCoordinates.jsx +++ /dev/null @@ -1,31 +0,0 @@ -import ToPdfViewTransformation from './ToPdfViewTransformation.jsx'; -import ParseResult from '../ParseResult.jsx'; - -export default class RoundCoordinates extends ToPdfViewTransformation { - - constructor() { - super("Round Coordinates"); - } - - transform(parseResult:ParseResult) { - const newContent = parseResult.content.map(pdfPage => { - return { - ...pdfPage, - textItems: pdfPage.textItems.map(textItem => { - return { - ...textItem, - x: Math.round(textItem.x), - y: Math.round(textItem.y), - width: Math.round(textItem.width), - height: Math.round(textItem.height) - } - }) - }; - }); - return new ParseResult({ - ...parseResult, - content: newContent, - }); - } - -} \ No newline at end of file