diff --git a/src/javascript/components/LoadingView.jsx b/src/javascript/components/LoadingView.jsx
index 8cd4fe8..4d1c44a 100644
--- a/src/javascript/components/LoadingView.jsx
+++ b/src/javascript/components/LoadingView.jsx
@@ -78,10 +78,10 @@ export default class LoadingView extends React.Component {
const style = textContent.styles[item.fontName];
return new TextItem({
- x: item.transform[4],
- y: item.transform[5],
- width: item.width,
- height: dividedHeight <= 1 ? item.height : dividedHeight,
+ x: Math.round(item.transform[4]),
+ y: Math.round(item.transform[5]),
+ width: Math.round(item.width),
+ height: Math.round(dividedHeight <= 1 ? item.height : dividedHeight),
text: item.str,
font: item.fontName,
fontAscent: style.ascent,
diff --git a/src/javascript/models/AppState.jsx b/src/javascript/models/AppState.jsx
index 165cca9..d42b73d 100644
--- a/src/javascript/models/AppState.jsx
+++ b/src/javascript/models/AppState.jsx
@@ -1,7 +1,6 @@
import { Enum } from 'enumify';
import CalculateGlobalStats from './transformations/CalculateGlobalStats.jsx';
-import RoundCoordinates from './transformations/RoundCoordinates.jsx';
import DetectFormats from './transformations/DetectFormats.jsx'
import CombineSameY from './transformations/CombineSameY.jsx';
import RemoveWhitespaces from './transformations/RemoveWhitespaces.jsx'
@@ -23,7 +22,6 @@ export default class AppState {
this.pdfPages = [];
this.transformations = [
new CalculateGlobalStats(),
- new RoundCoordinates(),
new DetectFormats(),
new CombineSameY(),
new RemoveWhitespaces(),
diff --git a/src/javascript/models/transformations/CalculateGlobalStats.jsx b/src/javascript/models/transformations/CalculateGlobalStats.jsx
index 2ddb72c..211c96e 100644
--- a/src/javascript/models/transformations/CalculateGlobalStats.jsx
+++ b/src/javascript/models/transformations/CalculateGlobalStats.jsx
@@ -17,6 +17,9 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation {
{ 'Most-used font: ' + parseResult.globals.mostUsedFont + ' ' }
+
+ { 'Most-used distance: ' + parseResult.globals.mostUsedDistance + ' ' }
+
{ 'Max height: ' + parseResult.globals.maxHeight + ' ' }
@@ -30,11 +33,16 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation {
{ 'Items per font: ' + JSON.stringify(parseResult.summary.fontToOccurrence) + ' ' }
+
+ { 'Items per distance: ' + JSON.stringify(parseResult.summary.distanceToOccurrence) + ' ' }
+
;
}
transform(parseResult:ParseResult) {
+
+ // Parse heights
const heightToOccurrence = {};
const fontToOccurrence = {};
var maxHeight = 0;
@@ -51,15 +59,39 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation {
});
const mostUsedHeight = parseInt(getMostUsedKey(heightToOccurrence));
const mostUsedFont = getMostUsedKey(fontToOccurrence);
+
+ // Parse line distances
+ const distanceToOccurrence = {};
+ parseResult.content.forEach(page => {
+ var lastItemOfMostUsedHeight;
+ page.textItems.forEach(item => {
+ if (item.height == mostUsedHeight) {
+ if (lastItemOfMostUsedHeight && item.y != lastItemOfMostUsedHeight.y) {
+ const distance = lastItemOfMostUsedHeight.y - item.y;
+ if (distance > 0) {
+ distanceToOccurrence[distance] = distanceToOccurrence[distance] ? distanceToOccurrence[distance] + 1 : 1;
+ }
+ }
+ lastItemOfMostUsedHeight = item;
+ } else {
+ lastItemOfMostUsedHeight = null;
+ }
+ });
+ });
+ const mostUsedDistance = parseInt(getMostUsedKey(distanceToOccurrence));
+
+
parseResult.globals = {
mostUsedHeight: mostUsedHeight,
mostUsedFont: mostUsedFont,
+ mostUsedDistance: mostUsedDistance,
maxHeight: maxHeight,
maxHeightFont: maxHeightFont
}
parseResult.summary = {
heightToOccurrence: heightToOccurrence,
- fontToOccurrence: fontToOccurrence
+ fontToOccurrence: fontToOccurrence,
+ distanceToOccurrence: distanceToOccurrence,
}
return parseResult;
}
diff --git a/src/javascript/models/transformations/RoundCoordinates.jsx b/src/javascript/models/transformations/RoundCoordinates.jsx
deleted file mode 100644
index 8a0515f..0000000
--- a/src/javascript/models/transformations/RoundCoordinates.jsx
+++ /dev/null
@@ -1,31 +0,0 @@
-import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
-import ParseResult from '../ParseResult.jsx';
-
-export default class RoundCoordinates extends ToPdfViewTransformation {
-
- constructor() {
- super("Round Coordinates");
- }
-
- transform(parseResult:ParseResult) {
- const newContent = parseResult.content.map(pdfPage => {
- return {
- ...pdfPage,
- textItems: pdfPage.textItems.map(textItem => {
- return {
- ...textItem,
- x: Math.round(textItem.x),
- y: Math.round(textItem.y),
- width: Math.round(textItem.width),
- height: Math.round(textItem.height)
- }
- })
- };
- });
- return new ParseResult({
- ...parseResult,
- content: newContent,
- });
- }
-
-}
\ No newline at end of file