Calculate most used distance

* round coordinates on construction
This commit is contained in:
Johannes Zillmann 2017-02-17 09:01:12 +01:00
parent b7393fc806
commit a92e384249
4 changed files with 37 additions and 38 deletions

View File

@ -78,10 +78,10 @@ export default class LoadingView extends React.Component {
const style = textContent.styles[item.fontName];
return new TextItem({
x: item.transform[4],
y: item.transform[5],
width: item.width,
height: dividedHeight <= 1 ? item.height : dividedHeight,
x: Math.round(item.transform[4]),
y: Math.round(item.transform[5]),
width: Math.round(item.width),
height: Math.round(dividedHeight <= 1 ? item.height : dividedHeight),
text: item.str,
font: item.fontName,
fontAscent: style.ascent,

View File

@ -1,7 +1,6 @@
import { Enum } from 'enumify';
import CalculateGlobalStats from './transformations/CalculateGlobalStats.jsx';
import RoundCoordinates from './transformations/RoundCoordinates.jsx';
import DetectFormats from './transformations/DetectFormats.jsx'
import CombineSameY from './transformations/CombineSameY.jsx';
import RemoveWhitespaces from './transformations/RemoveWhitespaces.jsx'
@ -23,7 +22,6 @@ export default class AppState {
this.pdfPages = [];
this.transformations = [
new CalculateGlobalStats(),
new RoundCoordinates(),
new DetectFormats(),
new CombineSameY(),
new RemoveWhitespaces(),

View File

@ -17,6 +17,9 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation {
<li>
{ 'Most-used font: ' + parseResult.globals.mostUsedFont + ' ' }
</li>
<li>
{ 'Most-used distance: ' + parseResult.globals.mostUsedDistance + ' ' }
</li>
<li>
{ 'Max height: ' + parseResult.globals.maxHeight + ' ' }
</li>
@ -30,11 +33,16 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation {
<li>
{ 'Items per font: ' + JSON.stringify(parseResult.summary.fontToOccurrence) + ' ' }
</li>
<li>
{ 'Items per distance: ' + JSON.stringify(parseResult.summary.distanceToOccurrence) + ' ' }
</li>
</ul>
</div>;
}
transform(parseResult:ParseResult) {
// Parse heights
const heightToOccurrence = {};
const fontToOccurrence = {};
var maxHeight = 0;
@ -51,15 +59,39 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation {
});
const mostUsedHeight = parseInt(getMostUsedKey(heightToOccurrence));
const mostUsedFont = getMostUsedKey(fontToOccurrence);
// Parse line distances
const distanceToOccurrence = {};
parseResult.content.forEach(page => {
var lastItemOfMostUsedHeight;
page.textItems.forEach(item => {
if (item.height == mostUsedHeight) {
if (lastItemOfMostUsedHeight && item.y != lastItemOfMostUsedHeight.y) {
const distance = lastItemOfMostUsedHeight.y - item.y;
if (distance > 0) {
distanceToOccurrence[distance] = distanceToOccurrence[distance] ? distanceToOccurrence[distance] + 1 : 1;
}
}
lastItemOfMostUsedHeight = item;
} else {
lastItemOfMostUsedHeight = null;
}
});
});
const mostUsedDistance = parseInt(getMostUsedKey(distanceToOccurrence));
parseResult.globals = {
mostUsedHeight: mostUsedHeight,
mostUsedFont: mostUsedFont,
mostUsedDistance: mostUsedDistance,
maxHeight: maxHeight,
maxHeightFont: maxHeightFont
}
parseResult.summary = {
heightToOccurrence: heightToOccurrence,
fontToOccurrence: fontToOccurrence
fontToOccurrence: fontToOccurrence,
distanceToOccurrence: distanceToOccurrence,
}
return parseResult;
}

View File

@ -1,31 +0,0 @@
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import ParseResult from '../ParseResult.jsx';
export default class RoundCoordinates extends ToPdfViewTransformation {
constructor() {
super("Round Coordinates");
}
transform(parseResult:ParseResult) {
const newContent = parseResult.content.map(pdfPage => {
return {
...pdfPage,
textItems: pdfPage.textItems.map(textItem => {
return {
...textItem,
x: Math.round(textItem.x),
y: Math.round(textItem.y),
width: Math.round(textItem.width),
height: Math.round(textItem.height)
}
})
};
});
return new ParseResult({
...parseResult,
content: newContent,
});
}
}