Calculate most used distance

* round coordinates on construction
This commit is contained in:
Johannes Zillmann 2017-02-17 09:01:12 +01:00
parent b7393fc806
commit a92e384249
4 changed files with 37 additions and 38 deletions

View File

@ -78,10 +78,10 @@ export default class LoadingView extends React.Component {
const style = textContent.styles[item.fontName]; const style = textContent.styles[item.fontName];
return new TextItem({ return new TextItem({
x: item.transform[4], x: Math.round(item.transform[4]),
y: item.transform[5], y: Math.round(item.transform[5]),
width: item.width, width: Math.round(item.width),
height: dividedHeight <= 1 ? item.height : dividedHeight, height: Math.round(dividedHeight <= 1 ? item.height : dividedHeight),
text: item.str, text: item.str,
font: item.fontName, font: item.fontName,
fontAscent: style.ascent, fontAscent: style.ascent,

View File

@ -1,7 +1,6 @@
import { Enum } from 'enumify'; import { Enum } from 'enumify';
import CalculateGlobalStats from './transformations/CalculateGlobalStats.jsx'; import CalculateGlobalStats from './transformations/CalculateGlobalStats.jsx';
import RoundCoordinates from './transformations/RoundCoordinates.jsx';
import DetectFormats from './transformations/DetectFormats.jsx' import DetectFormats from './transformations/DetectFormats.jsx'
import CombineSameY from './transformations/CombineSameY.jsx'; import CombineSameY from './transformations/CombineSameY.jsx';
import RemoveWhitespaces from './transformations/RemoveWhitespaces.jsx' import RemoveWhitespaces from './transformations/RemoveWhitespaces.jsx'
@ -23,7 +22,6 @@ export default class AppState {
this.pdfPages = []; this.pdfPages = [];
this.transformations = [ this.transformations = [
new CalculateGlobalStats(), new CalculateGlobalStats(),
new RoundCoordinates(),
new DetectFormats(), new DetectFormats(),
new CombineSameY(), new CombineSameY(),
new RemoveWhitespaces(), new RemoveWhitespaces(),

View File

@ -17,6 +17,9 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation {
<li> <li>
{ 'Most-used font: ' + parseResult.globals.mostUsedFont + ' ' } { 'Most-used font: ' + parseResult.globals.mostUsedFont + ' ' }
</li> </li>
<li>
{ 'Most-used distance: ' + parseResult.globals.mostUsedDistance + ' ' }
</li>
<li> <li>
{ 'Max height: ' + parseResult.globals.maxHeight + ' ' } { 'Max height: ' + parseResult.globals.maxHeight + ' ' }
</li> </li>
@ -30,11 +33,16 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation {
<li> <li>
{ 'Items per font: ' + JSON.stringify(parseResult.summary.fontToOccurrence) + ' ' } { 'Items per font: ' + JSON.stringify(parseResult.summary.fontToOccurrence) + ' ' }
</li> </li>
<li>
{ 'Items per distance: ' + JSON.stringify(parseResult.summary.distanceToOccurrence) + ' ' }
</li>
</ul> </ul>
</div>; </div>;
} }
transform(parseResult:ParseResult) { transform(parseResult:ParseResult) {
// Parse heights
const heightToOccurrence = {}; const heightToOccurrence = {};
const fontToOccurrence = {}; const fontToOccurrence = {};
var maxHeight = 0; var maxHeight = 0;
@ -51,15 +59,39 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation {
}); });
const mostUsedHeight = parseInt(getMostUsedKey(heightToOccurrence)); const mostUsedHeight = parseInt(getMostUsedKey(heightToOccurrence));
const mostUsedFont = getMostUsedKey(fontToOccurrence); const mostUsedFont = getMostUsedKey(fontToOccurrence);
// Parse line distances
const distanceToOccurrence = {};
parseResult.content.forEach(page => {
var lastItemOfMostUsedHeight;
page.textItems.forEach(item => {
if (item.height == mostUsedHeight) {
if (lastItemOfMostUsedHeight && item.y != lastItemOfMostUsedHeight.y) {
const distance = lastItemOfMostUsedHeight.y - item.y;
if (distance > 0) {
distanceToOccurrence[distance] = distanceToOccurrence[distance] ? distanceToOccurrence[distance] + 1 : 1;
}
}
lastItemOfMostUsedHeight = item;
} else {
lastItemOfMostUsedHeight = null;
}
});
});
const mostUsedDistance = parseInt(getMostUsedKey(distanceToOccurrence));
parseResult.globals = { parseResult.globals = {
mostUsedHeight: mostUsedHeight, mostUsedHeight: mostUsedHeight,
mostUsedFont: mostUsedFont, mostUsedFont: mostUsedFont,
mostUsedDistance: mostUsedDistance,
maxHeight: maxHeight, maxHeight: maxHeight,
maxHeightFont: maxHeightFont maxHeightFont: maxHeightFont
} }
parseResult.summary = { parseResult.summary = {
heightToOccurrence: heightToOccurrence, heightToOccurrence: heightToOccurrence,
fontToOccurrence: fontToOccurrence fontToOccurrence: fontToOccurrence,
distanceToOccurrence: distanceToOccurrence,
} }
return parseResult; return parseResult;
} }

View File

@ -1,31 +0,0 @@
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import ParseResult from '../ParseResult.jsx';
export default class RoundCoordinates extends ToPdfViewTransformation {
constructor() {
super("Round Coordinates");
}
transform(parseResult:ParseResult) {
const newContent = parseResult.content.map(pdfPage => {
return {
...pdfPage,
textItems: pdfPage.textItems.map(textItem => {
return {
...textItem,
x: Math.round(textItem.x),
y: Math.round(textItem.y),
width: Math.round(textItem.width),
height: Math.round(textItem.height)
}
})
};
});
return new ParseResult({
...parseResult,
content: newContent,
});
}
}