Add global statistics

This commit is contained in:
Johannes Zillmann 2017-02-15 07:33:07 +01:00
parent a76dac6428
commit 6441580889
6 changed files with 72 additions and 20 deletions

View File

@ -78,7 +78,7 @@ export default class DebugView extends React.Component {
}
parseResult.content = parseResult.content.filter((elem, i) => pageNr == -1 || i == pageNr);
const summaryComponent = lastTransformation.createSummaryView(parseResult.summary);
const summaryComponent = lastTransformation.createSummaryView(parseResult);
const pageComponents = parseResult.content.map(page => lastTransformation.createPageView(page, this.state.modificationsOnly));
const showModificationCheckbox = lastTransformation.showModificationCheckbox();

View File

@ -1,6 +1,6 @@
import { Enum } from 'enumify';
import NoOp from './transformations/NoOp.jsx';
import CalculateGlobalStats from './transformations/CalculateGlobalStats.jsx';
import RoundCoordinates from './transformations/RoundCoordinates.jsx';
import CombineSameY from './transformations/CombineSameY.jsx';
import RemoveWhitespaces from './transformations/RemoveWhitespaces.jsx'
@ -21,7 +21,7 @@ export default class AppState {
this.fileBuffer;
this.pdfPages = [];
this.transformations = [
new NoOp,
new CalculateGlobalStats(),
new RoundCoordinates(),
new CombineSameY(),
new RemoveWhitespaces(),

View File

@ -0,0 +1,66 @@
import React from 'react';
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import ParseResult from '../ParseResult.jsx';
export default class CalculateGlobalStats extends ToPdfViewTransformation {
constructor() {
super("Calculate Statistics");
}
createSummaryView(parseResult:ParseResult) {
return <div>
<ul>
<li>
{ 'Most-used height: ' + parseResult.globals.mostUsedHeight + ' ' }
</li>
<li>
{ 'Most-used font: ' + parseResult.globals.mostUsedFont + ' ' }
</li>
<hr/>
<li>
{ 'Items per height: ' + JSON.stringify(parseResult.summary.heightToOccurrence) + ' ' }
</li>
<li>
{ 'Items per font: ' + JSON.stringify(parseResult.summary.fontToOccurrence) + ' ' }
</li>
</ul>
</div>;
}
transform(parseResult:ParseResult) {
const heightToOccurrence = {};
const fontToOccurrence = {};
parseResult.content.forEach(page => {
page.textItems.forEach(item => {
heightToOccurrence[item.height] = heightToOccurrence[item.height] ? heightToOccurrence[item.height] + 1 : 1;
fontToOccurrence[item.font] = fontToOccurrence[item.font] ? fontToOccurrence[item.font] + 1 : 1;
});
});
const mostUsedHeight = parseInt(getMostUsedKey(heightToOccurrence));
const mostUsedFont = getMostUsedKey(fontToOccurrence);
parseResult.globals = {
mostUsedHeight: mostUsedHeight,
mostUsedFont: mostUsedFont
}
parseResult.summary = {
heightToOccurrence: heightToOccurrence,
fontToOccurrence: fontToOccurrence
}
return parseResult;
}
}
function getMostUsedKey(keyToOccurrence) {
var maxOccurence = 0;
var maxKey;
Object.keys(keyToOccurrence).map((element) => {
if (!maxKey || keyToOccurrence[element] > maxOccurence) {
maxOccurence = keyToOccurrence[element];
maxKey = element;
}
});
return maxKey;
}

View File

@ -12,10 +12,10 @@ export default class DetectFootnotes extends ToPdfViewTransformation {
super("Detect Footnotes");
}
createSummaryView(summary) {
createSummaryView(parseResult:ParseResult) {
return <div>
Detected
{ ' ' + summary.footnotes + ' ' } footnotes.
{ ' ' + parseResult.summary.footnotes + ' ' } footnotes.
</div>;
}

View File

@ -1,14 +0,0 @@
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import ParseResult from '../ParseResult.jsx';
export default class NoOp extends ToPdfViewTransformation {
constructor() {
super("Original");
}
transform(parseResult:ParseResult) {
return parseResult;
}
}

View File

@ -21,7 +21,7 @@ export default class Transformation {
return false;
}
createSummaryView(summary) { // eslint-disable-line no-unused-vars
createSummaryView(parseResult:ParseResult) { // eslint-disable-line no-unused-vars
return null;
}