mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-21 23:33:31 +01:00
Add global statistics
This commit is contained in:
parent
a76dac6428
commit
6441580889
@ -78,7 +78,7 @@ export default class DebugView extends React.Component {
|
||||
}
|
||||
|
||||
parseResult.content = parseResult.content.filter((elem, i) => pageNr == -1 || i == pageNr);
|
||||
const summaryComponent = lastTransformation.createSummaryView(parseResult.summary);
|
||||
const summaryComponent = lastTransformation.createSummaryView(parseResult);
|
||||
const pageComponents = parseResult.content.map(page => lastTransformation.createPageView(page, this.state.modificationsOnly));
|
||||
const showModificationCheckbox = lastTransformation.showModificationCheckbox();
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
import { Enum } from 'enumify';
|
||||
|
||||
import NoOp from './transformations/NoOp.jsx';
|
||||
import CalculateGlobalStats from './transformations/CalculateGlobalStats.jsx';
|
||||
import RoundCoordinates from './transformations/RoundCoordinates.jsx';
|
||||
import CombineSameY from './transformations/CombineSameY.jsx';
|
||||
import RemoveWhitespaces from './transformations/RemoveWhitespaces.jsx'
|
||||
@ -21,7 +21,7 @@ export default class AppState {
|
||||
this.fileBuffer;
|
||||
this.pdfPages = [];
|
||||
this.transformations = [
|
||||
new NoOp,
|
||||
new CalculateGlobalStats(),
|
||||
new RoundCoordinates(),
|
||||
new CombineSameY(),
|
||||
new RemoveWhitespaces(),
|
||||
|
@ -0,0 +1,66 @@
|
||||
import React from 'react';
|
||||
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
|
||||
export default class CalculateGlobalStats extends ToPdfViewTransformation {
|
||||
|
||||
constructor() {
|
||||
super("Calculate Statistics");
|
||||
}
|
||||
|
||||
createSummaryView(parseResult:ParseResult) {
|
||||
return <div>
|
||||
<ul>
|
||||
<li>
|
||||
{ 'Most-used height: ' + parseResult.globals.mostUsedHeight + ' ' }
|
||||
</li>
|
||||
<li>
|
||||
{ 'Most-used font: ' + parseResult.globals.mostUsedFont + ' ' }
|
||||
</li>
|
||||
<hr/>
|
||||
<li>
|
||||
{ 'Items per height: ' + JSON.stringify(parseResult.summary.heightToOccurrence) + ' ' }
|
||||
</li>
|
||||
<li>
|
||||
{ 'Items per font: ' + JSON.stringify(parseResult.summary.fontToOccurrence) + ' ' }
|
||||
</li>
|
||||
</ul>
|
||||
</div>;
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
const heightToOccurrence = {};
|
||||
const fontToOccurrence = {};
|
||||
parseResult.content.forEach(page => {
|
||||
page.textItems.forEach(item => {
|
||||
heightToOccurrence[item.height] = heightToOccurrence[item.height] ? heightToOccurrence[item.height] + 1 : 1;
|
||||
fontToOccurrence[item.font] = fontToOccurrence[item.font] ? fontToOccurrence[item.font] + 1 : 1;
|
||||
});
|
||||
});
|
||||
const mostUsedHeight = parseInt(getMostUsedKey(heightToOccurrence));
|
||||
const mostUsedFont = getMostUsedKey(fontToOccurrence);
|
||||
parseResult.globals = {
|
||||
mostUsedHeight: mostUsedHeight,
|
||||
mostUsedFont: mostUsedFont
|
||||
}
|
||||
parseResult.summary = {
|
||||
heightToOccurrence: heightToOccurrence,
|
||||
fontToOccurrence: fontToOccurrence
|
||||
}
|
||||
return parseResult;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
function getMostUsedKey(keyToOccurrence) {
|
||||
var maxOccurence = 0;
|
||||
var maxKey;
|
||||
Object.keys(keyToOccurrence).map((element) => {
|
||||
if (!maxKey || keyToOccurrence[element] > maxOccurence) {
|
||||
maxOccurence = keyToOccurrence[element];
|
||||
maxKey = element;
|
||||
}
|
||||
});
|
||||
return maxKey;
|
||||
}
|
@ -12,10 +12,10 @@ export default class DetectFootnotes extends ToPdfViewTransformation {
|
||||
super("Detect Footnotes");
|
||||
}
|
||||
|
||||
createSummaryView(summary) {
|
||||
createSummaryView(parseResult:ParseResult) {
|
||||
return <div>
|
||||
Detected
|
||||
{ ' ' + summary.footnotes + ' ' } footnotes.
|
||||
{ ' ' + parseResult.summary.footnotes + ' ' } footnotes.
|
||||
</div>;
|
||||
}
|
||||
|
||||
|
@ -1,14 +0,0 @@
|
||||
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
|
||||
export default class NoOp extends ToPdfViewTransformation {
|
||||
|
||||
constructor() {
|
||||
super("Original");
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
return parseResult;
|
||||
}
|
||||
|
||||
}
|
@ -21,7 +21,7 @@ export default class Transformation {
|
||||
return false;
|
||||
}
|
||||
|
||||
createSummaryView(summary) { // eslint-disable-line no-unused-vars
|
||||
createSummaryView(parseResult:ParseResult) { // eslint-disable-line no-unused-vars
|
||||
return null;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user