WIP globalize display of globals and summary/messages

This commit is contained in:
Johannes Zillmann 2017-02-21 08:05:00 +01:00
parent 62fd0155ed
commit b7db48af4b
15 changed files with 81 additions and 139 deletions

View File

@ -8,6 +8,8 @@ import Pagination from 'react-bootstrap/lib/Pagination'
import MenuItem from 'react-bootstrap/lib/MenuItem'
import Label from 'react-bootstrap/lib/Label'
import Checkbox from 'react-bootstrap/lib/Checkbox'
import Collapse from 'react-bootstrap/lib/Collapse'
import Panel from 'react-bootstrap/lib/Panel'
import ParseResult from '../models/ParseResult.jsx';
@ -24,7 +26,8 @@ export default class DebugView extends React.Component {
this.state = {
currentTransformation: 0,
pageNr: -1,
modificationsOnly: false
modificationsOnly: false,
showStatistics: false
};
}
@ -58,6 +61,13 @@ export default class DebugView extends React.Component {
});
}
showStatistics() {
this.setState({
showStatistics: !this.state.showStatistics
});
}
render() {
const {currentTransformation, pageNr} = this.state;
@ -78,9 +88,18 @@ export default class DebugView extends React.Component {
}
parseResult.content = parseResult.content.filter((elem, i) => pageNr == -1 || i == pageNr);
const summaryComponent = lastTransformation.createSummaryView(parseResult);
const pageComponents = parseResult.content.map(page => lastTransformation.createPageView(page, this.state.modificationsOnly));
const showModificationCheckbox = lastTransformation.showModificationCheckbox();
const statisticsAsList = Object.keys(parseResult.globals).map((key, i) => {
return <li key={ i }>
{ key + ': ' + parseResult.globals[key] }
</li>
});
const messagesAsList = parseResult.messages.map((message, i) => {
return <li key={ i }>
{ message }
</li>
});
return (
<div>
@ -103,7 +122,7 @@ export default class DebugView extends React.Component {
ellipsis
boundaryLinks
items={ pdfPages.length }
maxButtons={ 18 }
maxButtons={ 17 }
activePage={ this.state.pageNr + 1 }
onSelect={ this.selectPage.bind(this) } />
</div>
@ -141,6 +160,11 @@ export default class DebugView extends React.Component {
Show only modifications
</Checkbox> }
</ButtonGroup>
<ButtonGroup>
<Checkbox onClick={ ::this.showStatistics }>
Show Statistics
</Checkbox>
</ButtonGroup>
</ButtonToolbar>
</td>
<td style={ { padding: '5px' } }>
@ -150,10 +174,24 @@ export default class DebugView extends React.Component {
</Label>
</td>
</tr>
<tr>
<td>
<Collapse in={ this.state.showStatistics }>
<Panel bsStyle="default">
<ul>
{ statisticsAsList }
</ul>
</Panel>
</Collapse>
</td>
</tr>
</tbody>
</table>
<hr/>
{ summaryComponent }
{ !this.state.showStatistics &&
<hr style={ { marginTop: '5px' } } /> }
<ul>
{ messagesAsList }
</ul>
{ pageComponents }
</div>
);

View File

@ -81,7 +81,7 @@ export default class TextItemTable extends React.Component {
)
return (
<Table responsive bordered>
<Table responsive condensed bordered>
{ tableHeader }
<tbody>
{ textItemRows }

View File

@ -34,6 +34,7 @@ export default class AppState {
new DetectTOC(),
new DetectLists(),
new DetectCodeBlocks(),
// new DetectFormats(),
// new CombineSameY(),
// new RemoveWhitespaces(),

View File

@ -3,8 +3,8 @@ export default class ParseResult {
constructor(options) {
this.content = options.content; // like PdfPages[]
this.summary = options.summary; // something to show only for the transformation
this.globals = options.globals; // properties accasable for the following transformations
this.globals = options.globals; // properties accasable for all the following transformations in debug mode
this.messages = options.messages; // something to show only for the transformation in debug mode
}
}

View File

@ -1,4 +1,3 @@
import React from 'react';
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import ParseResult from '../ParseResult.jsx';
@ -8,38 +7,6 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation {
super("Calculate Statistics");
}
createSummaryView(parseResult:ParseResult) {
return <div>
<ul>
<li>
{ 'Most-used height: ' + parseResult.globals.mostUsedHeight + ' ' }
</li>
<li>
{ 'Most-used font: ' + parseResult.globals.mostUsedFont + ' ' }
</li>
<li>
{ 'Most-used distance: ' + parseResult.globals.mostUsedDistance + ' ' }
</li>
<li>
{ 'Max height: ' + parseResult.globals.maxHeight + ' ' }
</li>
<li>
{ 'Max height font: ' + parseResult.globals.maxHeightFont + ' ' }
</li>
<hr/>
<li>
{ 'Items per height: ' + JSON.stringify(parseResult.summary.heightToOccurrence) + ' ' }
</li>
<li>
{ 'Items per font: ' + JSON.stringify(parseResult.summary.fontToOccurrence) + ' ' }
</li>
<li>
{ 'Items per distance: ' + JSON.stringify(parseResult.summary.distanceToOccurrence) + ' ' }
</li>
</ul>
</div>;
}
transform(parseResult:ParseResult) {
// Parse heights
@ -102,11 +69,11 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation {
maxHeight: maxHeight,
maxHeightFont: maxHeightFont,
},
summary: {
heightToOccurrence: heightToOccurrence,
fontToOccurrence: fontToOccurrence,
distanceToOccurrence: distanceToOccurrence,
}
messages: [
'Items per height: ' + JSON.stringify(heightToOccurrence),
'Items per font: ' + JSON.stringify(fontToOccurrence),
'Items per distance: ' + JSON.stringify(distanceToOccurrence)
]
});
}

View File

@ -1,4 +1,3 @@
import React from 'react';
import ToPdfBlockViewTransformation from './ToPdfBlockViewTransformation.jsx';
import ParseResult from '../ParseResult.jsx';
import PdfBlock from '../PdfBlock.jsx';
@ -14,13 +13,6 @@ export default class DetectCodeBlocks extends ToPdfBlockViewTransformation {
super("Detect Code/Quotes");
}
createSummaryView(parseResult:ParseResult) {
return <div>
Detected
{ ' ' + parseResult.summary.foundBlocks + ' ' } code/quote blocks.
</div>;
}
transform(parseResult:ParseResult) {
const {mostUsedHeight, mostUsedDistance} = parseResult.globals;
@ -83,9 +75,7 @@ export default class DetectCodeBlocks extends ToPdfBlockViewTransformation {
return new ParseResult({
...parseResult,
summary: {
foundBlocks: foundBlocks
}
messages: ['Detected ' + foundBlocks + ' code/quote blocks.']
});
}

View File

@ -1,4 +1,3 @@
import React from 'react';
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import TextItem from '../TextItem.jsx';
import ParseResult from '../ParseResult.jsx';
@ -12,14 +11,6 @@ export default class DetectFootnotes extends ToPdfViewTransformation {
super("Detect Footnotes");
}
createSummaryView(parseResult:ParseResult) {
return <div>
Detected
{ ' ' + parseResult.summary.footnotes + ' ' } footnotes.
</div>;
}
transform(parseResult:ParseResult) {
var nextFooterNumber = 1;
@ -60,12 +51,11 @@ export default class DetectFootnotes extends ToPdfViewTransformation {
textItems: newTextItems
};
});
return new ParseResult({
...parseResult,
content: newContent,
summary: {
footnotes: foundFootnotes
}
messages: ['Detected ' + foundFootnotes + ' footnotes']
});
}

View File

@ -1,4 +1,3 @@
import React from 'react';
import ToPdfBlockViewTransformation from './ToPdfBlockViewTransformation.jsx';
import ParseResult from '../ParseResult.jsx';
import TextItem from '../TextItem.jsx';
@ -15,13 +14,6 @@ export default class DetectLists extends ToPdfBlockViewTransformation {
super("Detect Lists");
}
createSummaryView(parseResult:ParseResult) {
return <div>
Detected
{ ' ' + parseResult.summary.foundBlocks + ' ' } list blocks.
</div>;
}
transform(parseResult:ParseResult) {
const {mostUsedDistance} = parseResult.globals;
var foundBlocks = 0;
@ -111,9 +103,7 @@ export default class DetectLists extends ToPdfBlockViewTransformation {
return new ParseResult({
...parseResult,
summary: {
foundBlocks: foundBlocks
}
messages: ['Detected ' + foundBlocks + ' list blocks.']
});
}

View File

@ -1,4 +1,3 @@
import React from 'react';
import ToPdfBlockViewTransformation from './ToPdfBlockViewTransformation.jsx';
import ParseResult from '../ParseResult.jsx';
import PdfBlockPage from '../PdfBlockPage.jsx';
@ -11,13 +10,6 @@ export default class DetectPdfBlocks extends ToPdfBlockViewTransformation {
super("Detect Blocks");
}
createSummaryView(parseResult:ParseResult) {
return <div>
Splitted into
{ ' ' + parseResult.summary.createdBlocks + ' ' } blocks.
</div>;
}
transform(parseResult:ParseResult) {
const {mostUsedDistance} = parseResult.globals;
var createdBlocks = 0;
@ -53,12 +45,11 @@ export default class DetectPdfBlocks extends ToPdfBlockViewTransformation {
});
});
return new ParseResult({
...parseResult,
content: newContent,
summary: {
createdBlocks: createdBlocks
}
messages: ['Splitted into ' + createdBlocks + ' blocks']
});
}

View File

@ -1,4 +1,3 @@
import React from 'react';
import ToPdfBlockViewTransformation from './ToPdfBlockViewTransformation.jsx';
import ParseResult from '../ParseResult.jsx';
import TextItem from '../TextItem.jsx';
@ -15,14 +14,6 @@ export default class DetectTOC extends ToPdfBlockViewTransformation {
super("Detect Table of Contents");
}
createSummaryView(parseResult:ParseResult) {
return <div>
Detected
{ ' ' + parseResult.summary.foundTocPages + ' ' } table of content pages.
</div>;
}
transform(parseResult:ParseResult) {
const {mostUsedDistance} = parseResult.globals;
var foundTocPages = 0;
@ -82,9 +73,7 @@ export default class DetectTOC extends ToPdfBlockViewTransformation {
return new ParseResult({
...parseResult,
summary: {
foundTocPages: foundTocPages
}
messages: ['Detected ' + foundTocPages + ' table of content pages']
});
}

View File

@ -1,4 +1,3 @@
import React from 'react';
import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
import ParseResult from '../ParseResult.jsx';
import { REMOVED_ANNOTATION } from '../Annotation.jsx';
@ -27,19 +26,6 @@ export default class RemoveRepetitiveElements extends ToPdfViewTransformation {
super("Remove Repetitive Elements");
}
createSummaryView(parseResult:ParseResult) {
return <div>
<ul>
<li>
{ 'Removed Header: ' + parseResult.summary.removedHeader + ' ' }
</li>
<li>
{ 'Removed Footers: ' + parseResult.summary.removedFooter + ' ' }
</li>
</ul>
</div>;
}
// The idea is the following:
// - For each page, collect all items of the first, and all items of the last line
// - Calculate how often these items occur accros all pages (hash ignoring numbers, whitespace, upper/lowercase)
@ -104,18 +90,11 @@ export default class RemoveRepetitiveElements extends ToPdfViewTransformation {
return new ParseResult({
...parseResult,
summary: {
removedHeader: removedHeader,
removedFooter: removedFooter,
}
messages: [
'Removed Header: ' + removedHeader,
'Removed Footers: ' + removedFooter
]
});
}
completeTransform(parseResult:ParseResult) {
parseResult.content.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
});
return parseResult;
}
}

View File

@ -33,6 +33,7 @@ export default class ToPdfBlockViewTransformation extends Transformation {
completeTransform(parseResult:ParseResult) {
// The usual cleanup
parseResult.messages = [];
parseResult.content.forEach(page => {
page.blocks = page.blocks.filter(block => !block.annotation || block.annotation !== REMOVED_ANNOTATION);
page.blocks.forEach(block => block.annotation = null);

View File

@ -1,6 +1,8 @@
import React from 'react';
import Transformation from './Transformation.jsx';
import ParseResult from '../ParseResult.jsx';
import PdfPageView from '../../components/debug/PdfPageView.jsx';
import { REMOVED_ANNOTATION } from '../Annotation.jsx';
// Abstract class for transformations producing a PdfPage to be shown in the PdfView
export default class ToPdfViewTransformation extends Transformation {
@ -29,4 +31,15 @@ export default class ToPdfViewTransformation extends Transformation {
showWhitespaces={ this.showWhitespaces } />;
}
completeTransform(parseResult:ParseResult) {
// The usual cleanup
parseResult.messages = [];
parseResult.content.forEach(page => {
page.textItems = page.textItems.filter(item => !item.annotation || item.annotation !== REMOVED_ANNOTATION);
page.textItems.forEach(block => block.annotation = null);
});
return parseResult;
}
}

View File

@ -21,10 +21,6 @@ export default class Transformation {
return false;
}
createSummaryView(parseResult:ParseResult) { // eslint-disable-line no-unused-vars
return null;
}
createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
throw new TypeError("Do not call abstract method foo from child.");
}
@ -36,6 +32,7 @@ export default class Transformation {
// Sometimes the transform() does only visualize a change. This methods then does the actual change.
completeTransform(parseResult: ParseResult) { // eslint-disable-line no-unused-vars
parseResult.messages = [];
return parseResult;
}

View File

@ -11,6 +11,7 @@ export default class VerticalToHorizontal extends ToPdfViewTransformation {
}
transform(parseResult:ParseResult) {
var foundVerticals = 0;
const newContent = parseResult.content.map(page => {
const newTextItems = [];
// var oneCharacterItems = [];
@ -65,6 +66,7 @@ export default class VerticalToHorizontal extends ToPdfViewTransformation {
text: combinedText,
annotation: ADDED_ANNOTATION
}));
foundVerticals++;
} else {
oneCharacterItems.forEach(oneCharacterItem => newTextItems.push(oneCharacterItem));
}
@ -87,15 +89,9 @@ export default class VerticalToHorizontal extends ToPdfViewTransformation {
return new ParseResult({
...parseResult,
content: newContent,
messages: ["Converted " + foundVerticals + " verticals"]
});
}
completeTransform(parseResult:ParseResult) {
parseResult.content.forEach(page => {
page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
page.textItems.forEach(textItem => textItem.annotation = null)
});
return parseResult;
}
}