From b7db48af4b1d83172a271c5e39ecf8d73c1f35aa Mon Sep 17 00:00:00 2001 From: Johannes Zillmann Date: Tue, 21 Feb 2017 08:05:00 +0100 Subject: [PATCH] WIP globalize display of globals and summary/messages --- src/javascript/components/DebugView.jsx | 48 +++++++++++++++++-- .../components/debug/TextItemTable.jsx | 2 +- src/javascript/models/AppState.jsx | 1 + src/javascript/models/ParseResult.jsx | 4 +- .../transformations/CalculateGlobalStats.jsx | 43 ++--------------- .../transformations/DetectCodeBlocks.jsx | 12 +---- .../transformations/DetectFootnotes.jsx | 14 +----- .../models/transformations/DetectLists.jsx | 12 +---- .../transformations/DetectPdfBlocks.jsx | 13 +---- .../models/transformations/DetectTOC.jsx | 13 +---- .../RemoveRepetitiveElements.jsx | 29 ++--------- .../ToPdfBlockViewTransformation.jsx | 1 + .../ToPdfViewTransformation.jsx | 13 +++++ .../models/transformations/Transformation.jsx | 5 +- .../transformations/VerticalToHorizontal.jsx | 10 ++-- 15 files changed, 81 insertions(+), 139 deletions(-) diff --git a/src/javascript/components/DebugView.jsx b/src/javascript/components/DebugView.jsx index da87f9f..5aaa42e 100644 --- a/src/javascript/components/DebugView.jsx +++ b/src/javascript/components/DebugView.jsx @@ -8,6 +8,8 @@ import Pagination from 'react-bootstrap/lib/Pagination' import MenuItem from 'react-bootstrap/lib/MenuItem' import Label from 'react-bootstrap/lib/Label' import Checkbox from 'react-bootstrap/lib/Checkbox' +import Collapse from 'react-bootstrap/lib/Collapse' +import Panel from 'react-bootstrap/lib/Panel' import ParseResult from '../models/ParseResult.jsx'; @@ -24,7 +26,8 @@ export default class DebugView extends React.Component { this.state = { currentTransformation: 0, pageNr: -1, - modificationsOnly: false + modificationsOnly: false, + showStatistics: false }; } @@ -58,6 +61,13 @@ export default class DebugView extends React.Component { }); } + showStatistics() { + this.setState({ + showStatistics: !this.state.showStatistics + }); + + } + render() { const {currentTransformation, pageNr} = this.state; @@ -78,9 +88,18 @@ export default class DebugView extends React.Component { } parseResult.content = parseResult.content.filter((elem, i) => pageNr == -1 || i == pageNr); - const summaryComponent = lastTransformation.createSummaryView(parseResult); const pageComponents = parseResult.content.map(page => lastTransformation.createPageView(page, this.state.modificationsOnly)); const showModificationCheckbox = lastTransformation.showModificationCheckbox(); + const statisticsAsList = Object.keys(parseResult.globals).map((key, i) => { + return
  • + { key + ': ' + parseResult.globals[key] } +
  • + }); + const messagesAsList = parseResult.messages.map((message, i) => { + return
  • + { message } +
  • + }); return (
    @@ -103,7 +122,7 @@ export default class DebugView extends React.Component { ellipsis boundaryLinks items={ pdfPages.length } - maxButtons={ 18 } + maxButtons={ 17 } activePage={ this.state.pageNr + 1 } onSelect={ this.selectPage.bind(this) } />
    @@ -141,6 +160,11 @@ export default class DebugView extends React.Component { Show only modifications } + + + Show Statistics + + @@ -150,10 +174,24 @@ export default class DebugView extends React.Component { + + + + +
      + { statisticsAsList } +
    +
    +
    + + -
    - { summaryComponent } + { !this.state.showStatistics && +
    } + { pageComponents } ); diff --git a/src/javascript/components/debug/TextItemTable.jsx b/src/javascript/components/debug/TextItemTable.jsx index fe4683b..7ac6e60 100644 --- a/src/javascript/components/debug/TextItemTable.jsx +++ b/src/javascript/components/debug/TextItemTable.jsx @@ -81,7 +81,7 @@ export default class TextItemTable extends React.Component { ) return ( - +
    { tableHeader } { textItemRows } diff --git a/src/javascript/models/AppState.jsx b/src/javascript/models/AppState.jsx index 646c1b8..a29513e 100644 --- a/src/javascript/models/AppState.jsx +++ b/src/javascript/models/AppState.jsx @@ -34,6 +34,7 @@ export default class AppState { new DetectTOC(), new DetectLists(), new DetectCodeBlocks(), + // new DetectFormats(), // new CombineSameY(), // new RemoveWhitespaces(), diff --git a/src/javascript/models/ParseResult.jsx b/src/javascript/models/ParseResult.jsx index 5881f09..709e1a6 100644 --- a/src/javascript/models/ParseResult.jsx +++ b/src/javascript/models/ParseResult.jsx @@ -3,8 +3,8 @@ export default class ParseResult { constructor(options) { this.content = options.content; // like PdfPages[] - this.summary = options.summary; // something to show only for the transformation - this.globals = options.globals; // properties accasable for the following transformations + this.globals = options.globals; // properties accasable for all the following transformations in debug mode + this.messages = options.messages; // something to show only for the transformation in debug mode } } diff --git a/src/javascript/models/transformations/CalculateGlobalStats.jsx b/src/javascript/models/transformations/CalculateGlobalStats.jsx index 1c8765d..4d6c8a3 100644 --- a/src/javascript/models/transformations/CalculateGlobalStats.jsx +++ b/src/javascript/models/transformations/CalculateGlobalStats.jsx @@ -1,4 +1,3 @@ -import React from 'react'; import ToPdfViewTransformation from './ToPdfViewTransformation.jsx'; import ParseResult from '../ParseResult.jsx'; @@ -8,38 +7,6 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation { super("Calculate Statistics"); } - createSummaryView(parseResult:ParseResult) { - return
    - -
    ; - } - transform(parseResult:ParseResult) { // Parse heights @@ -102,11 +69,11 @@ export default class CalculateGlobalStats extends ToPdfViewTransformation { maxHeight: maxHeight, maxHeightFont: maxHeightFont, }, - summary: { - heightToOccurrence: heightToOccurrence, - fontToOccurrence: fontToOccurrence, - distanceToOccurrence: distanceToOccurrence, - } + messages: [ + 'Items per height: ' + JSON.stringify(heightToOccurrence), + 'Items per font: ' + JSON.stringify(fontToOccurrence), + 'Items per distance: ' + JSON.stringify(distanceToOccurrence) + ] }); } diff --git a/src/javascript/models/transformations/DetectCodeBlocks.jsx b/src/javascript/models/transformations/DetectCodeBlocks.jsx index 585ae3f..ff8c4b2 100644 --- a/src/javascript/models/transformations/DetectCodeBlocks.jsx +++ b/src/javascript/models/transformations/DetectCodeBlocks.jsx @@ -1,4 +1,3 @@ -import React from 'react'; import ToPdfBlockViewTransformation from './ToPdfBlockViewTransformation.jsx'; import ParseResult from '../ParseResult.jsx'; import PdfBlock from '../PdfBlock.jsx'; @@ -14,13 +13,6 @@ export default class DetectCodeBlocks extends ToPdfBlockViewTransformation { super("Detect Code/Quotes"); } - createSummaryView(parseResult:ParseResult) { - return
    - Detected - { ' ' + parseResult.summary.foundBlocks + ' ' } code/quote blocks. -
    ; - } - transform(parseResult:ParseResult) { const {mostUsedHeight, mostUsedDistance} = parseResult.globals; @@ -83,9 +75,7 @@ export default class DetectCodeBlocks extends ToPdfBlockViewTransformation { return new ParseResult({ ...parseResult, - summary: { - foundBlocks: foundBlocks - } + messages: ['Detected ' + foundBlocks + ' code/quote blocks.'] }); } diff --git a/src/javascript/models/transformations/DetectFootnotes.jsx b/src/javascript/models/transformations/DetectFootnotes.jsx index 5b6d609..a447c34 100644 --- a/src/javascript/models/transformations/DetectFootnotes.jsx +++ b/src/javascript/models/transformations/DetectFootnotes.jsx @@ -1,4 +1,3 @@ -import React from 'react'; import ToPdfViewTransformation from './ToPdfViewTransformation.jsx'; import TextItem from '../TextItem.jsx'; import ParseResult from '../ParseResult.jsx'; @@ -12,14 +11,6 @@ export default class DetectFootnotes extends ToPdfViewTransformation { super("Detect Footnotes"); } - createSummaryView(parseResult:ParseResult) { - return
    - Detected - { ' ' + parseResult.summary.footnotes + ' ' } footnotes. -
    ; - } - - transform(parseResult:ParseResult) { var nextFooterNumber = 1; @@ -60,12 +51,11 @@ export default class DetectFootnotes extends ToPdfViewTransformation { textItems: newTextItems }; }); + return new ParseResult({ ...parseResult, content: newContent, - summary: { - footnotes: foundFootnotes - } + messages: ['Detected ' + foundFootnotes + ' footnotes'] }); } diff --git a/src/javascript/models/transformations/DetectLists.jsx b/src/javascript/models/transformations/DetectLists.jsx index 3558fa8..9859e5e 100644 --- a/src/javascript/models/transformations/DetectLists.jsx +++ b/src/javascript/models/transformations/DetectLists.jsx @@ -1,4 +1,3 @@ -import React from 'react'; import ToPdfBlockViewTransformation from './ToPdfBlockViewTransformation.jsx'; import ParseResult from '../ParseResult.jsx'; import TextItem from '../TextItem.jsx'; @@ -15,13 +14,6 @@ export default class DetectLists extends ToPdfBlockViewTransformation { super("Detect Lists"); } - createSummaryView(parseResult:ParseResult) { - return
    - Detected - { ' ' + parseResult.summary.foundBlocks + ' ' } list blocks. -
    ; - } - transform(parseResult:ParseResult) { const {mostUsedDistance} = parseResult.globals; var foundBlocks = 0; @@ -111,9 +103,7 @@ export default class DetectLists extends ToPdfBlockViewTransformation { return new ParseResult({ ...parseResult, - summary: { - foundBlocks: foundBlocks - } + messages: ['Detected ' + foundBlocks + ' list blocks.'] }); } diff --git a/src/javascript/models/transformations/DetectPdfBlocks.jsx b/src/javascript/models/transformations/DetectPdfBlocks.jsx index b362cce..34e9d12 100644 --- a/src/javascript/models/transformations/DetectPdfBlocks.jsx +++ b/src/javascript/models/transformations/DetectPdfBlocks.jsx @@ -1,4 +1,3 @@ -import React from 'react'; import ToPdfBlockViewTransformation from './ToPdfBlockViewTransformation.jsx'; import ParseResult from '../ParseResult.jsx'; import PdfBlockPage from '../PdfBlockPage.jsx'; @@ -11,13 +10,6 @@ export default class DetectPdfBlocks extends ToPdfBlockViewTransformation { super("Detect Blocks"); } - createSummaryView(parseResult:ParseResult) { - return
    - Splitted into - { ' ' + parseResult.summary.createdBlocks + ' ' } blocks. -
    ; - } - transform(parseResult:ParseResult) { const {mostUsedDistance} = parseResult.globals; var createdBlocks = 0; @@ -53,12 +45,11 @@ export default class DetectPdfBlocks extends ToPdfBlockViewTransformation { }); }); + return new ParseResult({ ...parseResult, content: newContent, - summary: { - createdBlocks: createdBlocks - } + messages: ['Splitted into ' + createdBlocks + ' blocks'] }); } diff --git a/src/javascript/models/transformations/DetectTOC.jsx b/src/javascript/models/transformations/DetectTOC.jsx index 70d3b49..933ec31 100644 --- a/src/javascript/models/transformations/DetectTOC.jsx +++ b/src/javascript/models/transformations/DetectTOC.jsx @@ -1,4 +1,3 @@ -import React from 'react'; import ToPdfBlockViewTransformation from './ToPdfBlockViewTransformation.jsx'; import ParseResult from '../ParseResult.jsx'; import TextItem from '../TextItem.jsx'; @@ -15,14 +14,6 @@ export default class DetectTOC extends ToPdfBlockViewTransformation { super("Detect Table of Contents"); } - createSummaryView(parseResult:ParseResult) { - return
    - Detected - { ' ' + parseResult.summary.foundTocPages + ' ' } table of content pages. -
    ; - } - - transform(parseResult:ParseResult) { const {mostUsedDistance} = parseResult.globals; var foundTocPages = 0; @@ -82,9 +73,7 @@ export default class DetectTOC extends ToPdfBlockViewTransformation { return new ParseResult({ ...parseResult, - summary: { - foundTocPages: foundTocPages - } + messages: ['Detected ' + foundTocPages + ' table of content pages'] }); } diff --git a/src/javascript/models/transformations/RemoveRepetitiveElements.jsx b/src/javascript/models/transformations/RemoveRepetitiveElements.jsx index 7dc95cf..bcb258c 100644 --- a/src/javascript/models/transformations/RemoveRepetitiveElements.jsx +++ b/src/javascript/models/transformations/RemoveRepetitiveElements.jsx @@ -1,4 +1,3 @@ -import React from 'react'; import ToPdfViewTransformation from './ToPdfViewTransformation.jsx'; import ParseResult from '../ParseResult.jsx'; import { REMOVED_ANNOTATION } from '../Annotation.jsx'; @@ -27,19 +26,6 @@ export default class RemoveRepetitiveElements extends ToPdfViewTransformation { super("Remove Repetitive Elements"); } - createSummaryView(parseResult:ParseResult) { - return
    - -
    ; - } - // The idea is the following: // - For each page, collect all items of the first, and all items of the last line // - Calculate how often these items occur accros all pages (hash ignoring numbers, whitespace, upper/lowercase) @@ -104,18 +90,11 @@ export default class RemoveRepetitiveElements extends ToPdfViewTransformation { return new ParseResult({ ...parseResult, - summary: { - removedHeader: removedHeader, - removedFooter: removedFooter, - } + messages: [ + 'Removed Header: ' + removedHeader, + 'Removed Footers: ' + removedFooter + ] }); } - completeTransform(parseResult:ParseResult) { - parseResult.content.forEach(page => { - page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION); - }); - return parseResult; - } - } \ No newline at end of file diff --git a/src/javascript/models/transformations/ToPdfBlockViewTransformation.jsx b/src/javascript/models/transformations/ToPdfBlockViewTransformation.jsx index fadaa2e..54db116 100644 --- a/src/javascript/models/transformations/ToPdfBlockViewTransformation.jsx +++ b/src/javascript/models/transformations/ToPdfBlockViewTransformation.jsx @@ -33,6 +33,7 @@ export default class ToPdfBlockViewTransformation extends Transformation { completeTransform(parseResult:ParseResult) { // The usual cleanup + parseResult.messages = []; parseResult.content.forEach(page => { page.blocks = page.blocks.filter(block => !block.annotation || block.annotation !== REMOVED_ANNOTATION); page.blocks.forEach(block => block.annotation = null); diff --git a/src/javascript/models/transformations/ToPdfViewTransformation.jsx b/src/javascript/models/transformations/ToPdfViewTransformation.jsx index e207c3f..2bbd699 100644 --- a/src/javascript/models/transformations/ToPdfViewTransformation.jsx +++ b/src/javascript/models/transformations/ToPdfViewTransformation.jsx @@ -1,6 +1,8 @@ import React from 'react'; import Transformation from './Transformation.jsx'; +import ParseResult from '../ParseResult.jsx'; import PdfPageView from '../../components/debug/PdfPageView.jsx'; +import { REMOVED_ANNOTATION } from '../Annotation.jsx'; // Abstract class for transformations producing a PdfPage to be shown in the PdfView export default class ToPdfViewTransformation extends Transformation { @@ -29,4 +31,15 @@ export default class ToPdfViewTransformation extends Transformation { showWhitespaces={ this.showWhitespaces } />; } + completeTransform(parseResult:ParseResult) { + // The usual cleanup + parseResult.messages = []; + parseResult.content.forEach(page => { + page.textItems = page.textItems.filter(item => !item.annotation || item.annotation !== REMOVED_ANNOTATION); + page.textItems.forEach(block => block.annotation = null); + }); + return parseResult; + } + + } \ No newline at end of file diff --git a/src/javascript/models/transformations/Transformation.jsx b/src/javascript/models/transformations/Transformation.jsx index a126c7a..58f2fb8 100644 --- a/src/javascript/models/transformations/Transformation.jsx +++ b/src/javascript/models/transformations/Transformation.jsx @@ -21,10 +21,6 @@ export default class Transformation { return false; } - createSummaryView(parseResult:ParseResult) { // eslint-disable-line no-unused-vars - return null; - } - createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars throw new TypeError("Do not call abstract method foo from child."); } @@ -36,6 +32,7 @@ export default class Transformation { // Sometimes the transform() does only visualize a change. This methods then does the actual change. completeTransform(parseResult: ParseResult) { // eslint-disable-line no-unused-vars + parseResult.messages = []; return parseResult; } diff --git a/src/javascript/models/transformations/VerticalToHorizontal.jsx b/src/javascript/models/transformations/VerticalToHorizontal.jsx index 78075eb..5ab8155 100644 --- a/src/javascript/models/transformations/VerticalToHorizontal.jsx +++ b/src/javascript/models/transformations/VerticalToHorizontal.jsx @@ -11,6 +11,7 @@ export default class VerticalToHorizontal extends ToPdfViewTransformation { } transform(parseResult:ParseResult) { + var foundVerticals = 0; const newContent = parseResult.content.map(page => { const newTextItems = []; // var oneCharacterItems = []; @@ -65,6 +66,7 @@ export default class VerticalToHorizontal extends ToPdfViewTransformation { text: combinedText, annotation: ADDED_ANNOTATION })); + foundVerticals++; } else { oneCharacterItems.forEach(oneCharacterItem => newTextItems.push(oneCharacterItem)); } @@ -87,15 +89,9 @@ export default class VerticalToHorizontal extends ToPdfViewTransformation { return new ParseResult({ ...parseResult, content: newContent, + messages: ["Converted " + foundVerticals + " verticals"] }); } - completeTransform(parseResult:ParseResult) { - parseResult.content.forEach(page => { - page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION); - page.textItems.forEach(textItem => textItem.annotation = null) - }); - return parseResult; - } }