mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-21 23:33:31 +01:00
[WIP] move different typed transformations to different folders
This commit is contained in:
parent
739d20d83b
commit
93f15a38b5
@ -101,6 +101,18 @@ export default class DebugView extends React.Component {
|
|||||||
</li>
|
</li>
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const transformationMenuItems = [];
|
||||||
|
var lastItemType;
|
||||||
|
transformations.forEach((transformation, i) => {
|
||||||
|
if (lastItemType && transformation.itemType !== lastItemType) {
|
||||||
|
transformationMenuItems.push(<MenuItem key={ i + '-divider' } divider />);
|
||||||
|
}
|
||||||
|
transformationMenuItems.push(<MenuItem key={ i } eventKey={ i } onSelect={ this.selectTransformation.bind(this, i) }>
|
||||||
|
{ transformation.name }
|
||||||
|
</MenuItem>);
|
||||||
|
lastItemType = transformation.itemType;
|
||||||
|
});
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div>
|
<div>
|
||||||
<table>
|
<table>
|
||||||
@ -149,9 +161,7 @@ export default class DebugView extends React.Component {
|
|||||||
</ButtonGroup>
|
</ButtonGroup>
|
||||||
<ButtonGroup>
|
<ButtonGroup>
|
||||||
<DropdownButton title={ currentTransformationName } id="dropdown-size-medium">
|
<DropdownButton title={ currentTransformationName } id="dropdown-size-medium">
|
||||||
{ transformations.map((transformation, i) => <MenuItem key={ i } eventKey={ i } onSelect={ this.selectTransformation.bind(this, i) }>
|
{ transformationMenuItems }
|
||||||
{ transformation.name }
|
|
||||||
</MenuItem>) }
|
|
||||||
</DropdownButton>
|
</DropdownButton>
|
||||||
</ButtonGroup>
|
</ButtonGroup>
|
||||||
<ButtonGroup>
|
<ButtonGroup>
|
||||||
|
@ -1,16 +1,17 @@
|
|||||||
import { Enum } from 'enumify';
|
import { Enum } from 'enumify';
|
||||||
|
|
||||||
import CalculateGlobalStats from './transformations/CalculateGlobalStats.jsx';
|
import CalculateGlobalStats from './transformations/textitem/CalculateGlobalStats.jsx';
|
||||||
import CompactLines from './transformations/CompactLines.jsx';
|
import CompactLines from './transformations/textitem/CompactLines.jsx';
|
||||||
import RemoveRepetitiveElements from './transformations/RemoveRepetitiveElements.jsx'
|
import RemoveRepetitiveElements from './transformations/textitem/RemoveRepetitiveElements.jsx'
|
||||||
import VerticalToHorizontal from './transformations/VerticalToHorizontal.jsx';
|
import VerticalToHorizontal from './transformations/textitem/VerticalToHorizontal.jsx';
|
||||||
import DetectTOC from './transformations/DetectTOC.jsx'
|
import DetectTOC from './transformations/textitem/DetectTOC.jsx'
|
||||||
import DetectListItems from './transformations/DetectListItems.jsx'
|
import DetectListItems from './transformations/textitem/DetectListItems.jsx'
|
||||||
|
// import DetectHeaders from './transformations/textitem/DetectHeaders.jsx'
|
||||||
|
|
||||||
import GatherBlocks from './transformations/GatherBlocks.jsx'
|
import GatherBlocks from './transformations/textitemblock/GatherBlocks.jsx'
|
||||||
import DetectCodeQuoteBlocks from './transformations/DetectCodeQuoteBlocks.jsx'
|
import DetectCodeQuoteBlocks from './transformations/textitemblock/DetectCodeQuoteBlocks.jsx'
|
||||||
import DetectListLevels from './transformations/DetectListLevels.jsx'
|
import DetectListLevels from './transformations/textitemblock/DetectListLevels.jsx'
|
||||||
import DetectHeadlines from './transformations/DetectHeadlines.jsx'
|
// import DetectHeadlines from './transformations/textitemblock/DetectHeadlines.jsx'
|
||||||
// import DetectFormats from './transformations/DetectFormats.jsx'
|
// import DetectFormats from './transformations/DetectFormats.jsx'
|
||||||
// import RemoveWhitespaces from './transformations/RemoveWhitespaces.jsx'
|
// import RemoveWhitespaces from './transformations/RemoveWhitespaces.jsx'
|
||||||
// import DetectLinks from './transformations/DetectLinks.jsx'
|
// import DetectLinks from './transformations/DetectLinks.jsx'
|
||||||
@ -34,11 +35,12 @@ export default class AppState {
|
|||||||
new VerticalToHorizontal(),
|
new VerticalToHorizontal(),
|
||||||
new DetectTOC(),
|
new DetectTOC(),
|
||||||
new DetectListItems(),
|
new DetectListItems(),
|
||||||
|
new DetectHeaders(),
|
||||||
|
|
||||||
new GatherBlocks(),
|
new GatherBlocks(),
|
||||||
new DetectCodeQuoteBlocks(),
|
new DetectCodeQuoteBlocks(),
|
||||||
new DetectListLevels(),
|
new DetectListLevels(),
|
||||||
new DetectHeadlines(),
|
// new DetectHeadlines(),
|
||||||
|
|
||||||
// new DetectFormats(),
|
// new DetectFormats(),
|
||||||
// new RemoveWhitespaces(),
|
// new RemoveWhitespaces(),
|
||||||
|
@ -6,7 +6,7 @@ import ParseResult from '../ParseResult.jsx';
|
|||||||
export default class ToMarkdown extends Transformation {
|
export default class ToMarkdown extends Transformation {
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
super("To Markdown");
|
super("To Markdown", "String");
|
||||||
}
|
}
|
||||||
|
|
||||||
createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
|
createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
|
||||||
|
@ -7,7 +7,7 @@ import { blockToText } from '../ElementType.jsx';
|
|||||||
export default class ToTextBlocks extends Transformation {
|
export default class ToTextBlocks extends Transformation {
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
super("To Text Blocks");
|
super("To Text Blocks", "TextBlock");
|
||||||
}
|
}
|
||||||
|
|
||||||
createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
|
createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import React from 'react';
|
import React from 'react';
|
||||||
import Transformation from './Transformation.jsx';
|
import Transformation from './Transformation.jsx';
|
||||||
import ParseResult from '../ParseResult.jsx';
|
import ParseResult from '../ParseResult.jsx';
|
||||||
|
import TextItemBlock from '../TextItemBlock.jsx';
|
||||||
import TextItemBlockPageView from '../../components/debug/TextItemBlockPageView.jsx';
|
import TextItemBlockPageView from '../../components/debug/TextItemBlockPageView.jsx';
|
||||||
import { REMOVED_ANNOTATION } from '../Annotation.jsx';
|
import { REMOVED_ANNOTATION } from '../Annotation.jsx';
|
||||||
|
|
||||||
@ -8,7 +9,7 @@ import { REMOVED_ANNOTATION } from '../Annotation.jsx';
|
|||||||
export default class ToTextItemBlockTransformation extends Transformation {
|
export default class ToTextItemBlockTransformation extends Transformation {
|
||||||
|
|
||||||
constructor(name) {
|
constructor(name) {
|
||||||
super(name);
|
super(name, TextItemBlock.name);
|
||||||
if (this.constructor === ToTextItemBlockTransformation) {
|
if (this.constructor === ToTextItemBlockTransformation) {
|
||||||
throw new TypeError("Can not construct abstract class.");
|
throw new TypeError("Can not construct abstract class.");
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import React from 'react';
|
import React from 'react';
|
||||||
import Transformation from './Transformation.jsx';
|
import Transformation from './Transformation.jsx';
|
||||||
import ParseResult from '../ParseResult.jsx';
|
import ParseResult from '../ParseResult.jsx';
|
||||||
|
import TextItem from '../TextItem.jsx';
|
||||||
import TextItemPageView from '../../components/debug/TextItemPageView.jsx';
|
import TextItemPageView from '../../components/debug/TextItemPageView.jsx';
|
||||||
import { REMOVED_ANNOTATION } from '../Annotation.jsx';
|
import { REMOVED_ANNOTATION } from '../Annotation.jsx';
|
||||||
|
|
||||||
@ -8,7 +9,7 @@ import { REMOVED_ANNOTATION } from '../Annotation.jsx';
|
|||||||
export default class ToTextItemTransformation extends Transformation {
|
export default class ToTextItemTransformation extends Transformation {
|
||||||
|
|
||||||
constructor(name) {
|
constructor(name) {
|
||||||
super(name);
|
super(name, TextItem.name);
|
||||||
if (this.constructor === ToTextItemTransformation) {
|
if (this.constructor === ToTextItemTransformation) {
|
||||||
throw new TypeError("Can not construct abstract class.");
|
throw new TypeError("Can not construct abstract class.");
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,7 @@ import ParseResult from '../ParseResult.jsx';
|
|||||||
// A transformation from an PdfPage to an PdfPage
|
// A transformation from an PdfPage to an PdfPage
|
||||||
export default class Transformation {
|
export default class Transformation {
|
||||||
|
|
||||||
constructor(name) {
|
constructor(name, itemType) {
|
||||||
if (this.constructor === Transformation) {
|
if (this.constructor === Transformation) {
|
||||||
throw new TypeError("Can not construct abstract class.");
|
throw new TypeError("Can not construct abstract class.");
|
||||||
}
|
}
|
||||||
@ -11,6 +11,7 @@ export default class Transformation {
|
|||||||
throw new TypeError("Please implement abstract method 'transform()'.");
|
throw new TypeError("Please implement abstract method 'transform()'.");
|
||||||
}
|
}
|
||||||
this.name = name;
|
this.name = name;
|
||||||
|
this.itemType = itemType;
|
||||||
}
|
}
|
||||||
|
|
||||||
showPageSelection() {
|
showPageSelection() {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import ToTextItemTransformation from './ToTextItemTransformation.jsx';
|
import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
|
||||||
import ParseResult from '../ParseResult.jsx';
|
import ParseResult from '../../ParseResult.jsx';
|
||||||
|
|
||||||
export default class CalculateGlobalStats extends ToTextItemTransformation {
|
export default class CalculateGlobalStats extends ToTextItemTransformation {
|
||||||
|
|
@ -1,11 +1,11 @@
|
|||||||
import React from 'react';
|
import React from 'react';
|
||||||
|
|
||||||
import ToTextItemTransformation from './ToTextItemTransformation.jsx';
|
import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
|
||||||
import ParseResult from '../ParseResult.jsx';
|
import ParseResult from '../../ParseResult.jsx';
|
||||||
import TextItemLineGrouper from '../TextItemLineGrouper.jsx';
|
import TextItemLineGrouper from '../../TextItemLineGrouper.jsx';
|
||||||
import TextItemLineCompactor from '../TextItemLineCompactor.jsx';
|
import TextItemLineCompactor from '../../TextItemLineCompactor.jsx';
|
||||||
import ElementType from '../ElementType.jsx';
|
import ElementType from '../../ElementType.jsx';
|
||||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx';
|
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../../Annotation.jsx';
|
||||||
|
|
||||||
|
|
||||||
// gathers text items on the same y line to one text item
|
// gathers text items on the same y line to one text item
|
@ -1,9 +1,9 @@
|
|||||||
import ToTextItemTransformation from './ToTextItemTransformation.jsx';
|
import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
|
||||||
import ParseResult from '../ParseResult.jsx';
|
import ParseResult from '../../ParseResult.jsx';
|
||||||
import TextItem from '../TextItem.jsx';
|
import TextItem from '../../TextItem.jsx';
|
||||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION, DETECTED_ANNOTATION } from '../Annotation.jsx';
|
import { REMOVED_ANNOTATION, ADDED_ANNOTATION, DETECTED_ANNOTATION } from '../../Annotation.jsx';
|
||||||
import ElementType from '../ElementType.jsx';
|
import ElementType from '../../ElementType.jsx';
|
||||||
import { isListItem, isNumberedListItem, removeLeadingWhitespaces } from '../../functions.jsx';
|
import { isListItem, isNumberedListItem, removeLeadingWhitespaces } from '../../../functions.jsx';
|
||||||
|
|
||||||
//Detect items starting with -, •, etc...
|
//Detect items starting with -, •, etc...
|
||||||
export default class DetectListItems extends ToTextItemTransformation {
|
export default class DetectListItems extends ToTextItemTransformation {
|
@ -1,11 +1,11 @@
|
|||||||
import ToTextItemTransformation from './ToTextItemTransformation.jsx';
|
import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
|
||||||
import ParseResult from '../ParseResult.jsx';
|
import ParseResult from '../../ParseResult.jsx';
|
||||||
import TextItem from '../TextItem.jsx';
|
import TextItem from '../../TextItem.jsx';
|
||||||
import HeadlineFinder from '../HeadlineFinder.jsx';
|
import HeadlineFinder from '../../HeadlineFinder.jsx';
|
||||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx';
|
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../../Annotation.jsx';
|
||||||
import ElementType from '../ElementType.jsx';
|
import ElementType from '../../ElementType.jsx';
|
||||||
import { headlineByLevel } from '../ElementType.jsx';
|
import { headlineByLevel } from '../../ElementType.jsx';
|
||||||
import { isDigit } from '../../functions.jsx'
|
import { isDigit } from '../../../functions.jsx'
|
||||||
|
|
||||||
//Detect table of contents pages
|
//Detect table of contents pages
|
||||||
export default class DetectTOC extends ToTextItemTransformation {
|
export default class DetectTOC extends ToTextItemTransformation {
|
@ -1,8 +1,8 @@
|
|||||||
import ToTextItemTransformation from './ToTextItemTransformation.jsx';
|
import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
|
||||||
import ParseResult from '../ParseResult.jsx';
|
import ParseResult from '../../ParseResult.jsx';
|
||||||
import { REMOVED_ANNOTATION } from '../Annotation.jsx';
|
import { REMOVED_ANNOTATION } from '../../Annotation.jsx';
|
||||||
|
|
||||||
import { isDigit } from '../../functions.jsx'
|
import { isDigit } from '../../../functions.jsx'
|
||||||
|
|
||||||
|
|
||||||
function hashCodeIgnoringSpacesAndNumbers(string) {
|
function hashCodeIgnoringSpacesAndNumbers(string) {
|
@ -1,7 +1,7 @@
|
|||||||
import ToTextItemTransformation from './ToTextItemTransformation.jsx';
|
import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
|
||||||
import ParseResult from '../ParseResult.jsx';
|
import ParseResult from '../../ParseResult.jsx';
|
||||||
import TextItem from '../TextItem.jsx';
|
import TextItem from '../../TextItem.jsx';
|
||||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx';
|
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../../Annotation.jsx';
|
||||||
|
|
||||||
// Converts vertical text to horizontal
|
// Converts vertical text to horizontal
|
||||||
export default class VerticalToHorizontal extends ToTextItemTransformation {
|
export default class VerticalToHorizontal extends ToTextItemTransformation {
|
@ -1,8 +1,8 @@
|
|||||||
import ToTextItemBlockTransformation from './ToTextItemBlockTransformation.jsx';
|
import ToTextItemBlockTransformation from '..//ToTextItemBlockTransformation.jsx';
|
||||||
import ParseResult from '../ParseResult.jsx';
|
import ParseResult from '../../ParseResult.jsx';
|
||||||
import { DETECTED_ANNOTATION } from '../Annotation.jsx';
|
import { DETECTED_ANNOTATION } from '../../Annotation.jsx';
|
||||||
import ElementType from '../ElementType.jsx';
|
import ElementType from '../../ElementType.jsx';
|
||||||
import { minXFromBlocks } from '../../textItemFunctions.jsx';
|
import { minXFromBlocks } from '../../../textItemFunctions.jsx';
|
||||||
|
|
||||||
//Detect items which are code/quote blocks
|
//Detect items which are code/quote blocks
|
||||||
export default class DetectCodeQuoteBlocks extends ToTextItemBlockTransformation {
|
export default class DetectCodeQuoteBlocks extends ToTextItemBlockTransformation {
|
@ -1,7 +1,7 @@
|
|||||||
import ToTextItemBlockTransformation from './ToTextItemBlockTransformation.jsx';
|
import ToTextItemBlockTransformation from '..//ToTextItemBlockTransformation.jsx';
|
||||||
import ParseResult from '../ParseResult.jsx';
|
import ParseResult from '../../ParseResult.jsx';
|
||||||
import { MODIFIED_ANNOTATION, UNCHANGED_ANNOTATION } from '../Annotation.jsx';
|
import { MODIFIED_ANNOTATION, UNCHANGED_ANNOTATION } from '../../Annotation.jsx';
|
||||||
import ElementType from '../ElementType.jsx';
|
import ElementType from '../../ElementType.jsx';
|
||||||
|
|
||||||
// Cares for proper sub-item spacing/leveling
|
// Cares for proper sub-item spacing/leveling
|
||||||
export default class DetectListLevels extends ToTextItemBlockTransformation {
|
export default class DetectListLevels extends ToTextItemBlockTransformation {
|
@ -1,8 +1,8 @@
|
|||||||
import ToTextItemBlockTransformation from './ToTextItemBlockTransformation.jsx';
|
import ToTextItemBlockTransformation from '../ToTextItemBlockTransformation.jsx';
|
||||||
import ParseResult from '../ParseResult.jsx';
|
import ParseResult from '../../ParseResult.jsx';
|
||||||
import TextItemBlock from '../TextItemBlock.jsx';
|
import TextItemBlock from '../../TextItemBlock.jsx';
|
||||||
import { DETECTED_ANNOTATION } from '../Annotation.jsx';
|
import { DETECTED_ANNOTATION } from '../../Annotation.jsx';
|
||||||
import { minXFromTextItems } from '../../textItemFunctions.jsx';
|
import { minXFromTextItems } from '../../../textItemFunctions.jsx';
|
||||||
|
|
||||||
// Gathers lines to blocks
|
// Gathers lines to blocks
|
||||||
export default class GatherBlocks extends ToTextItemBlockTransformation {
|
export default class GatherBlocks extends ToTextItemBlockTransformation {
|
Loading…
Reference in New Issue
Block a user