mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-21 15:23:26 +01:00
[WIP] move different typed transformations to different folders
This commit is contained in:
parent
739d20d83b
commit
93f15a38b5
@ -101,6 +101,18 @@ export default class DebugView extends React.Component {
|
||||
</li>
|
||||
});
|
||||
|
||||
const transformationMenuItems = [];
|
||||
var lastItemType;
|
||||
transformations.forEach((transformation, i) => {
|
||||
if (lastItemType && transformation.itemType !== lastItemType) {
|
||||
transformationMenuItems.push(<MenuItem key={ i + '-divider' } divider />);
|
||||
}
|
||||
transformationMenuItems.push(<MenuItem key={ i } eventKey={ i } onSelect={ this.selectTransformation.bind(this, i) }>
|
||||
{ transformation.name }
|
||||
</MenuItem>);
|
||||
lastItemType = transformation.itemType;
|
||||
});
|
||||
|
||||
return (
|
||||
<div>
|
||||
<table>
|
||||
@ -149,9 +161,7 @@ export default class DebugView extends React.Component {
|
||||
</ButtonGroup>
|
||||
<ButtonGroup>
|
||||
<DropdownButton title={ currentTransformationName } id="dropdown-size-medium">
|
||||
{ transformations.map((transformation, i) => <MenuItem key={ i } eventKey={ i } onSelect={ this.selectTransformation.bind(this, i) }>
|
||||
{ transformation.name }
|
||||
</MenuItem>) }
|
||||
{ transformationMenuItems }
|
||||
</DropdownButton>
|
||||
</ButtonGroup>
|
||||
<ButtonGroup>
|
||||
|
@ -1,16 +1,17 @@
|
||||
import { Enum } from 'enumify';
|
||||
|
||||
import CalculateGlobalStats from './transformations/CalculateGlobalStats.jsx';
|
||||
import CompactLines from './transformations/CompactLines.jsx';
|
||||
import RemoveRepetitiveElements from './transformations/RemoveRepetitiveElements.jsx'
|
||||
import VerticalToHorizontal from './transformations/VerticalToHorizontal.jsx';
|
||||
import DetectTOC from './transformations/DetectTOC.jsx'
|
||||
import DetectListItems from './transformations/DetectListItems.jsx'
|
||||
import CalculateGlobalStats from './transformations/textitem/CalculateGlobalStats.jsx';
|
||||
import CompactLines from './transformations/textitem/CompactLines.jsx';
|
||||
import RemoveRepetitiveElements from './transformations/textitem/RemoveRepetitiveElements.jsx'
|
||||
import VerticalToHorizontal from './transformations/textitem/VerticalToHorizontal.jsx';
|
||||
import DetectTOC from './transformations/textitem/DetectTOC.jsx'
|
||||
import DetectListItems from './transformations/textitem/DetectListItems.jsx'
|
||||
// import DetectHeaders from './transformations/textitem/DetectHeaders.jsx'
|
||||
|
||||
import GatherBlocks from './transformations/GatherBlocks.jsx'
|
||||
import DetectCodeQuoteBlocks from './transformations/DetectCodeQuoteBlocks.jsx'
|
||||
import DetectListLevels from './transformations/DetectListLevels.jsx'
|
||||
import DetectHeadlines from './transformations/DetectHeadlines.jsx'
|
||||
import GatherBlocks from './transformations/textitemblock/GatherBlocks.jsx'
|
||||
import DetectCodeQuoteBlocks from './transformations/textitemblock/DetectCodeQuoteBlocks.jsx'
|
||||
import DetectListLevels from './transformations/textitemblock/DetectListLevels.jsx'
|
||||
// import DetectHeadlines from './transformations/textitemblock/DetectHeadlines.jsx'
|
||||
// import DetectFormats from './transformations/DetectFormats.jsx'
|
||||
// import RemoveWhitespaces from './transformations/RemoveWhitespaces.jsx'
|
||||
// import DetectLinks from './transformations/DetectLinks.jsx'
|
||||
@ -34,11 +35,12 @@ export default class AppState {
|
||||
new VerticalToHorizontal(),
|
||||
new DetectTOC(),
|
||||
new DetectListItems(),
|
||||
new DetectHeaders(),
|
||||
|
||||
new GatherBlocks(),
|
||||
new DetectCodeQuoteBlocks(),
|
||||
new DetectListLevels(),
|
||||
new DetectHeadlines(),
|
||||
// new DetectHeadlines(),
|
||||
|
||||
// new DetectFormats(),
|
||||
// new RemoveWhitespaces(),
|
||||
|
@ -6,7 +6,7 @@ import ParseResult from '../ParseResult.jsx';
|
||||
export default class ToMarkdown extends Transformation {
|
||||
|
||||
constructor() {
|
||||
super("To Markdown");
|
||||
super("To Markdown", "String");
|
||||
}
|
||||
|
||||
createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
|
||||
|
@ -7,7 +7,7 @@ import { blockToText } from '../ElementType.jsx';
|
||||
export default class ToTextBlocks extends Transformation {
|
||||
|
||||
constructor() {
|
||||
super("To Text Blocks");
|
||||
super("To Text Blocks", "TextBlock");
|
||||
}
|
||||
|
||||
createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
|
||||
|
@ -1,6 +1,7 @@
|
||||
import React from 'react';
|
||||
import Transformation from './Transformation.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
import TextItemBlock from '../TextItemBlock.jsx';
|
||||
import TextItemBlockPageView from '../../components/debug/TextItemBlockPageView.jsx';
|
||||
import { REMOVED_ANNOTATION } from '../Annotation.jsx';
|
||||
|
||||
@ -8,7 +9,7 @@ import { REMOVED_ANNOTATION } from '../Annotation.jsx';
|
||||
export default class ToTextItemBlockTransformation extends Transformation {
|
||||
|
||||
constructor(name) {
|
||||
super(name);
|
||||
super(name, TextItemBlock.name);
|
||||
if (this.constructor === ToTextItemBlockTransformation) {
|
||||
throw new TypeError("Can not construct abstract class.");
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
import React from 'react';
|
||||
import Transformation from './Transformation.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
import TextItem from '../TextItem.jsx';
|
||||
import TextItemPageView from '../../components/debug/TextItemPageView.jsx';
|
||||
import { REMOVED_ANNOTATION } from '../Annotation.jsx';
|
||||
|
||||
@ -8,7 +9,7 @@ import { REMOVED_ANNOTATION } from '../Annotation.jsx';
|
||||
export default class ToTextItemTransformation extends Transformation {
|
||||
|
||||
constructor(name) {
|
||||
super(name);
|
||||
super(name, TextItem.name);
|
||||
if (this.constructor === ToTextItemTransformation) {
|
||||
throw new TypeError("Can not construct abstract class.");
|
||||
}
|
||||
|
@ -3,7 +3,7 @@ import ParseResult from '../ParseResult.jsx';
|
||||
// A transformation from an PdfPage to an PdfPage
|
||||
export default class Transformation {
|
||||
|
||||
constructor(name) {
|
||||
constructor(name, itemType) {
|
||||
if (this.constructor === Transformation) {
|
||||
throw new TypeError("Can not construct abstract class.");
|
||||
}
|
||||
@ -11,6 +11,7 @@ export default class Transformation {
|
||||
throw new TypeError("Please implement abstract method 'transform()'.");
|
||||
}
|
||||
this.name = name;
|
||||
this.itemType = itemType;
|
||||
}
|
||||
|
||||
showPageSelection() {
|
||||
|
@ -1,5 +1,5 @@
|
||||
import ToTextItemTransformation from './ToTextItemTransformation.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
|
||||
import ParseResult from '../../ParseResult.jsx';
|
||||
|
||||
export default class CalculateGlobalStats extends ToTextItemTransformation {
|
||||
|
@ -1,11 +1,11 @@
|
||||
import React from 'react';
|
||||
|
||||
import ToTextItemTransformation from './ToTextItemTransformation.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
import TextItemLineGrouper from '../TextItemLineGrouper.jsx';
|
||||
import TextItemLineCompactor from '../TextItemLineCompactor.jsx';
|
||||
import ElementType from '../ElementType.jsx';
|
||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx';
|
||||
import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
|
||||
import ParseResult from '../../ParseResult.jsx';
|
||||
import TextItemLineGrouper from '../../TextItemLineGrouper.jsx';
|
||||
import TextItemLineCompactor from '../../TextItemLineCompactor.jsx';
|
||||
import ElementType from '../../ElementType.jsx';
|
||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../../Annotation.jsx';
|
||||
|
||||
|
||||
// gathers text items on the same y line to one text item
|
@ -1,9 +1,9 @@
|
||||
import ToTextItemTransformation from './ToTextItemTransformation.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
import TextItem from '../TextItem.jsx';
|
||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION, DETECTED_ANNOTATION } from '../Annotation.jsx';
|
||||
import ElementType from '../ElementType.jsx';
|
||||
import { isListItem, isNumberedListItem, removeLeadingWhitespaces } from '../../functions.jsx';
|
||||
import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
|
||||
import ParseResult from '../../ParseResult.jsx';
|
||||
import TextItem from '../../TextItem.jsx';
|
||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION, DETECTED_ANNOTATION } from '../../Annotation.jsx';
|
||||
import ElementType from '../../ElementType.jsx';
|
||||
import { isListItem, isNumberedListItem, removeLeadingWhitespaces } from '../../../functions.jsx';
|
||||
|
||||
//Detect items starting with -, •, etc...
|
||||
export default class DetectListItems extends ToTextItemTransformation {
|
@ -1,11 +1,11 @@
|
||||
import ToTextItemTransformation from './ToTextItemTransformation.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
import TextItem from '../TextItem.jsx';
|
||||
import HeadlineFinder from '../HeadlineFinder.jsx';
|
||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx';
|
||||
import ElementType from '../ElementType.jsx';
|
||||
import { headlineByLevel } from '../ElementType.jsx';
|
||||
import { isDigit } from '../../functions.jsx'
|
||||
import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
|
||||
import ParseResult from '../../ParseResult.jsx';
|
||||
import TextItem from '../../TextItem.jsx';
|
||||
import HeadlineFinder from '../../HeadlineFinder.jsx';
|
||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../../Annotation.jsx';
|
||||
import ElementType from '../../ElementType.jsx';
|
||||
import { headlineByLevel } from '../../ElementType.jsx';
|
||||
import { isDigit } from '../../../functions.jsx'
|
||||
|
||||
//Detect table of contents pages
|
||||
export default class DetectTOC extends ToTextItemTransformation {
|
@ -1,8 +1,8 @@
|
||||
import ToTextItemTransformation from './ToTextItemTransformation.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
import { REMOVED_ANNOTATION } from '../Annotation.jsx';
|
||||
import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
|
||||
import ParseResult from '../../ParseResult.jsx';
|
||||
import { REMOVED_ANNOTATION } from '../../Annotation.jsx';
|
||||
|
||||
import { isDigit } from '../../functions.jsx'
|
||||
import { isDigit } from '../../../functions.jsx'
|
||||
|
||||
|
||||
function hashCodeIgnoringSpacesAndNumbers(string) {
|
@ -1,7 +1,7 @@
|
||||
import ToTextItemTransformation from './ToTextItemTransformation.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
import TextItem from '../TextItem.jsx';
|
||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx';
|
||||
import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
|
||||
import ParseResult from '../../ParseResult.jsx';
|
||||
import TextItem from '../../TextItem.jsx';
|
||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../../Annotation.jsx';
|
||||
|
||||
// Converts vertical text to horizontal
|
||||
export default class VerticalToHorizontal extends ToTextItemTransformation {
|
@ -1,8 +1,8 @@
|
||||
import ToTextItemBlockTransformation from './ToTextItemBlockTransformation.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
import { DETECTED_ANNOTATION } from '../Annotation.jsx';
|
||||
import ElementType from '../ElementType.jsx';
|
||||
import { minXFromBlocks } from '../../textItemFunctions.jsx';
|
||||
import ToTextItemBlockTransformation from '..//ToTextItemBlockTransformation.jsx';
|
||||
import ParseResult from '../../ParseResult.jsx';
|
||||
import { DETECTED_ANNOTATION } from '../../Annotation.jsx';
|
||||
import ElementType from '../../ElementType.jsx';
|
||||
import { minXFromBlocks } from '../../../textItemFunctions.jsx';
|
||||
|
||||
//Detect items which are code/quote blocks
|
||||
export default class DetectCodeQuoteBlocks extends ToTextItemBlockTransformation {
|
@ -1,7 +1,7 @@
|
||||
import ToTextItemBlockTransformation from './ToTextItemBlockTransformation.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
import { MODIFIED_ANNOTATION, UNCHANGED_ANNOTATION } from '../Annotation.jsx';
|
||||
import ElementType from '../ElementType.jsx';
|
||||
import ToTextItemBlockTransformation from '..//ToTextItemBlockTransformation.jsx';
|
||||
import ParseResult from '../../ParseResult.jsx';
|
||||
import { MODIFIED_ANNOTATION, UNCHANGED_ANNOTATION } from '../../Annotation.jsx';
|
||||
import ElementType from '../../ElementType.jsx';
|
||||
|
||||
// Cares for proper sub-item spacing/leveling
|
||||
export default class DetectListLevels extends ToTextItemBlockTransformation {
|
@ -1,8 +1,8 @@
|
||||
import ToTextItemBlockTransformation from './ToTextItemBlockTransformation.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
import TextItemBlock from '../TextItemBlock.jsx';
|
||||
import { DETECTED_ANNOTATION } from '../Annotation.jsx';
|
||||
import { minXFromTextItems } from '../../textItemFunctions.jsx';
|
||||
import ToTextItemBlockTransformation from '../ToTextItemBlockTransformation.jsx';
|
||||
import ParseResult from '../../ParseResult.jsx';
|
||||
import TextItemBlock from '../../TextItemBlock.jsx';
|
||||
import { DETECTED_ANNOTATION } from '../../Annotation.jsx';
|
||||
import { minXFromTextItems } from '../../../textItemFunctions.jsx';
|
||||
|
||||
// Gathers lines to blocks
|
||||
export default class GatherBlocks extends ToTextItemBlockTransformation {
|
Loading…
Reference in New Issue
Block a user