[WIP] remove MarkdownElement in favor of ElementType enum

This commit is contained in:
Johannes Zillmann 2017-03-10 12:39:42 +01:00
parent 15c5946073
commit f8fecc4c1d
7 changed files with 70 additions and 95 deletions

View File

@ -1,37 +1,86 @@
import { Enum } from 'enumify';
import TextItem from './TextItem.jsx';
import TextItemBlock from './TextItemBlock.jsx';
export default class ElementType extends Enum {
}
ElementType.initEnum({
H1: {
toText(block:TextItemBlock) {
return '# ' + concatTextItems(block.textItems);
}
},
H2: {
toText(block:TextItemBlock) {
return '## ' + concatTextItems(block.textItems);
}
},
H3: {
toText(block:TextItemBlock) {
return '### ' + concatTextItems(block.textItems);
}
},
H4: {
toText(block:TextItemBlock) {
return '#### ' + concatTextItems(block.textItems);
}
},
H5: {
toText(block:TextItemBlock) {
return '##### ' + concatTextItems(block.textItems);
}
},
H6: {
toText(block:TextItemBlock) {
return '###### ' + concatTextItems(block.textItems);
}
},
TOC: {
mergeToBlock: true
mergeToBlock: true,
toText(block:TextItemBlock) {
return concatTextItems(block.textItems);
}
},
FOOTNOTES: {
mergeToBlock: true,
mergeFollowingNonTypedItems: true
mergeFollowingNonTypedItems: true,
toText(block:TextItemBlock) {
return concatTextItems(block.textItems);
}
},
CODE: {
toText(block:TextItemBlock) {
return '```\n' + concatTextItems(block.textItems) + '```'
}
},
LIST: {
toText(block:TextItemBlock) {
return concatTextItems(block.textItems);
}
},
PARAGRAPH: {
toText(block:TextItemBlock) {
return concatTextItems(block.textItems);
}
}
});
//export default ElementType
export function blockToText(block: TextItemBlock) {
if (!block.type) {
return concatTextItems(block.textItems);
}
console.debug(block.type);
return block.type.toText(block);
}
function concatTextItems(textItems: TextItem[]) {
var text = '';
textItems.forEach(item => {
text += item.text + '\n';
});
return text;
}
export function headlineByLevel(level) {
if (level == 1) {

View File

@ -1,75 +0,0 @@
import TextItemBlock from './TextItemBlock.jsx';
import TextItemCombiner from './TextItemCombiner.jsx';
import TextItem from './TextItem.jsx';
export const HEADLINE1 = "Headline 1";
export const HEADLINE2 = "Headline 2";
export const HEADLINE3 = "Headline 3";
export const HEADLINE4 = "Headline 4";
export const HEADLINE5 = "Headline 5";
export const HEADLINE6 = "Headline 6";
export const PARAGRAPH = "Paragraph";
export const LIST_BLOCK = "List";
export const CODE_BLOCK = "Code/Quote";
export const TOC_BLOCK = "TOC";
export const FOOTNOTE_BLOCK = "Footnotes"
export function headlineByLevel(level) {
if (level == 1) {
return HEADLINE1;
} else if (level == 2) {
return HEADLINE2;
} else if (level == 3) {
return HEADLINE3;
} else if (level == 4) {
return HEADLINE4;
} else if (level == 5) {
return HEADLINE5;
} else if (level == 6) {
return HEADLINE6;
}
throw "Unsupported headline level: " + level;
}
export function blockToText(block: TextItemBlock) {
switch (block.type) {
case CODE_BLOCK:
return '```\n' + concatTextItems(block.textItems) + '```'
case TOC_BLOCK:
var text = '';
//TODO real links
//TODO de-duplicate with DetectLists ?
block.textItems.forEach(item => {
text += item.text + '\n';
});
return text;
case HEADLINE1:
return '# ' + concatTextItems(block.textItems);
case HEADLINE2:
return '## ' + concatTextItems(block.textItems);
case HEADLINE3:
return '### ' + concatTextItems(block.textItems);
case HEADLINE4:
return '#### ' + concatTextItems(block.textItems);
case HEADLINE5:
return '##### ' + concatTextItems(block.textItems);
case HEADLINE6:
return '###### ' + concatTextItems(block.textItems);
default:
var textItems = block.textItems;
if (!block.type) {
//TODO mostUsedDistance
textItems = new TextItemCombiner({}).combine(textItems).textItems;
}
return concatTextItems(textItems);
}
}
function concatTextItems(textItems: TextItem[]) {
var text = '';
textItems.forEach(item => {
text += item.text + '\n';
});
return text;
}

View File

@ -3,7 +3,7 @@ import ParseResult from '../ParseResult.jsx';
import TextItemBlock from '../TextItemBlock.jsx';
import TextItemCombiner from '../TextItemCombiner.jsx';
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx';
import { CODE_BLOCK } from '../MarkdownElements.jsx';
import ElementType from '../ElementType.jsx';
import { minXFromBlocks } from '../../textItemFunctions.jsx';
//Detect quotes, code etc.. which is transformed to markdown code syntax
@ -55,7 +55,7 @@ export default class DetectCodeBlocks extends ToTextItemBlockTransformation {
preceedingCodeBlock.parsedElements.add(combineResult.parsedElements);
} else {
preceedingCodeBlock = new TextItemBlock({
type: CODE_BLOCK,
type: ElementType.CODE,
annotation: ADDED_ANNOTATION,
textItems: combineResult.textItems,
parsedElements: combineResult.parsedElements

View File

@ -3,7 +3,8 @@ import ParseResult from '../ParseResult.jsx';
import TextItemBlock from '../TextItemBlock.jsx';
import TextItemCombiner from '../TextItemCombiner.jsx';
import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
import { HEADLINE1, HEADLINE2, headlineByLevel } from '../MarkdownElements.jsx';
import ElementType from '../ElementType.jsx';
import { headlineByLevel } from '../ElementType.jsx';
//Detect headlines
export default class DetectHeadlines extends ToTextItemBlockTransformation {
@ -157,9 +158,9 @@ function convertMaxHeaders(pages, maxHeight, mostUsedHeight, textCombiner) {
block.annotation = REMOVED_ANNOTATION;
const combineResult = textCombiner.combine(block.textItems);
if (height == maxHeight) {
addNewBlock(newBlocks, combineResult, HEADLINE1);
addNewBlock(newBlocks, combineResult, ElementType.H1);
} else if (combineResult.textItems.length == 1) {
addNewBlock(newBlocks, combineResult, HEADLINE2);
addNewBlock(newBlocks, combineResult, ElementType.H2);
}
}
});

View File

@ -4,7 +4,7 @@ import TextItem from '../TextItem.jsx';
import TextItemBlock from '../TextItemBlock.jsx';
import TextItemCombiner from '../TextItemCombiner.jsx';
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx';
import { PARAGRAPH, LIST_BLOCK } from '../MarkdownElements.jsx';
import ElementType from '../ElementType.jsx';
import { minXFromBlocks } from '../../textItemFunctions.jsx';
//Detect quotes, code etc.. which is transformed to markdown code syntax
@ -83,14 +83,14 @@ export default class DetectLists extends ToTextItemBlockTransformation {
if (itemsBeforeFirstLineItem.length > 0) {
newBlocks.push(new TextItemBlock({
textItems: itemsBeforeFirstLineItem,
type: PARAGRAPH,
type: ElementType.PARAGRAPH,
annotation: ADDED_ANNOTATION
}));
}
//TODO display with whitespace pre support
newBlocks.push(new TextItemBlock({
textItems: listBlockItems,
type: LIST_BLOCK,
type: ElementType.LIST,
annotation: ADDED_ANNOTATION,
parsedElements: combineResult.parsedElements
}));

View File

@ -60,7 +60,6 @@ function shouldFlushBlock(stashedBlock, item, minX, mostUsedDistance) {
if (item.type) {
return !item.type.mergeToBlock;
} else {
console.debug(item);
const lastItem = stashedBlock.textItems[stashedBlock.textItems.length - 1];
return shouldSplit(lastItem, item, minX, mostUsedDistance);
}

View File

@ -2,7 +2,7 @@ import React from 'react';
import Transformation from './Transformation.jsx';
import TextPageView from '../../components/debug/TextPageView.jsx';
import ParseResult from '../ParseResult.jsx';
import { blockToText } from '../MarkdownElements.jsx';
import { blockToText } from '../ElementType.jsx';
export default class ToTextBlocks extends Transformation {
@ -18,7 +18,8 @@ export default class ToTextBlocks extends Transformation {
parseResult.pages.forEach(page => {
const textItems = [];
page.items.forEach(block => {
const category = block.type ? block.type : 'Unknown';
//TODO category to type (before have no unknowns, have paragraph)
const category = block.type ? block.type.name : 'Unknown';
textItems.push({
category: category,
text: blockToText(block)