diff --git a/src/javascript/models/MarkdownElements.jsx b/src/javascript/models/MarkdownElements.jsx new file mode 100644 index 0000000..3da3a5b --- /dev/null +++ b/src/javascript/models/MarkdownElements.jsx @@ -0,0 +1,25 @@ +import PdfBlock from './BlockPage.jsx'; + +export const CODE_BLOCK = "Code/Quote"; +export const HEADLINE1 = "Headline 1"; + +export function blockToText(block: PdfBlock) { + const text = concatTextItems(block); + switch (block.type) { + case CODE_BLOCK: + return '```\n' + text + '```' + case HEADLINE1: + return '#' + text; + default: + return text; + } +} + + +function concatTextItems(block: PdfBlock) { + var text = ''; + block.textItems.forEach(item => { + text += item.text + '\n'; + }); + return text; +} \ No newline at end of file diff --git a/src/javascript/models/transformations/DetectCodeBlocks.jsx b/src/javascript/models/transformations/DetectCodeBlocks.jsx index c8d22e1..84f488b 100644 --- a/src/javascript/models/transformations/DetectCodeBlocks.jsx +++ b/src/javascript/models/transformations/DetectCodeBlocks.jsx @@ -4,6 +4,7 @@ import ParseResult from '../ParseResult.jsx'; import PdfBlock from '../PdfBlock.jsx'; import TextItemCombiner from '../TextItemCombiner.jsx'; import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx'; +import { CODE_BLOCK } from '../MarkdownElements.jsx'; //Detect quotes, code etc.. which is transformed to markdown code syntax export default class DetectCodeBlocks extends ToPdfBlockViewTransformation { @@ -56,7 +57,7 @@ export default class DetectCodeBlocks extends ToPdfBlockViewTransformation { block.annotation = REMOVED_ANNOTATION; newBlocks.push(block); newBlocks.push(new PdfBlock({ - type: 'Code/Quote', + type: CODE_BLOCK, annotation: ADDED_ANNOTATION, textItems: textCombiner.combine(block.textItems) })); diff --git a/src/javascript/models/transformations/ToTextBlocks.jsx b/src/javascript/models/transformations/ToTextBlocks.jsx index cf01f34..315deaf 100644 --- a/src/javascript/models/transformations/ToTextBlocks.jsx +++ b/src/javascript/models/transformations/ToTextBlocks.jsx @@ -3,6 +3,7 @@ import Transformation from './Transformation.jsx'; import BlockPageView from '../../components/debug/BlockPageView.jsx'; import ParseResult from '../ParseResult.jsx'; import BlockPage from '../BlockPage.jsx'; +import { blockToText } from '../MarkdownElements.jsx'; export default class ToTextBlocks extends Transformation { @@ -18,17 +19,10 @@ export default class ToTextBlocks extends Transformation { const blocks = []; parseResult.content.forEach(page => { page.blocks.forEach(block => { - var text = ''; - block.textItems.forEach(item => { - // if (item.markdownElement) { - // text = item.markdownElement.transformText(item.text); - // } - text += '\n' + item.text; - }); const category = block.type ? block.type : 'Unknown'; blocks.push({ category: category, - text: text + text: blockToText(block) }); });