mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-21 23:33:31 +01:00
WIP markdown formatting for code/quote
This commit is contained in:
parent
f93d1e4aa1
commit
e7ff939351
25
src/javascript/models/MarkdownElements.jsx
Normal file
25
src/javascript/models/MarkdownElements.jsx
Normal file
@ -0,0 +1,25 @@
|
||||
import PdfBlock from './BlockPage.jsx';
|
||||
|
||||
export const CODE_BLOCK = "Code/Quote";
|
||||
export const HEADLINE1 = "Headline 1";
|
||||
|
||||
export function blockToText(block: PdfBlock) {
|
||||
const text = concatTextItems(block);
|
||||
switch (block.type) {
|
||||
case CODE_BLOCK:
|
||||
return '```\n' + text + '```'
|
||||
case HEADLINE1:
|
||||
return '#' + text;
|
||||
default:
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function concatTextItems(block: PdfBlock) {
|
||||
var text = '';
|
||||
block.textItems.forEach(item => {
|
||||
text += item.text + '\n';
|
||||
});
|
||||
return text;
|
||||
}
|
@ -4,6 +4,7 @@ import ParseResult from '../ParseResult.jsx';
|
||||
import PdfBlock from '../PdfBlock.jsx';
|
||||
import TextItemCombiner from '../TextItemCombiner.jsx';
|
||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx';
|
||||
import { CODE_BLOCK } from '../MarkdownElements.jsx';
|
||||
|
||||
//Detect quotes, code etc.. which is transformed to markdown code syntax
|
||||
export default class DetectCodeBlocks extends ToPdfBlockViewTransformation {
|
||||
@ -56,7 +57,7 @@ export default class DetectCodeBlocks extends ToPdfBlockViewTransformation {
|
||||
block.annotation = REMOVED_ANNOTATION;
|
||||
newBlocks.push(block);
|
||||
newBlocks.push(new PdfBlock({
|
||||
type: 'Code/Quote',
|
||||
type: CODE_BLOCK,
|
||||
annotation: ADDED_ANNOTATION,
|
||||
textItems: textCombiner.combine(block.textItems)
|
||||
}));
|
||||
|
@ -3,6 +3,7 @@ import Transformation from './Transformation.jsx';
|
||||
import BlockPageView from '../../components/debug/BlockPageView.jsx';
|
||||
import ParseResult from '../ParseResult.jsx';
|
||||
import BlockPage from '../BlockPage.jsx';
|
||||
import { blockToText } from '../MarkdownElements.jsx';
|
||||
|
||||
export default class ToTextBlocks extends Transformation {
|
||||
|
||||
@ -18,17 +19,10 @@ export default class ToTextBlocks extends Transformation {
|
||||
const blocks = [];
|
||||
parseResult.content.forEach(page => {
|
||||
page.blocks.forEach(block => {
|
||||
var text = '';
|
||||
block.textItems.forEach(item => {
|
||||
// if (item.markdownElement) {
|
||||
// text = item.markdownElement.transformText(item.text);
|
||||
// }
|
||||
text += '\n' + item.text;
|
||||
});
|
||||
const category = block.type ? block.type : 'Unknown';
|
||||
blocks.push({
|
||||
category: category,
|
||||
text: text
|
||||
text: blockToText(block)
|
||||
});
|
||||
});
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user