mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-25 09:13:49 +01:00
WIP markdown formatting for code/quote
This commit is contained in:
parent
f93d1e4aa1
commit
e7ff939351
25
src/javascript/models/MarkdownElements.jsx
Normal file
25
src/javascript/models/MarkdownElements.jsx
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
import PdfBlock from './BlockPage.jsx';
|
||||||
|
|
||||||
|
export const CODE_BLOCK = "Code/Quote";
|
||||||
|
export const HEADLINE1 = "Headline 1";
|
||||||
|
|
||||||
|
export function blockToText(block: PdfBlock) {
|
||||||
|
const text = concatTextItems(block);
|
||||||
|
switch (block.type) {
|
||||||
|
case CODE_BLOCK:
|
||||||
|
return '```\n' + text + '```'
|
||||||
|
case HEADLINE1:
|
||||||
|
return '#' + text;
|
||||||
|
default:
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function concatTextItems(block: PdfBlock) {
|
||||||
|
var text = '';
|
||||||
|
block.textItems.forEach(item => {
|
||||||
|
text += item.text + '\n';
|
||||||
|
});
|
||||||
|
return text;
|
||||||
|
}
|
@ -4,6 +4,7 @@ import ParseResult from '../ParseResult.jsx';
|
|||||||
import PdfBlock from '../PdfBlock.jsx';
|
import PdfBlock from '../PdfBlock.jsx';
|
||||||
import TextItemCombiner from '../TextItemCombiner.jsx';
|
import TextItemCombiner from '../TextItemCombiner.jsx';
|
||||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx';
|
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx';
|
||||||
|
import { CODE_BLOCK } from '../MarkdownElements.jsx';
|
||||||
|
|
||||||
//Detect quotes, code etc.. which is transformed to markdown code syntax
|
//Detect quotes, code etc.. which is transformed to markdown code syntax
|
||||||
export default class DetectCodeBlocks extends ToPdfBlockViewTransformation {
|
export default class DetectCodeBlocks extends ToPdfBlockViewTransformation {
|
||||||
@ -56,7 +57,7 @@ export default class DetectCodeBlocks extends ToPdfBlockViewTransformation {
|
|||||||
block.annotation = REMOVED_ANNOTATION;
|
block.annotation = REMOVED_ANNOTATION;
|
||||||
newBlocks.push(block);
|
newBlocks.push(block);
|
||||||
newBlocks.push(new PdfBlock({
|
newBlocks.push(new PdfBlock({
|
||||||
type: 'Code/Quote',
|
type: CODE_BLOCK,
|
||||||
annotation: ADDED_ANNOTATION,
|
annotation: ADDED_ANNOTATION,
|
||||||
textItems: textCombiner.combine(block.textItems)
|
textItems: textCombiner.combine(block.textItems)
|
||||||
}));
|
}));
|
||||||
|
@ -3,6 +3,7 @@ import Transformation from './Transformation.jsx';
|
|||||||
import BlockPageView from '../../components/debug/BlockPageView.jsx';
|
import BlockPageView from '../../components/debug/BlockPageView.jsx';
|
||||||
import ParseResult from '../ParseResult.jsx';
|
import ParseResult from '../ParseResult.jsx';
|
||||||
import BlockPage from '../BlockPage.jsx';
|
import BlockPage from '../BlockPage.jsx';
|
||||||
|
import { blockToText } from '../MarkdownElements.jsx';
|
||||||
|
|
||||||
export default class ToTextBlocks extends Transformation {
|
export default class ToTextBlocks extends Transformation {
|
||||||
|
|
||||||
@ -18,17 +19,10 @@ export default class ToTextBlocks extends Transformation {
|
|||||||
const blocks = [];
|
const blocks = [];
|
||||||
parseResult.content.forEach(page => {
|
parseResult.content.forEach(page => {
|
||||||
page.blocks.forEach(block => {
|
page.blocks.forEach(block => {
|
||||||
var text = '';
|
|
||||||
block.textItems.forEach(item => {
|
|
||||||
// if (item.markdownElement) {
|
|
||||||
// text = item.markdownElement.transformText(item.text);
|
|
||||||
// }
|
|
||||||
text += '\n' + item.text;
|
|
||||||
});
|
|
||||||
const category = block.type ? block.type : 'Unknown';
|
const category = block.type ? block.type : 'Unknown';
|
||||||
blocks.push({
|
blocks.push({
|
||||||
category: category,
|
category: category,
|
||||||
text: text
|
text: blockToText(block)
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user