mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-22 07:43:46 +01:00
WIP merge successive code blocks
This commit is contained in:
parent
e7ff939351
commit
bed3fd357b
@ -23,7 +23,7 @@ export default class DetectCodeBlocks extends ToPdfBlockViewTransformation {
|
|||||||
// TODO ==> combine quotes follow each other
|
// TODO ==> combine quotes follow each other
|
||||||
|
|
||||||
transform(parseResult:ParseResult) {
|
transform(parseResult:ParseResult) {
|
||||||
const {mostUsedHeight} = parseResult.globals;
|
const {mostUsedHeight, mostUsedDistance} = parseResult.globals;
|
||||||
|
|
||||||
var foundBlocks = 0;
|
var foundBlocks = 0;
|
||||||
const textCombiner = new TextItemCombiner({});
|
const textCombiner = new TextItemCombiner({});
|
||||||
@ -49,20 +49,32 @@ export default class DetectCodeBlocks extends ToPdfBlockViewTransformation {
|
|||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
const newBlocks = [];
|
const newBlocks = [];
|
||||||
|
var preceedingCodeBlock;
|
||||||
page.blocks.forEach(block => {
|
page.blocks.forEach(block => {
|
||||||
if (block.type) {
|
if (block.type) {
|
||||||
newBlocks.push(block);
|
newBlocks.push(block);
|
||||||
|
preceedingCodeBlock = null;
|
||||||
} else {
|
} else {
|
||||||
if (itemAreSuitable(block.textItems)) {
|
if (itemAreSuitable(block.textItems)) {
|
||||||
|
const mergeWithPreceedingCodeBlock = preceedingCodeBlock && preceedingCodeBlock.textItems[preceedingCodeBlock.textItems.length - 1].y - block.textItems[0].y < mostUsedDistance * 2;
|
||||||
|
if (mergeWithPreceedingCodeBlock) {
|
||||||
|
newBlocks.pop();
|
||||||
|
}
|
||||||
block.annotation = REMOVED_ANNOTATION;
|
block.annotation = REMOVED_ANNOTATION;
|
||||||
newBlocks.push(block);
|
newBlocks.push(block);
|
||||||
newBlocks.push(new PdfBlock({
|
if (mergeWithPreceedingCodeBlock) {
|
||||||
|
preceedingCodeBlock.textItems = preceedingCodeBlock.textItems.concat(textCombiner.combine(block.textItems));
|
||||||
|
} else {
|
||||||
|
preceedingCodeBlock = new PdfBlock({
|
||||||
type: CODE_BLOCK,
|
type: CODE_BLOCK,
|
||||||
annotation: ADDED_ANNOTATION,
|
annotation: ADDED_ANNOTATION,
|
||||||
textItems: textCombiner.combine(block.textItems)
|
textItems: textCombiner.combine(block.textItems)
|
||||||
}));
|
});
|
||||||
|
}
|
||||||
|
newBlocks.push(preceedingCodeBlock);
|
||||||
} else {
|
} else {
|
||||||
newBlocks.push(block);
|
newBlocks.push(block);
|
||||||
|
preceedingCodeBlock = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -10,10 +10,6 @@ export default class DetectPdfBlocks extends ToPdfBlockViewTransformation {
|
|||||||
super("Detect Blocks");
|
super("Detect Blocks");
|
||||||
}
|
}
|
||||||
|
|
||||||
showModificationCheckbox() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
createSummaryView(parseResult:ParseResult) {
|
createSummaryView(parseResult:ParseResult) {
|
||||||
return <div>
|
return <div>
|
||||||
Splitted into
|
Splitted into
|
||||||
|
Loading…
Reference in New Issue
Block a user