mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2025-06-25 03:51:33 +02:00
Fix duplicate headline bug
This commit is contained in:
parent
491e8c549a
commit
c952409c0e
@ -37,6 +37,7 @@ export default class DetectHeaders extends ItemTransformer {
|
|||||||
const itemsByLine = groupByLine(inputItems);
|
const itemsByLine = groupByLine(inputItems);
|
||||||
const itemToLevel: Map<string, HeadlineType> = new Map();
|
const itemToLevel: Map<string, HeadlineType> = new Map();
|
||||||
|
|
||||||
|
// TODO move the seperate parts to different transformations (easier debuggable/testable)
|
||||||
|
|
||||||
// Handle title pages: Title pages often have multiple lines of extraordinary height.
|
// Handle title pages: Title pages often have multiple lines of extraordinary height.
|
||||||
// Starting the leveling here would already consume most of the available headline levels.
|
// Starting the leveling here would already consume most of the available headline levels.
|
||||||
@ -145,7 +146,10 @@ export default class DetectHeaders extends ItemTransformer {
|
|||||||
items: inputItems.map((item) => {
|
items: inputItems.map((item) => {
|
||||||
const headerType = itemToLevel.get(item.uuid);
|
const headerType = itemToLevel.get(item.uuid);
|
||||||
if (headerType) {
|
if (headerType) {
|
||||||
return itemWithType(item, headerType);
|
const hasAlreadyHeadline = item.data['types'] || [].find((t) => isHeadline(t));
|
||||||
|
if (!hasAlreadyHeadline) {
|
||||||
|
return itemWithType(item, headerType);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return item;
|
return item;
|
||||||
}),
|
}),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user