mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-24 16:54:12 +01:00
[WIP] don’t make paragraph bolds to headline
This commit is contained in:
parent
e19294f35f
commit
81518a857b
@ -91,16 +91,19 @@ export default class DetectHeaders extends ToTextItemTransformation {
|
||||
if (smallesHeadlineLevel < 6) {
|
||||
const nextHeadlineType = headlineByLevel(smallesHeadlineLevel + 1);
|
||||
parseResult.pages.forEach(page => {
|
||||
var lastItem;
|
||||
page.items.forEach(textItem => {
|
||||
if (!textItem.type
|
||||
&& textItem.height == mostUsedHeight
|
||||
&& textItem.font !== mostUsedFont
|
||||
&& (!lastItem || lastItem.y < textItem.y || (lastItem.type && lastItem.type.headline) || (lastItem.y - textItem.y > mostUsedDistance * 2))
|
||||
&& textItem.text === textItem.text.toUpperCase()
|
||||
) {
|
||||
detectedHeaders++;
|
||||
textItem.annotation = DETECTED_ANNOTATION;
|
||||
textItem.type = nextHeadlineType;
|
||||
}
|
||||
lastItem = textItem;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user