mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-21 23:33:31 +01:00
[WIP] don’t make paragraph bolds to headline
This commit is contained in:
parent
e19294f35f
commit
81518a857b
@ -91,16 +91,19 @@ export default class DetectHeaders extends ToTextItemTransformation {
|
|||||||
if (smallesHeadlineLevel < 6) {
|
if (smallesHeadlineLevel < 6) {
|
||||||
const nextHeadlineType = headlineByLevel(smallesHeadlineLevel + 1);
|
const nextHeadlineType = headlineByLevel(smallesHeadlineLevel + 1);
|
||||||
parseResult.pages.forEach(page => {
|
parseResult.pages.forEach(page => {
|
||||||
|
var lastItem;
|
||||||
page.items.forEach(textItem => {
|
page.items.forEach(textItem => {
|
||||||
if (!textItem.type
|
if (!textItem.type
|
||||||
&& textItem.height == mostUsedHeight
|
&& textItem.height == mostUsedHeight
|
||||||
&& textItem.font !== mostUsedFont
|
&& textItem.font !== mostUsedFont
|
||||||
|
&& (!lastItem || lastItem.y < textItem.y || (lastItem.type && lastItem.type.headline) || (lastItem.y - textItem.y > mostUsedDistance * 2))
|
||||||
&& textItem.text === textItem.text.toUpperCase()
|
&& textItem.text === textItem.text.toUpperCase()
|
||||||
) {
|
) {
|
||||||
detectedHeaders++;
|
detectedHeaders++;
|
||||||
textItem.annotation = DETECTED_ANNOTATION;
|
textItem.annotation = DETECTED_ANNOTATION;
|
||||||
textItem.type = nextHeadlineType;
|
textItem.type = nextHeadlineType;
|
||||||
}
|
}
|
||||||
|
lastItem = textItem;
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user