mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2025-01-03 20:28:54 +01:00
Enable Font calculations
This commit is contained in:
parent
1b530c6c29
commit
2d14de5167
7
core/src/FontType.ts
Normal file
7
core/src/FontType.ts
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
enum FontType {
|
||||||
|
BOLD = 'BOLD',
|
||||||
|
OBLIQUE = 'OBLIQUE',
|
||||||
|
BOLD_OBLIQUE = 'BOLD_OBLIQUE',
|
||||||
|
}
|
||||||
|
|
||||||
|
export default FontType;
|
@ -2,6 +2,7 @@ import Item from '../Item';
|
|||||||
import ItemResult from '../ItemResult';
|
import ItemResult from '../ItemResult';
|
||||||
import ItemTransformer from './ItemTransformer';
|
import ItemTransformer from './ItemTransformer';
|
||||||
import TransformContext from './TransformContext';
|
import TransformContext from './TransformContext';
|
||||||
|
import FontType from '../FontType';
|
||||||
|
|
||||||
export default class CalculateStatistics extends ItemTransformer {
|
export default class CalculateStatistics extends ItemTransformer {
|
||||||
constructor() {
|
constructor() {
|
||||||
@ -13,12 +14,13 @@ export default class CalculateStatistics extends ItemTransformer {
|
|||||||
'mostUsedDistance',
|
'mostUsedDistance',
|
||||||
'maxHeight',
|
'maxHeight',
|
||||||
'maxHeightFont',
|
'maxHeightFont',
|
||||||
// 'fontToFormats',
|
'fontToFormats',
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
transform(_: TransformContext, items: Item[]): ItemResult {
|
transform(context: TransformContext, items: Item[]): ItemResult {
|
||||||
|
// const heightToOccurrence: { [key: string]: number } = {};
|
||||||
const heightToOccurrence = {};
|
const heightToOccurrence = {};
|
||||||
const fontToOccurrence = {};
|
const fontToOccurrence = {};
|
||||||
let maxHeight = 0;
|
let maxHeight = 0;
|
||||||
@ -34,6 +36,7 @@ export default class CalculateStatistics extends ItemTransformer {
|
|||||||
maxHeightFont = itemFont;
|
maxHeightFont = itemFont;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
// TODO really need parseInt here ?
|
||||||
const mostUsedHeight = parseInt(getMostUsedKey(heightToOccurrence));
|
const mostUsedHeight = parseInt(getMostUsedKey(heightToOccurrence));
|
||||||
const mostUsedFont = getMostUsedKey(fontToOccurrence);
|
const mostUsedFont = getMostUsedKey(fontToOccurrence);
|
||||||
|
|
||||||
@ -48,8 +51,6 @@ export default class CalculateStatistics extends ItemTransformer {
|
|||||||
const itemText = item.data['str'];
|
const itemText = item.data['str'];
|
||||||
const itemY = item.data['y'];
|
const itemY = item.data['y'];
|
||||||
if (itemHeight == mostUsedHeight && itemText.trim().length > 0) {
|
if (itemHeight == mostUsedHeight && itemText.trim().length > 0) {
|
||||||
console.log('__', itemY, lastItemOfMostUsedHeight);
|
|
||||||
|
|
||||||
if (lastItemOfMostUsedHeight && itemY != lastItemOfMostUsedHeight.data['y']) {
|
if (lastItemOfMostUsedHeight && itemY != lastItemOfMostUsedHeight.data['y']) {
|
||||||
const distance = lastItemOfMostUsedHeight.data['y'] - itemY;
|
const distance = lastItemOfMostUsedHeight.data['y'] - itemY;
|
||||||
if (distance > 0) {
|
if (distance > 0) {
|
||||||
@ -64,28 +65,17 @@ export default class CalculateStatistics extends ItemTransformer {
|
|||||||
});
|
});
|
||||||
const mostUsedDistance = parseInt(getMostUsedKey(distanceToOccurrence));
|
const mostUsedDistance = parseInt(getMostUsedKey(distanceToOccurrence));
|
||||||
|
|
||||||
// const fontIdToName = [];
|
const fontIdToName: string[] = [];
|
||||||
// const fontToFormats = new Map();
|
const fontToType = new Map();
|
||||||
// this.fontMap.forEach(function (value, key) {
|
context.fontMap.forEach(function (value, key) {
|
||||||
// fontIdToName.push(key + ' = ' + value.name);
|
const fontName = value['name'];
|
||||||
// const fontName = value.name.toLowerCase();
|
fontIdToName.push(`${key} = ${fontName}`);
|
||||||
// var format;
|
const formatType = getFormatType(key, fontName, mostUsedFont, maxHeightFont);
|
||||||
// if (key == mostUsedFont) {
|
if (formatType) {
|
||||||
// format = null;
|
fontToType.set(key, formatType);
|
||||||
// } else if (fontName.includes('bold') && (fontName.includes('oblique') || fontName.includes('italic'))) {
|
}
|
||||||
// format = WordFormat.BOLD_OBLIQUE;
|
});
|
||||||
// } else if (fontName.includes('bold')) {
|
fontIdToName.sort();
|
||||||
// format = WordFormat.BOLD;
|
|
||||||
// } else if (fontName.includes('oblique') || fontName.includes('italic')) {
|
|
||||||
// format = WordFormat.OBLIQUE;
|
|
||||||
// } else if (fontName === maxHeightFont) {
|
|
||||||
// format = WordFormat.BOLD;
|
|
||||||
// }
|
|
||||||
// if (format) {
|
|
||||||
// fontToFormats.set(key, format.name);
|
|
||||||
// }
|
|
||||||
// });
|
|
||||||
// fontIdToName.sort();
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
items: items,
|
items: items,
|
||||||
@ -95,21 +85,21 @@ export default class CalculateStatistics extends ItemTransformer {
|
|||||||
mostUsedDistance: mostUsedDistance,
|
mostUsedDistance: mostUsedDistance,
|
||||||
maxHeight: maxHeight,
|
maxHeight: maxHeight,
|
||||||
maxHeightFont: maxHeightFont,
|
maxHeightFont: maxHeightFont,
|
||||||
// fontToFormats: fontToFormats,
|
fontToFormats: fontToType,
|
||||||
},
|
},
|
||||||
messages: [
|
messages: [
|
||||||
'Items per height: ' + JSON.stringify(heightToOccurrence),
|
'Items per height: ' + JSON.stringify(heightToOccurrence),
|
||||||
'Items per font: ' + JSON.stringify(fontToOccurrence),
|
'Items per font: ' + JSON.stringify(fontToOccurrence),
|
||||||
'Items per distance: ' + JSON.stringify(distanceToOccurrence),
|
'Items per distance: ' + JSON.stringify(distanceToOccurrence),
|
||||||
// 'Fonts:' + JSON.stringify(fontIdToName),
|
'Fonts:' + JSON.stringify(fontIdToName),
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function getMostUsedKey(keyToOccurrence) {
|
function getMostUsedKey(keyToOccurrence): any {
|
||||||
var maxOccurence = 0;
|
var maxOccurence = 0;
|
||||||
var maxKey;
|
var maxKey: string | undefined;
|
||||||
Object.keys(keyToOccurrence).map((element) => {
|
Object.keys(keyToOccurrence).map((element) => {
|
||||||
if (!maxKey || keyToOccurrence[element] > maxOccurence) {
|
if (!maxKey || keyToOccurrence[element] > maxOccurence) {
|
||||||
maxOccurence = keyToOccurrence[element];
|
maxOccurence = keyToOccurrence[element];
|
||||||
@ -118,3 +108,27 @@ function getMostUsedKey(keyToOccurrence) {
|
|||||||
});
|
});
|
||||||
return maxKey;
|
return maxKey;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getFormatType(
|
||||||
|
fontId: string,
|
||||||
|
fontName: string,
|
||||||
|
mostUsedFont: string | undefined,
|
||||||
|
maxHeightFont: string | undefined,
|
||||||
|
): FontType | undefined {
|
||||||
|
const fontNameLowerCase = fontName.toLowerCase();
|
||||||
|
if (fontId == mostUsedFont) {
|
||||||
|
return undefined;
|
||||||
|
} else if (
|
||||||
|
fontNameLowerCase.includes('bold') &&
|
||||||
|
(fontNameLowerCase.includes('oblique') || fontNameLowerCase.includes('italic'))
|
||||||
|
) {
|
||||||
|
return FontType.BOLD_OBLIQUE;
|
||||||
|
} else if (fontNameLowerCase.includes('bold')) {
|
||||||
|
return FontType.BOLD;
|
||||||
|
} else if (fontNameLowerCase.includes('oblique') || fontNameLowerCase.includes('italic')) {
|
||||||
|
return FontType.OBLIQUE;
|
||||||
|
} else if (fontId === maxHeightFont) {
|
||||||
|
//TODO this was the wrong comparision in old app and thus never returned as bold probably
|
||||||
|
return FontType.BOLD;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user