mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2025-01-19 12:14:26 +01:00
[WIP] use fontMap to map fonts to formats
This commit is contained in:
parent
b5bb56b647
commit
d927b45087
@ -92,7 +92,7 @@ export default class DebugView extends React.Component {
|
||||
const showModificationCheckbox = lastTransformation.showModificationCheckbox();
|
||||
const statisticsAsList = Object.keys(parseResult.globals).map((key, i) => {
|
||||
return <li key={ i }>
|
||||
{ key + ': ' + parseResult.globals[key] }
|
||||
{ key + ': ' + JSON.stringify(parseResult.globals[key]) }
|
||||
</li>
|
||||
});
|
||||
const messagesAsList = parseResult.messages.map((message, i) => {
|
||||
|
@ -25,26 +25,8 @@ export default class AppState {
|
||||
this.mainView = View.UPLOAD;
|
||||
this.fileBuffer;
|
||||
this.metadata;
|
||||
this.fontMap;
|
||||
this.pages = [];
|
||||
this.transformations = [
|
||||
new CalculateGlobalStats(),
|
||||
new CompactLines(),
|
||||
new RemoveRepetitiveElements(),
|
||||
new VerticalToHorizontal(),
|
||||
new PostprocessLines(),
|
||||
new DetectTOC(),
|
||||
new DetectListItems(),
|
||||
new DetectHeaders(),
|
||||
|
||||
new GatherBlocks(),
|
||||
new DetectCodeQuoteBlocks(),
|
||||
new DetectListLevels(),
|
||||
|
||||
// new DetectFormats(),
|
||||
// new HeadlineToUppercase(),
|
||||
new ToTextBlocks(),
|
||||
new ToMarkdown()];
|
||||
this.transformations ;
|
||||
|
||||
//bind functions
|
||||
this.render = this.render.bind(this);
|
||||
@ -66,11 +48,31 @@ export default class AppState {
|
||||
|
||||
storePdfPages(metadata, fontMap, pages) {
|
||||
this.metadata = metadata;
|
||||
this.fontMap = fontMap;
|
||||
this.pages = pages;
|
||||
this.fileBuffer = null;
|
||||
this.mainView = View.RESULT;
|
||||
|
||||
this.transformations = [
|
||||
new CalculateGlobalStats(fontMap),
|
||||
new CompactLines(),
|
||||
new RemoveRepetitiveElements(),
|
||||
new VerticalToHorizontal(),
|
||||
new PostprocessLines(),
|
||||
new DetectTOC(),
|
||||
new DetectListItems(),
|
||||
new DetectHeaders(),
|
||||
|
||||
new GatherBlocks(),
|
||||
new DetectCodeQuoteBlocks(),
|
||||
new DetectListLevels(),
|
||||
|
||||
// new DetectFormats(),
|
||||
// new HeadlineToUppercase(),
|
||||
new ToTextBlocks(),
|
||||
new ToMarkdown()];
|
||||
|
||||
this.render();
|
||||
|
||||
}
|
||||
|
||||
switchMainView(view) {
|
||||
|
@ -2,6 +2,7 @@ import { Enum } from 'enumify';
|
||||
import TextItem from './TextItem.jsx';
|
||||
import TextItemBlock from './TextItemBlock.jsx';
|
||||
|
||||
// An Markdown element
|
||||
export default class ElementType extends Enum {
|
||||
}
|
||||
|
||||
|
6
src/javascript/models/StringFormat.jsx
Normal file
6
src/javascript/models/StringFormat.jsx
Normal file
@ -0,0 +1,6 @@
|
||||
import { Enum } from 'enumify';
|
||||
|
||||
export default class StringFormat extends Enum {
|
||||
}
|
||||
|
||||
StringFormat.initEnum(['STANDARD', 'BOLD', 'OBLIQUE', 'BOLD_OBLIQUE'])
|
@ -1,14 +1,15 @@
|
||||
import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
|
||||
import ParseResult from '../../ParseResult.jsx';
|
||||
import StringFormat from '../../StringFormat.jsx';
|
||||
|
||||
export default class CalculateGlobalStats extends ToTextItemTransformation {
|
||||
|
||||
constructor() {
|
||||
constructor(fontMap) {
|
||||
super("Calculate Statistics");
|
||||
this.fontMap = fontMap;
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
|
||||
// Parse heights
|
||||
const heightToOccurrence = {};
|
||||
const fontToOccurrence = {};
|
||||
@ -48,6 +49,31 @@ export default class CalculateGlobalStats extends ToTextItemTransformation {
|
||||
const mostUsedDistance = parseInt(getMostUsedKey(distanceToOccurrence));
|
||||
|
||||
|
||||
const fontIdToName = [];
|
||||
const fontToFormats = new Map();
|
||||
this.fontMap.forEach(function(value, key) {
|
||||
fontIdToName.push(key + " = " + value.name)
|
||||
const fontName = value.name.toLowerCase();
|
||||
var format;
|
||||
if (key == mostUsedFont) {
|
||||
format = StringFormat.STANDARD;
|
||||
} else if (fontName.includes('bold') && fontName.includes('bold')) {
|
||||
format = StringFormat.BOLD_OBLIQUE;
|
||||
} else if (fontName.includes('bold')) {
|
||||
format = StringFormat.BOLD;
|
||||
} else if (fontName.includes('oblique')) {
|
||||
format = StringFormat.OBLIQUE;
|
||||
} else if (fontName === maxHeightFont) {
|
||||
format = StringFormat.BOLD;
|
||||
} else {
|
||||
format = StringFormat.STANDARD;
|
||||
}
|
||||
fontToFormats.set(key, format);
|
||||
});
|
||||
fontIdToName.sort();
|
||||
|
||||
|
||||
|
||||
//Make a copy of the originals so all following transformation don't modify them
|
||||
const newPages = parseResult.pages.map(page => {
|
||||
return {
|
||||
@ -68,11 +94,13 @@ export default class CalculateGlobalStats extends ToTextItemTransformation {
|
||||
mostUsedDistance: mostUsedDistance,
|
||||
maxHeight: maxHeight,
|
||||
maxHeightFont: maxHeightFont,
|
||||
fontToFormats: fontToFormats
|
||||
},
|
||||
messages: [
|
||||
'Items per height: ' + JSON.stringify(heightToOccurrence),
|
||||
'Items per font: ' + JSON.stringify(fontToOccurrence),
|
||||
'Items per distance: ' + JSON.stringify(distanceToOccurrence)
|
||||
'Items per distance: ' + JSON.stringify(distanceToOccurrence),
|
||||
'Fonts:' + JSON.stringify(fontIdToName)
|
||||
]
|
||||
});
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user