[WIP] use fontMap to map fonts to formats

This commit is contained in:
Johannes Zillmann 2017-03-22 20:08:34 +01:00
parent b5bb56b647
commit d927b45087
5 changed files with 61 additions and 24 deletions

View File

@ -92,7 +92,7 @@ export default class DebugView extends React.Component {
const showModificationCheckbox = lastTransformation.showModificationCheckbox(); const showModificationCheckbox = lastTransformation.showModificationCheckbox();
const statisticsAsList = Object.keys(parseResult.globals).map((key, i) => { const statisticsAsList = Object.keys(parseResult.globals).map((key, i) => {
return <li key={ i }> return <li key={ i }>
{ key + ': ' + parseResult.globals[key] } { key + ': ' + JSON.stringify(parseResult.globals[key]) }
</li> </li>
}); });
const messagesAsList = parseResult.messages.map((message, i) => { const messagesAsList = parseResult.messages.map((message, i) => {

View File

@ -25,26 +25,8 @@ export default class AppState {
this.mainView = View.UPLOAD; this.mainView = View.UPLOAD;
this.fileBuffer; this.fileBuffer;
this.metadata; this.metadata;
this.fontMap;
this.pages = []; this.pages = [];
this.transformations = [ this.transformations ;
new CalculateGlobalStats(),
new CompactLines(),
new RemoveRepetitiveElements(),
new VerticalToHorizontal(),
new PostprocessLines(),
new DetectTOC(),
new DetectListItems(),
new DetectHeaders(),
new GatherBlocks(),
new DetectCodeQuoteBlocks(),
new DetectListLevels(),
// new DetectFormats(),
// new HeadlineToUppercase(),
new ToTextBlocks(),
new ToMarkdown()];
//bind functions //bind functions
this.render = this.render.bind(this); this.render = this.render.bind(this);
@ -66,11 +48,31 @@ export default class AppState {
storePdfPages(metadata, fontMap, pages) { storePdfPages(metadata, fontMap, pages) {
this.metadata = metadata; this.metadata = metadata;
this.fontMap = fontMap;
this.pages = pages; this.pages = pages;
this.fileBuffer = null; this.fileBuffer = null;
this.mainView = View.RESULT; this.mainView = View.RESULT;
this.transformations = [
new CalculateGlobalStats(fontMap),
new CompactLines(),
new RemoveRepetitiveElements(),
new VerticalToHorizontal(),
new PostprocessLines(),
new DetectTOC(),
new DetectListItems(),
new DetectHeaders(),
new GatherBlocks(),
new DetectCodeQuoteBlocks(),
new DetectListLevels(),
// new DetectFormats(),
// new HeadlineToUppercase(),
new ToTextBlocks(),
new ToMarkdown()];
this.render(); this.render();
} }
switchMainView(view) { switchMainView(view) {

View File

@ -2,6 +2,7 @@ import { Enum } from 'enumify';
import TextItem from './TextItem.jsx'; import TextItem from './TextItem.jsx';
import TextItemBlock from './TextItemBlock.jsx'; import TextItemBlock from './TextItemBlock.jsx';
// An Markdown element
export default class ElementType extends Enum { export default class ElementType extends Enum {
} }

View File

@ -0,0 +1,6 @@
import { Enum } from 'enumify';
export default class StringFormat extends Enum {
}
StringFormat.initEnum(['STANDARD', 'BOLD', 'OBLIQUE', 'BOLD_OBLIQUE'])

View File

@ -1,14 +1,15 @@
import ToTextItemTransformation from '../ToTextItemTransformation.jsx'; import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
import ParseResult from '../../ParseResult.jsx'; import ParseResult from '../../ParseResult.jsx';
import StringFormat from '../../StringFormat.jsx';
export default class CalculateGlobalStats extends ToTextItemTransformation { export default class CalculateGlobalStats extends ToTextItemTransformation {
constructor() { constructor(fontMap) {
super("Calculate Statistics"); super("Calculate Statistics");
this.fontMap = fontMap;
} }
transform(parseResult:ParseResult) { transform(parseResult:ParseResult) {
// Parse heights // Parse heights
const heightToOccurrence = {}; const heightToOccurrence = {};
const fontToOccurrence = {}; const fontToOccurrence = {};
@ -48,6 +49,31 @@ export default class CalculateGlobalStats extends ToTextItemTransformation {
const mostUsedDistance = parseInt(getMostUsedKey(distanceToOccurrence)); const mostUsedDistance = parseInt(getMostUsedKey(distanceToOccurrence));
const fontIdToName = [];
const fontToFormats = new Map();
this.fontMap.forEach(function(value, key) {
fontIdToName.push(key + " = " + value.name)
const fontName = value.name.toLowerCase();
var format;
if (key == mostUsedFont) {
format = StringFormat.STANDARD;
} else if (fontName.includes('bold') && fontName.includes('bold')) {
format = StringFormat.BOLD_OBLIQUE;
} else if (fontName.includes('bold')) {
format = StringFormat.BOLD;
} else if (fontName.includes('oblique')) {
format = StringFormat.OBLIQUE;
} else if (fontName === maxHeightFont) {
format = StringFormat.BOLD;
} else {
format = StringFormat.STANDARD;
}
fontToFormats.set(key, format);
});
fontIdToName.sort();
//Make a copy of the originals so all following transformation don't modify them //Make a copy of the originals so all following transformation don't modify them
const newPages = parseResult.pages.map(page => { const newPages = parseResult.pages.map(page => {
return { return {
@ -68,11 +94,13 @@ export default class CalculateGlobalStats extends ToTextItemTransformation {
mostUsedDistance: mostUsedDistance, mostUsedDistance: mostUsedDistance,
maxHeight: maxHeight, maxHeight: maxHeight,
maxHeightFont: maxHeightFont, maxHeightFont: maxHeightFont,
fontToFormats: fontToFormats
}, },
messages: [ messages: [
'Items per height: ' + JSON.stringify(heightToOccurrence), 'Items per height: ' + JSON.stringify(heightToOccurrence),
'Items per font: ' + JSON.stringify(fontToOccurrence), 'Items per font: ' + JSON.stringify(fontToOccurrence),
'Items per distance: ' + JSON.stringify(distanceToOccurrence) 'Items per distance: ' + JSON.stringify(distanceToOccurrence),
'Fonts:' + JSON.stringify(fontIdToName)
] ]
}); });
} }