mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-21 23:33:31 +01:00
[WIP] store font-map in appState
This commit is contained in:
parent
a35ecd28b6
commit
94c2561717
@ -1,4 +1,5 @@
|
||||
import React from 'react';
|
||||
import FaCheck from 'react-icons/lib/fa/check'
|
||||
|
||||
import pdfjs from 'pdfjs-dist'; // eslint-disable-line no-unused-vars
|
||||
import { Line } from 'rc-progress';
|
||||
@ -6,6 +7,7 @@ import { Line } from 'rc-progress';
|
||||
import Page from '../models/Page.jsx';
|
||||
import TextItem from '../models/TextItem.jsx';
|
||||
|
||||
// Parses the PDF pages and displays progress
|
||||
export default class LoadingView extends React.Component {
|
||||
|
||||
static propTypes = {
|
||||
@ -16,42 +18,78 @@ export default class LoadingView extends React.Component {
|
||||
constructor(props) {
|
||||
super(props);
|
||||
this.state = {
|
||||
parsedPages: 0,
|
||||
pages: []
|
||||
pages: [],
|
||||
fontIds: new Set(),
|
||||
fontMap: new Map(),
|
||||
progress: new Progress({
|
||||
stages: [
|
||||
new ProgressStage('Parsing PDF Pages'),
|
||||
new ProgressStage('Parsing Fonts')
|
||||
]
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
anounceInitialParse(pages) {
|
||||
this.setState({
|
||||
pages: pages
|
||||
});
|
||||
}
|
||||
announceInitialParse(document) {
|
||||
const pageStage = this.state.progress.stages[0];
|
||||
const numPages = document.numPages;
|
||||
pageStage.steps = numPages;
|
||||
pageStage.stepsDone;
|
||||
|
||||
anouncePageParsed(index, textItems) {
|
||||
//TODO might make problems.. concat unordered and order at the end ?
|
||||
this.state.pages[index].items = textItems; // eslint-disable-line react/no-direct-mutation-state
|
||||
this.setState({
|
||||
parsedPages: this.state.parsedPages + 1
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
componentWillMount() {
|
||||
const anounceInitialParseFunction = this.anounceInitialParse.bind(this);
|
||||
const anouncePageParsedFunction = this.anouncePageParsed.bind(this);
|
||||
PDFJS.getDocument(this.props.fileBuffer).then(function(pdfDocument) { // eslint-disable-line no-undef
|
||||
// console.log('Number of pages: ' + pdfDocument.numPages);
|
||||
// console.debug(pdfDocument);
|
||||
const numPages = pdfDocument.numPages;
|
||||
// const numPages = 4; // hack
|
||||
var pages = [];
|
||||
for (var i = 0; i < numPages; i++) {
|
||||
pages.push(new Page({
|
||||
index: i
|
||||
}));
|
||||
}
|
||||
anounceInitialParseFunction(pages);
|
||||
for (var j = 1; j <= numPages; j++) {
|
||||
|
||||
this.setState({
|
||||
document: document,
|
||||
pages: pages,
|
||||
});
|
||||
}
|
||||
|
||||
announcePageParsed(index, textItems) {
|
||||
const pageStage = this.state.progress.stages[0];
|
||||
const fontStage = this.state.progress.stages[1];
|
||||
textItems.forEach(item => {
|
||||
const fontId = item.font;
|
||||
if (!this.state.fontIds.has(fontId)) {
|
||||
const announceFontFunction = this.announceFontParsed.bind(this);
|
||||
this.state.document.transport.commonObjs.get(fontId, function(font) {
|
||||
announceFontFunction(fontId, font);
|
||||
});
|
||||
this.state.fontIds.add(fontId);
|
||||
fontStage.steps = this.state.fontIds.size;
|
||||
}
|
||||
});
|
||||
|
||||
pageStage.stepsDone = pageStage.stepsDone + 1;
|
||||
this.state.pages[index].items = textItems; // eslint-disable-line react/no-direct-mutation-state
|
||||
this.setState({
|
||||
progress: this.state.progress
|
||||
});
|
||||
}
|
||||
|
||||
announceFontParsed(fontId, font) {
|
||||
const fontStage = this.state.progress.stages[1];
|
||||
this.state.fontMap.set(fontId, font); // eslint-disable-line react/no-direct-mutation-state
|
||||
fontStage.stepsDone = fontStage.stepsDone + 1;
|
||||
if (this.state.progress.currentStage == 1) {
|
||||
this.setState({ //force rendering
|
||||
fontMap: this.state.fontMap,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
componentWillMount() {
|
||||
const announceInitialParseFunction = this.announceInitialParse.bind(this);
|
||||
const announcePageParsedFunction = this.announcePageParsed.bind(this);
|
||||
|
||||
PDFJS.getDocument(this.props.fileBuffer).then(function(pdfDocument) { // eslint-disable-line no-undef
|
||||
// console.debug(pdfDocument);
|
||||
announceInitialParseFunction(pdfDocument);
|
||||
for (var j = 1; j <= pdfDocument.numPages; j++) {
|
||||
pdfDocument.getPage(j).then(function(page) {
|
||||
var scale = 1.0;
|
||||
var viewport = page.getViewport(scale);
|
||||
@ -59,14 +97,8 @@ export default class LoadingView extends React.Component {
|
||||
// pdfDocument.getMetadata().then(function(data) {
|
||||
// console.debug(data);
|
||||
// });
|
||||
// page.getOperatorList().then(function(opList) {
|
||||
// console.debug(opList);
|
||||
// // var svgGfx = new svgLib.SVGGraphics(page.commonObjs, page.objs);
|
||||
// // return svgGfx.getSVG(opList, viewport).then(function (svg) {
|
||||
// // container.appendChild(svg);
|
||||
// // });
|
||||
// });
|
||||
page.getTextContent().then(function(textContent) {
|
||||
// console.debug(textContent);
|
||||
const textItems = textContent.items.map(function(item) {
|
||||
const tx = PDFJS.Util.transform( // eslint-disable-line no-undef
|
||||
viewport.transform,
|
||||
@ -75,20 +107,19 @@ export default class LoadingView extends React.Component {
|
||||
|
||||
const fontHeight = Math.sqrt((tx[2] * tx[2]) + (tx[3] * tx[3]));
|
||||
const dividedHeight = item.height / fontHeight;
|
||||
|
||||
const style = textContent.styles[item.fontName];
|
||||
return new TextItem({
|
||||
x: Math.round(item.transform[4]),
|
||||
y: Math.round(item.transform[5]),
|
||||
width: Math.round(item.width),
|
||||
height: Math.round(dividedHeight <= 1 ? item.height : dividedHeight),
|
||||
text: item.str,
|
||||
font: item.fontName,
|
||||
fontAscent: style.ascent,
|
||||
fontDescent: style.descent
|
||||
font: item.fontName
|
||||
});
|
||||
});
|
||||
anouncePageParsedFunction(page.pageIndex, textItems);
|
||||
announcePageParsedFunction(page.pageIndex, textItems);
|
||||
});
|
||||
page.getOperatorList().then(function() {
|
||||
// do nothing... this is only for triggering the font retrieval
|
||||
});
|
||||
});
|
||||
}
|
||||
@ -96,16 +127,20 @@ export default class LoadingView extends React.Component {
|
||||
}
|
||||
|
||||
render() {
|
||||
const {parsedPages, pages} = this.state;
|
||||
var percentDone = 0;
|
||||
var details = '';
|
||||
if (pages.length > 0) {
|
||||
percentDone = parsedPages / pages.length * 100;
|
||||
details = parsedPages + ' / ' + pages.length
|
||||
if (parsedPages == pages.length) {
|
||||
this.props.storePdfPagesFunction(this.state.pages);
|
||||
}
|
||||
const {pages, progress} = this.state;
|
||||
const percentDone = getPercentDone(progress);
|
||||
if (percentDone == 100) {
|
||||
this.props.storePdfPagesFunction(pages, this.state.fontMap);
|
||||
}
|
||||
const stageItems = progress.stages.filter((elem, i) => i <= progress.currentStage).map((stage, i) => {
|
||||
const progressDetails = stage.steps ? stage.stepsDone + ' / ' + stage.steps : '';
|
||||
const checkmark = stage.isComplete() ? <FaCheck color={ 'green' } /> : '';
|
||||
return <div key={ i }>
|
||||
{ stage.name }
|
||||
{ ' ' + progressDetails + ' ' }
|
||||
{ checkmark }
|
||||
</div>
|
||||
});
|
||||
return (
|
||||
<div style={ { textAlign: 'center' } }>
|
||||
<br/>
|
||||
@ -115,10 +150,64 @@ export default class LoadingView extends React.Component {
|
||||
<br/>
|
||||
<br/>
|
||||
<div>
|
||||
Uploading and parsing PDF...
|
||||
<br/>
|
||||
{ ' ' + details }
|
||||
{ stageItems }
|
||||
</div>
|
||||
</div>);
|
||||
}
|
||||
}
|
||||
|
||||
function getPercentDone(progress) {
|
||||
const activeStage = progress.activeStage();
|
||||
const percentDone = activeStage.percentDone();
|
||||
|
||||
if (percentDone == 100) {
|
||||
progress.completeStage();
|
||||
if (!progress.isComplete()) {
|
||||
return getPercentDone(progress, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return percentDone;
|
||||
}
|
||||
|
||||
class Progress {
|
||||
|
||||
constructor(options) {
|
||||
this.stages = options.stages;
|
||||
this.currentStage = 0;
|
||||
}
|
||||
|
||||
completeStage() {
|
||||
this.currentStage++;
|
||||
}
|
||||
|
||||
isComplete() {
|
||||
return this.currentStage == this.stages.length;
|
||||
}
|
||||
|
||||
activeStage() {
|
||||
return this.stages[this.currentStage];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class ProgressStage {
|
||||
|
||||
constructor(name) {
|
||||
this.name = name;
|
||||
this.stepsDone = 0;
|
||||
this.steps;
|
||||
}
|
||||
|
||||
isComplete() {
|
||||
return this.stepsDone == this.steps;
|
||||
}
|
||||
|
||||
percentDone() {
|
||||
if (!this.steps) {
|
||||
return 0;
|
||||
}
|
||||
return this.stepsDone / this.steps * 100;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,6 @@ export default class TextItemTable extends React.Component {
|
||||
</th>
|
||||
<th>
|
||||
Font
|
||||
<br/>(asc/desc)
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
@ -81,8 +80,6 @@ export default class TextItemTable extends React.Component {
|
||||
</td>
|
||||
<td>
|
||||
{ textItem.font }
|
||||
<br/>
|
||||
{ textItem.fontAscent + ' / ' + textItem.fontDescent }
|
||||
</td>
|
||||
</tr>
|
||||
)
|
||||
|
@ -25,6 +25,7 @@ export default class AppState {
|
||||
this.mainView = View.UPLOAD;
|
||||
this.fileBuffer;
|
||||
this.pages = [];
|
||||
this.fontMap;
|
||||
this.transformations = [
|
||||
new CalculateGlobalStats(),
|
||||
new CompactLines(),
|
||||
@ -62,8 +63,9 @@ export default class AppState {
|
||||
this.render()
|
||||
}
|
||||
|
||||
storePdfPages(pages) {
|
||||
storePdfPages(pages, fontMap) {
|
||||
this.pages = pages;
|
||||
this.fontMap = fontMap;
|
||||
this.fileBuffer = null;
|
||||
this.mainView = View.RESULT;
|
||||
this.render();
|
||||
|
Loading…
Reference in New Issue
Block a user