mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-21 23:33:31 +01:00
[WIP] store font-map in appState
This commit is contained in:
parent
a35ecd28b6
commit
94c2561717
@ -1,4 +1,5 @@
|
|||||||
import React from 'react';
|
import React from 'react';
|
||||||
|
import FaCheck from 'react-icons/lib/fa/check'
|
||||||
|
|
||||||
import pdfjs from 'pdfjs-dist'; // eslint-disable-line no-unused-vars
|
import pdfjs from 'pdfjs-dist'; // eslint-disable-line no-unused-vars
|
||||||
import { Line } from 'rc-progress';
|
import { Line } from 'rc-progress';
|
||||||
@ -6,6 +7,7 @@ import { Line } from 'rc-progress';
|
|||||||
import Page from '../models/Page.jsx';
|
import Page from '../models/Page.jsx';
|
||||||
import TextItem from '../models/TextItem.jsx';
|
import TextItem from '../models/TextItem.jsx';
|
||||||
|
|
||||||
|
// Parses the PDF pages and displays progress
|
||||||
export default class LoadingView extends React.Component {
|
export default class LoadingView extends React.Component {
|
||||||
|
|
||||||
static propTypes = {
|
static propTypes = {
|
||||||
@ -16,42 +18,78 @@ export default class LoadingView extends React.Component {
|
|||||||
constructor(props) {
|
constructor(props) {
|
||||||
super(props);
|
super(props);
|
||||||
this.state = {
|
this.state = {
|
||||||
parsedPages: 0,
|
pages: [],
|
||||||
pages: []
|
fontIds: new Set(),
|
||||||
|
fontMap: new Map(),
|
||||||
|
progress: new Progress({
|
||||||
|
stages: [
|
||||||
|
new ProgressStage('Parsing PDF Pages'),
|
||||||
|
new ProgressStage('Parsing Fonts')
|
||||||
|
]
|
||||||
|
}),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
anounceInitialParse(pages) {
|
announceInitialParse(document) {
|
||||||
|
const pageStage = this.state.progress.stages[0];
|
||||||
|
const numPages = document.numPages;
|
||||||
|
pageStage.steps = numPages;
|
||||||
|
pageStage.stepsDone;
|
||||||
|
|
||||||
|
var pages = [];
|
||||||
|
for (var i = 0; i < numPages; i++) {
|
||||||
|
pages.push(new Page({
|
||||||
|
index: i
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
this.setState({
|
this.setState({
|
||||||
pages: pages
|
document: document,
|
||||||
|
pages: pages,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
anouncePageParsed(index, textItems) {
|
announcePageParsed(index, textItems) {
|
||||||
//TODO might make problems.. concat unordered and order at the end ?
|
const pageStage = this.state.progress.stages[0];
|
||||||
|
const fontStage = this.state.progress.stages[1];
|
||||||
|
textItems.forEach(item => {
|
||||||
|
const fontId = item.font;
|
||||||
|
if (!this.state.fontIds.has(fontId)) {
|
||||||
|
const announceFontFunction = this.announceFontParsed.bind(this);
|
||||||
|
this.state.document.transport.commonObjs.get(fontId, function(font) {
|
||||||
|
announceFontFunction(fontId, font);
|
||||||
|
});
|
||||||
|
this.state.fontIds.add(fontId);
|
||||||
|
fontStage.steps = this.state.fontIds.size;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
pageStage.stepsDone = pageStage.stepsDone + 1;
|
||||||
this.state.pages[index].items = textItems; // eslint-disable-line react/no-direct-mutation-state
|
this.state.pages[index].items = textItems; // eslint-disable-line react/no-direct-mutation-state
|
||||||
this.setState({
|
this.setState({
|
||||||
parsedPages: this.state.parsedPages + 1
|
progress: this.state.progress
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
announceFontParsed(fontId, font) {
|
||||||
|
const fontStage = this.state.progress.stages[1];
|
||||||
|
this.state.fontMap.set(fontId, font); // eslint-disable-line react/no-direct-mutation-state
|
||||||
|
fontStage.stepsDone = fontStage.stepsDone + 1;
|
||||||
|
if (this.state.progress.currentStage == 1) {
|
||||||
|
this.setState({ //force rendering
|
||||||
|
fontMap: this.state.fontMap,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
componentWillMount() {
|
componentWillMount() {
|
||||||
const anounceInitialParseFunction = this.anounceInitialParse.bind(this);
|
const announceInitialParseFunction = this.announceInitialParse.bind(this);
|
||||||
const anouncePageParsedFunction = this.anouncePageParsed.bind(this);
|
const announcePageParsedFunction = this.announcePageParsed.bind(this);
|
||||||
|
|
||||||
PDFJS.getDocument(this.props.fileBuffer).then(function(pdfDocument) { // eslint-disable-line no-undef
|
PDFJS.getDocument(this.props.fileBuffer).then(function(pdfDocument) { // eslint-disable-line no-undef
|
||||||
// console.log('Number of pages: ' + pdfDocument.numPages);
|
|
||||||
// console.debug(pdfDocument);
|
// console.debug(pdfDocument);
|
||||||
const numPages = pdfDocument.numPages;
|
announceInitialParseFunction(pdfDocument);
|
||||||
// const numPages = 4; // hack
|
for (var j = 1; j <= pdfDocument.numPages; j++) {
|
||||||
var pages = [];
|
|
||||||
for (var i = 0; i < numPages; i++) {
|
|
||||||
pages.push(new Page({
|
|
||||||
index: i
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
anounceInitialParseFunction(pages);
|
|
||||||
for (var j = 1; j <= numPages; j++) {
|
|
||||||
pdfDocument.getPage(j).then(function(page) {
|
pdfDocument.getPage(j).then(function(page) {
|
||||||
var scale = 1.0;
|
var scale = 1.0;
|
||||||
var viewport = page.getViewport(scale);
|
var viewport = page.getViewport(scale);
|
||||||
@ -59,14 +97,8 @@ export default class LoadingView extends React.Component {
|
|||||||
// pdfDocument.getMetadata().then(function(data) {
|
// pdfDocument.getMetadata().then(function(data) {
|
||||||
// console.debug(data);
|
// console.debug(data);
|
||||||
// });
|
// });
|
||||||
// page.getOperatorList().then(function(opList) {
|
|
||||||
// console.debug(opList);
|
|
||||||
// // var svgGfx = new svgLib.SVGGraphics(page.commonObjs, page.objs);
|
|
||||||
// // return svgGfx.getSVG(opList, viewport).then(function (svg) {
|
|
||||||
// // container.appendChild(svg);
|
|
||||||
// // });
|
|
||||||
// });
|
|
||||||
page.getTextContent().then(function(textContent) {
|
page.getTextContent().then(function(textContent) {
|
||||||
|
// console.debug(textContent);
|
||||||
const textItems = textContent.items.map(function(item) {
|
const textItems = textContent.items.map(function(item) {
|
||||||
const tx = PDFJS.Util.transform( // eslint-disable-line no-undef
|
const tx = PDFJS.Util.transform( // eslint-disable-line no-undef
|
||||||
viewport.transform,
|
viewport.transform,
|
||||||
@ -75,20 +107,19 @@ export default class LoadingView extends React.Component {
|
|||||||
|
|
||||||
const fontHeight = Math.sqrt((tx[2] * tx[2]) + (tx[3] * tx[3]));
|
const fontHeight = Math.sqrt((tx[2] * tx[2]) + (tx[3] * tx[3]));
|
||||||
const dividedHeight = item.height / fontHeight;
|
const dividedHeight = item.height / fontHeight;
|
||||||
|
|
||||||
const style = textContent.styles[item.fontName];
|
|
||||||
return new TextItem({
|
return new TextItem({
|
||||||
x: Math.round(item.transform[4]),
|
x: Math.round(item.transform[4]),
|
||||||
y: Math.round(item.transform[5]),
|
y: Math.round(item.transform[5]),
|
||||||
width: Math.round(item.width),
|
width: Math.round(item.width),
|
||||||
height: Math.round(dividedHeight <= 1 ? item.height : dividedHeight),
|
height: Math.round(dividedHeight <= 1 ? item.height : dividedHeight),
|
||||||
text: item.str,
|
text: item.str,
|
||||||
font: item.fontName,
|
font: item.fontName
|
||||||
fontAscent: style.ascent,
|
|
||||||
fontDescent: style.descent
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
anouncePageParsedFunction(page.pageIndex, textItems);
|
announcePageParsedFunction(page.pageIndex, textItems);
|
||||||
|
});
|
||||||
|
page.getOperatorList().then(function() {
|
||||||
|
// do nothing... this is only for triggering the font retrieval
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -96,16 +127,20 @@ export default class LoadingView extends React.Component {
|
|||||||
}
|
}
|
||||||
|
|
||||||
render() {
|
render() {
|
||||||
const {parsedPages, pages} = this.state;
|
const {pages, progress} = this.state;
|
||||||
var percentDone = 0;
|
const percentDone = getPercentDone(progress);
|
||||||
var details = '';
|
if (percentDone == 100) {
|
||||||
if (pages.length > 0) {
|
this.props.storePdfPagesFunction(pages, this.state.fontMap);
|
||||||
percentDone = parsedPages / pages.length * 100;
|
|
||||||
details = parsedPages + ' / ' + pages.length
|
|
||||||
if (parsedPages == pages.length) {
|
|
||||||
this.props.storePdfPagesFunction(this.state.pages);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
const stageItems = progress.stages.filter((elem, i) => i <= progress.currentStage).map((stage, i) => {
|
||||||
|
const progressDetails = stage.steps ? stage.stepsDone + ' / ' + stage.steps : '';
|
||||||
|
const checkmark = stage.isComplete() ? <FaCheck color={ 'green' } /> : '';
|
||||||
|
return <div key={ i }>
|
||||||
|
{ stage.name }
|
||||||
|
{ ' ' + progressDetails + ' ' }
|
||||||
|
{ checkmark }
|
||||||
|
</div>
|
||||||
|
});
|
||||||
return (
|
return (
|
||||||
<div style={ { textAlign: 'center' } }>
|
<div style={ { textAlign: 'center' } }>
|
||||||
<br/>
|
<br/>
|
||||||
@ -115,10 +150,64 @@ export default class LoadingView extends React.Component {
|
|||||||
<br/>
|
<br/>
|
||||||
<br/>
|
<br/>
|
||||||
<div>
|
<div>
|
||||||
Uploading and parsing PDF...
|
{ stageItems }
|
||||||
<br/>
|
|
||||||
{ ' ' + details }
|
|
||||||
</div>
|
</div>
|
||||||
</div>);
|
</div>);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getPercentDone(progress) {
|
||||||
|
const activeStage = progress.activeStage();
|
||||||
|
const percentDone = activeStage.percentDone();
|
||||||
|
|
||||||
|
if (percentDone == 100) {
|
||||||
|
progress.completeStage();
|
||||||
|
if (!progress.isComplete()) {
|
||||||
|
return getPercentDone(progress, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return percentDone;
|
||||||
|
}
|
||||||
|
|
||||||
|
class Progress {
|
||||||
|
|
||||||
|
constructor(options) {
|
||||||
|
this.stages = options.stages;
|
||||||
|
this.currentStage = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
completeStage() {
|
||||||
|
this.currentStage++;
|
||||||
|
}
|
||||||
|
|
||||||
|
isComplete() {
|
||||||
|
return this.currentStage == this.stages.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
activeStage() {
|
||||||
|
return this.stages[this.currentStage];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class ProgressStage {
|
||||||
|
|
||||||
|
constructor(name) {
|
||||||
|
this.name = name;
|
||||||
|
this.stepsDone = 0;
|
||||||
|
this.steps;
|
||||||
|
}
|
||||||
|
|
||||||
|
isComplete() {
|
||||||
|
return this.stepsDone == this.steps;
|
||||||
|
}
|
||||||
|
|
||||||
|
percentDone() {
|
||||||
|
if (!this.steps) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return this.stepsDone / this.steps * 100;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -34,7 +34,6 @@ export default class TextItemTable extends React.Component {
|
|||||||
</th>
|
</th>
|
||||||
<th>
|
<th>
|
||||||
Font
|
Font
|
||||||
<br/>(asc/desc)
|
|
||||||
</th>
|
</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
@ -81,8 +80,6 @@ export default class TextItemTable extends React.Component {
|
|||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
{ textItem.font }
|
{ textItem.font }
|
||||||
<br/>
|
|
||||||
{ textItem.fontAscent + ' / ' + textItem.fontDescent }
|
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
)
|
)
|
||||||
|
@ -25,6 +25,7 @@ export default class AppState {
|
|||||||
this.mainView = View.UPLOAD;
|
this.mainView = View.UPLOAD;
|
||||||
this.fileBuffer;
|
this.fileBuffer;
|
||||||
this.pages = [];
|
this.pages = [];
|
||||||
|
this.fontMap;
|
||||||
this.transformations = [
|
this.transformations = [
|
||||||
new CalculateGlobalStats(),
|
new CalculateGlobalStats(),
|
||||||
new CompactLines(),
|
new CompactLines(),
|
||||||
@ -62,8 +63,9 @@ export default class AppState {
|
|||||||
this.render()
|
this.render()
|
||||||
}
|
}
|
||||||
|
|
||||||
storePdfPages(pages) {
|
storePdfPages(pages, fontMap) {
|
||||||
this.pages = pages;
|
this.pages = pages;
|
||||||
|
this.fontMap = fontMap;
|
||||||
this.fileBuffer = null;
|
this.fileBuffer = null;
|
||||||
this.mainView = View.RESULT;
|
this.mainView = View.RESULT;
|
||||||
this.render();
|
this.render();
|
||||||
|
Loading…
Reference in New Issue
Block a user