[WIP] parse metadata & display title

This commit is contained in:
Johannes Zillmann 2017-03-22 07:19:21 +01:00
parent 94c2561717
commit b5bb56b647
5 changed files with 88 additions and 35 deletions

View File

@ -37,9 +37,10 @@ export default class App extends React.Component {
throw `View ${this.props.appState.mainView} not supported!`; throw `View ${this.props.appState.mainView} not supported!`;
} }
const title = appState.metadata ? appState.metadata.title : '';
return ( return (
<div> <div>
<TopBar mainView={ appState.mainView } switchMainViewFunction={ appState.switchMainView } /> <TopBar mainView={ appState.mainView } switchMainViewFunction={ appState.switchMainView } title={ title } />
<Grid> <Grid>
<div> <div>
{ mainView } { mainView }

View File

@ -4,6 +4,7 @@ import FaCheck from 'react-icons/lib/fa/check'
import pdfjs from 'pdfjs-dist'; // eslint-disable-line no-unused-vars import pdfjs from 'pdfjs-dist'; // eslint-disable-line no-unused-vars
import { Line } from 'rc-progress'; import { Line } from 'rc-progress';
import Metadata from '../models/Metadata.jsx';
import Page from '../models/Page.jsx'; import Page from '../models/Page.jsx';
import TextItem from '../models/TextItem.jsx'; import TextItem from '../models/TextItem.jsx';
@ -17,21 +18,38 @@ export default class LoadingView extends React.Component {
constructor(props) { constructor(props) {
super(props); super(props);
const progress = new Progress({
stages: [
new ProgressStage('Parsing Metadata', 2),
new ProgressStage('Parsing Pages'),
new ProgressStage('Parsing Fonts')
]
});
Progress.prototype.metadataStage = () => {
return progress.stages[0]
};
Progress.prototype.pageStage = () => {
return progress.stages[1]
};
Progress.prototype.fontStage = () => {
return progress.stages[2]
};
this.state = { this.state = {
document: null,
metadata: null,
pages: [], pages: [],
fontIds: new Set(), fontIds: new Set(),
fontMap: new Map(), fontMap: new Map(),
progress: new Progress({ progress: progress,
stages: [
new ProgressStage('Parsing PDF Pages'),
new ProgressStage('Parsing Fonts')
]
}),
}; };
} }
announceInitialParse(document) { documentParsed(document) {
const pageStage = this.state.progress.stages[0]; const metadataStage = this.state.progress.metadataStage();
const pageStage = this.state.progress.pageStage();
metadataStage.stepsDone++;
const numPages = document.numPages; const numPages = document.numPages;
pageStage.steps = numPages; pageStage.steps = numPages;
pageStage.stepsDone; pageStage.stepsDone;
@ -49,15 +67,24 @@ export default class LoadingView extends React.Component {
}); });
} }
announcePageParsed(index, textItems) { metadataParsed(metadata) {
const pageStage = this.state.progress.stages[0]; const metadataStage = this.state.progress.metadataStage();
const fontStage = this.state.progress.stages[1]; metadataStage.stepsDone++;
// console.debug(new Metadata(metadata));
this.setState({
metadata: new Metadata(metadata),
});
}
pageParsed(index, textItems) {
const pageStage = this.state.progress.pageStage();
const fontStage = this.state.progress.fontStage();
const self = this;
textItems.forEach(item => { textItems.forEach(item => {
const fontId = item.font; const fontId = item.font;
if (!this.state.fontIds.has(fontId)) { if (!this.state.fontIds.has(fontId)) {
const announceFontFunction = this.announceFontParsed.bind(this);
this.state.document.transport.commonObjs.get(fontId, function(font) { this.state.document.transport.commonObjs.get(fontId, function(font) {
announceFontFunction(fontId, font); self.fontParsed(fontId, font);
}); });
this.state.fontIds.add(fontId); this.state.fontIds.add(fontId);
fontStage.steps = this.state.fontIds.size; fontStage.steps = this.state.fontIds.size;
@ -71,11 +98,11 @@ export default class LoadingView extends React.Component {
}); });
} }
announceFontParsed(fontId, font) { fontParsed(fontId, font) {
const fontStage = this.state.progress.stages[1]; const fontStage = this.state.progress.fontStage();
this.state.fontMap.set(fontId, font); // eslint-disable-line react/no-direct-mutation-state this.state.fontMap.set(fontId, font); // eslint-disable-line react/no-direct-mutation-state
fontStage.stepsDone = fontStage.stepsDone + 1; fontStage.stepsDone++;
if (this.state.progress.currentStage == 1) { if (this.state.progress.activeStage() === fontStage) {
this.setState({ //force rendering this.setState({ //force rendering
fontMap: this.state.fontMap, fontMap: this.state.fontMap,
}); });
@ -83,20 +110,20 @@ export default class LoadingView extends React.Component {
} }
componentWillMount() { componentWillMount() {
const announceInitialParseFunction = this.announceInitialParse.bind(this); const self = this;
const announcePageParsedFunction = this.announcePageParsed.bind(this);
PDFJS.getDocument(this.props.fileBuffer).then(function(pdfDocument) { // eslint-disable-line no-undef PDFJS.getDocument(this.props.fileBuffer).then(function(pdfDocument) { // eslint-disable-line no-undef
// console.debug(pdfDocument); // console.debug(pdfDocument);
announceInitialParseFunction(pdfDocument); pdfDocument.getMetadata().then(function(metadata) {
// console.debug(metadata);
self.metadataParsed(metadata);
});
self.documentParsed(pdfDocument);
for (var j = 1; j <= pdfDocument.numPages; j++) { for (var j = 1; j <= pdfDocument.numPages; j++) {
pdfDocument.getPage(j).then(function(page) { pdfDocument.getPage(j).then(function(page) {
// console.debug(page);
var scale = 1.0; var scale = 1.0;
var viewport = page.getViewport(scale); var viewport = page.getViewport(scale);
// pdfDocument.getMetadata().then(function(data) {
// console.debug(data);
// });
page.getTextContent().then(function(textContent) { page.getTextContent().then(function(textContent) {
// console.debug(textContent); // console.debug(textContent);
const textItems = textContent.items.map(function(item) { const textItems = textContent.items.map(function(item) {
@ -116,7 +143,7 @@ export default class LoadingView extends React.Component {
font: item.fontName font: item.fontName
}); });
}); });
announcePageParsedFunction(page.pageIndex, textItems); self.pageParsed(page.pageIndex, textItems);
}); });
page.getOperatorList().then(function() { page.getOperatorList().then(function() {
// do nothing... this is only for triggering the font retrieval // do nothing... this is only for triggering the font retrieval
@ -127,10 +154,10 @@ export default class LoadingView extends React.Component {
} }
render() { render() {
const {pages, progress} = this.state; const {pages, fontMap, metadata, progress} = this.state;
const percentDone = getPercentDone(progress); const percentDone = getPercentDone(progress);
if (percentDone == 100) { if (percentDone == 100) {
this.props.storePdfPagesFunction(pages, this.state.fontMap); this.props.storePdfPagesFunction(metadata, fontMap, pages);
} }
const stageItems = progress.stages.filter((elem, i) => i <= progress.currentStage).map((stage, i) => { const stageItems = progress.stages.filter((elem, i) => i <= progress.currentStage).map((stage, i) => {
const progressDetails = stage.steps ? stage.stepsDone + ' / ' + stage.steps : ''; const progressDetails = stage.steps ? stage.stepsDone + ' / ' + stage.steps : '';
@ -193,10 +220,10 @@ class Progress {
class ProgressStage { class ProgressStage {
constructor(name) { constructor(name, steps) {
this.name = name; this.name = name;
this.steps = steps ;
this.stepsDone = 0; this.stepsDone = 0;
this.steps;
} }
isComplete() { isComplete() {

View File

@ -16,10 +16,11 @@ export default class TopBar extends React.Component {
static propTypes = { static propTypes = {
mainView: React.PropTypes.object.isRequired, mainView: React.PropTypes.object.isRequired,
switchMainViewFunction: React.PropTypes.func.isRequired, switchMainViewFunction: React.PropTypes.func.isRequired,
title: React.PropTypes.string.isRequired,
}; };
render() { render() {
const {mainView, switchMainViewFunction} = this.props; const {mainView, switchMainViewFunction, title} = this.props;
const aboutPopover = ( const aboutPopover = (
<Popover id="popover-trigger-click-root-close" title={ `About PDF to Markdown Converter - ${ process.env.version }` }> <Popover id="popover-trigger-click-root-close" title={ `About PDF to Markdown Converter - ${ process.env.version }` }>
<p> <p>
@ -61,6 +62,11 @@ export default class TopBar extends React.Component {
</NavItem> </NavItem>
</Nav> } </Nav> }
</Navbar.Header> </Navbar.Header>
<Navbar.Collapse>
<Navbar.Text pullRight={ true }>
{ title }
</Navbar.Text>
</Navbar.Collapse>
</Navbar> </Navbar>
); );
} }

View File

@ -24,8 +24,9 @@ export default class AppState {
this.renderFunction = options.renderFunction; this.renderFunction = options.renderFunction;
this.mainView = View.UPLOAD; this.mainView = View.UPLOAD;
this.fileBuffer; this.fileBuffer;
this.pages = []; this.metadata;
this.fontMap; this.fontMap;
this.pages = [];
this.transformations = [ this.transformations = [
new CalculateGlobalStats(), new CalculateGlobalStats(),
new CompactLines(), new CompactLines(),
@ -63,9 +64,10 @@ export default class AppState {
this.render() this.render()
} }
storePdfPages(pages, fontMap) { storePdfPages(metadata, fontMap, pages) {
this.pages = pages; this.metadata = metadata;
this.fontMap = fontMap; this.fontMap = fontMap;
this.pages = pages;
this.fileBuffer = null; this.fileBuffer = null;
this.mainView = View.RESULT; this.mainView = View.RESULT;
this.render(); this.render();

View File

@ -0,0 +1,17 @@
// Metadata of the PDF document
export default class Metadata {
constructor(originalMetadata) {
if (originalMetadata.metadata) {
this.title = originalMetadata.metadata.get('dc:title');
this.creator = originalMetadata.metadata.get('xap:creatortool')
this.producer = originalMetadata.metadata.get('pdf:producer')
} else {
this.title = originalMetadata.info.Title;
this.author = originalMetadata.info.Author;
this.creator = originalMetadata.info.Creator;
this.producer = originalMetadata.info.Producer;
}
}
}