mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-21 23:33:31 +01:00
[WIP] parse metadata & display title
This commit is contained in:
parent
94c2561717
commit
b5bb56b647
@ -37,9 +37,10 @@ export default class App extends React.Component {
|
|||||||
throw `View ${this.props.appState.mainView} not supported!`;
|
throw `View ${this.props.appState.mainView} not supported!`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const title = appState.metadata ? appState.metadata.title : '';
|
||||||
return (
|
return (
|
||||||
<div>
|
<div>
|
||||||
<TopBar mainView={ appState.mainView } switchMainViewFunction={ appState.switchMainView } />
|
<TopBar mainView={ appState.mainView } switchMainViewFunction={ appState.switchMainView } title={ title } />
|
||||||
<Grid>
|
<Grid>
|
||||||
<div>
|
<div>
|
||||||
{ mainView }
|
{ mainView }
|
||||||
|
@ -4,6 +4,7 @@ import FaCheck from 'react-icons/lib/fa/check'
|
|||||||
import pdfjs from 'pdfjs-dist'; // eslint-disable-line no-unused-vars
|
import pdfjs from 'pdfjs-dist'; // eslint-disable-line no-unused-vars
|
||||||
import { Line } from 'rc-progress';
|
import { Line } from 'rc-progress';
|
||||||
|
|
||||||
|
import Metadata from '../models/Metadata.jsx';
|
||||||
import Page from '../models/Page.jsx';
|
import Page from '../models/Page.jsx';
|
||||||
import TextItem from '../models/TextItem.jsx';
|
import TextItem from '../models/TextItem.jsx';
|
||||||
|
|
||||||
@ -17,21 +18,38 @@ export default class LoadingView extends React.Component {
|
|||||||
|
|
||||||
constructor(props) {
|
constructor(props) {
|
||||||
super(props);
|
super(props);
|
||||||
|
|
||||||
|
const progress = new Progress({
|
||||||
|
stages: [
|
||||||
|
new ProgressStage('Parsing Metadata', 2),
|
||||||
|
new ProgressStage('Parsing Pages'),
|
||||||
|
new ProgressStage('Parsing Fonts')
|
||||||
|
]
|
||||||
|
});
|
||||||
|
Progress.prototype.metadataStage = () => {
|
||||||
|
return progress.stages[0]
|
||||||
|
};
|
||||||
|
Progress.prototype.pageStage = () => {
|
||||||
|
return progress.stages[1]
|
||||||
|
};
|
||||||
|
Progress.prototype.fontStage = () => {
|
||||||
|
return progress.stages[2]
|
||||||
|
};
|
||||||
this.state = {
|
this.state = {
|
||||||
|
document: null,
|
||||||
|
metadata: null,
|
||||||
pages: [],
|
pages: [],
|
||||||
fontIds: new Set(),
|
fontIds: new Set(),
|
||||||
fontMap: new Map(),
|
fontMap: new Map(),
|
||||||
progress: new Progress({
|
progress: progress,
|
||||||
stages: [
|
|
||||||
new ProgressStage('Parsing PDF Pages'),
|
|
||||||
new ProgressStage('Parsing Fonts')
|
|
||||||
]
|
|
||||||
}),
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
announceInitialParse(document) {
|
documentParsed(document) {
|
||||||
const pageStage = this.state.progress.stages[0];
|
const metadataStage = this.state.progress.metadataStage();
|
||||||
|
const pageStage = this.state.progress.pageStage();
|
||||||
|
metadataStage.stepsDone++;
|
||||||
|
|
||||||
const numPages = document.numPages;
|
const numPages = document.numPages;
|
||||||
pageStage.steps = numPages;
|
pageStage.steps = numPages;
|
||||||
pageStage.stepsDone;
|
pageStage.stepsDone;
|
||||||
@ -49,15 +67,24 @@ export default class LoadingView extends React.Component {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
announcePageParsed(index, textItems) {
|
metadataParsed(metadata) {
|
||||||
const pageStage = this.state.progress.stages[0];
|
const metadataStage = this.state.progress.metadataStage();
|
||||||
const fontStage = this.state.progress.stages[1];
|
metadataStage.stepsDone++;
|
||||||
|
// console.debug(new Metadata(metadata));
|
||||||
|
this.setState({
|
||||||
|
metadata: new Metadata(metadata),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
pageParsed(index, textItems) {
|
||||||
|
const pageStage = this.state.progress.pageStage();
|
||||||
|
const fontStage = this.state.progress.fontStage();
|
||||||
|
const self = this;
|
||||||
textItems.forEach(item => {
|
textItems.forEach(item => {
|
||||||
const fontId = item.font;
|
const fontId = item.font;
|
||||||
if (!this.state.fontIds.has(fontId)) {
|
if (!this.state.fontIds.has(fontId)) {
|
||||||
const announceFontFunction = this.announceFontParsed.bind(this);
|
|
||||||
this.state.document.transport.commonObjs.get(fontId, function(font) {
|
this.state.document.transport.commonObjs.get(fontId, function(font) {
|
||||||
announceFontFunction(fontId, font);
|
self.fontParsed(fontId, font);
|
||||||
});
|
});
|
||||||
this.state.fontIds.add(fontId);
|
this.state.fontIds.add(fontId);
|
||||||
fontStage.steps = this.state.fontIds.size;
|
fontStage.steps = this.state.fontIds.size;
|
||||||
@ -71,11 +98,11 @@ export default class LoadingView extends React.Component {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
announceFontParsed(fontId, font) {
|
fontParsed(fontId, font) {
|
||||||
const fontStage = this.state.progress.stages[1];
|
const fontStage = this.state.progress.fontStage();
|
||||||
this.state.fontMap.set(fontId, font); // eslint-disable-line react/no-direct-mutation-state
|
this.state.fontMap.set(fontId, font); // eslint-disable-line react/no-direct-mutation-state
|
||||||
fontStage.stepsDone = fontStage.stepsDone + 1;
|
fontStage.stepsDone++;
|
||||||
if (this.state.progress.currentStage == 1) {
|
if (this.state.progress.activeStage() === fontStage) {
|
||||||
this.setState({ //force rendering
|
this.setState({ //force rendering
|
||||||
fontMap: this.state.fontMap,
|
fontMap: this.state.fontMap,
|
||||||
});
|
});
|
||||||
@ -83,20 +110,20 @@ export default class LoadingView extends React.Component {
|
|||||||
}
|
}
|
||||||
|
|
||||||
componentWillMount() {
|
componentWillMount() {
|
||||||
const announceInitialParseFunction = this.announceInitialParse.bind(this);
|
const self = this;
|
||||||
const announcePageParsedFunction = this.announcePageParsed.bind(this);
|
|
||||||
|
|
||||||
PDFJS.getDocument(this.props.fileBuffer).then(function(pdfDocument) { // eslint-disable-line no-undef
|
PDFJS.getDocument(this.props.fileBuffer).then(function(pdfDocument) { // eslint-disable-line no-undef
|
||||||
// console.debug(pdfDocument);
|
// console.debug(pdfDocument);
|
||||||
announceInitialParseFunction(pdfDocument);
|
pdfDocument.getMetadata().then(function(metadata) {
|
||||||
|
// console.debug(metadata);
|
||||||
|
self.metadataParsed(metadata);
|
||||||
|
});
|
||||||
|
self.documentParsed(pdfDocument);
|
||||||
for (var j = 1; j <= pdfDocument.numPages; j++) {
|
for (var j = 1; j <= pdfDocument.numPages; j++) {
|
||||||
pdfDocument.getPage(j).then(function(page) {
|
pdfDocument.getPage(j).then(function(page) {
|
||||||
|
// console.debug(page);
|
||||||
var scale = 1.0;
|
var scale = 1.0;
|
||||||
var viewport = page.getViewport(scale);
|
var viewport = page.getViewport(scale);
|
||||||
|
|
||||||
// pdfDocument.getMetadata().then(function(data) {
|
|
||||||
// console.debug(data);
|
|
||||||
// });
|
|
||||||
page.getTextContent().then(function(textContent) {
|
page.getTextContent().then(function(textContent) {
|
||||||
// console.debug(textContent);
|
// console.debug(textContent);
|
||||||
const textItems = textContent.items.map(function(item) {
|
const textItems = textContent.items.map(function(item) {
|
||||||
@ -116,7 +143,7 @@ export default class LoadingView extends React.Component {
|
|||||||
font: item.fontName
|
font: item.fontName
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
announcePageParsedFunction(page.pageIndex, textItems);
|
self.pageParsed(page.pageIndex, textItems);
|
||||||
});
|
});
|
||||||
page.getOperatorList().then(function() {
|
page.getOperatorList().then(function() {
|
||||||
// do nothing... this is only for triggering the font retrieval
|
// do nothing... this is only for triggering the font retrieval
|
||||||
@ -127,10 +154,10 @@ export default class LoadingView extends React.Component {
|
|||||||
}
|
}
|
||||||
|
|
||||||
render() {
|
render() {
|
||||||
const {pages, progress} = this.state;
|
const {pages, fontMap, metadata, progress} = this.state;
|
||||||
const percentDone = getPercentDone(progress);
|
const percentDone = getPercentDone(progress);
|
||||||
if (percentDone == 100) {
|
if (percentDone == 100) {
|
||||||
this.props.storePdfPagesFunction(pages, this.state.fontMap);
|
this.props.storePdfPagesFunction(metadata, fontMap, pages);
|
||||||
}
|
}
|
||||||
const stageItems = progress.stages.filter((elem, i) => i <= progress.currentStage).map((stage, i) => {
|
const stageItems = progress.stages.filter((elem, i) => i <= progress.currentStage).map((stage, i) => {
|
||||||
const progressDetails = stage.steps ? stage.stepsDone + ' / ' + stage.steps : '';
|
const progressDetails = stage.steps ? stage.stepsDone + ' / ' + stage.steps : '';
|
||||||
@ -193,10 +220,10 @@ class Progress {
|
|||||||
|
|
||||||
class ProgressStage {
|
class ProgressStage {
|
||||||
|
|
||||||
constructor(name) {
|
constructor(name, steps) {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
|
this.steps = steps ;
|
||||||
this.stepsDone = 0;
|
this.stepsDone = 0;
|
||||||
this.steps;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
isComplete() {
|
isComplete() {
|
||||||
|
@ -16,10 +16,11 @@ export default class TopBar extends React.Component {
|
|||||||
static propTypes = {
|
static propTypes = {
|
||||||
mainView: React.PropTypes.object.isRequired,
|
mainView: React.PropTypes.object.isRequired,
|
||||||
switchMainViewFunction: React.PropTypes.func.isRequired,
|
switchMainViewFunction: React.PropTypes.func.isRequired,
|
||||||
|
title: React.PropTypes.string.isRequired,
|
||||||
};
|
};
|
||||||
|
|
||||||
render() {
|
render() {
|
||||||
const {mainView, switchMainViewFunction} = this.props;
|
const {mainView, switchMainViewFunction, title} = this.props;
|
||||||
const aboutPopover = (
|
const aboutPopover = (
|
||||||
<Popover id="popover-trigger-click-root-close" title={ `About PDF to Markdown Converter - ${ process.env.version }` }>
|
<Popover id="popover-trigger-click-root-close" title={ `About PDF to Markdown Converter - ${ process.env.version }` }>
|
||||||
<p>
|
<p>
|
||||||
@ -61,6 +62,11 @@ export default class TopBar extends React.Component {
|
|||||||
</NavItem>
|
</NavItem>
|
||||||
</Nav> }
|
</Nav> }
|
||||||
</Navbar.Header>
|
</Navbar.Header>
|
||||||
|
<Navbar.Collapse>
|
||||||
|
<Navbar.Text pullRight={ true }>
|
||||||
|
{ title }
|
||||||
|
</Navbar.Text>
|
||||||
|
</Navbar.Collapse>
|
||||||
</Navbar>
|
</Navbar>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -24,8 +24,9 @@ export default class AppState {
|
|||||||
this.renderFunction = options.renderFunction;
|
this.renderFunction = options.renderFunction;
|
||||||
this.mainView = View.UPLOAD;
|
this.mainView = View.UPLOAD;
|
||||||
this.fileBuffer;
|
this.fileBuffer;
|
||||||
this.pages = [];
|
this.metadata;
|
||||||
this.fontMap;
|
this.fontMap;
|
||||||
|
this.pages = [];
|
||||||
this.transformations = [
|
this.transformations = [
|
||||||
new CalculateGlobalStats(),
|
new CalculateGlobalStats(),
|
||||||
new CompactLines(),
|
new CompactLines(),
|
||||||
@ -63,9 +64,10 @@ export default class AppState {
|
|||||||
this.render()
|
this.render()
|
||||||
}
|
}
|
||||||
|
|
||||||
storePdfPages(pages, fontMap) {
|
storePdfPages(metadata, fontMap, pages) {
|
||||||
this.pages = pages;
|
this.metadata = metadata;
|
||||||
this.fontMap = fontMap;
|
this.fontMap = fontMap;
|
||||||
|
this.pages = pages;
|
||||||
this.fileBuffer = null;
|
this.fileBuffer = null;
|
||||||
this.mainView = View.RESULT;
|
this.mainView = View.RESULT;
|
||||||
this.render();
|
this.render();
|
||||||
|
17
src/javascript/models/Metadata.jsx
Normal file
17
src/javascript/models/Metadata.jsx
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
// Metadata of the PDF document
|
||||||
|
export default class Metadata {
|
||||||
|
|
||||||
|
constructor(originalMetadata) {
|
||||||
|
if (originalMetadata.metadata) {
|
||||||
|
this.title = originalMetadata.metadata.get('dc:title');
|
||||||
|
this.creator = originalMetadata.metadata.get('xap:creatortool')
|
||||||
|
this.producer = originalMetadata.metadata.get('pdf:producer')
|
||||||
|
} else {
|
||||||
|
this.title = originalMetadata.info.Title;
|
||||||
|
this.author = originalMetadata.info.Author;
|
||||||
|
this.creator = originalMetadata.info.Creator;
|
||||||
|
this.producer = originalMetadata.info.Producer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user