diff --git a/.gitignore b/.gitignore index 2f2e27e..23b8d3d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ node_modules/ -dist/ -npm-debug.log \ No newline at end of file +build/ +npm-debug.log diff --git a/package.json b/package.json index 1d81d88..84f8f61 100644 --- a/package.json +++ b/package.json @@ -1,11 +1,12 @@ { "name": "pdf-to-markdown", "version": "0.0.1", - "description": "A PDF to Markdown converter", + "description": "A PDF to Markdown Converter", "main": "main.js", "scripts": { "watch": "webpack -d --watch", - "build": "webpack" + "build": "webpack", + "lint": "eslint . --ext .js --ext .jsx --cache" }, "keywords": [ "PDF", @@ -13,15 +14,20 @@ "Converter" ], "author": "Johannes Zillmann", - "license": "ISC", + "license": "Apache-2.0", "repository": { "type": "git", "url": "https://github.com/jzillmann/pdf-to-markdown" }, "dependencies": { + "bootstrap": "^3.3.7", + "enumify": "^1.0.4", "pdfjs-dist": "^1.6.317", - "vue": "^2.0.5", - "vue-material": "^0.3.3" + "react": "^15.3.2", + "react-bootstrap": "^0.30.3", + "react-dom": "^15.3.2", + "react-dropzone": "^3.6.0", + "react-icons": "^2.2.1" }, "devDependencies": { "babel-core": "^6.18.2", @@ -29,13 +35,18 @@ "babel-loader": "^6.2.7", "babel-plugin-transform-runtime": "^6.15.0", "babel-preset-es2015": "^6.18.0", + "babel-preset-react": "^6.16.0", "babel-preset-stage-0": "^6.16.0", + "copy-webpack-plugin": "^4.0.1", "css-loader": "^0.25.0", + "esformatter-jsx": "^7.0.1", + "eslint": "^3.7.0", + "eslint-plugin-react": "^6.3.0", + "extract-text-webpack-plugin": "^1.0.1", "file-loader": "^0.9.0", "html-webpack-plugin": "^2.24.1", - "sass-loader": "^4.0.2", + "style-loader": "^0.13.1", "url-loader": "^0.5.7", - "vue-loader": "^9.8.1", "webpack": "^1.13.3" } } diff --git a/src/App.vue b/src/App.vue deleted file mode 100644 index 936a0f1..0000000 --- a/src/App.vue +++ /dev/null @@ -1,47 +0,0 @@ - - - - - diff --git a/src/assets/logo.png b/src/assets/logo.png deleted file mode 100644 index f3d2503..0000000 Binary files a/src/assets/logo.png and /dev/null differ diff --git a/src/components/Dropzone.vue b/src/components/Dropzone.vue deleted file mode 100644 index ccd5f62..0000000 --- a/src/components/Dropzone.vue +++ /dev/null @@ -1,174 +0,0 @@ - - - - - - diff --git a/src/components/Hello.vue b/src/components/Hello.vue deleted file mode 100644 index d0b7cbb..0000000 --- a/src/components/Hello.vue +++ /dev/null @@ -1,47 +0,0 @@ - - - - - - diff --git a/src/index.html b/src/index.html index bf272c8..dbf002d 100644 --- a/src/index.html +++ b/src/index.html @@ -1,11 +1,12 @@ - - - PDF to Markdown - - -
- - - + + + PDF to Markdown + + + + +
+ + \ No newline at end of file diff --git a/src/javascript/components/App.jsx b/src/javascript/components/App.jsx new file mode 100644 index 0000000..2eaa49a --- /dev/null +++ b/src/javascript/components/App.jsx @@ -0,0 +1,46 @@ +import React from 'react'; + +import Grid from 'react-bootstrap/lib/Grid' + +import TopBar from './TopBar.jsx'; +import { View } from '../models/AppState.jsx'; +import PdfUploadView from './PdfUploadView.jsx'; +import LoadingView from './LoadingView.jsx'; +import PdfView from './PdfView.jsx'; + +export default class App extends React.Component { + + static propTypes = { + appState: React.PropTypes.object.isRequired, + }; + + render() { + console.debug(this.props.appState); + + var mainView; + switch (this.props.appState.mainView) { + case View.UPLOAD: + mainView = + break; + case View.LOADING: + mainView = + break; + case View.PDF_VIEW: + mainView = + break; + } + + return ( +
+ + +
+ { mainView } +
+
+
+ ); + } +} + + diff --git a/src/javascript/components/AppLogo.jsx b/src/javascript/components/AppLogo.jsx new file mode 100644 index 0000000..3b7aa34 --- /dev/null +++ b/src/javascript/components/AppLogo.jsx @@ -0,0 +1,28 @@ +import React, { Component } from 'react'; +import FaFilePdfO from 'react-icons/lib/fa/file-pdf-o' + +export default class AppLogo extends Component { + + static propTypes = { + onClick: React.PropTypes.func, + }; + + constructor(props, context) { + super(props, context); + this.handleClick = this.handleClick.bind(this); + } + + handleClick(e) { + e.preventDefault(); + this.props.onClick(e); + } + + + + render() { + return ( + + PDF To Markdown Converter + ); + } +} diff --git a/src/javascript/components/LoadingView.jsx b/src/javascript/components/LoadingView.jsx new file mode 100644 index 0000000..7dd442e --- /dev/null +++ b/src/javascript/components/LoadingView.jsx @@ -0,0 +1,22 @@ +import React from 'react'; + +import Spinner from './lib/Spinner.jsx'; + +export default class LoadingView extends React.Component { + + render() { + return ( +
+
+
+
+
+ +
+
+
+ Uploading and parsing PDF... +
+
); + } +} \ No newline at end of file diff --git a/src/javascript/components/PdfPageView.jsx b/src/javascript/components/PdfPageView.jsx new file mode 100644 index 0000000..fa6e20d --- /dev/null +++ b/src/javascript/components/PdfPageView.jsx @@ -0,0 +1,66 @@ +import React from 'react'; + +import Table from 'react-bootstrap/lib/Table' + +export default class PdfPageView extends React.Component { + + static propTypes = { + pdfPage: React.PropTypes.object.isRequired, + }; + + render() { + const header = "Page " + this.props.pdfPage.index; + return ( +
+

{ header }

+ + + + + + + + + + + + + { this.props.pdfPage.textItems.map((textItem, i) => + + + + + + + + ) } + +
+ # + + Text + + X + + Y + + Width + + Height +
+ { i } + + { textItem.text } + + { textItem.x } + + { textItem.y } + + { textItem.width } + + { textItem.height } +
+
+ ); + } +} \ No newline at end of file diff --git a/src/javascript/components/PdfUploadView.jsx b/src/javascript/components/PdfUploadView.jsx new file mode 100644 index 0000000..451272e --- /dev/null +++ b/src/javascript/components/PdfUploadView.jsx @@ -0,0 +1,47 @@ +import React from 'react'; + +import Dropzone from 'react-dropzone' +import FaCloudUpload from 'react-icons/lib/fa/cloud-upload' + +export default class PdfUploadView extends React.Component { + + static propTypes = { + uploadPdfFunction: React.PropTypes.func.isRequired, + }; + + constructor(props) { + super(props); + this.state = { + uploadPdfFunction: props.uploadPdfFunction, + }; + } + + onDrop(files) { + console.debug(files.length); + if (files.length > 1) { + alert(`Maximum one file allowed to upload, but not ${files.length}!`) + return + } + const reader = new FileReader(); + const uploadFunction = this.state.uploadPdfFunction; + reader.onload = (evt) => { + const fileBuffer = evt.target.result; + uploadFunction(fileBuffer); + }; + reader.readAsArrayBuffer(files[0]); + } + + render() { + return ( +
+ +
+

Drop your PDF file here!

+
+

+
+
+ ); + } + +} \ No newline at end of file diff --git a/src/javascript/components/PdfView.jsx b/src/javascript/components/PdfView.jsx new file mode 100644 index 0000000..f416d16 --- /dev/null +++ b/src/javascript/components/PdfView.jsx @@ -0,0 +1,25 @@ +import React from 'react'; + +import PdfPageView from './PdfPageView.jsx'; + +// A view which displays the TextItems of multiple PdfPages +export default class PdfView extends React.Component { + + static propTypes = { + pdfPages: React.PropTypes.array.isRequired, + }; + + render() { + console.debug(this.props.pdfPages); + const header = "Parsed " + this.props.pdfPages.length + " pages!" + return ( +
+
+ { header } +
+
+ { this.props.pdfPages.map((page) => ) } +
+ ); + } +} \ No newline at end of file diff --git a/src/javascript/components/TopBar.jsx b/src/javascript/components/TopBar.jsx new file mode 100644 index 0000000..0320a12 --- /dev/null +++ b/src/javascript/components/TopBar.jsx @@ -0,0 +1,49 @@ +import React from 'react'; + +import Navbar from 'react-bootstrap/lib/Navbar' +import MenuItem from 'react-bootstrap/lib/MenuItem' +import Dropdown from 'react-bootstrap/lib/Dropdown' +import Popover from 'react-bootstrap/lib/Popover' +import OverlayTrigger from 'react-bootstrap/lib/OverlayTrigger' + +import AppLogo from './AppLogo.jsx'; + +export default class TopBar extends React.Component { + + render() { + + const aboutPopover = ( + +

+ PDF to Markdown Converter will convert your uploaded PDF to Markdown format. +

+
+ ); + + return ( + + + + + + + + Github + + + About + + + + + + + + ); + } + +} \ No newline at end of file diff --git a/src/javascript/components/lib/Spinner.jsx b/src/javascript/components/lib/Spinner.jsx new file mode 100644 index 0000000..23d6298 --- /dev/null +++ b/src/javascript/components/lib/Spinner.jsx @@ -0,0 +1,209 @@ +import React from 'react'; + +// Spinner like loading indicator +export default class Spinner extends React.Component { + render() { + return ( + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ); + } +} diff --git a/src/javascript/functions/pdfToTextItems.jsx b/src/javascript/functions/pdfToTextItems.jsx new file mode 100644 index 0000000..81ab8fc --- /dev/null +++ b/src/javascript/functions/pdfToTextItems.jsx @@ -0,0 +1,32 @@ +import pdfjs from 'pdfjs-dist'; + +import AppState from '../models/AppState.jsx'; +import TextItem from '../models/TextItem.jsx'; + +export function pdfToTextItemsAsync(fileBuffer:ArrayBuffer, appState:AppState) { + PDFJS.getDocument(fileBuffer).then(function(pdfDocument) { + console.log('Number of pages: ' + pdfDocument.numPages); + // console.debug(pdfDocument); + const numPages = pdfDocument.numPages; + // const numPages = 3; + appState.setPageCount(numPages); + for (var i = 0; i <= numPages; i++) { + pdfDocument.getPage(i).then(function(page) { + page.getTextContent().then(function(textContent) { + // console.debug(textContent); + const textItems = textContent.items.map(function(item) { + const transform = item.transform; + return new TextItem({ + x: transform[4], + y: transform[5], + width: item.width, + height: item.height, + text: item.str + }); + }); + appState.setPdfPage(page.pageIndex, textItems); + }); + }); + } + }); +} \ No newline at end of file diff --git a/src/javascript/index.jsx b/src/javascript/index.jsx new file mode 100644 index 0000000..10d02c7 --- /dev/null +++ b/src/javascript/index.jsx @@ -0,0 +1,17 @@ +import React from 'react'; +import ReactDOM from 'react-dom'; + +import 'bootstrap/dist/css/bootstrap.css'; + +import App from './components/App.jsx'; +import AppState from './models/AppState.jsx'; + +function render(appState) { + ReactDOM.render(, document.getElementById('main')); +} + +const appState = new AppState({ + renderFunction: render, +}); + +appState.render() diff --git a/src/javascript/models/AppState.jsx b/src/javascript/models/AppState.jsx new file mode 100644 index 0000000..15a9c9b --- /dev/null +++ b/src/javascript/models/AppState.jsx @@ -0,0 +1,57 @@ +import { Enum } from 'enumify'; + +import { pdfToTextItemsAsync } from '../functions/pdfToTextItems.jsx' +import PdfPage from './PdfPage.jsx'; + +// Holds the state of the Application +export default class AppState { + + constructor(options) { + this.renderFunction = options.renderFunction; + this.mainView = View.UPLOAD; + this.pagesToUpload = 0; + this.uploadedPages = 0; + this.pdfPages = []; + + //bind functions + this.render = this.render.bind(this); + this.uploadPdf = this.uploadPdf.bind(this); + this.setPageCount = this.setPageCount.bind(this); + this.setPdfPage = this.setPdfPage.bind(this); + } + + render() { + this.renderFunction(this) + } + + uploadPdf(fileBuffer:ArrayBuffer) { + pdfToTextItemsAsync(fileBuffer, this); + this.mainView = View.LOADING; + this.render() + } + + setPageCount(numPages) { + this.pagesToUpload = numPages; + for (var i = 0; i < numPages; i++) { + this.pdfPages.push(new PdfPage({ + index: i + })); + } + } + + setPdfPage(pageIndex, textItems) { + console.debug("Upload " + pageIndex); + this.pdfPages[pageIndex].textItems = textItems; + this.uploadedPages++; + if (this.uploadedPages == this.pagesToUpload) { + console.debug("Fin"); + this.mainView = View.PDF_VIEW; + this.render(); + } + } + +} + +export class View extends Enum { +} +View.initEnum(['UPLOAD', 'LOADING', 'PDF_VIEW']) \ No newline at end of file diff --git a/src/models/Page.js b/src/javascript/models/PdfPage.jsx similarity index 52% rename from src/models/Page.js rename to src/javascript/models/PdfPage.jsx index 9a8cdcf..36a163b 100644 --- a/src/models/Page.js +++ b/src/javascript/models/PdfPage.jsx @@ -1,4 +1,5 @@ -export default class Page { +// A page which holds TextItems displayable via PdfPageView +export default class PdfPage { constructor(options) { this.index = options.index; diff --git a/src/models/TextItem.js b/src/javascript/models/TextItem.jsx similarity index 100% rename from src/models/TextItem.js rename to src/javascript/models/TextItem.jsx diff --git a/src/main.js b/src/main.js deleted file mode 100644 index 6578802..0000000 --- a/src/main.js +++ /dev/null @@ -1,11 +0,0 @@ -import Vue from 'vue' -import App from './App' - -/* eslint-disable no-new */ -new Vue({ - el: '#app', - template: '', - components: { - App - } -}) diff --git a/webpack.config.js b/webpack.config.js index 460a35c..3f19b74 100644 --- a/webpack.config.js +++ b/webpack.config.js @@ -1,15 +1,18 @@ -var path = require('path') -var sourceDir = path.resolve(__dirname, 'src'); - +var path = require('path'); +var webpack = require('webpack'); var HtmlWebpackPlugin = require('html-webpack-plugin'); +var CopyWebpackPlugin = require('copy-webpack-plugin'); + +var SOURCE_DIR = path.resolve(__dirname, 'src'); +var BUILD_DIR = path.resolve(__dirname, 'build'); +var NODEMODULES_DIR = path.resolve(__dirname, 'node_modules'); +var JAVASCRIPT_DIR = SOURCE_DIR + '/javascript'; module.exports = { - entry: './src/main.js', + entry: JAVASCRIPT_DIR + '/index.jsx', output: { - // To the `dist` folder - path: './dist', - // With the filename `build.js` so it's dist/build.js - filename: 'build.js' + path: BUILD_DIR, + filename: 'bundle.js' }, resolve: { extensions: ['', '.js', '.vue'], @@ -29,34 +32,55 @@ module.exports = { loaders: [ { // Ask webpack to check: If this file ends with .js, then apply some transforms - test: /\.js$/, + test: /\.jsx?$/, // Transform it with babel loader: 'babel', // don't transform node_modules folder (which don't need to be compiled) - exclude: /node_modules/ + include: [JAVASCRIPT_DIR], + query: { + plugins: ['transform-runtime'], + presets: ['es2015', 'stage-0', 'react'], + } }, { - test: /\.vue$/, - loader: 'vue' - }, - { - test: /\.scss$/, - loaders: ["style", "css", "sass"] + test: /\.css$/, + loader: "style-loader!css-loader" }, { test: /\.png$/, loader: "url-loader?limit=100000" }, + { + test: /\.jpg$/, + loader: "file-loader" + }, + { + test: /\.(woff|woff2)(\?v=\d+\.\d+\.\d+)?$/, + loader: 'url?limit=10000&mimetype=application/font-woff' + }, + { + test: /\.ttf(\?v=\d+\.\d+\.\d+)?$/, + loader: 'url?limit=10000&mimetype=application/octet-stream' + }, + { + test: /\.eot(\?v=\d+\.\d+\.\d+)?$/, + loader: 'file' + }, + { + test: /\.svg(\?v=\d+\.\d+\.\d+)?$/, + loader: 'url?limit=10000&mimetype=image/svg+xml' + } ] }, - vue: { - loaders: { - js: 'babel' - } - }, plugins: [ new HtmlWebpackPlugin({ - template: sourceDir + '/index.html' - }) + template: SOURCE_DIR + '/index.html' + }), + new CopyWebpackPlugin([ + { + from: NODEMODULES_DIR + '/pdfjs-dist/build/pdf.worker.js', + to: 'bundle.worker.js' + }, + ]) ] } \ No newline at end of file