Load Example PDF

This commit is contained in:
Johannes Zillmann 2021-01-06 22:40:09 +01:00
parent 66c6e7577f
commit 42f54e6b38
7 changed files with 60 additions and 26 deletions

View File

@ -10,13 +10,25 @@ export default class PdfParser {
this.pdfjs = pdfjs; this.pdfjs = pdfjs;
} }
async parse(data: Uint8Array): Promise<ParseResult> { async parseBytes(data: Uint8Array): Promise<ParseResult> {
return this.pdfjs return this.parse(this.params({ data }));
.getDocument({ }
data,
async parseUrl(url: string): Promise<ParseResult> {
return this.parse(this.params({ url }));
}
private params(dataSourceParams: object): object {
const defaultParams = {
cMapUrl: 'cmaps/', cMapUrl: 'cmaps/',
cMapPacked: true, cMapPacked: true,
}) };
return { ...defaultParams, ...dataSourceParams };
}
async parse(parameter: object): Promise<ParseResult> {
return this.pdfjs
.getDocument(parameter)
.promise.then((pdfDocument) => { .promise.then((pdfDocument) => {
return Promise.all([pdfDocument.getMetadata(), this.extractPagesSequentially(pdfDocument)]); return Promise.all([pdfDocument.getMetadata(), this.extractPagesSequentially(pdfDocument)]);
}) })

View File

@ -6,7 +6,7 @@ const parser = new PdfParser(pdfjs);
test('testIt', async () => { test('testIt', async () => {
const data = fs.readFileSync('../examples/ExamplePdf.pdf', null); const data = fs.readFileSync('../examples/ExamplePdf.pdf', null);
const result = await parser.parse(data); const result = await parser.parseBytes(data);
expect(result.metadata.title()).toEqual('ExamplePdf'); expect(result.metadata.title()).toEqual('ExamplePdf');
expect(result.metadata.author()).toEqual('Johannes Zillmann'); expect(result.metadata.author()).toEqual('Johannes Zillmann');
expect(result.pages.length).toBe(7); expect(result.pages.length).toBe(7);

BIN
ui/public/ExamplePdf.pdf Normal file

Binary file not shown.

View File

@ -1,5 +1,5 @@
<script> <script>
import type ParsedPageItem from 'pdf-to-markdown-core/lib/src/ParsedPageItem'; import type ParsedPageItem from '@core/ParsedPageItem';
const headers = ['ID', 'Text', 'Font', 'Direction', 'Width', 'Height', 'Transform']; const headers = ['ID', 'Text', 'Font', 'Direction', 'Width', 'Height', 'Transform'];
export let items: ParsedPageItem[]; export let items: ParsedPageItem[];

View File

@ -1,19 +1,24 @@
<script> <script>
import Dropzone from 'svelte-file-dropzone'; import Dropzone from 'svelte-file-dropzone';
import { Download } from 'svelte-hero-icons'; import { Download } from 'svelte-hero-icons';
import { processUpload } from './store'; import { processUpload, loadExample } from './store';
let specifiedFile: File; let specifiedFileName: string;
let dragover = false; let dragover = false;
let upload: Promise<any>; let upload: Promise<any>;
let rejectionError; let rejectionError: string;
function handleExampleLoad() {
rejectionError = undefined;
specifiedFileName = 'ExamplePdf.pdf';
upload = loadExample();
}
function handleFilesSelect(e) { function handleFilesSelect(e) {
rejectionError = undefined; rejectionError = undefined;
const { acceptedFiles, fileRejections } = e.detail; const { acceptedFiles, fileRejections } = e.detail;
console.log(e.detail);
if (acceptedFiles.length === 1) { if (acceptedFiles.length === 1) {
specifiedFile = acceptedFiles[0]; const specifiedFile = acceptedFiles[0];
specifiedFileName = specifiedFile.name;
upload = processUpload(specifiedFile); upload = processUpload(specifiedFile);
} }
if (fileRejections.length > 1) { if (fileRejections.length > 1) {
@ -23,6 +28,15 @@
} }
</script> </script>
<!-- Options -->
<div class="mb-0.5 flex flex-row-reverse space-x-2 space-x-reverse text-sm items-center">
<div class="py-0.5 border-2 border-gray-50 hover:underline cursor-pointer" on:click={handleExampleLoad}>
Load Example
</div>
<div class="py-0.5 px-1 border-2 border-gray-50 hover:border-blue-600 cursor-pointer">Debug</div>
</div>
<!-- Upload Box -->
<div class="pb-5 border-2 border-dashed border-gray-400 hover:border-blue-800" class:dragover> <div class="pb-5 border-2 border-dashed border-gray-400 hover:border-blue-800" class:dragover>
<Dropzone <Dropzone
on:drop={handleFilesSelect} on:drop={handleFilesSelect}
@ -52,9 +66,9 @@
<div class="mt-5 text-center font-bold"> <div class="mt-5 text-center font-bold">
{#await upload} {#await upload}
<div>Parsing {specifiedFile.name}...</div> <div>Parsing {specifiedFileName}...</div>
{:catch error} {:catch error}
<div class="text-red-700">Failed to parse '{specifiedFile.name}': {error.message}</div> <div class="text-red-700">Failed to parse '{specifiedFileName}': {error.message}</div>
{/await} {/await}
{#if rejectionError} {#if rejectionError}
<div class="text-red-700">{rejectionError}</div> <div class="text-red-700">{rejectionError}</div>

View File

@ -12,7 +12,11 @@ pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorker;
const parser = pdfParser(pdfjs); const parser = pdfParser(pdfjs);
export function processUpload(file: File): Promise<ParseResult> { export async function loadExample(): Promise<ParseResult> {
return parsePdf(parser.parseUrl('/ExamplePdf.pdf'));
}
export async function processUpload(file: File): Promise<ParseResult> {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const reader = new FileReader(); const reader = new FileReader();
reader.onerror = reject; reader.onerror = reject;
@ -20,12 +24,14 @@ export function processUpload(file: File): Promise<ParseResult> {
resolve(reader.result as ArrayBuffer); resolve(reader.result as ArrayBuffer);
}; };
reader.readAsArrayBuffer(file); reader.readAsArrayBuffer(file);
}) }).then((buffer) => {
.then((buffer) => { const data = new Uint8Array(buffer as ArrayBuffer);
const uintArray = new Uint8Array(buffer as ArrayBuffer); return parsePdf(parser.parseBytes(data));
return parser.parse(uintArray); });
}) }
.then((result) => {
async function parsePdf(parsePromise: Promise<ParseResult>): Promise<ParseResult> {
return parsePromise.then((result) => {
parseResult.set(result); parseResult.set(result);
return result; return result;
}); });

View File

@ -9,6 +9,8 @@ module.exports = {
theme: { theme: {
extend: {}, extend: {},
}, },
variants: {}, variants: {
extend: {},
},
plugins: [], plugins: [],
}; };