Load Example PDF

This commit is contained in:
Johannes Zillmann 2021-01-06 22:40:09 +01:00
parent 66c6e7577f
commit 42f54e6b38
7 changed files with 60 additions and 26 deletions

View File

@ -10,13 +10,25 @@ export default class PdfParser {
this.pdfjs = pdfjs;
}
async parse(data: Uint8Array): Promise<ParseResult> {
return this.pdfjs
.getDocument({
data,
async parseBytes(data: Uint8Array): Promise<ParseResult> {
return this.parse(this.params({ data }));
}
async parseUrl(url: string): Promise<ParseResult> {
return this.parse(this.params({ url }));
}
private params(dataSourceParams: object): object {
const defaultParams = {
cMapUrl: 'cmaps/',
cMapPacked: true,
})
};
return { ...defaultParams, ...dataSourceParams };
}
async parse(parameter: object): Promise<ParseResult> {
return this.pdfjs
.getDocument(parameter)
.promise.then((pdfDocument) => {
return Promise.all([pdfDocument.getMetadata(), this.extractPagesSequentially(pdfDocument)]);
})

View File

@ -6,7 +6,7 @@ const parser = new PdfParser(pdfjs);
test('testIt', async () => {
const data = fs.readFileSync('../examples/ExamplePdf.pdf', null);
const result = await parser.parse(data);
const result = await parser.parseBytes(data);
expect(result.metadata.title()).toEqual('ExamplePdf');
expect(result.metadata.author()).toEqual('Johannes Zillmann');
expect(result.pages.length).toBe(7);

BIN
ui/public/ExamplePdf.pdf Normal file

Binary file not shown.

View File

@ -1,5 +1,5 @@
<script>
import type ParsedPageItem from 'pdf-to-markdown-core/lib/src/ParsedPageItem';
import type ParsedPageItem from '@core/ParsedPageItem';
const headers = ['ID', 'Text', 'Font', 'Direction', 'Width', 'Height', 'Transform'];
export let items: ParsedPageItem[];

View File

@ -1,19 +1,24 @@
<script>
import Dropzone from 'svelte-file-dropzone';
import { Download } from 'svelte-hero-icons';
import { processUpload } from './store';
import { processUpload, loadExample } from './store';
let specifiedFile: File;
let specifiedFileName: string;
let dragover = false;
let upload: Promise<any>;
let rejectionError;
let rejectionError: string;
function handleExampleLoad() {
rejectionError = undefined;
specifiedFileName = 'ExamplePdf.pdf';
upload = loadExample();
}
function handleFilesSelect(e) {
rejectionError = undefined;
const { acceptedFiles, fileRejections } = e.detail;
console.log(e.detail);
if (acceptedFiles.length === 1) {
specifiedFile = acceptedFiles[0];
const specifiedFile = acceptedFiles[0];
specifiedFileName = specifiedFile.name;
upload = processUpload(specifiedFile);
}
if (fileRejections.length > 1) {
@ -23,6 +28,15 @@
}
</script>
<!-- Options -->
<div class="mb-0.5 flex flex-row-reverse space-x-2 space-x-reverse text-sm items-center">
<div class="py-0.5 border-2 border-gray-50 hover:underline cursor-pointer" on:click={handleExampleLoad}>
Load Example
</div>
<div class="py-0.5 px-1 border-2 border-gray-50 hover:border-blue-600 cursor-pointer">Debug</div>
</div>
<!-- Upload Box -->
<div class="pb-5 border-2 border-dashed border-gray-400 hover:border-blue-800" class:dragover>
<Dropzone
on:drop={handleFilesSelect}
@ -52,9 +66,9 @@
<div class="mt-5 text-center font-bold">
{#await upload}
<div>Parsing {specifiedFile.name}...</div>
<div>Parsing {specifiedFileName}...</div>
{:catch error}
<div class="text-red-700">Failed to parse '{specifiedFile.name}': {error.message}</div>
<div class="text-red-700">Failed to parse '{specifiedFileName}': {error.message}</div>
{/await}
{#if rejectionError}
<div class="text-red-700">{rejectionError}</div>

View File

@ -12,7 +12,11 @@ pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorker;
const parser = pdfParser(pdfjs);
export function processUpload(file: File): Promise<ParseResult> {
export async function loadExample(): Promise<ParseResult> {
return parsePdf(parser.parseUrl('/ExamplePdf.pdf'));
}
export async function processUpload(file: File): Promise<ParseResult> {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onerror = reject;
@ -20,12 +24,14 @@ export function processUpload(file: File): Promise<ParseResult> {
resolve(reader.result as ArrayBuffer);
};
reader.readAsArrayBuffer(file);
})
.then((buffer) => {
const uintArray = new Uint8Array(buffer as ArrayBuffer);
return parser.parse(uintArray);
})
.then((result) => {
}).then((buffer) => {
const data = new Uint8Array(buffer as ArrayBuffer);
return parsePdf(parser.parseBytes(data));
});
}
async function parsePdf(parsePromise: Promise<ParseResult>): Promise<ParseResult> {
return parsePromise.then((result) => {
parseResult.set(result);
return result;
});

View File

@ -9,6 +9,8 @@ module.exports = {
theme: {
extend: {},
},
variants: {},
variants: {
extend: {},
},
plugins: [],
};