mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2025-01-01 03:09:01 +01:00
Load Example PDF
This commit is contained in:
parent
66c6e7577f
commit
42f54e6b38
@ -10,13 +10,25 @@ export default class PdfParser {
|
||||
this.pdfjs = pdfjs;
|
||||
}
|
||||
|
||||
async parse(data: Uint8Array): Promise<ParseResult> {
|
||||
async parseBytes(data: Uint8Array): Promise<ParseResult> {
|
||||
return this.parse(this.params({ data }));
|
||||
}
|
||||
|
||||
async parseUrl(url: string): Promise<ParseResult> {
|
||||
return this.parse(this.params({ url }));
|
||||
}
|
||||
|
||||
private params(dataSourceParams: object): object {
|
||||
const defaultParams = {
|
||||
cMapUrl: 'cmaps/',
|
||||
cMapPacked: true,
|
||||
};
|
||||
return { ...defaultParams, ...dataSourceParams };
|
||||
}
|
||||
|
||||
async parse(parameter: object): Promise<ParseResult> {
|
||||
return this.pdfjs
|
||||
.getDocument({
|
||||
data,
|
||||
cMapUrl: 'cmaps/',
|
||||
cMapPacked: true,
|
||||
})
|
||||
.getDocument(parameter)
|
||||
.promise.then((pdfDocument) => {
|
||||
return Promise.all([pdfDocument.getMetadata(), this.extractPagesSequentially(pdfDocument)]);
|
||||
})
|
||||
|
@ -6,7 +6,7 @@ const parser = new PdfParser(pdfjs);
|
||||
|
||||
test('testIt', async () => {
|
||||
const data = fs.readFileSync('../examples/ExamplePdf.pdf', null);
|
||||
const result = await parser.parse(data);
|
||||
const result = await parser.parseBytes(data);
|
||||
expect(result.metadata.title()).toEqual('ExamplePdf');
|
||||
expect(result.metadata.author()).toEqual('Johannes Zillmann');
|
||||
expect(result.pages.length).toBe(7);
|
||||
|
BIN
ui/public/ExamplePdf.pdf
Normal file
BIN
ui/public/ExamplePdf.pdf
Normal file
Binary file not shown.
@ -1,5 +1,5 @@
|
||||
<script>
|
||||
import type ParsedPageItem from 'pdf-to-markdown-core/lib/src/ParsedPageItem';
|
||||
import type ParsedPageItem from '@core/ParsedPageItem';
|
||||
|
||||
const headers = ['ID', 'Text', 'Font', 'Direction', 'Width', 'Height', 'Transform'];
|
||||
export let items: ParsedPageItem[];
|
||||
|
@ -1,19 +1,24 @@
|
||||
<script>
|
||||
import Dropzone from 'svelte-file-dropzone';
|
||||
import { Download } from 'svelte-hero-icons';
|
||||
import { processUpload } from './store';
|
||||
import { processUpload, loadExample } from './store';
|
||||
|
||||
let specifiedFile: File;
|
||||
let specifiedFileName: string;
|
||||
let dragover = false;
|
||||
let upload: Promise<any>;
|
||||
let rejectionError;
|
||||
let rejectionError: string;
|
||||
|
||||
function handleExampleLoad() {
|
||||
rejectionError = undefined;
|
||||
specifiedFileName = 'ExamplePdf.pdf';
|
||||
upload = loadExample();
|
||||
}
|
||||
function handleFilesSelect(e) {
|
||||
rejectionError = undefined;
|
||||
const { acceptedFiles, fileRejections } = e.detail;
|
||||
console.log(e.detail);
|
||||
if (acceptedFiles.length === 1) {
|
||||
specifiedFile = acceptedFiles[0];
|
||||
const specifiedFile = acceptedFiles[0];
|
||||
specifiedFileName = specifiedFile.name;
|
||||
upload = processUpload(specifiedFile);
|
||||
}
|
||||
if (fileRejections.length > 1) {
|
||||
@ -23,6 +28,15 @@
|
||||
}
|
||||
</script>
|
||||
|
||||
<!-- Options -->
|
||||
<div class="mb-0.5 flex flex-row-reverse space-x-2 space-x-reverse text-sm items-center">
|
||||
<div class="py-0.5 border-2 border-gray-50 hover:underline cursor-pointer" on:click={handleExampleLoad}>
|
||||
Load Example
|
||||
</div>
|
||||
<div class="py-0.5 px-1 border-2 border-gray-50 hover:border-blue-600 cursor-pointer">Debug</div>
|
||||
</div>
|
||||
|
||||
<!-- Upload Box -->
|
||||
<div class="pb-5 border-2 border-dashed border-gray-400 hover:border-blue-800" class:dragover>
|
||||
<Dropzone
|
||||
on:drop={handleFilesSelect}
|
||||
@ -52,9 +66,9 @@
|
||||
|
||||
<div class="mt-5 text-center font-bold">
|
||||
{#await upload}
|
||||
<div>Parsing {specifiedFile.name}...</div>
|
||||
<div>Parsing {specifiedFileName}...</div>
|
||||
{:catch error}
|
||||
<div class="text-red-700">Failed to parse '{specifiedFile.name}': {error.message}</div>
|
||||
<div class="text-red-700">Failed to parse '{specifiedFileName}': {error.message}</div>
|
||||
{/await}
|
||||
{#if rejectionError}
|
||||
<div class="text-red-700">{rejectionError}</div>
|
||||
|
@ -12,7 +12,11 @@ pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorker;
|
||||
|
||||
const parser = pdfParser(pdfjs);
|
||||
|
||||
export function processUpload(file: File): Promise<ParseResult> {
|
||||
export async function loadExample(): Promise<ParseResult> {
|
||||
return parsePdf(parser.parseUrl('/ExamplePdf.pdf'));
|
||||
}
|
||||
|
||||
export async function processUpload(file: File): Promise<ParseResult> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onerror = reject;
|
||||
@ -20,13 +24,15 @@ export function processUpload(file: File): Promise<ParseResult> {
|
||||
resolve(reader.result as ArrayBuffer);
|
||||
};
|
||||
reader.readAsArrayBuffer(file);
|
||||
})
|
||||
.then((buffer) => {
|
||||
const uintArray = new Uint8Array(buffer as ArrayBuffer);
|
||||
return parser.parse(uintArray);
|
||||
})
|
||||
.then((result) => {
|
||||
parseResult.set(result);
|
||||
return result;
|
||||
});
|
||||
}).then((buffer) => {
|
||||
const data = new Uint8Array(buffer as ArrayBuffer);
|
||||
return parsePdf(parser.parseBytes(data));
|
||||
});
|
||||
}
|
||||
|
||||
async function parsePdf(parsePromise: Promise<ParseResult>): Promise<ParseResult> {
|
||||
return parsePromise.then((result) => {
|
||||
parseResult.set(result);
|
||||
return result;
|
||||
});
|
||||
}
|
||||
|
@ -9,6 +9,8 @@ module.exports = {
|
||||
theme: {
|
||||
extend: {},
|
||||
},
|
||||
variants: {},
|
||||
variants: {
|
||||
extend: {},
|
||||
},
|
||||
plugins: [],
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user