mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2025-06-25 12:01:45 +02:00
Load Example PDF
This commit is contained in:
parent
66c6e7577f
commit
42f54e6b38
@ -10,13 +10,25 @@ export default class PdfParser {
|
|||||||
this.pdfjs = pdfjs;
|
this.pdfjs = pdfjs;
|
||||||
}
|
}
|
||||||
|
|
||||||
async parse(data: Uint8Array): Promise<ParseResult> {
|
async parseBytes(data: Uint8Array): Promise<ParseResult> {
|
||||||
|
return this.parse(this.params({ data }));
|
||||||
|
}
|
||||||
|
|
||||||
|
async parseUrl(url: string): Promise<ParseResult> {
|
||||||
|
return this.parse(this.params({ url }));
|
||||||
|
}
|
||||||
|
|
||||||
|
private params(dataSourceParams: object): object {
|
||||||
|
const defaultParams = {
|
||||||
|
cMapUrl: 'cmaps/',
|
||||||
|
cMapPacked: true,
|
||||||
|
};
|
||||||
|
return { ...defaultParams, ...dataSourceParams };
|
||||||
|
}
|
||||||
|
|
||||||
|
async parse(parameter: object): Promise<ParseResult> {
|
||||||
return this.pdfjs
|
return this.pdfjs
|
||||||
.getDocument({
|
.getDocument(parameter)
|
||||||
data,
|
|
||||||
cMapUrl: 'cmaps/',
|
|
||||||
cMapPacked: true,
|
|
||||||
})
|
|
||||||
.promise.then((pdfDocument) => {
|
.promise.then((pdfDocument) => {
|
||||||
return Promise.all([pdfDocument.getMetadata(), this.extractPagesSequentially(pdfDocument)]);
|
return Promise.all([pdfDocument.getMetadata(), this.extractPagesSequentially(pdfDocument)]);
|
||||||
})
|
})
|
||||||
|
@ -6,7 +6,7 @@ const parser = new PdfParser(pdfjs);
|
|||||||
|
|
||||||
test('testIt', async () => {
|
test('testIt', async () => {
|
||||||
const data = fs.readFileSync('../examples/ExamplePdf.pdf', null);
|
const data = fs.readFileSync('../examples/ExamplePdf.pdf', null);
|
||||||
const result = await parser.parse(data);
|
const result = await parser.parseBytes(data);
|
||||||
expect(result.metadata.title()).toEqual('ExamplePdf');
|
expect(result.metadata.title()).toEqual('ExamplePdf');
|
||||||
expect(result.metadata.author()).toEqual('Johannes Zillmann');
|
expect(result.metadata.author()).toEqual('Johannes Zillmann');
|
||||||
expect(result.pages.length).toBe(7);
|
expect(result.pages.length).toBe(7);
|
||||||
|
BIN
ui/public/ExamplePdf.pdf
Normal file
BIN
ui/public/ExamplePdf.pdf
Normal file
Binary file not shown.
@ -1,5 +1,5 @@
|
|||||||
<script>
|
<script>
|
||||||
import type ParsedPageItem from 'pdf-to-markdown-core/lib/src/ParsedPageItem';
|
import type ParsedPageItem from '@core/ParsedPageItem';
|
||||||
|
|
||||||
const headers = ['ID', 'Text', 'Font', 'Direction', 'Width', 'Height', 'Transform'];
|
const headers = ['ID', 'Text', 'Font', 'Direction', 'Width', 'Height', 'Transform'];
|
||||||
export let items: ParsedPageItem[];
|
export let items: ParsedPageItem[];
|
||||||
|
@ -1,19 +1,24 @@
|
|||||||
<script>
|
<script>
|
||||||
import Dropzone from 'svelte-file-dropzone';
|
import Dropzone from 'svelte-file-dropzone';
|
||||||
import { Download } from 'svelte-hero-icons';
|
import { Download } from 'svelte-hero-icons';
|
||||||
import { processUpload } from './store';
|
import { processUpload, loadExample } from './store';
|
||||||
|
|
||||||
let specifiedFile: File;
|
let specifiedFileName: string;
|
||||||
let dragover = false;
|
let dragover = false;
|
||||||
let upload: Promise<any>;
|
let upload: Promise<any>;
|
||||||
let rejectionError;
|
let rejectionError: string;
|
||||||
|
|
||||||
|
function handleExampleLoad() {
|
||||||
|
rejectionError = undefined;
|
||||||
|
specifiedFileName = 'ExamplePdf.pdf';
|
||||||
|
upload = loadExample();
|
||||||
|
}
|
||||||
function handleFilesSelect(e) {
|
function handleFilesSelect(e) {
|
||||||
rejectionError = undefined;
|
rejectionError = undefined;
|
||||||
const { acceptedFiles, fileRejections } = e.detail;
|
const { acceptedFiles, fileRejections } = e.detail;
|
||||||
console.log(e.detail);
|
|
||||||
if (acceptedFiles.length === 1) {
|
if (acceptedFiles.length === 1) {
|
||||||
specifiedFile = acceptedFiles[0];
|
const specifiedFile = acceptedFiles[0];
|
||||||
|
specifiedFileName = specifiedFile.name;
|
||||||
upload = processUpload(specifiedFile);
|
upload = processUpload(specifiedFile);
|
||||||
}
|
}
|
||||||
if (fileRejections.length > 1) {
|
if (fileRejections.length > 1) {
|
||||||
@ -23,6 +28,15 @@
|
|||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
<!-- Options -->
|
||||||
|
<div class="mb-0.5 flex flex-row-reverse space-x-2 space-x-reverse text-sm items-center">
|
||||||
|
<div class="py-0.5 border-2 border-gray-50 hover:underline cursor-pointer" on:click={handleExampleLoad}>
|
||||||
|
Load Example
|
||||||
|
</div>
|
||||||
|
<div class="py-0.5 px-1 border-2 border-gray-50 hover:border-blue-600 cursor-pointer">Debug</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Upload Box -->
|
||||||
<div class="pb-5 border-2 border-dashed border-gray-400 hover:border-blue-800" class:dragover>
|
<div class="pb-5 border-2 border-dashed border-gray-400 hover:border-blue-800" class:dragover>
|
||||||
<Dropzone
|
<Dropzone
|
||||||
on:drop={handleFilesSelect}
|
on:drop={handleFilesSelect}
|
||||||
@ -52,9 +66,9 @@
|
|||||||
|
|
||||||
<div class="mt-5 text-center font-bold">
|
<div class="mt-5 text-center font-bold">
|
||||||
{#await upload}
|
{#await upload}
|
||||||
<div>Parsing {specifiedFile.name}...</div>
|
<div>Parsing {specifiedFileName}...</div>
|
||||||
{:catch error}
|
{:catch error}
|
||||||
<div class="text-red-700">Failed to parse '{specifiedFile.name}': {error.message}</div>
|
<div class="text-red-700">Failed to parse '{specifiedFileName}': {error.message}</div>
|
||||||
{/await}
|
{/await}
|
||||||
{#if rejectionError}
|
{#if rejectionError}
|
||||||
<div class="text-red-700">{rejectionError}</div>
|
<div class="text-red-700">{rejectionError}</div>
|
||||||
|
@ -12,7 +12,11 @@ pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorker;
|
|||||||
|
|
||||||
const parser = pdfParser(pdfjs);
|
const parser = pdfParser(pdfjs);
|
||||||
|
|
||||||
export function processUpload(file: File): Promise<ParseResult> {
|
export async function loadExample(): Promise<ParseResult> {
|
||||||
|
return parsePdf(parser.parseUrl('/ExamplePdf.pdf'));
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function processUpload(file: File): Promise<ParseResult> {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
const reader = new FileReader();
|
const reader = new FileReader();
|
||||||
reader.onerror = reject;
|
reader.onerror = reject;
|
||||||
@ -20,13 +24,15 @@ export function processUpload(file: File): Promise<ParseResult> {
|
|||||||
resolve(reader.result as ArrayBuffer);
|
resolve(reader.result as ArrayBuffer);
|
||||||
};
|
};
|
||||||
reader.readAsArrayBuffer(file);
|
reader.readAsArrayBuffer(file);
|
||||||
})
|
}).then((buffer) => {
|
||||||
.then((buffer) => {
|
const data = new Uint8Array(buffer as ArrayBuffer);
|
||||||
const uintArray = new Uint8Array(buffer as ArrayBuffer);
|
return parsePdf(parser.parseBytes(data));
|
||||||
return parser.parse(uintArray);
|
});
|
||||||
})
|
}
|
||||||
.then((result) => {
|
|
||||||
parseResult.set(result);
|
async function parsePdf(parsePromise: Promise<ParseResult>): Promise<ParseResult> {
|
||||||
return result;
|
return parsePromise.then((result) => {
|
||||||
});
|
parseResult.set(result);
|
||||||
|
return result;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,8 @@ module.exports = {
|
|||||||
theme: {
|
theme: {
|
||||||
extend: {},
|
extend: {},
|
||||||
},
|
},
|
||||||
variants: {},
|
variants: {
|
||||||
|
extend: {},
|
||||||
|
},
|
||||||
plugins: [],
|
plugins: [],
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user