mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2025-06-25 03:51:33 +02:00
Progress Infrastructure
This commit is contained in:
parent
21ecfd35bd
commit
ee7d686ba6
36
core/src/ParseProgressReporter.ts
Normal file
36
core/src/ParseProgressReporter.ts
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
import type ParseReporter from './ParseReporter';
|
||||||
|
import type ProgressListenFunction from './ProgressListenFunction';
|
||||||
|
import Progress from './Progress';
|
||||||
|
|
||||||
|
export default class ParseProgressReporter implements ParseReporter {
|
||||||
|
progress = new Progress(['Document Header', 'Metadata', 'Pages', 'Fonts'], [0.1, 0.1, 0.7, 0.1]);
|
||||||
|
pagesToParse = 0;
|
||||||
|
progressListenFunction: ProgressListenFunction;
|
||||||
|
|
||||||
|
constructor(progressListenFunction: ProgressListenFunction) {
|
||||||
|
this.progressListenFunction = progressListenFunction;
|
||||||
|
}
|
||||||
|
|
||||||
|
parsedDocumentHeader(numberOfPages: number): void {
|
||||||
|
this.pagesToParse = numberOfPages;
|
||||||
|
this.progress.stageProgress[0] = 1;
|
||||||
|
this.progress.stageDetails[2] = `0 / ${numberOfPages}`;
|
||||||
|
this.progressListenFunction(this.progress);
|
||||||
|
}
|
||||||
|
|
||||||
|
parsedMetadata(): void {
|
||||||
|
this.progress.stageProgress[1] = 1;
|
||||||
|
this.progressListenFunction(this.progress);
|
||||||
|
}
|
||||||
|
|
||||||
|
parsedPage(index: number): void {
|
||||||
|
const pagesParsed = index + 1;
|
||||||
|
this.progress.stageProgress[2] = pagesParsed / this.pagesToParse;
|
||||||
|
this.progress.stageDetails[2] = `${pagesParsed} / ${this.pagesToParse}`;
|
||||||
|
this.progressListenFunction(this.progress);
|
||||||
|
}
|
||||||
|
|
||||||
|
parsedFonts(): void {
|
||||||
|
this.progress.stageProgress[3] = 1;
|
||||||
|
}
|
||||||
|
}
|
9
core/src/ParseReporter.ts
Normal file
9
core/src/ParseReporter.ts
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
/**
|
||||||
|
* Progress listerner for PdfParser.
|
||||||
|
*/
|
||||||
|
export default interface ParseReporter {
|
||||||
|
parsedDocumentHeader(numberOfPages: number): void;
|
||||||
|
parsedMetadata(): void;
|
||||||
|
parsedPage(index: number): void;
|
||||||
|
parsedFonts(): void;
|
||||||
|
}
|
@ -1,21 +1,25 @@
|
|||||||
import Metadata from './Metadata';
|
import Metadata from './Metadata';
|
||||||
import ParsedPage from './ParsedPage';
|
import ParsedPage from './ParsedPage';
|
||||||
|
import type ParseReporter from './ParseReporter';
|
||||||
import ParseResult from './ParseResult';
|
import ParseResult from './ParseResult';
|
||||||
import TextDirection from './TextDirection';
|
import TextDirection from './TextDirection';
|
||||||
import type TextItem from './TextItem';
|
import type TextItem from './TextItem';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses a PDF via PDFJS and returns a ParseResult which contains more or less the original data from PDFJS.
|
||||||
|
*/
|
||||||
export default class PdfParser {
|
export default class PdfParser {
|
||||||
pdfjs: any;
|
pdfjs: any;
|
||||||
constructor(pdfjs: any) {
|
constructor(pdfjs: any) {
|
||||||
this.pdfjs = pdfjs;
|
this.pdfjs = pdfjs;
|
||||||
}
|
}
|
||||||
|
|
||||||
async parseBytes(data: Uint8Array): Promise<ParseResult> {
|
async parseBytes(data: Uint8Array, reporter: ParseReporter): Promise<ParseResult> {
|
||||||
return this.parse(this.params({ data }));
|
return this.parse(this.params({ data }), reporter);
|
||||||
}
|
}
|
||||||
|
|
||||||
async parseUrl(url: string): Promise<ParseResult> {
|
async parseUrl(url: string, reporter: ParseReporter): Promise<ParseResult> {
|
||||||
return this.parse(this.params({ url }));
|
return this.parse(this.params({ url }), reporter);
|
||||||
}
|
}
|
||||||
|
|
||||||
private params(dataSourceParams: object): object {
|
private params(dataSourceParams: object): object {
|
||||||
@ -26,27 +30,37 @@ export default class PdfParser {
|
|||||||
return { ...defaultParams, ...dataSourceParams };
|
return { ...defaultParams, ...dataSourceParams };
|
||||||
}
|
}
|
||||||
|
|
||||||
async parse(parameter: object): Promise<ParseResult> {
|
async parse(parameter: object, reporter: ParseReporter): Promise<ParseResult> {
|
||||||
return this.pdfjs
|
return this.pdfjs
|
||||||
.getDocument(parameter)
|
.getDocument(parameter)
|
||||||
.promise.then((pdfDocument) => {
|
.promise.then((pdfDocument) => {
|
||||||
return Promise.all([pdfDocument.getMetadata(), this.extractPagesSequentially(pdfDocument)]);
|
reporter.parsedDocumentHeader(pdfDocument.numPages);
|
||||||
|
return Promise.all([
|
||||||
|
pdfDocument.getMetadata().then((metadata) => {
|
||||||
|
reporter.parsedMetadata();
|
||||||
|
return metadata;
|
||||||
|
}),
|
||||||
|
this.extractPagesSequentially(pdfDocument, reporter),
|
||||||
|
]);
|
||||||
})
|
})
|
||||||
.then(([metadata, pages]) => new ParseResult(new Metadata(metadata), pages));
|
.then(([metadata, pages]) => new ParseResult(new Metadata(metadata), pages));
|
||||||
}
|
}
|
||||||
|
|
||||||
private extractPagesSequentially(pdfDocument: any): Promise<ParsedPage> {
|
private extractPagesSequentially(pdfDocument: any, reporter: ParseReporter): Promise<ParsedPage> {
|
||||||
return [...Array(pdfDocument.numPages)].reduce((accumulatorPromise, _, index) => {
|
return [...Array(pdfDocument.numPages)].reduce((accumulatorPromise, _, index) => {
|
||||||
return accumulatorPromise.then((accumulatedResults) => {
|
return accumulatorPromise.then((accumulatedResults) => {
|
||||||
return pdfDocument.getPage(index + 1).then((page) => {
|
return pdfDocument.getPage(index + 1).then((page) => {
|
||||||
const viewport = page.getViewport({ scale: 1.0 });
|
const viewport = page.getViewport({ scale: 1.0 });
|
||||||
return this.triggerFontRetrieval(page).then(() =>
|
return this.triggerFontRetrieval(page).then(() =>
|
||||||
page
|
page
|
||||||
.getTextContent()
|
.getTextContent({
|
||||||
.then((textContent) => [
|
normalizeWhitespace: false,
|
||||||
...accumulatedResults,
|
disableCombineTextItems: true,
|
||||||
new ParsedPage(index, viewport.transform, textContent.items),
|
})
|
||||||
]),
|
.then((textContent) => {
|
||||||
|
reporter.parsedPage(index);
|
||||||
|
return [...accumulatedResults, new ParsedPage(index, viewport.transform, textContent.items)];
|
||||||
|
}),
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
45
core/src/Progress.ts
Normal file
45
core/src/Progress.ts
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
/**
|
||||||
|
* Multi-stage progress. Progress is expressed in a number between 0 and 1.
|
||||||
|
*/
|
||||||
|
export default class Progress {
|
||||||
|
stages: string[];
|
||||||
|
stageDetails: string[];
|
||||||
|
stageProgress: number[];
|
||||||
|
stageWeights: number[];
|
||||||
|
|
||||||
|
constructor(stages: string[], weights: number[] = []) {
|
||||||
|
this.stages = stages;
|
||||||
|
this.stageDetails = new Array(stages.length);
|
||||||
|
this.stageProgress = new Array(stages.length).fill(0);
|
||||||
|
if (weights.length === 0) {
|
||||||
|
this.stageWeights = new Array(stages.length).fill(1 / stages.length);
|
||||||
|
} else {
|
||||||
|
if (weights.length !== stages.length)
|
||||||
|
throw new Error(
|
||||||
|
`Provided only ${weights.length} weights but expected ${stages.length} for ${stages.length} stages`,
|
||||||
|
);
|
||||||
|
const weightsSummed = weights.reduce((sum, weight) => +(sum + weight).toFixed(12), 0);
|
||||||
|
if (weightsSummed !== 1)
|
||||||
|
throw new Error(`Weights [${weights.join(', ')}] should sum up to 1, but did to ${weightsSummed}`);
|
||||||
|
this.stageWeights = weights;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
isComplete(stageIndex: number) {
|
||||||
|
return this.stageProgress[stageIndex] === 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
isProgressing(stageIndex: number) {
|
||||||
|
const previousComplete = stageIndex === 0 || this.isComplete(stageIndex - 1);
|
||||||
|
return previousComplete && this.stageProgress[stageIndex] < 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
totalProgress() {
|
||||||
|
const stageCount = this.stages.length;
|
||||||
|
const stageProgressSummed = this.stageProgress.reduce(
|
||||||
|
(sum, stageProgress, index) => sum + stageProgress * this.stageWeights[index] * this.stages.length,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
return stageProgressSummed / stageCount;
|
||||||
|
}
|
||||||
|
}
|
5
core/src/ProgressListenFunction.ts
Normal file
5
core/src/ProgressListenFunction.ts
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
import type Progress from './Progress';
|
||||||
|
|
||||||
|
type ProgressListenFunction = (progressUpdate: Progress) => void;
|
||||||
|
|
||||||
|
export default ProgressListenFunction;
|
@ -1,6 +1,11 @@
|
|||||||
import ParseResult from './ParseResult';
|
import type ProgressListenFunction from './ProgressListenFunction';
|
||||||
|
import ParseProgressReporter from './ParseProgressReporter';
|
||||||
import PdfParser from './PdfParser';
|
import PdfParser from './PdfParser';
|
||||||
|
|
||||||
export function pdfParser(pdfJs: any) {
|
export function pdfParser(pdfJs: any) {
|
||||||
return new PdfParser(pdfJs);
|
return new PdfParser(pdfJs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function parseReporter(progressListener: ProgressListenFunction) {
|
||||||
|
return new ParseProgressReporter(progressListener);
|
||||||
|
}
|
||||||
|
@ -1,15 +1,22 @@
|
|||||||
import PdfParser from 'src/PdfParser';
|
import PdfParser from 'src/PdfParser';
|
||||||
import * as pdfjs from 'pdfjs-dist/es5/build/pdf';
|
import * as pdfjs from 'pdfjs-dist/es5/build/pdf';
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
|
import ParseProgressReporter from 'src/ParseProgressReporter';
|
||||||
|
import Progress from 'src/Progress';
|
||||||
|
|
||||||
const parser = new PdfParser(pdfjs);
|
const parser = new PdfParser(pdfjs);
|
||||||
|
|
||||||
test('testIt', async () => {
|
test('basic example PDF parse', async () => {
|
||||||
|
const progressUpdates: Progress[] = [];
|
||||||
const data = fs.readFileSync('../examples/ExamplePdf.pdf', null);
|
const data = fs.readFileSync('../examples/ExamplePdf.pdf', null);
|
||||||
const result = await parser.parseBytes(data);
|
const result = await parser.parseBytes(
|
||||||
|
data,
|
||||||
|
new ParseProgressReporter((progress) => progressUpdates.push(JSON.parse(JSON.stringify(progress)) as Progress)),
|
||||||
|
);
|
||||||
|
const expectedPages = 7;
|
||||||
expect(result.metadata.title()).toEqual('ExamplePdf');
|
expect(result.metadata.title()).toEqual('ExamplePdf');
|
||||||
expect(result.metadata.author()).toEqual('Johannes Zillmann');
|
expect(result.metadata.author()).toEqual('Johannes Zillmann');
|
||||||
expect(result.pages.length).toBe(7);
|
expect(result.pages.length).toBe(expectedPages);
|
||||||
expect(result.pages[0].index).toBe(0);
|
expect(result.pages[0].index).toBe(0);
|
||||||
expect(result.pages[0].viewPortTransform).toEqual([1, 0, 0, -1, 0, 841.8898]);
|
expect(result.pages[0].viewPortTransform).toEqual([1, 0, 0, -1, 0, 841.8898]);
|
||||||
expect(result.pages[0].items).toEqual([
|
expect(result.pages[0].items).toEqual([
|
||||||
@ -118,4 +125,30 @@ test('testIt', async () => {
|
|||||||
fontName: 'g_d0_f2',
|
fontName: 'g_d0_f2',
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
expect(progressUpdates.length).toBe(expectedPages + 2);
|
||||||
|
progressUpdates.forEach((update) => expect(update.stages).toEqual(['Document Header', 'Metadata', 'Pages', 'Fonts']));
|
||||||
|
expect(progressUpdates[0].stageProgress).toEqual([1, 0, 0, 0]);
|
||||||
|
expect(progressUpdates[0].stageDetails).toEqual([null, null, `0 / ${expectedPages}`, null]);
|
||||||
|
|
||||||
|
expect(progressUpdates[1].stageProgress).toEqual([1, 1, 0, 0]);
|
||||||
|
expect(progressUpdates[1].stageDetails).toEqual([null, null, `0 / ${expectedPages}`, null]);
|
||||||
|
|
||||||
|
expect(progressUpdates[2].stageProgress).toEqual([1, 1, 1 / expectedPages, 0]);
|
||||||
|
expect(progressUpdates[2].stageDetails).toEqual([null, null, `1 / ${expectedPages}`, null]);
|
||||||
|
expect(progressUpdates[3].stageProgress).toEqual([1, 1, 2 / expectedPages, 0]);
|
||||||
|
expect(progressUpdates[3].stageDetails).toEqual([null, null, `2 / ${expectedPages}`, null]);
|
||||||
|
expect(progressUpdates[4].stageProgress).toEqual([1, 1, 3 / expectedPages, 0]);
|
||||||
|
expect(progressUpdates[4].stageDetails).toEqual([null, null, `3 / ${expectedPages}`, null]);
|
||||||
|
expect(progressUpdates[5].stageProgress).toEqual([1, 1, 4 / expectedPages, 0]);
|
||||||
|
expect(progressUpdates[5].stageDetails).toEqual([null, null, `4 / ${expectedPages}`, null]);
|
||||||
|
expect(progressUpdates[6].stageProgress).toEqual([1, 1, 5 / expectedPages, 0]);
|
||||||
|
expect(progressUpdates[6].stageDetails).toEqual([null, null, `5 / ${expectedPages}`, null]);
|
||||||
|
expect(progressUpdates[7].stageProgress).toEqual([1, 1, 6 / expectedPages, 0]);
|
||||||
|
expect(progressUpdates[7].stageDetails).toEqual([null, null, `6 / ${expectedPages}`, null]);
|
||||||
|
expect(progressUpdates[8].stageProgress).toEqual([1, 1, 7 / expectedPages, 0]);
|
||||||
|
expect(progressUpdates[8].stageDetails).toEqual([null, null, `7 / ${expectedPages}`, null]);
|
||||||
|
|
||||||
|
// expect(progressUpdates[9].stagePercents).toEqual([1, 1, 1, 0]);
|
||||||
|
// expect(progressUpdates[9].stageDetails).toEqual([null, null, `${expectedPages} / ${expectedPages}`, null]);
|
||||||
});
|
});
|
||||||
|
107
core/test/Progress.test.ts
Normal file
107
core/test/Progress.test.ts
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
import Progress from 'src/Progress';
|
||||||
|
|
||||||
|
test('basic progress', async () => {
|
||||||
|
const progress = new Progress(['Stage0', 'Stage1', 'Stage1']);
|
||||||
|
|
||||||
|
// nothing yet
|
||||||
|
expectTotalProgress(progress, 0);
|
||||||
|
expectStageInProgress(progress, 0);
|
||||||
|
|
||||||
|
// stage 0 progress
|
||||||
|
progress.stageProgress[0] = 0.3;
|
||||||
|
expectTotalProgress(progress, 10);
|
||||||
|
expectStageInProgress(progress, 0);
|
||||||
|
|
||||||
|
// stage 0 completed
|
||||||
|
progress.stageProgress[0] = 1;
|
||||||
|
expectTotalProgress(progress, 33);
|
||||||
|
expectStageInProgress(progress, 1);
|
||||||
|
|
||||||
|
// stage 1 progress
|
||||||
|
progress.stageProgress[1] = 0.3;
|
||||||
|
expectTotalProgress(progress, 43);
|
||||||
|
expectStageInProgress(progress, 1);
|
||||||
|
|
||||||
|
// stage 1 completed
|
||||||
|
progress.stageProgress[1] = 1;
|
||||||
|
expectTotalProgress(progress, 67);
|
||||||
|
expectStageInProgress(progress, 2);
|
||||||
|
|
||||||
|
// stage 2 completed
|
||||||
|
progress.stageProgress[2] = 1;
|
||||||
|
expectTotalProgress(progress, 100);
|
||||||
|
expectStageInProgress(progress, 3);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('number of stage weights must match the number of stages', async () => {
|
||||||
|
try {
|
||||||
|
new Progress(['Stage0', 'Stage1', 'Stage1'], [0.5, 0.5]);
|
||||||
|
fail('Creating a progress object with number of weigths not matching numbers of stages should fail');
|
||||||
|
} catch (error) {
|
||||||
|
expect(error.message).toEqual('Provided only 2 weights but expected 3 for 3 stages');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('stage weights must sum up', async () => {
|
||||||
|
try {
|
||||||
|
new Progress(['Stage0', 'Stage1', 'Stage1'], [0.5, 0.5, 0.5]);
|
||||||
|
fail('Creating a progress object with stage weigths not summing up should fail');
|
||||||
|
} catch (error) {
|
||||||
|
expect(error.message).toEqual('Weights [0.5, 0.5, 0.5] should sum up to 1, but did to 1.5');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('weighted progress', async () => {
|
||||||
|
const progress = new Progress(['Stage0', 'Stage1', 'Stage1'], [0, 0.7, 0.3]);
|
||||||
|
|
||||||
|
// nothing yet
|
||||||
|
expectTotalProgress(progress, 0);
|
||||||
|
|
||||||
|
// stage 0 progress
|
||||||
|
progress.stageProgress[0] = 0.9;
|
||||||
|
expectTotalProgress(progress, 0);
|
||||||
|
|
||||||
|
// stage 0 completed
|
||||||
|
progress.stageProgress[0] = 1;
|
||||||
|
expectTotalProgress(progress, 0);
|
||||||
|
|
||||||
|
// stage 1 progress
|
||||||
|
progress.stageProgress[1] = 0.3;
|
||||||
|
expectTotalProgress(progress, 21);
|
||||||
|
|
||||||
|
// stage 1 more progress
|
||||||
|
progress.stageProgress[1] = 0.6;
|
||||||
|
expectTotalProgress(progress, 42);
|
||||||
|
|
||||||
|
// stage 1 completed
|
||||||
|
progress.stageProgress[1] = 1;
|
||||||
|
expectTotalProgress(progress, 70);
|
||||||
|
|
||||||
|
// stage 2 progress
|
||||||
|
progress.stageProgress[2] = 0.3;
|
||||||
|
expectTotalProgress(progress, 79);
|
||||||
|
|
||||||
|
// stage 2 completed
|
||||||
|
progress.stageProgress[2] = 1;
|
||||||
|
expectTotalProgress(progress, 100);
|
||||||
|
});
|
||||||
|
|
||||||
|
function expectTotalProgress(progress: Progress, expected: number) {
|
||||||
|
expect(Math.round(progress.totalProgress() * 100)).toBe(expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
function expectStageInProgress(progress: Progress, stageIndex: number) {
|
||||||
|
for (let index = 0; index < progress.stageProgress.length; index++) {
|
||||||
|
const stageProgress = progress.stageProgress[index];
|
||||||
|
if (index < stageIndex) {
|
||||||
|
expect(progress.isProgressing(index)).toBe(false);
|
||||||
|
expect(progress.isComplete(index)).toBe(true);
|
||||||
|
} else if (index === stageIndex) {
|
||||||
|
expect(progress.isProgressing(index)).toBe(true);
|
||||||
|
expect(progress.isComplete(index)).toBe(false);
|
||||||
|
} else if (index > stageIndex) {
|
||||||
|
expect(progress.isProgressing(index)).toBe(false);
|
||||||
|
expect(progress.isComplete(index)).toBe(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -31,5 +31,6 @@ module.exports = {
|
|||||||
},
|
},
|
||||||
alias: {
|
alias: {
|
||||||
'@core': '../core/src/index.js',
|
'@core': '../core/src/index.js',
|
||||||
|
'@core/*': '../core/src/*',
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
35
ui/src/ProgressRing.svelte
Normal file
35
ui/src/ProgressRing.svelte
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
<script>
|
||||||
|
import { tweened } from 'svelte/motion';
|
||||||
|
import { cubicOut } from 'svelte/easing';
|
||||||
|
|
||||||
|
export let radius: number;
|
||||||
|
export let stroke: number;
|
||||||
|
export let progress: number;
|
||||||
|
|
||||||
|
const normalizedRadius = radius - stroke * 2;
|
||||||
|
const circumference = normalizedRadius * 2 * Math.PI;
|
||||||
|
const progressTweened = tweened(0, {
|
||||||
|
duration: 400,
|
||||||
|
easing: cubicOut,
|
||||||
|
});
|
||||||
|
$: progressTweened.set(progress);
|
||||||
|
$: strokeDashoffset = circumference - ($progressTweened / 100) * circumference;
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<svg
|
||||||
|
height={radius * 2}
|
||||||
|
width={radius * 2}
|
||||||
|
class="text-green-600 stroke-current"
|
||||||
|
style="filter: brightness({$progressTweened / 100 / 2 + 0.5}) sepia({0.5 - $progressTweened / 100 / 2}) blur({0.6 - $progressTweened / 100 / 3}px)">
|
||||||
|
<circle
|
||||||
|
fill="transparent"
|
||||||
|
stroke-width={stroke}
|
||||||
|
stroke-dasharray={circumference + ' ' + circumference}
|
||||||
|
stroke-dashoffset={strokeDashoffset}
|
||||||
|
r={normalizedRadius}
|
||||||
|
cx={radius}
|
||||||
|
cy={radius} />
|
||||||
|
<text x="50%" y="53%" text-anchor="middle" class="text-gray-800 fill-current" stroke-width="1px" dy=".2em">
|
||||||
|
{Math.round($progressTweened)}%
|
||||||
|
</text>
|
||||||
|
</svg>
|
@ -1,31 +1,44 @@
|
|||||||
<script>
|
<script>
|
||||||
|
import { blur, slide } from 'svelte/transition';
|
||||||
import Dropzone from 'svelte-file-dropzone';
|
import Dropzone from 'svelte-file-dropzone';
|
||||||
import { Download } from 'svelte-hero-icons';
|
import { Download, Check } from 'svelte-hero-icons';
|
||||||
import { processUpload, loadExample } from './store';
|
import { processUpload, loadExample } from './store';
|
||||||
|
import type Progress from '@core/Progress';
|
||||||
|
import ProgressRing from './ProgressRing.svelte';
|
||||||
|
|
||||||
let specifiedFileName: string;
|
let specifiedFileName: string;
|
||||||
let dragover = false;
|
let dragover = false;
|
||||||
let upload: Promise<any>;
|
let upload: Promise<any>;
|
||||||
let rejectionError: string;
|
let rejectionError: string;
|
||||||
|
let parseProgress: Progress;
|
||||||
|
|
||||||
function handleExampleLoad() {
|
function handleExampleLoad() {
|
||||||
rejectionError = undefined;
|
dragover = true;
|
||||||
specifiedFileName = 'ExamplePdf.pdf';
|
specifiedFileName = 'ExamplePdf.pdf';
|
||||||
upload = loadExample();
|
|
||||||
}
|
|
||||||
function handleFilesSelect(e) {
|
|
||||||
rejectionError = undefined;
|
rejectionError = undefined;
|
||||||
|
parseProgress = undefined;
|
||||||
|
upload = loadExample(handleProgress);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleFilesSelect(e) {
|
||||||
|
specifiedFileName = undefined;
|
||||||
|
rejectionError = undefined;
|
||||||
|
parseProgress = undefined;
|
||||||
const { acceptedFiles, fileRejections } = e.detail;
|
const { acceptedFiles, fileRejections } = e.detail;
|
||||||
if (acceptedFiles.length === 1) {
|
if (acceptedFiles.length === 1) {
|
||||||
const specifiedFile = acceptedFiles[0];
|
const specifiedFile = acceptedFiles[0];
|
||||||
specifiedFileName = specifiedFile.name;
|
specifiedFileName = specifiedFile.name;
|
||||||
upload = processUpload(specifiedFile);
|
upload = processUpload(specifiedFile, handleProgress);
|
||||||
}
|
}
|
||||||
if (fileRejections.length > 1) {
|
if (fileRejections.length > 1) {
|
||||||
const fileNames = fileRejections.map((r) => r.file.name);
|
const fileNames = fileRejections.map((r) => r.file.name);
|
||||||
rejectionError = `Only one file at a time allowed! Rejected ${fileRejections.length} files: '${fileNames}'.`;
|
rejectionError = `Only one file at a time allowed! Rejected ${fileRejections.length} files: '${fileNames}'.`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function handleProgress(progress: Progress) {
|
||||||
|
parseProgress = progress;
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<!-- Options -->
|
<!-- Options -->
|
||||||
@ -33,11 +46,11 @@
|
|||||||
<div class="py-0.5 border-2 border-gray-50 hover:underline cursor-pointer" on:click={handleExampleLoad}>
|
<div class="py-0.5 border-2 border-gray-50 hover:underline cursor-pointer" on:click={handleExampleLoad}>
|
||||||
Load Example
|
Load Example
|
||||||
</div>
|
</div>
|
||||||
<div class="py-0.5 px-1 border-2 border-gray-50 hover:border-blue-600 cursor-pointer">Debug</div>
|
<div class="py-0.5 px-1 border-2 border-gray-50 hover:border-green-600 cursor-pointer">Debug</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Upload Box -->
|
<!-- Upload Box -->
|
||||||
<div class="pb-5 border-2 border-dashed border-gray-400 hover:border-blue-800" class:dragover>
|
<div class="mb-5 border-2 border-dashed border-gray-400 hover:border-green-800" class:dragover>
|
||||||
<Dropzone
|
<Dropzone
|
||||||
on:drop={handleFilesSelect}
|
on:drop={handleFilesSelect}
|
||||||
on:dragenter={() => (dragover = true)}
|
on:dragenter={() => (dragover = true)}
|
||||||
@ -49,7 +62,7 @@
|
|||||||
<span class:dragoverItem={dragover}>
|
<span class:dragoverItem={dragover}>
|
||||||
<Download size="21x" />
|
<Download size="21x" />
|
||||||
</span>
|
</span>
|
||||||
<div class="px-5">
|
<div class="px-5 mb-5">
|
||||||
<div class="text-5xl font-bold my-4">Drop your PDF file here...</div>
|
<div class="text-5xl font-bold my-4">Drop your PDF file here...</div>
|
||||||
<div class="text-2xl font-bold">Or click the box to select one...</div>
|
<div class="text-2xl font-bold">Or click the box to select one...</div>
|
||||||
<div class="mt-14"><strong>Note:</strong> Your data stays locally in your browser.</div>
|
<div class="mt-14"><strong>Note:</strong> Your data stays locally in your browser.</div>
|
||||||
@ -64,22 +77,55 @@
|
|||||||
</Dropzone>
|
</Dropzone>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="mt-5 text-center font-bold">
|
<!-- Progress Info -->
|
||||||
{#await upload}
|
<div class="mt-5 text-xl font-bold">
|
||||||
<div>Parsing {specifiedFileName}...</div>
|
<div style="min-width: 70%;">
|
||||||
{:catch error}
|
{#if specifiedFileName}
|
||||||
<div class="text-red-700">Failed to parse '{specifiedFileName}': {error.message}</div>
|
<div in:blur class="text-2xl mb-2">Parsing {specifiedFileName} ...</div>
|
||||||
{/await}
|
{/if}
|
||||||
{#if rejectionError}
|
{#if parseProgress}
|
||||||
<div class="text-red-700">{rejectionError}</div>
|
<div in:blur class="flex space-x-4">
|
||||||
{/if}
|
<ProgressRing radius={50} stroke={7} progress={parseProgress?.totalProgress() * 100} />
|
||||||
|
<div>
|
||||||
|
{#each parseProgress.stages as stage, index}
|
||||||
|
{#if parseProgress.isProgressing(index)}
|
||||||
|
<div class="flex space-x-2 items-center">
|
||||||
|
<div>
|
||||||
|
Parsing
|
||||||
|
{stage}
|
||||||
|
{parseProgress.stageDetails[index] ? parseProgress.stageDetails[index] : ''}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{:else if parseProgress.isComplete(index)}
|
||||||
|
<div class="flex space-x-2 items-center ">
|
||||||
|
<div>
|
||||||
|
Parsing
|
||||||
|
{stage}
|
||||||
|
{parseProgress.stageDetails[index] ? parseProgress.stageDetails[index] : ''}
|
||||||
|
</div>
|
||||||
|
<Check size="1.5x" class="text-green-700" />
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
{#if rejectionError}
|
||||||
|
<div in:slide class="text-red-700">{rejectionError}</div>
|
||||||
|
{/if}
|
||||||
|
{#await upload}
|
||||||
|
<!-- -->
|
||||||
|
{:catch error}
|
||||||
|
<div class="text-red-700">Failed to parse '{specifiedFileName}': {error?.message}</div>
|
||||||
|
{/await}
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<style>
|
<style>
|
||||||
.dragover {
|
.dragover {
|
||||||
@apply border-purple-600;
|
@apply border-green-600;
|
||||||
}
|
}
|
||||||
.dragoverItem {
|
.dragoverItem {
|
||||||
@apply text-purple-600;
|
@apply text-green-600;
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import { pdfParser } from '@core';
|
import { pdfParser, parseReporter } from '@core';
|
||||||
|
import type ProgressListenFunction from '@core/ProgressListenFunction';
|
||||||
import type ParseResult from '@core/ParseResult';
|
import type ParseResult from '@core/ParseResult';
|
||||||
import * as pdfjs from 'pdfjs-dist/es5/build/pdf';
|
import * as pdfjs from 'pdfjs-dist/es5/build/pdf';
|
||||||
|
|
||||||
@ -10,11 +11,11 @@ pdfjs.GlobalWorkerOptions.workerSrc = 'worker/pdf.worker.min.js';
|
|||||||
|
|
||||||
const parser = pdfParser(pdfjs);
|
const parser = pdfParser(pdfjs);
|
||||||
|
|
||||||
export async function loadExample(): Promise<ParseResult> {
|
export async function loadExample(progressListener: ProgressListenFunction): Promise<ParseResult> {
|
||||||
return parsePdf(parser.parseUrl('/ExamplePdf.pdf'));
|
return parsePdf(parser.parseUrl('/ExamplePdf.pdf', parseReporter(progressListener)));
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function processUpload(file: File): Promise<ParseResult> {
|
export async function processUpload(file: File, progressListener: ProgressListenFunction): Promise<ParseResult> {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
const reader = new FileReader();
|
const reader = new FileReader();
|
||||||
reader.onerror = reject;
|
reader.onerror = reject;
|
||||||
@ -24,7 +25,7 @@ export async function processUpload(file: File): Promise<ParseResult> {
|
|||||||
reader.readAsArrayBuffer(file);
|
reader.readAsArrayBuffer(file);
|
||||||
}).then((buffer) => {
|
}).then((buffer) => {
|
||||||
const data = new Uint8Array(buffer as ArrayBuffer);
|
const data = new Uint8Array(buffer as ArrayBuffer);
|
||||||
return parsePdf(parser.parseBytes(data));
|
return parsePdf(parser.parseBytes(data, parseReporter(progressListener)));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user