Show parse results

This commit is contained in:
Johannes Zillmann 2021-01-03 20:09:35 +01:00
parent 26e5b785cd
commit e6ce7132ce
7 changed files with 122 additions and 28 deletions

View File

@ -1,10 +1,17 @@
<script> <script>
import Upload from './Upload.svelte'; import Upload from './Upload.svelte';
import { parseResult } from './store';
import Result from './Result.svelte';
</script> </script>
<div class="text-2xl font-semibold font-serif text-center bg-gray-400">PDF to Markdown Converter</div> <div class="text-2xl font-semibold font-serif text-center bg-gray-400">PDF to Markdown Converter</div>
<main class="container mx-auto mt-5 h-full"> <main class="container mx-auto mt-5 h-full">
{#if $parseResult}
<Result parseResult={$parseResult} />
{:else}
<Upload /> <Upload />
{/if}
</main> </main>
<style> <style>

15
ui/src/Result.svelte Normal file
View File

@ -0,0 +1,15 @@
<script>
import type ParseResult from 'pdf-to-markdown-core/lib/src/ParseResult';
import Table from './Table.svelte';
export let parseResult: ParseResult;
console.log(parseResult.metadata);
</script>
<div>Title: {parseResult.metadata.title()}</div>
<div>Author: {parseResult.metadata.author()}</div>
{#each parseResult.pages as page}
<div class="text-2xl font-semibold my-5">Page {page.index}</div>
<Table items={page.items} />
{/each}

52
ui/src/Table.svelte Normal file
View File

@ -0,0 +1,52 @@
<script>
import type ParsedPageItem from 'pdf-to-markdown-core/lib/src/ParsedPageItem';
const headers = ['ID', 'Text', 'Font', 'Direction', 'Width', 'Height', 'Transform'];
export let items: ParsedPageItem[];
</script>
<div class="static">
<div class="grid" style="grid-template-columns:repeat({headers.length}, auto)">
{#each headers as header}
<div class="header">{header}</div>
{/each}
{#each items as item, i}
<div class="row contents">
<div class="cell">{i + 1}</div>
<div class="cell">{item.str}</div>
<div class="cell">{item.fontName}</div>
<div class="cell">{item.dir}</div>
<div class="cell">{item.width}</div>
<div class="cell">{item.height}</div>
<div class="cell">{item.transform.join(', ')}</div>
</div>
{/each}
</div>
</div>
<style>
.grid {
width: 100%;
max-height: 100vh;
display: grid;
grid-auto-rows: min-content;
overflow-y: auto;
border: 1px solid #e3e4e4;
border-left: none;
}
.header {
@apply bg-gray-300;
position: sticky;
top: 0;
padding: 5px;
border-bottom: 1px solid #e3e4e4;
}
.row:hover > div {
@apply bg-gray-200;
}
.cell {
@apply pl-1;
border-left: 1px solid #e3e4e4;
}
</style>

View File

@ -32,7 +32,9 @@
noClick={false} noClick={false}
disableDefaultStyles={true}> disableDefaultStyles={true}>
<div class="grid grid-cols-1 md:grid-cols-2 justify-items-center"> <div class="grid grid-cols-1 md:grid-cols-2 justify-items-center">
<span class:dragoverItem={dragover}>
<Download size="21x" /> <Download size="21x" />
</span>
<div class="mt-4"> <div class="mt-4">
<div class="text-5xl font-bold my-4">Drop your PDF file here...</div> <div class="text-5xl font-bold my-4">Drop your PDF file here...</div>
<div class="text-2xl font-bold">Or click the box to select one...</div> <div class="text-2xl font-bold">Or click the box to select one...</div>
@ -63,4 +65,7 @@
.dragover { .dragover {
@apply border-purple-600; @apply border-purple-600;
} }
.dragoverItem {
@apply text-purple-600;
}
</style> </style>

View File

@ -2,6 +2,10 @@ import { pdfParser } from 'pdf-to-markdown-core';
import type ParseResult from 'pdf-to-markdown-core/lib/src/ParseResult'; import type ParseResult from 'pdf-to-markdown-core/lib/src/ParseResult';
import * as pdfjs from 'pdfjs-dist/es5/build/pdf'; import * as pdfjs from 'pdfjs-dist/es5/build/pdf';
import { Writable, writable } from 'svelte/store';
export let parseResult: Writable<ParseResult> = writable(undefined);
// TODO this will setup fake worker cause getMainThreadWorkerMessageHandler isn't null // TODO this will setup fake worker cause getMainThreadWorkerMessageHandler isn't null
import pdfjsWorker from 'pdfjs-dist//es5/build/pdf.worker.entry'; import pdfjsWorker from 'pdfjs-dist//es5/build/pdf.worker.entry';
pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorker; pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorker;
@ -16,8 +20,13 @@ export function processUpload(file: File): Promise<ParseResult> {
resolve(reader.result as ArrayBuffer); resolve(reader.result as ArrayBuffer);
}; };
reader.readAsArrayBuffer(file); reader.readAsArrayBuffer(file);
}).then((buffer) => { })
.then((buffer) => {
const uintArray = new Uint8Array(buffer as ArrayBuffer); const uintArray = new Uint8Array(buffer as ArrayBuffer);
return parser.parse(uintArray); return parser.parse(uintArray);
})
.then((result) => {
parseResult.set(result);
return result;
}); });
} }

View File

@ -13,6 +13,12 @@
/* Additional Options */ /* Additional Options */
"strict": true, "strict": true,
"noImplicitAny": false, "noImplicitAny": false,
// "noImplicitThis": false,
// "alwaysStrict": false,
// "strictBindCallApply": false,
"strictNullChecks": false,
// "strictFunctionTypes": false,
// "strictPropertyInitialization": false,
"skipLibCheck": true, "skipLibCheck": true,
"forceConsistentCasingInFileNames": true, "forceConsistentCasingInFileNames": true,
"resolveJsonModule": true, "resolveJsonModule": true,