mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2025-01-03 20:28:54 +01:00
Show parse results
This commit is contained in:
parent
26e5b785cd
commit
e6ce7132ce
@ -2,7 +2,7 @@
|
||||
"name": "pdf-to-markdown",
|
||||
"version": "0.2.0",
|
||||
"description": "A PDF to Markdown Converter",
|
||||
"keywords": [
|
||||
"keywords": [
|
||||
"PDF",
|
||||
"Markdown",
|
||||
"Converter"
|
||||
|
@ -1,10 +1,17 @@
|
||||
<script>
|
||||
import Upload from './Upload.svelte';
|
||||
|
||||
import { parseResult } from './store';
|
||||
import Result from './Result.svelte';
|
||||
</script>
|
||||
|
||||
<div class="text-2xl font-semibold font-serif text-center bg-gray-400">PDF to Markdown Converter</div>
|
||||
<main class="container mx-auto mt-5 h-full">
|
||||
<Upload />
|
||||
{#if $parseResult}
|
||||
<Result parseResult={$parseResult} />
|
||||
{:else}
|
||||
<Upload />
|
||||
{/if}
|
||||
</main>
|
||||
|
||||
<style>
|
||||
|
15
ui/src/Result.svelte
Normal file
15
ui/src/Result.svelte
Normal file
@ -0,0 +1,15 @@
|
||||
<script>
|
||||
import type ParseResult from 'pdf-to-markdown-core/lib/src/ParseResult';
|
||||
import Table from './Table.svelte';
|
||||
|
||||
export let parseResult: ParseResult;
|
||||
console.log(parseResult.metadata);
|
||||
</script>
|
||||
|
||||
<div>Title: {parseResult.metadata.title()}</div>
|
||||
<div>Author: {parseResult.metadata.author()}</div>
|
||||
|
||||
{#each parseResult.pages as page}
|
||||
<div class="text-2xl font-semibold my-5">Page {page.index}</div>
|
||||
<Table items={page.items} />
|
||||
{/each}
|
52
ui/src/Table.svelte
Normal file
52
ui/src/Table.svelte
Normal file
@ -0,0 +1,52 @@
|
||||
<script>
|
||||
import type ParsedPageItem from 'pdf-to-markdown-core/lib/src/ParsedPageItem';
|
||||
|
||||
const headers = ['ID', 'Text', 'Font', 'Direction', 'Width', 'Height', 'Transform'];
|
||||
export let items: ParsedPageItem[];
|
||||
</script>
|
||||
|
||||
<div class="static">
|
||||
<div class="grid" style="grid-template-columns:repeat({headers.length}, auto)">
|
||||
{#each headers as header}
|
||||
<div class="header">{header}</div>
|
||||
{/each}
|
||||
{#each items as item, i}
|
||||
<div class="row contents">
|
||||
<div class="cell">{i + 1}</div>
|
||||
<div class="cell">{item.str}</div>
|
||||
<div class="cell">{item.fontName}</div>
|
||||
<div class="cell">{item.dir}</div>
|
||||
<div class="cell">{item.width}</div>
|
||||
<div class="cell">{item.height}</div>
|
||||
<div class="cell">{item.transform.join(', ')}</div>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
.grid {
|
||||
width: 100%;
|
||||
max-height: 100vh;
|
||||
display: grid;
|
||||
grid-auto-rows: min-content;
|
||||
overflow-y: auto;
|
||||
border: 1px solid #e3e4e4;
|
||||
border-left: none;
|
||||
}
|
||||
.header {
|
||||
@apply bg-gray-300;
|
||||
position: sticky;
|
||||
top: 0;
|
||||
padding: 5px;
|
||||
border-bottom: 1px solid #e3e4e4;
|
||||
}
|
||||
.row:hover > div {
|
||||
@apply bg-gray-200;
|
||||
}
|
||||
|
||||
.cell {
|
||||
@apply pl-1;
|
||||
border-left: 1px solid #e3e4e4;
|
||||
}
|
||||
</style>
|
@ -32,7 +32,9 @@
|
||||
noClick={false}
|
||||
disableDefaultStyles={true}>
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 justify-items-center">
|
||||
<Download size="21x" />
|
||||
<span class:dragoverItem={dragover}>
|
||||
<Download size="21x" />
|
||||
</span>
|
||||
<div class="mt-4">
|
||||
<div class="text-5xl font-bold my-4">Drop your PDF file here...</div>
|
||||
<div class="text-2xl font-bold">Or click the box to select one...</div>
|
||||
@ -63,4 +65,7 @@
|
||||
.dragover {
|
||||
@apply border-purple-600;
|
||||
}
|
||||
.dragoverItem {
|
||||
@apply text-purple-600;
|
||||
}
|
||||
</style>
|
||||
|
@ -2,6 +2,10 @@ import { pdfParser } from 'pdf-to-markdown-core';
|
||||
import type ParseResult from 'pdf-to-markdown-core/lib/src/ParseResult';
|
||||
import * as pdfjs from 'pdfjs-dist/es5/build/pdf';
|
||||
|
||||
import { Writable, writable } from 'svelte/store';
|
||||
|
||||
export let parseResult: Writable<ParseResult> = writable(undefined);
|
||||
|
||||
// TODO this will setup fake worker cause getMainThreadWorkerMessageHandler isn't null
|
||||
import pdfjsWorker from 'pdfjs-dist//es5/build/pdf.worker.entry';
|
||||
pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorker;
|
||||
@ -16,8 +20,13 @@ export function processUpload(file: File): Promise<ParseResult> {
|
||||
resolve(reader.result as ArrayBuffer);
|
||||
};
|
||||
reader.readAsArrayBuffer(file);
|
||||
}).then((buffer) => {
|
||||
const uintArray = new Uint8Array(buffer as ArrayBuffer);
|
||||
return parser.parse(uintArray);
|
||||
});
|
||||
})
|
||||
.then((buffer) => {
|
||||
const uintArray = new Uint8Array(buffer as ArrayBuffer);
|
||||
return parser.parse(uintArray);
|
||||
})
|
||||
.then((result) => {
|
||||
parseResult.set(result);
|
||||
return result;
|
||||
});
|
||||
}
|
||||
|
@ -1,23 +1,29 @@
|
||||
{
|
||||
"include": ["src", "types"],
|
||||
"compilerOptions": {
|
||||
"module": "esnext",
|
||||
"target": "esnext",
|
||||
"moduleResolution": "node",
|
||||
"jsx": "preserve",
|
||||
"baseUrl": "./",
|
||||
/* paths - If you configure Snowpack import aliases, add them here. */
|
||||
"paths": {},
|
||||
/* noEmit - Snowpack builds (emits) files, not tsc. */
|
||||
"noEmit": true,
|
||||
/* Additional Options */
|
||||
"strict": true,
|
||||
"noImplicitAny": false,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"resolveJsonModule": true,
|
||||
"useDefineForClassFields": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"importsNotUsedAsValues": "error"
|
||||
}
|
||||
"include": ["src", "types"],
|
||||
"compilerOptions": {
|
||||
"module": "esnext",
|
||||
"target": "esnext",
|
||||
"moduleResolution": "node",
|
||||
"jsx": "preserve",
|
||||
"baseUrl": "./",
|
||||
/* paths - If you configure Snowpack import aliases, add them here. */
|
||||
"paths": {},
|
||||
/* noEmit - Snowpack builds (emits) files, not tsc. */
|
||||
"noEmit": true,
|
||||
/* Additional Options */
|
||||
"strict": true,
|
||||
"noImplicitAny": false,
|
||||
// "noImplicitThis": false,
|
||||
// "alwaysStrict": false,
|
||||
// "strictBindCallApply": false,
|
||||
"strictNullChecks": false,
|
||||
// "strictFunctionTypes": false,
|
||||
// "strictPropertyInitialization": false,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"resolveJsonModule": true,
|
||||
"useDefineForClassFields": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"importsNotUsedAsValues": "error"
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user