mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2025-01-05 21:28:58 +01:00
Show parse results
This commit is contained in:
parent
26e5b785cd
commit
e6ce7132ce
@ -2,7 +2,7 @@
|
|||||||
"name": "pdf-to-markdown",
|
"name": "pdf-to-markdown",
|
||||||
"version": "0.2.0",
|
"version": "0.2.0",
|
||||||
"description": "A PDF to Markdown Converter",
|
"description": "A PDF to Markdown Converter",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"PDF",
|
"PDF",
|
||||||
"Markdown",
|
"Markdown",
|
||||||
"Converter"
|
"Converter"
|
||||||
|
@ -1,10 +1,17 @@
|
|||||||
<script>
|
<script>
|
||||||
import Upload from './Upload.svelte';
|
import Upload from './Upload.svelte';
|
||||||
|
|
||||||
|
import { parseResult } from './store';
|
||||||
|
import Result from './Result.svelte';
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="text-2xl font-semibold font-serif text-center bg-gray-400">PDF to Markdown Converter</div>
|
<div class="text-2xl font-semibold font-serif text-center bg-gray-400">PDF to Markdown Converter</div>
|
||||||
<main class="container mx-auto mt-5 h-full">
|
<main class="container mx-auto mt-5 h-full">
|
||||||
<Upload />
|
{#if $parseResult}
|
||||||
|
<Result parseResult={$parseResult} />
|
||||||
|
{:else}
|
||||||
|
<Upload />
|
||||||
|
{/if}
|
||||||
</main>
|
</main>
|
||||||
|
|
||||||
<style>
|
<style>
|
||||||
|
15
ui/src/Result.svelte
Normal file
15
ui/src/Result.svelte
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
<script>
|
||||||
|
import type ParseResult from 'pdf-to-markdown-core/lib/src/ParseResult';
|
||||||
|
import Table from './Table.svelte';
|
||||||
|
|
||||||
|
export let parseResult: ParseResult;
|
||||||
|
console.log(parseResult.metadata);
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div>Title: {parseResult.metadata.title()}</div>
|
||||||
|
<div>Author: {parseResult.metadata.author()}</div>
|
||||||
|
|
||||||
|
{#each parseResult.pages as page}
|
||||||
|
<div class="text-2xl font-semibold my-5">Page {page.index}</div>
|
||||||
|
<Table items={page.items} />
|
||||||
|
{/each}
|
52
ui/src/Table.svelte
Normal file
52
ui/src/Table.svelte
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
<script>
|
||||||
|
import type ParsedPageItem from 'pdf-to-markdown-core/lib/src/ParsedPageItem';
|
||||||
|
|
||||||
|
const headers = ['ID', 'Text', 'Font', 'Direction', 'Width', 'Height', 'Transform'];
|
||||||
|
export let items: ParsedPageItem[];
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="static">
|
||||||
|
<div class="grid" style="grid-template-columns:repeat({headers.length}, auto)">
|
||||||
|
{#each headers as header}
|
||||||
|
<div class="header">{header}</div>
|
||||||
|
{/each}
|
||||||
|
{#each items as item, i}
|
||||||
|
<div class="row contents">
|
||||||
|
<div class="cell">{i + 1}</div>
|
||||||
|
<div class="cell">{item.str}</div>
|
||||||
|
<div class="cell">{item.fontName}</div>
|
||||||
|
<div class="cell">{item.dir}</div>
|
||||||
|
<div class="cell">{item.width}</div>
|
||||||
|
<div class="cell">{item.height}</div>
|
||||||
|
<div class="cell">{item.transform.join(', ')}</div>
|
||||||
|
</div>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.grid {
|
||||||
|
width: 100%;
|
||||||
|
max-height: 100vh;
|
||||||
|
display: grid;
|
||||||
|
grid-auto-rows: min-content;
|
||||||
|
overflow-y: auto;
|
||||||
|
border: 1px solid #e3e4e4;
|
||||||
|
border-left: none;
|
||||||
|
}
|
||||||
|
.header {
|
||||||
|
@apply bg-gray-300;
|
||||||
|
position: sticky;
|
||||||
|
top: 0;
|
||||||
|
padding: 5px;
|
||||||
|
border-bottom: 1px solid #e3e4e4;
|
||||||
|
}
|
||||||
|
.row:hover > div {
|
||||||
|
@apply bg-gray-200;
|
||||||
|
}
|
||||||
|
|
||||||
|
.cell {
|
||||||
|
@apply pl-1;
|
||||||
|
border-left: 1px solid #e3e4e4;
|
||||||
|
}
|
||||||
|
</style>
|
@ -32,7 +32,9 @@
|
|||||||
noClick={false}
|
noClick={false}
|
||||||
disableDefaultStyles={true}>
|
disableDefaultStyles={true}>
|
||||||
<div class="grid grid-cols-1 md:grid-cols-2 justify-items-center">
|
<div class="grid grid-cols-1 md:grid-cols-2 justify-items-center">
|
||||||
<Download size="21x" />
|
<span class:dragoverItem={dragover}>
|
||||||
|
<Download size="21x" />
|
||||||
|
</span>
|
||||||
<div class="mt-4">
|
<div class="mt-4">
|
||||||
<div class="text-5xl font-bold my-4">Drop your PDF file here...</div>
|
<div class="text-5xl font-bold my-4">Drop your PDF file here...</div>
|
||||||
<div class="text-2xl font-bold">Or click the box to select one...</div>
|
<div class="text-2xl font-bold">Or click the box to select one...</div>
|
||||||
@ -63,4 +65,7 @@
|
|||||||
.dragover {
|
.dragover {
|
||||||
@apply border-purple-600;
|
@apply border-purple-600;
|
||||||
}
|
}
|
||||||
|
.dragoverItem {
|
||||||
|
@apply text-purple-600;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
@ -2,6 +2,10 @@ import { pdfParser } from 'pdf-to-markdown-core';
|
|||||||
import type ParseResult from 'pdf-to-markdown-core/lib/src/ParseResult';
|
import type ParseResult from 'pdf-to-markdown-core/lib/src/ParseResult';
|
||||||
import * as pdfjs from 'pdfjs-dist/es5/build/pdf';
|
import * as pdfjs from 'pdfjs-dist/es5/build/pdf';
|
||||||
|
|
||||||
|
import { Writable, writable } from 'svelte/store';
|
||||||
|
|
||||||
|
export let parseResult: Writable<ParseResult> = writable(undefined);
|
||||||
|
|
||||||
// TODO this will setup fake worker cause getMainThreadWorkerMessageHandler isn't null
|
// TODO this will setup fake worker cause getMainThreadWorkerMessageHandler isn't null
|
||||||
import pdfjsWorker from 'pdfjs-dist//es5/build/pdf.worker.entry';
|
import pdfjsWorker from 'pdfjs-dist//es5/build/pdf.worker.entry';
|
||||||
pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorker;
|
pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorker;
|
||||||
@ -16,8 +20,13 @@ export function processUpload(file: File): Promise<ParseResult> {
|
|||||||
resolve(reader.result as ArrayBuffer);
|
resolve(reader.result as ArrayBuffer);
|
||||||
};
|
};
|
||||||
reader.readAsArrayBuffer(file);
|
reader.readAsArrayBuffer(file);
|
||||||
}).then((buffer) => {
|
})
|
||||||
const uintArray = new Uint8Array(buffer as ArrayBuffer);
|
.then((buffer) => {
|
||||||
return parser.parse(uintArray);
|
const uintArray = new Uint8Array(buffer as ArrayBuffer);
|
||||||
});
|
return parser.parse(uintArray);
|
||||||
|
})
|
||||||
|
.then((result) => {
|
||||||
|
parseResult.set(result);
|
||||||
|
return result;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
@ -1,23 +1,29 @@
|
|||||||
{
|
{
|
||||||
"include": ["src", "types"],
|
"include": ["src", "types"],
|
||||||
"compilerOptions": {
|
"compilerOptions": {
|
||||||
"module": "esnext",
|
"module": "esnext",
|
||||||
"target": "esnext",
|
"target": "esnext",
|
||||||
"moduleResolution": "node",
|
"moduleResolution": "node",
|
||||||
"jsx": "preserve",
|
"jsx": "preserve",
|
||||||
"baseUrl": "./",
|
"baseUrl": "./",
|
||||||
/* paths - If you configure Snowpack import aliases, add them here. */
|
/* paths - If you configure Snowpack import aliases, add them here. */
|
||||||
"paths": {},
|
"paths": {},
|
||||||
/* noEmit - Snowpack builds (emits) files, not tsc. */
|
/* noEmit - Snowpack builds (emits) files, not tsc. */
|
||||||
"noEmit": true,
|
"noEmit": true,
|
||||||
/* Additional Options */
|
/* Additional Options */
|
||||||
"strict": true,
|
"strict": true,
|
||||||
"noImplicitAny": false,
|
"noImplicitAny": false,
|
||||||
"skipLibCheck": true,
|
// "noImplicitThis": false,
|
||||||
"forceConsistentCasingInFileNames": true,
|
// "alwaysStrict": false,
|
||||||
"resolveJsonModule": true,
|
// "strictBindCallApply": false,
|
||||||
"useDefineForClassFields": true,
|
"strictNullChecks": false,
|
||||||
"allowSyntheticDefaultImports": true,
|
// "strictFunctionTypes": false,
|
||||||
"importsNotUsedAsValues": "error"
|
// "strictPropertyInitialization": false,
|
||||||
}
|
"skipLibCheck": true,
|
||||||
|
"forceConsistentCasingInFileNames": true,
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"useDefineForClassFields": true,
|
||||||
|
"allowSyntheticDefaultImports": true,
|
||||||
|
"importsNotUsedAsValues": "error"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user