mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-24 00:33:48 +01:00
Format values
This commit is contained in:
parent
93d067b346
commit
95a7e3e93b
@ -11,7 +11,8 @@ import type TextItem from './TextItem';
|
||||
*/
|
||||
export default class PdfParser {
|
||||
pdfjs: any;
|
||||
columns = ['str', 'dir', 'width', 'height', 'transfom', 'fontName'];
|
||||
columns = ['str', 'dir', 'width', 'height', 'transform', 'fontName'];
|
||||
|
||||
constructor(pdfjs: any) {
|
||||
this.pdfjs = pdfjs;
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
</script>
|
||||
|
||||
<div class="text-2xl font-semibold font-serif text-center bg-gray-400">PDF to Markdown Converter</div>
|
||||
<main class="container mx-auto mt-5 h-full">
|
||||
<main class="mt-5 h-full">
|
||||
{#if $parseResult}
|
||||
<Result parseResult={$parseResult} />
|
||||
{:else}
|
||||
|
@ -5,10 +5,12 @@
|
||||
export let parseResult: ParseResult;
|
||||
</script>
|
||||
|
||||
<div class="mb-4">
|
||||
<div>Parsed {parseResult.pageCount()} pages with {parseResult.items.length} items</div>
|
||||
<div>Title: {parseResult.metadata.title()}</div>
|
||||
<div>Author: {parseResult.metadata.author()}</div>
|
||||
</div>
|
||||
<div class="mx-4">
|
||||
<div class="mb-4">
|
||||
<div>Parsed {parseResult.pageCount()} pages with {parseResult.items.length} items</div>
|
||||
<div>Title: {parseResult.metadata.title()}</div>
|
||||
<div>Author: {parseResult.metadata.author()}</div>
|
||||
</div>
|
||||
|
||||
<Table columns={parseResult.columns} items={parseResult.items} />
|
||||
<Table columns={parseResult.columns} items={parseResult.items} />
|
||||
</div>
|
||||
|
@ -27,10 +27,27 @@
|
||||
openedPageIndex = false;
|
||||
focusedPage = undefined;
|
||||
}
|
||||
|
||||
function format(value: object) {
|
||||
const type = typeof value;
|
||||
if (typeof value === 'number') {
|
||||
return (value as number).toFixed(2);
|
||||
}
|
||||
if (typeof value === 'object' && typeof Array.isArray(value)) {
|
||||
let array = value as Array<object>;
|
||||
if (array.length > 0 && typeof array[0] === 'number') {
|
||||
array = (array.map((element) =>
|
||||
((element as unknown) as number).toFixed(2)
|
||||
) as unknown) as Array<object>;
|
||||
}
|
||||
return '[' + array.join(', ') + ']';
|
||||
}
|
||||
return value;
|
||||
}
|
||||
</script>
|
||||
|
||||
<!-- Sticky Controls -->
|
||||
<div class="controls pb-3">
|
||||
<div class="controls py-2">
|
||||
<div class="flex items-center space-x-2">
|
||||
<span>
|
||||
<span on:click={() => (openedPageIndex = !openedPageIndex)}>
|
||||
@ -91,11 +108,11 @@
|
||||
<div class="absolute flex">
|
||||
{#if !focused}
|
||||
<span on:click={() => focusOnPage(pageNumber)}>
|
||||
<Support size="1x" class="hover:text-green-700 cursor-pointer" />
|
||||
<Support size="1x" class="hover:text-green-700 cursor-pointer opacity-75" />
|
||||
</span>
|
||||
{:else}
|
||||
<span on:click={showAllPages}>
|
||||
<Collection size="1x" class="hover:text-green-700 cursor-pointer" />
|
||||
<Collection size="1x" class="hover:text-green-700 cursor-pointer opacity-75" />
|
||||
</span>
|
||||
{/if}
|
||||
</div>
|
||||
@ -103,9 +120,9 @@
|
||||
{:else}
|
||||
<td />
|
||||
{/if}
|
||||
<td class="">{itemIdx}</td>
|
||||
<td>{itemIdx}</td>
|
||||
{#each columns as column}
|
||||
<td class="borde2r">{item.data[column]}</td>
|
||||
<td>{format(item.data[column])}</td>
|
||||
{/each}
|
||||
</tr>
|
||||
{/each}
|
||||
@ -129,7 +146,7 @@
|
||||
@apply whitespace-nowrap;
|
||||
position: -webkit-sticky;
|
||||
position: sticky;
|
||||
top: 1.7em;
|
||||
top: 2em;
|
||||
z-index: 2;
|
||||
}
|
||||
|
||||
@ -137,7 +154,7 @@
|
||||
@apply px-1;
|
||||
position: -webkit-sticky;
|
||||
position: sticky;
|
||||
top: 2.1em;
|
||||
top: 2.4em;
|
||||
z-index: 2;
|
||||
}
|
||||
th:not(:first-child) {
|
||||
|
@ -41,83 +41,85 @@
|
||||
}
|
||||
</script>
|
||||
|
||||
<!-- Options -->
|
||||
<div class="mb-0.5 flex flex-row-reverse space-x-2 space-x-reverse text-sm items-center">
|
||||
<div class="py-0.5 border-2 border-gray-50 hover:underline cursor-pointer" on:click={handleExampleLoad}>
|
||||
Load Example
|
||||
</div>
|
||||
<div class="py-0.5 px-1 border-2 border-gray-50 hover:border-green-600 cursor-pointer">Debug</div>
|
||||
</div>
|
||||
|
||||
<!-- Upload Box -->
|
||||
<div class="mb-5 border-2 border-dashed border-gray-400 hover:border-green-800" class:dragover>
|
||||
<Dropzone
|
||||
on:drop={handleFilesSelect}
|
||||
on:dragenter={() => (dragover = true)}
|
||||
on:dragleave={() => (dragover = false)}
|
||||
multiple={false}
|
||||
noClick={false}
|
||||
disableDefaultStyles={true}>
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 justify-items-center">
|
||||
<span class:dragoverItem={dragover}>
|
||||
<Download size="21x" />
|
||||
</span>
|
||||
<div class="px-5 mb-5">
|
||||
<div class="text-5xl font-bold my-4">Drop your PDF file here...</div>
|
||||
<div class="text-2xl font-bold">Or click the box to select one...</div>
|
||||
<div class="mt-14"><strong>Note:</strong> Your data stays locally in your browser.</div>
|
||||
<div class="mt-5 text-sm italic font-serif">
|
||||
This tool converts a PDF file into a Markdown text format! Simply drag & drop your PDF file on the
|
||||
upload area and go from there. Don't expect wonders, there are a lot of variances in generated PDF's
|
||||
from different tools and different ages. No matter how good the parser works for your PDF, you will
|
||||
have to invest a good amount of manuell work to complete it.
|
||||
</div>
|
||||
</div>
|
||||
<div class="container mx-auto">
|
||||
<!-- Options -->
|
||||
<div class="mb-0.5 flex flex-row-reverse space-x-2 space-x-reverse text-sm items-center">
|
||||
<div class="py-0.5 border-2 border-gray-50 hover:underline cursor-pointer" on:click={handleExampleLoad}>
|
||||
Load Example
|
||||
</div>
|
||||
</Dropzone>
|
||||
</div>
|
||||
<div class="py-0.5 px-1 border-2 border-gray-50 hover:border-green-600 cursor-pointer">Debug</div>
|
||||
</div>
|
||||
|
||||
<!-- Progress Info -->
|
||||
<div class="mt-5 text-xl font-bold">
|
||||
<div style="min-width: 70%;">
|
||||
{#if specifiedFileName}
|
||||
<div in:blur class="text-2xl mb-2">Parsing {specifiedFileName} ...</div>
|
||||
{/if}
|
||||
{#if parseProgress}
|
||||
<div in:blur class="flex space-x-4">
|
||||
<ProgressRing radius={50} stroke={7} progress={parseProgress?.totalProgress() * 100} />
|
||||
<div>
|
||||
{#each parseProgress.stages as stage, index}
|
||||
{#if parseProgress.isProgressing(index)}
|
||||
<div class="flex space-x-2 items-center">
|
||||
<div>
|
||||
Parsing
|
||||
{stage}
|
||||
{parseProgress.stageDetails[index] ? parseProgress.stageDetails[index] : ''}
|
||||
</div>
|
||||
</div>
|
||||
{:else if parseProgress.isComplete(index)}
|
||||
<div class="flex space-x-2 items-center ">
|
||||
<div>
|
||||
Parsing
|
||||
{stage}
|
||||
{parseProgress.stageDetails[index] ? parseProgress.stageDetails[index] : ''}
|
||||
</div>
|
||||
<Check size="1.5x" class="text-green-700" />
|
||||
</div>
|
||||
{/if}
|
||||
{/each}
|
||||
<!-- Upload Box -->
|
||||
<div class="mb-5 border-2 border-dashed border-gray-400 hover:border-green-800" class:dragover>
|
||||
<Dropzone
|
||||
on:drop={handleFilesSelect}
|
||||
on:dragenter={() => (dragover = true)}
|
||||
on:dragleave={() => (dragover = false)}
|
||||
multiple={false}
|
||||
noClick={false}
|
||||
disableDefaultStyles={true}>
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 justify-items-center">
|
||||
<span class:dragoverItem={dragover}>
|
||||
<Download size="21x" />
|
||||
</span>
|
||||
<div class="px-5 mb-5">
|
||||
<div class="text-5xl font-bold my-4">Drop your PDF file here...</div>
|
||||
<div class="text-2xl font-bold">Or click the box to select one...</div>
|
||||
<div class="mt-14"><strong>Note:</strong> Your data stays locally in your browser.</div>
|
||||
<div class="mt-5 text-sm italic font-serif">
|
||||
This tool converts a PDF file into a Markdown text format! Simply drag & drop your PDF file on
|
||||
the upload area and go from there. Don't expect wonders, there are a lot of variances in
|
||||
generated PDF's from different tools and different ages. No matter how good the parser works for
|
||||
your PDF, you will have to invest a good amount of manuell work to complete it.
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
{#if rejectionError}
|
||||
<div in:slide class="text-red-700">{rejectionError}</div>
|
||||
{/if}
|
||||
{#await upload}
|
||||
<!-- -->
|
||||
{:catch error}
|
||||
<div class="text-red-700">Failed to parse '{specifiedFileName}': {error?.message}</div>
|
||||
{/await}
|
||||
</Dropzone>
|
||||
</div>
|
||||
|
||||
<!-- Progress Info -->
|
||||
<div class="mt-5 text-xl font-bold">
|
||||
<div style="min-width: 70%;">
|
||||
{#if specifiedFileName}
|
||||
<div in:blur class="text-2xl mb-2">Parsing {specifiedFileName} ...</div>
|
||||
{/if}
|
||||
{#if parseProgress}
|
||||
<div in:blur class="flex space-x-4">
|
||||
<ProgressRing radius={50} stroke={7} progress={parseProgress?.totalProgress() * 100} />
|
||||
<div>
|
||||
{#each parseProgress.stages as stage, index}
|
||||
{#if parseProgress.isProgressing(index)}
|
||||
<div class="flex space-x-2 items-center">
|
||||
<div>
|
||||
Parsing
|
||||
{stage}
|
||||
{parseProgress.stageDetails[index] ? parseProgress.stageDetails[index] : ''}
|
||||
</div>
|
||||
</div>
|
||||
{:else if parseProgress.isComplete(index)}
|
||||
<div class="flex space-x-2 items-center ">
|
||||
<div>
|
||||
Parsing
|
||||
{stage}
|
||||
{parseProgress.stageDetails[index] ? parseProgress.stageDetails[index] : ''}
|
||||
</div>
|
||||
<Check size="1.5x" class="text-green-700" />
|
||||
</div>
|
||||
{/if}
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
{#if rejectionError}
|
||||
<div in:slide class="text-red-700">{rejectionError}</div>
|
||||
{/if}
|
||||
{#await upload}
|
||||
<!-- -->
|
||||
{:catch error}
|
||||
<div class="text-red-700">Failed to parse '{specifiedFileName}': {error?.message}</div>
|
||||
{/await}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user