Format values

This commit is contained in:
Johannes Zillmann 2021-01-29 14:13:50 +01:00
parent 93d067b346
commit 95a7e3e93b
5 changed files with 110 additions and 88 deletions

View File

@ -11,7 +11,8 @@ import type TextItem from './TextItem';
*/
export default class PdfParser {
pdfjs: any;
columns = ['str', 'dir', 'width', 'height', 'transfom', 'fontName'];
columns = ['str', 'dir', 'width', 'height', 'transform', 'fontName'];
constructor(pdfjs: any) {
this.pdfjs = pdfjs;
}

View File

@ -6,7 +6,7 @@
</script>
<div class="text-2xl font-semibold font-serif text-center bg-gray-400">PDF to Markdown Converter</div>
<main class="container mx-auto mt-5 h-full">
<main class="mt-5 h-full">
{#if $parseResult}
<Result parseResult={$parseResult} />
{:else}

View File

@ -5,10 +5,12 @@
export let parseResult: ParseResult;
</script>
<div class="mb-4">
<div>Parsed {parseResult.pageCount()} pages with {parseResult.items.length} items</div>
<div>Title: {parseResult.metadata.title()}</div>
<div>Author: {parseResult.metadata.author()}</div>
</div>
<div class="mx-4">
<div class="mb-4">
<div>Parsed {parseResult.pageCount()} pages with {parseResult.items.length} items</div>
<div>Title: {parseResult.metadata.title()}</div>
<div>Author: {parseResult.metadata.author()}</div>
</div>
<Table columns={parseResult.columns} items={parseResult.items} />
<Table columns={parseResult.columns} items={parseResult.items} />
</div>

View File

@ -27,10 +27,27 @@
openedPageIndex = false;
focusedPage = undefined;
}
function format(value: object) {
const type = typeof value;
if (typeof value === 'number') {
return (value as number).toFixed(2);
}
if (typeof value === 'object' && typeof Array.isArray(value)) {
let array = value as Array<object>;
if (array.length > 0 && typeof array[0] === 'number') {
array = (array.map((element) =>
((element as unknown) as number).toFixed(2)
) as unknown) as Array<object>;
}
return '[' + array.join(', ') + ']';
}
return value;
}
</script>
<!-- Sticky Controls -->
<div class="controls pb-3">
<div class="controls py-2">
<div class="flex items-center space-x-2">
<span>
<span on:click={() => (openedPageIndex = !openedPageIndex)}>
@ -91,11 +108,11 @@
<div class="absolute flex">
{#if !focused}
<span on:click={() => focusOnPage(pageNumber)}>
<Support size="1x" class="hover:text-green-700 cursor-pointer" />
<Support size="1x" class="hover:text-green-700 cursor-pointer opacity-75" />
</span>
{:else}
<span on:click={showAllPages}>
<Collection size="1x" class="hover:text-green-700 cursor-pointer" />
<Collection size="1x" class="hover:text-green-700 cursor-pointer opacity-75" />
</span>
{/if}
</div>
@ -103,9 +120,9 @@
{:else}
<td />
{/if}
<td class="">{itemIdx}</td>
<td>{itemIdx}</td>
{#each columns as column}
<td class="borde2r">{item.data[column]}</td>
<td>{format(item.data[column])}</td>
{/each}
</tr>
{/each}
@ -129,7 +146,7 @@
@apply whitespace-nowrap;
position: -webkit-sticky;
position: sticky;
top: 1.7em;
top: 2em;
z-index: 2;
}
@ -137,7 +154,7 @@
@apply px-1;
position: -webkit-sticky;
position: sticky;
top: 2.1em;
top: 2.4em;
z-index: 2;
}
th:not(:first-child) {

View File

@ -41,83 +41,85 @@
}
</script>
<!-- Options -->
<div class="mb-0.5 flex flex-row-reverse space-x-2 space-x-reverse text-sm items-center">
<div class="py-0.5 border-2 border-gray-50 hover:underline cursor-pointer" on:click={handleExampleLoad}>
Load Example
</div>
<div class="py-0.5 px-1 border-2 border-gray-50 hover:border-green-600 cursor-pointer">Debug</div>
</div>
<!-- Upload Box -->
<div class="mb-5 border-2 border-dashed border-gray-400 hover:border-green-800" class:dragover>
<Dropzone
on:drop={handleFilesSelect}
on:dragenter={() => (dragover = true)}
on:dragleave={() => (dragover = false)}
multiple={false}
noClick={false}
disableDefaultStyles={true}>
<div class="grid grid-cols-1 md:grid-cols-2 justify-items-center">
<span class:dragoverItem={dragover}>
<Download size="21x" />
</span>
<div class="px-5 mb-5">
<div class="text-5xl font-bold my-4">Drop your PDF file here...</div>
<div class="text-2xl font-bold">Or click the box to select one...</div>
<div class="mt-14"><strong>Note:</strong> Your data stays locally in your browser.</div>
<div class="mt-5 text-sm italic font-serif">
This tool converts a PDF file into a Markdown text format! Simply drag & drop your PDF file on the
upload area and go from there. Don't expect wonders, there are a lot of variances in generated PDF's
from different tools and different ages. No matter how good the parser works for your PDF, you will
have to invest a good amount of manuell work to complete it.
</div>
</div>
<div class="container mx-auto">
<!-- Options -->
<div class="mb-0.5 flex flex-row-reverse space-x-2 space-x-reverse text-sm items-center">
<div class="py-0.5 border-2 border-gray-50 hover:underline cursor-pointer" on:click={handleExampleLoad}>
Load Example
</div>
</Dropzone>
</div>
<div class="py-0.5 px-1 border-2 border-gray-50 hover:border-green-600 cursor-pointer">Debug</div>
</div>
<!-- Progress Info -->
<div class="mt-5 text-xl font-bold">
<div style="min-width: 70%;">
{#if specifiedFileName}
<div in:blur class="text-2xl mb-2">Parsing {specifiedFileName} ...</div>
{/if}
{#if parseProgress}
<div in:blur class="flex space-x-4">
<ProgressRing radius={50} stroke={7} progress={parseProgress?.totalProgress() * 100} />
<div>
{#each parseProgress.stages as stage, index}
{#if parseProgress.isProgressing(index)}
<div class="flex space-x-2 items-center">
<div>
Parsing
{stage}
{parseProgress.stageDetails[index] ? parseProgress.stageDetails[index] : ''}
</div>
</div>
{:else if parseProgress.isComplete(index)}
<div class="flex space-x-2 items-center ">
<div>
Parsing
{stage}
{parseProgress.stageDetails[index] ? parseProgress.stageDetails[index] : ''}
</div>
<Check size="1.5x" class="text-green-700" />
</div>
{/if}
{/each}
<!-- Upload Box -->
<div class="mb-5 border-2 border-dashed border-gray-400 hover:border-green-800" class:dragover>
<Dropzone
on:drop={handleFilesSelect}
on:dragenter={() => (dragover = true)}
on:dragleave={() => (dragover = false)}
multiple={false}
noClick={false}
disableDefaultStyles={true}>
<div class="grid grid-cols-1 md:grid-cols-2 justify-items-center">
<span class:dragoverItem={dragover}>
<Download size="21x" />
</span>
<div class="px-5 mb-5">
<div class="text-5xl font-bold my-4">Drop your PDF file here...</div>
<div class="text-2xl font-bold">Or click the box to select one...</div>
<div class="mt-14"><strong>Note:</strong> Your data stays locally in your browser.</div>
<div class="mt-5 text-sm italic font-serif">
This tool converts a PDF file into a Markdown text format! Simply drag & drop your PDF file on
the upload area and go from there. Don't expect wonders, there are a lot of variances in
generated PDF's from different tools and different ages. No matter how good the parser works for
your PDF, you will have to invest a good amount of manuell work to complete it.
</div>
</div>
</div>
{/if}
{#if rejectionError}
<div in:slide class="text-red-700">{rejectionError}</div>
{/if}
{#await upload}
<!-- -->
{:catch error}
<div class="text-red-700">Failed to parse '{specifiedFileName}': {error?.message}</div>
{/await}
</Dropzone>
</div>
<!-- Progress Info -->
<div class="mt-5 text-xl font-bold">
<div style="min-width: 70%;">
{#if specifiedFileName}
<div in:blur class="text-2xl mb-2">Parsing {specifiedFileName} ...</div>
{/if}
{#if parseProgress}
<div in:blur class="flex space-x-4">
<ProgressRing radius={50} stroke={7} progress={parseProgress?.totalProgress() * 100} />
<div>
{#each parseProgress.stages as stage, index}
{#if parseProgress.isProgressing(index)}
<div class="flex space-x-2 items-center">
<div>
Parsing
{stage}
{parseProgress.stageDetails[index] ? parseProgress.stageDetails[index] : ''}
</div>
</div>
{:else if parseProgress.isComplete(index)}
<div class="flex space-x-2 items-center ">
<div>
Parsing
{stage}
{parseProgress.stageDetails[index] ? parseProgress.stageDetails[index] : ''}
</div>
<Check size="1.5x" class="text-green-700" />
</div>
{/if}
{/each}
</div>
</div>
{/if}
{#if rejectionError}
<div in:slide class="text-red-700">{rejectionError}</div>
{/if}
{#await upload}
<!-- -->
{:catch error}
<div class="text-red-700">Failed to parse '{specifiedFileName}': {error?.message}</div>
{/await}
</div>
</div>
</div>