1
0
mirror of https://github.com/jzillmann/pdf-to-markdown.git synced 2025-07-05 08:40:04 +02:00

Format values

This commit is contained in:
Johannes Zillmann
2021-01-29 14:13:50 +01:00
parent 93d067b346
commit 95a7e3e93b
5 changed files with 110 additions and 88 deletions

@ -11,7 +11,8 @@ import type TextItem from './TextItem';
*/ */
export default class PdfParser { export default class PdfParser {
pdfjs: any; pdfjs: any;
columns = ['str', 'dir', 'width', 'height', 'transfom', 'fontName']; columns = ['str', 'dir', 'width', 'height', 'transform', 'fontName'];
constructor(pdfjs: any) { constructor(pdfjs: any) {
this.pdfjs = pdfjs; this.pdfjs = pdfjs;
} }

@ -6,7 +6,7 @@
</script> </script>
<div class="text-2xl font-semibold font-serif text-center bg-gray-400">PDF to Markdown Converter</div> <div class="text-2xl font-semibold font-serif text-center bg-gray-400">PDF to Markdown Converter</div>
<main class="container mx-auto mt-5 h-full"> <main class="mt-5 h-full">
{#if $parseResult} {#if $parseResult}
<Result parseResult={$parseResult} /> <Result parseResult={$parseResult} />
{:else} {:else}

@ -5,6 +5,7 @@
export let parseResult: ParseResult; export let parseResult: ParseResult;
</script> </script>
<div class="mx-4">
<div class="mb-4"> <div class="mb-4">
<div>Parsed {parseResult.pageCount()} pages with {parseResult.items.length} items</div> <div>Parsed {parseResult.pageCount()} pages with {parseResult.items.length} items</div>
<div>Title: {parseResult.metadata.title()}</div> <div>Title: {parseResult.metadata.title()}</div>
@ -12,3 +13,4 @@
</div> </div>
<Table columns={parseResult.columns} items={parseResult.items} /> <Table columns={parseResult.columns} items={parseResult.items} />
</div>

@ -27,10 +27,27 @@
openedPageIndex = false; openedPageIndex = false;
focusedPage = undefined; focusedPage = undefined;
} }
function format(value: object) {
const type = typeof value;
if (typeof value === 'number') {
return (value as number).toFixed(2);
}
if (typeof value === 'object' && typeof Array.isArray(value)) {
let array = value as Array<object>;
if (array.length > 0 && typeof array[0] === 'number') {
array = (array.map((element) =>
((element as unknown) as number).toFixed(2)
) as unknown) as Array<object>;
}
return '[' + array.join(', ') + ']';
}
return value;
}
</script> </script>
<!-- Sticky Controls --> <!-- Sticky Controls -->
<div class="controls pb-3"> <div class="controls py-2">
<div class="flex items-center space-x-2"> <div class="flex items-center space-x-2">
<span> <span>
<span on:click={() => (openedPageIndex = !openedPageIndex)}> <span on:click={() => (openedPageIndex = !openedPageIndex)}>
@ -91,11 +108,11 @@
<div class="absolute flex"> <div class="absolute flex">
{#if !focused} {#if !focused}
<span on:click={() => focusOnPage(pageNumber)}> <span on:click={() => focusOnPage(pageNumber)}>
<Support size="1x" class="hover:text-green-700 cursor-pointer" /> <Support size="1x" class="hover:text-green-700 cursor-pointer opacity-75" />
</span> </span>
{:else} {:else}
<span on:click={showAllPages}> <span on:click={showAllPages}>
<Collection size="1x" class="hover:text-green-700 cursor-pointer" /> <Collection size="1x" class="hover:text-green-700 cursor-pointer opacity-75" />
</span> </span>
{/if} {/if}
</div> </div>
@ -103,9 +120,9 @@
{:else} {:else}
<td /> <td />
{/if} {/if}
<td class="">{itemIdx}</td> <td>{itemIdx}</td>
{#each columns as column} {#each columns as column}
<td class="borde2r">{item.data[column]}</td> <td>{format(item.data[column])}</td>
{/each} {/each}
</tr> </tr>
{/each} {/each}
@ -129,7 +146,7 @@
@apply whitespace-nowrap; @apply whitespace-nowrap;
position: -webkit-sticky; position: -webkit-sticky;
position: sticky; position: sticky;
top: 1.7em; top: 2em;
z-index: 2; z-index: 2;
} }
@ -137,7 +154,7 @@
@apply px-1; @apply px-1;
position: -webkit-sticky; position: -webkit-sticky;
position: sticky; position: sticky;
top: 2.1em; top: 2.4em;
z-index: 2; z-index: 2;
} }
th:not(:first-child) { th:not(:first-child) {

@ -41,6 +41,7 @@
} }
</script> </script>
<div class="container mx-auto">
<!-- Options --> <!-- Options -->
<div class="mb-0.5 flex flex-row-reverse space-x-2 space-x-reverse text-sm items-center"> <div class="mb-0.5 flex flex-row-reverse space-x-2 space-x-reverse text-sm items-center">
<div class="py-0.5 border-2 border-gray-50 hover:underline cursor-pointer" on:click={handleExampleLoad}> <div class="py-0.5 border-2 border-gray-50 hover:underline cursor-pointer" on:click={handleExampleLoad}>
@ -67,10 +68,10 @@
<div class="text-2xl font-bold">Or click the box to select one...</div> <div class="text-2xl font-bold">Or click the box to select one...</div>
<div class="mt-14"><strong>Note:</strong> Your data stays locally in your browser.</div> <div class="mt-14"><strong>Note:</strong> Your data stays locally in your browser.</div>
<div class="mt-5 text-sm italic font-serif"> <div class="mt-5 text-sm italic font-serif">
This tool converts a PDF file into a Markdown text format! Simply drag & drop your PDF file on the This tool converts a PDF file into a Markdown text format! Simply drag & drop your PDF file on
upload area and go from there. Don't expect wonders, there are a lot of variances in generated PDF's the upload area and go from there. Don't expect wonders, there are a lot of variances in
from different tools and different ages. No matter how good the parser works for your PDF, you will generated PDF's from different tools and different ages. No matter how good the parser works for
have to invest a good amount of manuell work to complete it. your PDF, you will have to invest a good amount of manuell work to complete it.
</div> </div>
</div> </div>
</div> </div>
@ -120,6 +121,7 @@
{/await} {/await}
</div> </div>
</div> </div>
</div>
<style> <style>
.dragover { .dragover {