Grouping of line items

This commit is contained in:
Johannes Zillmann 2021-02-21 13:23:31 +01:00
parent d8bc6d100b
commit 0910f7b148
19 changed files with 378 additions and 106 deletions

View File

@ -39,6 +39,7 @@ export default class Debugger {
const outputSchema = transformer.schemaTransformer(inputSchema);
const itemResult = transformer.transform(this.context, [...this.stageResultCache[idx - 1].items]);
this.stageResultCache.push({
descriptor: transformer.descriptor,
schema: toAnnotatedSchema(inputSchema, outputSchema),
...itemResult,
});

6
core/src/ItemMerger.ts Normal file
View File

@ -0,0 +1,6 @@
import type Item from './Item';
export default interface ItemMerger {
groupKey: string;
merge(items: Item[]): Item;
}

View File

@ -0,0 +1,24 @@
import type ItemMerger from './ItemMerger';
export default interface TransformDescriptor {
readonly requireColumns: string[];
readonly consumesGlobels: string[];
readonly producesGlobels: string[];
/**
* If this is set, the debug UI will group items and display a merged item.
*/
readonly itemMerger?: ItemMerger;
}
const defaults: TransformDescriptor = {
requireColumns: [],
consumesGlobels: [],
producesGlobels: [],
};
export function toDescriptor(partial: Partial<TransformDescriptor>): TransformDescriptor {
return {
...defaults,
...partial,
};
}

View File

@ -1,5 +0,0 @@
export default interface TransformerDescriptor {
readonly requireColumns?: string[];
readonly consumesGlobels?: string[];
readonly producesGlobels?: string[];
}

View File

@ -1,7 +1,9 @@
import TransformDescriptor from '../TransformDescriptor';
import Item from '../Item';
import AnnotatedColumn from './AnnotatedColumn';
export default interface StageResult {
descriptor?: TransformDescriptor;
schema: AnnotatedColumn[];
items: Item[];
messages: string[];

View File

@ -0,0 +1,17 @@
import { assertDefined } from '../assert';
import type Item from '../Item';
import type ItemMerger from '../ItemMerger';
export default class ItemGroup {
top: Item;
elements: Item[];
constructor(top: Item, items: Item[] = []) {
this.top = top;
this.elements = items;
}
hasMany(): boolean {
return this.elements.length > 0;
}
}

6
core/src/support/Page.ts Normal file
View File

@ -0,0 +1,6 @@
import type ItemGroup from './ItemGroup';
export default interface Page {
index: number;
itemGroups: ItemGroup[];
}

View File

@ -0,0 +1,55 @@
import ItemMerger from 'src/ItemMerger';
import Item from '../Item';
import ItemGroup from './ItemGroup';
import Page from './Page';
type PageItemTransformer = (page: number, items: Item[]) => Item[];
export function groupByPage(items: Item[]): Item[][] {
return items.reduce((pageItems: Item[][], item: Item) => {
const lastPageItems = pageItems[pageItems.length - 1];
if (!lastPageItems || item.page > lastPageItems[0]?.page) {
pageItems.push([item]);
} else {
lastPageItems.push(item);
}
return pageItems;
}, []);
}
export function groupByElement(items: Item[], elementName: string): Item[][] {
return items.reduce((groupedItems: Item[][], item: Item) => {
const lastGroupItems = groupedItems[groupedItems.length - 1];
if (!lastGroupItems || item.data[elementName] !== lastGroupItems[0]?.data[elementName]) {
groupedItems.push([item]);
} else {
lastGroupItems.push(item);
}
return groupedItems;
}, []);
}
export function transformGroupedByPage(items: Item[], groupedTransformer: PageItemTransformer): Item[] {
return new Array<Item>().concat(
...groupByPage(items).map((pageItems) => groupedTransformer(pageItems[0].page, pageItems)),
);
}
export function asPages(items: Item[], itemMerger?: ItemMerger): Page[] {
return groupByPage(items).map((pageItems: Item[]) => {
let itemGroups: ItemGroup[];
if (itemMerger) {
itemGroups = groupByElement(pageItems, itemMerger.groupKey).map((groupItems) => {
if (groupItems.length > 1) {
const top = itemMerger.merge(groupItems);
return new ItemGroup(top, groupItems);
} else {
return new ItemGroup(groupItems[0]);
}
});
} else {
itemGroups = pageItems.map((item) => new ItemGroup(item));
}
return { index: pageItems[0].page, itemGroups } as Page;
});
}

View File

@ -3,7 +3,7 @@ import Item from '../Item';
import ItemResult from '../ItemResult';
import ItemTransformer from './ItemTransformer';
import TransformContext from './TransformContext';
import { transformGroupedByPage } from './transformerUtils';
import { transformGroupedByPage } from '../support/itemUtils';
export default class AdjustHeight extends ItemTransformer {
constructor() {

View File

@ -2,7 +2,7 @@ import Item from '../Item';
import ItemResult from '../ItemResult';
import ItemTransformer from './ItemTransformer';
import TransformContext from './TransformContext';
import { transformGroupedByPage } from './transformerUtils';
import { transformGroupedByPage } from '../support/itemUtils';
export default class CompactLines extends ItemTransformer {
constructor() {
@ -11,6 +11,10 @@ export default class CompactLines extends ItemTransformer {
'Combines items on the same y-axis',
{
requireColumns: ['str', 'y'],
itemMerger: {
groupKey: 'line',
merge: mergeLineItems,
},
},
(incomingSchema) => {
return incomingSchema.reduce((schema, column) => {
@ -41,3 +45,25 @@ export default class CompactLines extends ItemTransformer {
};
}
}
function mergeLineItems(items: Item[]): Item {
const page = items[0].page;
const line = items[0].data['line'];
const str = items.map((item) => item.data['str']).join(' ');
const x = Math.min(...items.map((item) => item.data['x']));
const y = Math.min(...items.map((item) => item.data['y']));
const width = items.reduce((sum, item) => sum + item.data['width'], 0);
const height = Math.max(...items.map((item) => item.data['height']));
const fontNames = [...new Set(items.map((item) => item.data['fontName']))];
const directions = [...new Set(items.map((item) => item.data['dir']))];
return new Item(page, {
str,
line,
x,
y,
width,
height,
fontName: fontNames,
dir: directions,
});
}

View File

@ -1,4 +1,4 @@
import type TransformerDescriptor from '../TransformerDescription';
import TransformDescriptor, { toDescriptor } from '../TransformDescriptor';
import type TransformContext from './TransformContext';
import type Item from '../Item';
import type ItemResult from '../ItemResult';
@ -11,25 +11,18 @@ type SchemaTransformer = (incomingSchema: string[]) => string[];
export default abstract class ItemTransformer {
readonly name: string;
readonly description: string;
readonly descriptor: TransformerDescriptor;
readonly descriptor: TransformDescriptor;
readonly schemaTransformer: SchemaTransformer;
constructor(
name: string,
description: string,
descriptor: TransformerDescriptor,
descriptorPartial: Partial<TransformDescriptor>,
schemaTransformer: SchemaTransformer = (schema) => schema,
) {
this.name = name;
this.description = description;
this.descriptor = {
...{
consumesGlobels: [],
producesGlobels: [],
requireColumns: [],
},
...descriptor,
};
this.descriptor = toDescriptor(descriptorPartial);
this.schemaTransformer = schemaTransformer;
}

View File

@ -1,19 +0,0 @@
import Item from '../Item';
type PageItemTransformer = (page: number, items: Item[]) => Item[];
export function transformGroupedByPage(items: Item[], groupedTransformer: PageItemTransformer) {
return new Array<Item>().concat(
...items
.reduce((pageItems: Item[][], item: Item) => {
const lastPageItems = pageItems[pageItems.length - 1];
if (!lastPageItems || item.page > lastPageItems[0]?.page) {
pageItems.push([item]);
} else {
lastPageItems.push(item);
}
return pageItems;
}, [])
.map((pageItems) => groupedTransformer(pageItems[0].page, pageItems)),
);
}

View File

@ -1,7 +1,7 @@
import Debugger from 'src/Debugger';
import Item from 'src/Item';
import ItemTransformer from 'src/transformer/ItemTransformer';
import TransformerDescriptor from 'src/TransformerDescription';
import TransformDescriptor from 'src/TransformDescriptor';
import TransformContext from 'src/transformer/TransformContext';
import ItemResult from 'src/ItemResult';
import ColumnAnnotation from 'src/debug/ColumnAnnotation';
@ -9,7 +9,7 @@ import AnnotatedColumn from 'src/debug/AnnotatedColumn';
class TestTransformer extends ItemTransformer {
items: Item[];
constructor(name: string, descriptor: TransformerDescriptor, outputSchema: string[], items: Item[]) {
constructor(name: string, descriptor: Partial<TransformDescriptor>, outputSchema: string[], items: Item[]) {
super(name, `Description for ${name}`, descriptor, (incomingSchema) => outputSchema);
this.items = items;
}
@ -30,7 +30,7 @@ test('basic debug', async () => {
const trans1Items = parsedItems.map((item) => item.withData({ C: `c=${item.value('A')}+${item.value('B')}` }));
const transformers = [new TestTransformer('Trans1', trans1Desc, trans1Schema, trans1Items)];
const debug = new Debugger(parsedSchema, parsedItems, { pageViewports: [] }, transformers);
const debug = new Debugger(parsedSchema, parsedItems, { fontMap: new Map(), pageViewports: [] }, transformers);
expect(debug.stageNames).toEqual(['Parse Result', 'Trans1']);
expect(debug.stageResults(0).schema).toEqual(parsedSchema.map((column) => ({ name: column })));
@ -47,7 +47,7 @@ describe('build schemas', () => {
function calculateSchema(inputSchema: string[], outputSchema: string[]): AnnotatedColumn[] {
const transformers = [new TestTransformer('Trans1', {}, outputSchema, items)];
const debug = new Debugger(inputSchema, items, { pageViewports: [] }, transformers);
const debug = new Debugger(inputSchema, items, { fontMap: new Map(), pageViewports: [] }, transformers);
return debug.stageResults(1).schema;
}

View File

@ -1,4 +1,4 @@
import TransformerDescriptor from 'src/TransformerDescription';
import TransformDescriptor from 'src/TransformDescriptor';
import Item from 'src/Item';
import ItemResult from 'src/ItemResult';
import ItemTransformer from 'src/transformer/ItemTransformer';
@ -9,7 +9,7 @@ import * as fs from 'fs';
import PdfPipeline from 'src/PdfPipeline';
class TestSchemaTransformer extends ItemTransformer {
constructor(name: string, descriptor: TransformerDescriptor, outputSchema: string[] | undefined = undefined) {
constructor(name: string, descriptor: Partial<TransformDescriptor>, outputSchema: string[] | undefined = undefined) {
if (outputSchema) {
super(name, `Description for ${name}`, descriptor, (_) => outputSchema);
} else {

View File

@ -0,0 +1,112 @@
import Item from 'src/Item';
import Page from 'src/support/Page';
import { groupByPage, groupByElement, transformGroupedByPage, asPages } from 'src/support/itemUtils';
import ItemGroup from 'src/support/ItemGroup';
import ItemMerger from 'src/ItemMerger';
import ItemTransformer from 'src/transformer/ItemTransformer';
describe('groupByPage', () => {
test('empty', async () => {
expect(groupByPage([])).toEqual([]);
});
test('group', async () => {
const pageItems = [
[new Item(0, { id: 1 })],
[new Item(1, { id: 2 }), new Item(1, { id: 3 })],
[new Item(2, { id: 4 })],
];
const flattenedItems = new Array<Item>().concat(...pageItems);
const transformedItems = groupByPage(flattenedItems);
expect(transformedItems).toEqual(pageItems);
});
});
describe('groupByElement', () => {
test('empty', async () => {
expect(groupByElement([], 'line')).toEqual([]);
});
test('group', async () => {
const groupedItems = [
[new Item(0, { line: 1, id: 1 })],
[new Item(0, { line: 2, id: 2 }), new Item(0, { line: 2, id: 3 })],
[new Item(0, { line: 3, id: 4 })],
];
const flattenedItems = new Array<Item>().concat(...groupedItems);
const transformedItems = groupByElement(flattenedItems, 'line');
expect(transformedItems).toEqual(groupedItems);
});
});
describe('transformGroupedByPage', () => {
test('empty', async () => {
const transformedItems = transformGroupedByPage([], () => fail("shoudln't be called"));
expect(transformedItems).toEqual([]);
});
test('pipe through', async () => {
const pageItems = [
[new Item(0, { id: 1 })],
[new Item(1, { id: 2 }), new Item(1, { id: 3 })],
[new Item(2, { id: 4 })],
];
const flattenedItems = new Array<Item>().concat(...pageItems);
const transformedItems = transformGroupedByPage(flattenedItems, (page, items) => {
expect(items).toEqual(pageItems[page]);
return items;
});
expect(transformedItems).toEqual(flattenedItems);
});
test('change', async () => {
const input = [new Item(0, { v: 0 }), new Item(1, { v: 0 })];
const transformedItems = transformGroupedByPage(input, (_, items) => {
return [items[0].withData({ v: 1 })];
});
expect(transformedItems).toEqual(input.map((item) => item.withData({ v: 1 })));
});
});
describe('asPages', () => {
test('empty', async () => {
expect(groupByPage([])).toEqual([]);
});
test('no merger', async () => {
const pageItems = [
[new Item(0, { id: 1, line: 1 })],
[new Item(1, { id: 2, line: 1 }), new Item(1, { id: 3, line: 1 }), new Item(1, { id: 4, line: 2 })],
[new Item(2, { id: 5, line: 1 })],
];
const flattenedItems = new Array<Item>().concat(...pageItems);
const pages = asPages(flattenedItems);
expect(pages).toEqual([
{ index: 0, itemGroups: pageItems[0].map((item) => new ItemGroup(item)) },
{ index: 1, itemGroups: pageItems[1].map((item) => new ItemGroup(item)) },
{ index: 2, itemGroups: pageItems[2].map((item) => new ItemGroup(item)) },
] as Page[]);
});
test('merger', async () => {
const pageItems = [
[new Item(0, { id: 1, line: 1 })],
[new Item(1, { id: 2, line: 1 }), new Item(1, { id: 3, line: 1 }), new Item(1, { id: 4, line: 2 })],
[new Item(2, { id: 5, line: 1 })],
];
const flattenedItems = new Array<Item>().concat(...pageItems);
const merger: ItemMerger = { groupKey: 'line', merge: (items) => items[0] };
const pages = asPages(flattenedItems, merger);
expect(pages).toEqual([
{ index: 0, itemGroups: pageItems[0].map((item) => new ItemGroup(item)) },
{
index: 1,
itemGroups: [
new ItemGroup(merger.merge(pageItems[1].slice(0, 2)), pageItems[1].slice(0, 2)),
new ItemGroup(pageItems[1][2]),
],
},
{ index: 2, itemGroups: pageItems[2].map((item) => new ItemGroup(item)) },
] as Page[]);
});
});

View File

@ -0,0 +1,51 @@
import Item from 'src/Item';
import CompactLines from 'src/transformer/CompactLines';
test('Item Merger', async () => {
const itemMerger = new CompactLines().descriptor.itemMerger;
expect(itemMerger?.groupKey).toEqual('line');
const mergedItem = itemMerger?.merge([
new Item(0, {
line: 2,
x: 240,
y: 585,
str: 'Dies ist eine Test-PDF',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 108.62,
height: 11,
}),
new Item(0, {
line: 2,
x: 352.69,
y: 585,
str: '.',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 3.06,
height: 11,
}),
new Item(0, {
line: 2,
x: 348,
y: 588,
str: '1',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 4.08,
height: 7.33,
}),
]);
expect(mergedItem?.withoutUuid()).toEqual(
new Item(0, {
line: 2,
x: 240,
y: 585,
str: 'Dies ist eine Test-PDF . 1',
fontName: ['g_d0_f2'],
dir: ['ltr'],
width: 115.76,
height: 11,
}).withoutUuid(),
);
});

View File

@ -1,31 +0,0 @@
import Item from 'src/Item';
import { transformGroupedByPage } from 'src/transformer/transformerUtils';
describe('transformGroupedByPage', () => {
test('empty', async () => {
const transformedItems = transformGroupedByPage([], () => fail("shoudln't be called"));
expect(transformedItems).toEqual([]);
});
test('pipe through', async () => {
const pageItems = [
[new Item(0, { id: 1 })],
[new Item(1, { id: 2 }), new Item(1, { id: 3 })],
[new Item(2, { id: 4 })],
];
const flattenedItems = new Array<Item>().concat(...pageItems);
const transformedItems = transformGroupedByPage(flattenedItems, (page, items) => {
expect(items).toEqual(pageItems[page]);
return items;
});
expect(transformedItems).toEqual(flattenedItems);
});
test('change', async () => {
const input = [new Item(0, { v: 0 }), new Item(1, { v: 0 })];
const transformedItems = transformGroupedByPage(input, (_, items) => {
return [items[0].withData({ v: 1 })];
});
expect(transformedItems).toEqual(input.map((item) => item.withData({ v: 1 })));
});
});

View File

@ -3,8 +3,11 @@
import Icon from 'fa-svelte';
import { faMapPin as pin } from '@fortawesome/free-solid-svg-icons/faMapPin';
import { BookOpen, ArrowLeft, ArrowRight } from 'svelte-hero-icons';
import type Debugger from '@core/Debugger';
import type Item from '@core/Item';
import { asPages } from '../../../core/src/support/itemUtils';
import Popup from '../components/Popup.svelte';
import PageSelectionPopup from './PageSelectionPopup.svelte';
import ItemTable from './ItemTable.svelte';
@ -21,17 +24,9 @@
$: stageResult = debug.stageResults(currentStage);
$: pageIsPinned = !isNaN(pinnedPage);
$: pagesNumbers = new Set(stageResult.items.map((item) => item.page));
$: pages = asPages(stageResult.items, stageResult.descriptor?.itemMerger);
$: maxPage = Math.max(...pagesNumbers);
$: itemsByPage = [
...stageResult.items.reduce((map, item) => {
if (!map.has(item.page)) {
map.set(item.page, []);
}
map.get(item.page).push(item);
return map;
}, new Map<number, Item[]>()),
];
$: visiblePages = pageIsPinned ? itemsByPage.filter(([page]) => page === pinnedPage) : itemsByPage;
$: visiblePages = pageIsPinned ? pages.filter((page) => page.index === pinnedPage) : pages;
</script>
<div class="mx-4">
@ -97,7 +92,7 @@
</ul>
<!-- Items -->
<ItemTable schema={stageResult.schema} itemsByPage={visiblePages} {maxPage} {pageIsPinned} />
<ItemTable schema={stageResult.schema} pages={visiblePages} {maxPage} {pageIsPinned} />
</div>
<style>

View File

@ -1,26 +1,27 @@
<script>
import { scale, fade } from 'svelte/transition';
import type Item from '@core/Item';
import type AnnotatedColumn from '@core/debug/AnnotatedColumn';
import ColumnAnnotation from '../../../core/src/debug/ColumnAnnotation';
import inView from '../actions/inView';
import { formatValue } from './formatValues';
import type Page from '@core/support/Page';
export let schema: AnnotatedColumn[];
export let itemsByPage: [number, Item[]][];
export let pages: Page[];
export let maxPage: number;
export let pageIsPinned: boolean;
let maxItemsToRenderInOneLoad = 200;
let renderedMaxPage = 0;
let expandedItemGroup: { pageIndex: number; itemIndex: number };
let renderedItemsByPage: [number, Item[]][];
let renderedPages: Page[];
$: {
if (pageIsPinned) {
renderedItemsByPage = itemsByPage;
renderedPages = pages;
renderedMaxPage = 0;
} else {
calculateNextPageToRenderTo();
renderedItemsByPage = itemsByPage.slice(0, renderedMaxPage);
renderedPages = pages.slice(0, renderedMaxPage);
}
}
@ -29,16 +30,22 @@
return;
}
let itemCount = 0;
for (let index = 0; index < itemsByPage.length; index++) {
for (let index = 0; index < pages.length; index++) {
renderedMaxPage++;
const [_, items] = itemsByPage[index];
itemCount += items.length;
itemCount += pages[index].itemGroups.length;
if (itemCount > maxItemsToRenderInOneLoad) {
break;
}
}
// console.log(`Render pages 0 to ${renderedMaxPage} with ${itemCount} items`);
}
const isExpanded = (pageIndex: number, itemIndex: number) => {
return expandedItemGroup?.pageIndex === pageIndex && expandedItemGroup?.itemIndex === itemIndex;
};
const toggleRow = (pageIndex: number, itemIndex: number) => {
expandedItemGroup = isExpanded(pageIndex, itemIndex) ? undefined : { pageIndex, itemIndex };
};
</script>
<!-- Item table -->
@ -56,33 +63,53 @@
{/each}
</thead>
<tbody>
{#each renderedItemsByPage as [pageNumber, items], pageIdx}
{#each renderedPages as page, pageIdx}
<!-- Separator between pages -->
{#if pageIdx > 0}
<tr class="h-5" />
{/if}
{#each items as item, itemIdx}
<tr in:fade>
<!-- Page items -->
{#each page.itemGroups as itemGroup, itemIdx}
<tr
class:expandable={itemGroup.hasMany()}
class:expanded={expandedItemGroup && isExpanded(page.index, itemIdx)}
in:fade>
<!-- Page number in first page item row -->
{#if itemIdx === 0}
<td class="page bg-gray-50">
<div>Page {pageNumber} {pageIsPinned ? '' : ' / ' + maxPage}</div>
<td id="page" class="page bg-gray-50">
<div>Page {page.index} {pageIsPinned ? '' : ' / ' + maxPage}</div>
</td>
{:else}
<td />
<td id="page" />
{/if}
<td>{itemIdx}</td>
{#each schema as column}
<td class="select-all">{formatValue(item.data[column.name])}</td>
{/each}
<span class="contents" on:click={() => itemGroup.hasMany() && toggleRow(page.index, itemIdx)}>
<td>{itemIdx}{itemGroup.hasMany() ? '+' : ''}</td>
{#each schema as column}
<td class="select-all">{formatValue(itemGroup.top.data[column.name])}</td>
{/each}
</span>
</tr>
<!-- Expanded childs -->
{#if expandedItemGroup && isExpanded(page.index, itemIdx)}
{#each itemGroup.elements as child, childIdx}
<tr class="childs">
<td id="page" />
<td>{'└ ' + childIdx}</td>
{#each schema as column}
<td class="select-all">{formatValue(child.data[column.name])}</td>
{/each}
</tr>
{/each}
{/if}
{/each}
{/each}
</tbody>
</table>
{#if !pageIsPinned}
{#if renderedMaxPage < itemsByPage.length}
{#if renderedMaxPage < pages.length}
<span use:inView on:intersect={({ detail }) => detail && calculateNextPageToRenderTo()} />
<div class="my-6 text-center text-2xl">...</div>
{:else}
@ -113,12 +140,24 @@
@apply bg-gray-300;
@apply shadow;
}
td:not(:first-child) {
td:not(#page) {
@apply px-1;
@apply border-b;
}
tr:hover td:not(:first-child) {
tr:hover td:not(#page) {
@apply bg-gray-200;
}
tr.expandable:hover td:not(#page) {
@apply cursor-pointer;
}
tr.expanded td:not(#page) {
@apply bg-gray-300;
}
tr.childs td:not(#page) {
@apply bg-gray-200;
}
</style>