Make LineItemMerger standalone and re-usable

This commit is contained in:
Johannes Zillmann 2021-02-27 18:45:14 +01:00
parent e6a18fa0d8
commit 229cb53eb0
11 changed files with 114 additions and 93 deletions

View File

@ -1,6 +0,0 @@
import type Item from './Item';
export default interface ItemMerger {
groupKey: string;
merge(items: Item[]): Item;
}

View File

@ -1,4 +1,11 @@
import type ItemMerger from './ItemMerger';
import type ItemMerger from './support/ItemMerger';
interface Debug {
/**
* If this is set, the debug UI will group items and display a merged item.
*/
readonly itemMerger?: ItemMerger;
}
export default interface TransformDescriptor {
readonly requireColumns: string[];
@ -7,7 +14,7 @@ export default interface TransformDescriptor {
/**
* If this is set, the debug UI will group items and display a merged item.
*/
readonly itemMerger?: ItemMerger;
readonly debug?: Debug;
}
const defaults: TransformDescriptor = {

View File

@ -0,0 +1,9 @@
import type Item from '../Item';
/**
* Groups individual items and merges them to a kind of top level summary item.
*/
export default abstract class ItemMerger {
constructor(public groupKey: string) {}
abstract merge(items: Item[]): Item;
}

View File

@ -0,0 +1,30 @@
import ItemMerger from './ItemMerger';
import Item from '../Item';
export default class LineItemMerger extends ItemMerger {
constructor() {
super('line');
}
merge(items: Item[]): Item {
const page = items[0].page;
const line = items[0].data['line'];
const str = items.map((item) => item.data['str']).join(' ');
const x = Math.min(...items.map((item) => item.data['x']));
const y = Math.min(...items.map((item) => item.data['y']));
const width = items.reduce((sum, item) => sum + item.data['width'], 0);
const height = Math.max(...items.map((item) => item.data['height']));
const fontNames = [...new Set(items.map((item) => item.data['fontName']))];
const directions = [...new Set(items.map((item) => item.data['dir']))];
return new Item(page, {
str,
line,
x,
y,
width,
height,
fontName: fontNames,
dir: directions,
});
}
}

View File

@ -1,4 +1,4 @@
import ItemMerger from '../ItemMerger';
import ItemMerger from './ItemMerger';
import Item from '../Item';
import ItemGroup from './ItemGroup';
import Page from './Page';

View File

@ -3,6 +3,7 @@ import ItemResult from '../ItemResult';
import ItemTransformer from './ItemTransformer';
import TransformContext from './TransformContext';
import { transformGroupedByPage } from '../support/itemUtils';
import LineItemMerger from '../support/LineItemMerger';
export default class CompactLines extends ItemTransformer {
constructor() {
@ -11,9 +12,8 @@ export default class CompactLines extends ItemTransformer {
'Combines items on the same y-axis',
{
requireColumns: ['str', 'y', 'height'],
itemMerger: {
groupKey: 'line',
merge: mergeLineItems,
debug: {
itemMerger: new LineItemMerger(),
},
},
(incomingSchema) => {
@ -48,25 +48,3 @@ export default class CompactLines extends ItemTransformer {
};
}
}
function mergeLineItems(items: Item[]): Item {
const page = items[0].page;
const line = items[0].data['line'];
const str = items.map((item) => item.data['str']).join(' ');
const x = Math.min(...items.map((item) => item.data['x']));
const y = Math.min(...items.map((item) => item.data['y']));
const width = items.reduce((sum, item) => sum + item.data['width'], 0);
const height = Math.max(...items.map((item) => item.data['height']));
const fontNames = [...new Set(items.map((item) => item.data['fontName']))];
const directions = [...new Set(items.map((item) => item.data['dir']))];
return new Item(page, {
str,
line,
x,
y,
width,
height,
fontName: fontNames,
dir: directions,
});
}

View File

@ -2,16 +2,16 @@ import Item from '../Item';
import ItemResult from '../ItemResult';
import ItemTransformer from './ItemTransformer';
import TransformContext from './TransformContext';
import LineItemMerger from '../support/LineItemMerger';
import { transformGroupedByPageAndLine } from '../support/itemUtils';
export default class SortXWithinLines extends ItemTransformer {
constructor() {
super('Sort by X', 'Sorts the items of a line by the x coordinate', {
requireColumns: ['line', 'x'],
// itemMerger: {
// groupKey: 'line',
// merge: mergeLineItems,
// },
debug: {
itemMerger: new LineItemMerger(),
},
});
}

View File

@ -0,0 +1,56 @@
import LineItemMerger from 'src/debug/LineItemMerger';
import Item from 'src/Item';
import { items } from '../testItems';
const itemMerger = new LineItemMerger();
test('Basics', async () => {
expect(itemMerger.groupKey).toEqual('line');
const mergedItem = itemMerger?.merge(
items(0, [
{
line: 2,
x: 240,
y: 585,
str: 'Dies ist eine Test-PDF',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 108.62,
height: 11,
},
{
line: 2,
x: 352.69,
y: 585,
str: '.',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 3.06,
height: 11,
},
{
line: 2,
x: 348,
y: 588,
str: '1',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 4.08,
height: 7.33,
},
]),
);
expect(mergedItem?.withoutUuid()).toEqual(
new Item(0, {
line: 2,
x: 240,
y: 585,
str: 'Dies ist eine Test-PDF . 1',
fontName: ['g_d0_f2'],
dir: ['ltr'],
width: 115.76,
height: 11,
}).withoutUuid(),
);
});

View File

@ -8,7 +8,7 @@ import {
asPages,
} from 'src/support/itemUtils';
import ItemGroup from 'src/support/ItemGroup';
import ItemMerger from 'src/ItemMerger';
import ItemMerger from 'src/support/ItemMerger';
import { items } from 'test/testItems';
describe('groupByPage', () => {

View File

@ -1,4 +1,3 @@
import Item from 'src/Item';
import CompactLines from 'src/transformer/CompactLines';
import { emptyContext } from './testContext';
import { items } from '../testItems';
@ -69,55 +68,3 @@ test('Transform - lowered charactes (dict.pdf)', async () => {
);
expect(results.items.map((item) => item.data['line'])).toEqual([0, 0, 1, 1, 1, 1, 2]);
});
test('Item Merger', async () => {
const itemMerger = transformer.descriptor.itemMerger;
expect(itemMerger?.groupKey).toEqual('line');
const mergedItem = itemMerger?.merge(
items(0, [
{
line: 2,
x: 240,
y: 585,
str: 'Dies ist eine Test-PDF',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 108.62,
height: 11,
},
{
line: 2,
x: 352.69,
y: 585,
str: '.',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 3.06,
height: 11,
},
{
line: 2,
x: 348,
y: 588,
str: '1',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 4.08,
height: 7.33,
},
]),
);
expect(mergedItem?.withoutUuid()).toEqual(
new Item(0, {
line: 2,
x: 240,
y: 585,
str: 'Dies ist eine Test-PDF . 1',
fontName: ['g_d0_f2'],
dir: ['ltr'],
width: 115.76,
height: 11,
}).withoutUuid(),
);
});

View File

@ -25,7 +25,7 @@
$: stageResult = debug.stageResults(currentStage);
$: pageIsPinned = !isNaN(pinnedPage);
$: pagesNumbers = new Set(stageResult.items.map((item) => item.page));
$: pages = asPages(stageResult.items, stageResult.descriptor?.itemMerger);
$: pages = asPages(stageResult.items, stageResult.descriptor?.debug?.itemMerger);
$: maxPage = Math.max(...pagesNumbers);
$: visiblePages = pageIsPinned ? pages.filter((page) => page.index === pinnedPage) : pages;
</script>