Sort line items on X axis

This commit is contained in:
Johannes Zillmann 2021-02-26 21:42:26 +01:00
parent 08509953dc
commit 915827be0c
4 changed files with 85 additions and 13 deletions

View File

@ -8,8 +8,15 @@ import AdjustHeight from './transformer/AdjustHeight';
import CalculateCoordinates from './transformer/CalculateCoordinates';
import CalculateStatistics from './transformer/CacluclateStatistics';
import CompactLines from './transformer/CompactLines';
import SortXWithinLines from './transformer/SortXWithinLines';
const transformers = [new AdjustHeight(), new CalculateCoordinates(), new CalculateStatistics(), new CompactLines()];
const transformers = [
new AdjustHeight(),
new CalculateCoordinates(),
new CalculateStatistics(),
new CompactLines(),
new SortXWithinLines(),
];
const defaultConfig: Config = {
pdfjsParams: {

View File

@ -3,12 +3,14 @@ import Item from '../Item';
import ItemGroup from './ItemGroup';
import Page from './Page';
type KeyExtractor = (item: Item) => any;
type PageItemTransformer = (page: number, items: Item[]) => Item[];
type LineItemTransformer = (page: number, line: number, items: Item[]) => Item[];
export function groupByPage(items: Item[]): Item[][] {
function groupBy(items: Item[], extractKey: KeyExtractor): Item[][] {
return items.reduce((pageItems: Item[][], item: Item) => {
const lastPageItems = pageItems[pageItems.length - 1];
if (!lastPageItems || item.page > lastPageItems[0]?.page) {
if (!lastPageItems || extractKey(item) !== extractKey(lastPageItems[0])) {
pageItems.push([item]);
} else {
lastPageItems.push(item);
@ -17,16 +19,12 @@ export function groupByPage(items: Item[]): Item[][] {
}, []);
}
export function groupByPage(items: Item[]): Item[][] {
return groupBy(items, (item) => item.page);
}
export function groupByElement(items: Item[], elementName: string): Item[][] {
return items.reduce((groupedItems: Item[][], item: Item) => {
const lastGroupItems = groupedItems[groupedItems.length - 1];
if (!lastGroupItems || item.data[elementName] !== lastGroupItems[0]?.data[elementName]) {
groupedItems.push([item]);
} else {
lastGroupItems.push(item);
}
return groupedItems;
}, []);
return groupBy(items, (item) => item.data[elementName]);
}
export function transformGroupedByPage(items: Item[], groupedTransformer: PageItemTransformer): Item[] {
@ -35,6 +33,16 @@ export function transformGroupedByPage(items: Item[], groupedTransformer: PageIt
);
}
export function transformGroupedByPageAndLine(items: Item[], groupedTransformer: LineItemTransformer): Item[] {
let transformedItems: Item[] = [];
groupByPage(items).forEach((pageItems) => {
groupByElement(pageItems, 'line').forEach((lineItems) => {
transformedItems.push(...groupedTransformer(pageItems[0].page, lineItems[0].data['line'], lineItems));
});
});
return transformedItems;
}
export function asPages(items: Item[], itemMerger?: ItemMerger): Page[] {
return groupByPage(items).map((pageItems: Item[]) => {
let itemGroups: ItemGroup[];

View File

@ -0,0 +1,26 @@
import Item from '../Item';
import ItemResult from '../ItemResult';
import ItemTransformer from './ItemTransformer';
import TransformContext from './TransformContext';
import { transformGroupedByPageAndLine } from '../support/itemUtils';
export default class SortXWithinLines extends ItemTransformer {
constructor() {
super('Sort by X', 'Sorts the items of a line by the x coordinate', {
requireColumns: ['line', 'x'],
// itemMerger: {
// groupKey: 'line',
// merge: mergeLineItems,
// },
});
}
transform(_: TransformContext, inputItems: Item[]): ItemResult {
return {
items: transformGroupedByPageAndLine(inputItems, (_, __, items) => {
return items.sort((a, b) => a.data['x'] - b.data['x']);
}),
messages: [],
};
}
}

View File

@ -1,9 +1,16 @@
import Item from 'src/Item';
import Page from 'src/support/Page';
import { groupByPage, groupByElement, transformGroupedByPage, asPages } from 'src/support/itemUtils';
import {
groupByPage,
groupByElement,
transformGroupedByPage,
transformGroupedByPageAndLine,
asPages,
} from 'src/support/itemUtils';
import ItemGroup from 'src/support/ItemGroup';
import ItemMerger from 'src/ItemMerger';
import ItemTransformer from 'src/transformer/ItemTransformer';
import { items } from 'test/transformer/testItems';
describe('groupByPage', () => {
test('empty', async () => {
@ -68,6 +75,30 @@ describe('transformGroupedByPage', () => {
});
});
describe('transformGroupedByPageAndLine', () => {
test('empty', async () => {
const transformedItems = transformGroupedByPageAndLine([], () => fail("shoudln't be called"));
expect(transformedItems).toEqual([]);
});
test('transform', async () => {
const pageItems = [
items(0, [{ line: 1, id: 1 }]),
items(1, [
{ line: 1, id: 2 },
{ line: 1, id: 3 },
{ line: 2, id: 4 },
]),
items(2, [{ line: 1, id: 5 }]),
];
const flattenedItems = new Array<Item>().concat(...pageItems);
const transformedItems = transformGroupedByPageAndLine(flattenedItems, (page, line, items) => {
return [new Item(0, { group: `${page}/${line}:${items.length}` })];
});
expect(transformedItems.map((item) => item.data['group'])).toEqual(['0/1:1', '1/1:2', '1/2:1', '2/1:1']);
});
});
describe('asPages', () => {
test('empty', async () => {
expect(groupByPage([])).toEqual([]);