mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2025-06-25 12:01:45 +02:00
Grouping of line items
This commit is contained in:
parent
d8bc6d100b
commit
0910f7b148
@ -39,6 +39,7 @@ export default class Debugger {
|
|||||||
const outputSchema = transformer.schemaTransformer(inputSchema);
|
const outputSchema = transformer.schemaTransformer(inputSchema);
|
||||||
const itemResult = transformer.transform(this.context, [...this.stageResultCache[idx - 1].items]);
|
const itemResult = transformer.transform(this.context, [...this.stageResultCache[idx - 1].items]);
|
||||||
this.stageResultCache.push({
|
this.stageResultCache.push({
|
||||||
|
descriptor: transformer.descriptor,
|
||||||
schema: toAnnotatedSchema(inputSchema, outputSchema),
|
schema: toAnnotatedSchema(inputSchema, outputSchema),
|
||||||
...itemResult,
|
...itemResult,
|
||||||
});
|
});
|
||||||
|
6
core/src/ItemMerger.ts
Normal file
6
core/src/ItemMerger.ts
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
import type Item from './Item';
|
||||||
|
|
||||||
|
export default interface ItemMerger {
|
||||||
|
groupKey: string;
|
||||||
|
merge(items: Item[]): Item;
|
||||||
|
}
|
24
core/src/TransformDescriptor.ts
Normal file
24
core/src/TransformDescriptor.ts
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
import type ItemMerger from './ItemMerger';
|
||||||
|
|
||||||
|
export default interface TransformDescriptor {
|
||||||
|
readonly requireColumns: string[];
|
||||||
|
readonly consumesGlobels: string[];
|
||||||
|
readonly producesGlobels: string[];
|
||||||
|
/**
|
||||||
|
* If this is set, the debug UI will group items and display a merged item.
|
||||||
|
*/
|
||||||
|
readonly itemMerger?: ItemMerger;
|
||||||
|
}
|
||||||
|
|
||||||
|
const defaults: TransformDescriptor = {
|
||||||
|
requireColumns: [],
|
||||||
|
consumesGlobels: [],
|
||||||
|
producesGlobels: [],
|
||||||
|
};
|
||||||
|
|
||||||
|
export function toDescriptor(partial: Partial<TransformDescriptor>): TransformDescriptor {
|
||||||
|
return {
|
||||||
|
...defaults,
|
||||||
|
...partial,
|
||||||
|
};
|
||||||
|
}
|
@ -1,5 +0,0 @@
|
|||||||
export default interface TransformerDescriptor {
|
|
||||||
readonly requireColumns?: string[];
|
|
||||||
readonly consumesGlobels?: string[];
|
|
||||||
readonly producesGlobels?: string[];
|
|
||||||
}
|
|
@ -1,7 +1,9 @@
|
|||||||
|
import TransformDescriptor from '../TransformDescriptor';
|
||||||
import Item from '../Item';
|
import Item from '../Item';
|
||||||
import AnnotatedColumn from './AnnotatedColumn';
|
import AnnotatedColumn from './AnnotatedColumn';
|
||||||
|
|
||||||
export default interface StageResult {
|
export default interface StageResult {
|
||||||
|
descriptor?: TransformDescriptor;
|
||||||
schema: AnnotatedColumn[];
|
schema: AnnotatedColumn[];
|
||||||
items: Item[];
|
items: Item[];
|
||||||
messages: string[];
|
messages: string[];
|
||||||
|
17
core/src/support/ItemGroup.ts
Normal file
17
core/src/support/ItemGroup.ts
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
import { assertDefined } from '../assert';
|
||||||
|
import type Item from '../Item';
|
||||||
|
import type ItemMerger from '../ItemMerger';
|
||||||
|
|
||||||
|
export default class ItemGroup {
|
||||||
|
top: Item;
|
||||||
|
elements: Item[];
|
||||||
|
|
||||||
|
constructor(top: Item, items: Item[] = []) {
|
||||||
|
this.top = top;
|
||||||
|
this.elements = items;
|
||||||
|
}
|
||||||
|
|
||||||
|
hasMany(): boolean {
|
||||||
|
return this.elements.length > 0;
|
||||||
|
}
|
||||||
|
}
|
6
core/src/support/Page.ts
Normal file
6
core/src/support/Page.ts
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
import type ItemGroup from './ItemGroup';
|
||||||
|
|
||||||
|
export default interface Page {
|
||||||
|
index: number;
|
||||||
|
itemGroups: ItemGroup[];
|
||||||
|
}
|
55
core/src/support/itemUtils.ts
Normal file
55
core/src/support/itemUtils.ts
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
import ItemMerger from 'src/ItemMerger';
|
||||||
|
import Item from '../Item';
|
||||||
|
import ItemGroup from './ItemGroup';
|
||||||
|
import Page from './Page';
|
||||||
|
|
||||||
|
type PageItemTransformer = (page: number, items: Item[]) => Item[];
|
||||||
|
|
||||||
|
export function groupByPage(items: Item[]): Item[][] {
|
||||||
|
return items.reduce((pageItems: Item[][], item: Item) => {
|
||||||
|
const lastPageItems = pageItems[pageItems.length - 1];
|
||||||
|
if (!lastPageItems || item.page > lastPageItems[0]?.page) {
|
||||||
|
pageItems.push([item]);
|
||||||
|
} else {
|
||||||
|
lastPageItems.push(item);
|
||||||
|
}
|
||||||
|
return pageItems;
|
||||||
|
}, []);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function groupByElement(items: Item[], elementName: string): Item[][] {
|
||||||
|
return items.reduce((groupedItems: Item[][], item: Item) => {
|
||||||
|
const lastGroupItems = groupedItems[groupedItems.length - 1];
|
||||||
|
if (!lastGroupItems || item.data[elementName] !== lastGroupItems[0]?.data[elementName]) {
|
||||||
|
groupedItems.push([item]);
|
||||||
|
} else {
|
||||||
|
lastGroupItems.push(item);
|
||||||
|
}
|
||||||
|
return groupedItems;
|
||||||
|
}, []);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function transformGroupedByPage(items: Item[], groupedTransformer: PageItemTransformer): Item[] {
|
||||||
|
return new Array<Item>().concat(
|
||||||
|
...groupByPage(items).map((pageItems) => groupedTransformer(pageItems[0].page, pageItems)),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function asPages(items: Item[], itemMerger?: ItemMerger): Page[] {
|
||||||
|
return groupByPage(items).map((pageItems: Item[]) => {
|
||||||
|
let itemGroups: ItemGroup[];
|
||||||
|
if (itemMerger) {
|
||||||
|
itemGroups = groupByElement(pageItems, itemMerger.groupKey).map((groupItems) => {
|
||||||
|
if (groupItems.length > 1) {
|
||||||
|
const top = itemMerger.merge(groupItems);
|
||||||
|
return new ItemGroup(top, groupItems);
|
||||||
|
} else {
|
||||||
|
return new ItemGroup(groupItems[0]);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
itemGroups = pageItems.map((item) => new ItemGroup(item));
|
||||||
|
}
|
||||||
|
return { index: pageItems[0].page, itemGroups } as Page;
|
||||||
|
});
|
||||||
|
}
|
@ -3,7 +3,7 @@ import Item from '../Item';
|
|||||||
import ItemResult from '../ItemResult';
|
import ItemResult from '../ItemResult';
|
||||||
import ItemTransformer from './ItemTransformer';
|
import ItemTransformer from './ItemTransformer';
|
||||||
import TransformContext from './TransformContext';
|
import TransformContext from './TransformContext';
|
||||||
import { transformGroupedByPage } from './transformerUtils';
|
import { transformGroupedByPage } from '../support/itemUtils';
|
||||||
|
|
||||||
export default class AdjustHeight extends ItemTransformer {
|
export default class AdjustHeight extends ItemTransformer {
|
||||||
constructor() {
|
constructor() {
|
||||||
|
@ -2,7 +2,7 @@ import Item from '../Item';
|
|||||||
import ItemResult from '../ItemResult';
|
import ItemResult from '../ItemResult';
|
||||||
import ItemTransformer from './ItemTransformer';
|
import ItemTransformer from './ItemTransformer';
|
||||||
import TransformContext from './TransformContext';
|
import TransformContext from './TransformContext';
|
||||||
import { transformGroupedByPage } from './transformerUtils';
|
import { transformGroupedByPage } from '../support/itemUtils';
|
||||||
|
|
||||||
export default class CompactLines extends ItemTransformer {
|
export default class CompactLines extends ItemTransformer {
|
||||||
constructor() {
|
constructor() {
|
||||||
@ -11,6 +11,10 @@ export default class CompactLines extends ItemTransformer {
|
|||||||
'Combines items on the same y-axis',
|
'Combines items on the same y-axis',
|
||||||
{
|
{
|
||||||
requireColumns: ['str', 'y'],
|
requireColumns: ['str', 'y'],
|
||||||
|
itemMerger: {
|
||||||
|
groupKey: 'line',
|
||||||
|
merge: mergeLineItems,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
(incomingSchema) => {
|
(incomingSchema) => {
|
||||||
return incomingSchema.reduce((schema, column) => {
|
return incomingSchema.reduce((schema, column) => {
|
||||||
@ -41,3 +45,25 @@ export default class CompactLines extends ItemTransformer {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function mergeLineItems(items: Item[]): Item {
|
||||||
|
const page = items[0].page;
|
||||||
|
const line = items[0].data['line'];
|
||||||
|
const str = items.map((item) => item.data['str']).join(' ');
|
||||||
|
const x = Math.min(...items.map((item) => item.data['x']));
|
||||||
|
const y = Math.min(...items.map((item) => item.data['y']));
|
||||||
|
const width = items.reduce((sum, item) => sum + item.data['width'], 0);
|
||||||
|
const height = Math.max(...items.map((item) => item.data['height']));
|
||||||
|
const fontNames = [...new Set(items.map((item) => item.data['fontName']))];
|
||||||
|
const directions = [...new Set(items.map((item) => item.data['dir']))];
|
||||||
|
return new Item(page, {
|
||||||
|
str,
|
||||||
|
line,
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
fontName: fontNames,
|
||||||
|
dir: directions,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import type TransformerDescriptor from '../TransformerDescription';
|
import TransformDescriptor, { toDescriptor } from '../TransformDescriptor';
|
||||||
import type TransformContext from './TransformContext';
|
import type TransformContext from './TransformContext';
|
||||||
import type Item from '../Item';
|
import type Item from '../Item';
|
||||||
import type ItemResult from '../ItemResult';
|
import type ItemResult from '../ItemResult';
|
||||||
@ -11,25 +11,18 @@ type SchemaTransformer = (incomingSchema: string[]) => string[];
|
|||||||
export default abstract class ItemTransformer {
|
export default abstract class ItemTransformer {
|
||||||
readonly name: string;
|
readonly name: string;
|
||||||
readonly description: string;
|
readonly description: string;
|
||||||
readonly descriptor: TransformerDescriptor;
|
readonly descriptor: TransformDescriptor;
|
||||||
readonly schemaTransformer: SchemaTransformer;
|
readonly schemaTransformer: SchemaTransformer;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
name: string,
|
name: string,
|
||||||
description: string,
|
description: string,
|
||||||
descriptor: TransformerDescriptor,
|
descriptorPartial: Partial<TransformDescriptor>,
|
||||||
schemaTransformer: SchemaTransformer = (schema) => schema,
|
schemaTransformer: SchemaTransformer = (schema) => schema,
|
||||||
) {
|
) {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.description = description;
|
this.description = description;
|
||||||
this.descriptor = {
|
this.descriptor = toDescriptor(descriptorPartial);
|
||||||
...{
|
|
||||||
consumesGlobels: [],
|
|
||||||
producesGlobels: [],
|
|
||||||
requireColumns: [],
|
|
||||||
},
|
|
||||||
...descriptor,
|
|
||||||
};
|
|
||||||
this.schemaTransformer = schemaTransformer;
|
this.schemaTransformer = schemaTransformer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,19 +0,0 @@
|
|||||||
import Item from '../Item';
|
|
||||||
|
|
||||||
type PageItemTransformer = (page: number, items: Item[]) => Item[];
|
|
||||||
|
|
||||||
export function transformGroupedByPage(items: Item[], groupedTransformer: PageItemTransformer) {
|
|
||||||
return new Array<Item>().concat(
|
|
||||||
...items
|
|
||||||
.reduce((pageItems: Item[][], item: Item) => {
|
|
||||||
const lastPageItems = pageItems[pageItems.length - 1];
|
|
||||||
if (!lastPageItems || item.page > lastPageItems[0]?.page) {
|
|
||||||
pageItems.push([item]);
|
|
||||||
} else {
|
|
||||||
lastPageItems.push(item);
|
|
||||||
}
|
|
||||||
return pageItems;
|
|
||||||
}, [])
|
|
||||||
.map((pageItems) => groupedTransformer(pageItems[0].page, pageItems)),
|
|
||||||
);
|
|
||||||
}
|
|
@ -1,7 +1,7 @@
|
|||||||
import Debugger from 'src/Debugger';
|
import Debugger from 'src/Debugger';
|
||||||
import Item from 'src/Item';
|
import Item from 'src/Item';
|
||||||
import ItemTransformer from 'src/transformer/ItemTransformer';
|
import ItemTransformer from 'src/transformer/ItemTransformer';
|
||||||
import TransformerDescriptor from 'src/TransformerDescription';
|
import TransformDescriptor from 'src/TransformDescriptor';
|
||||||
import TransformContext from 'src/transformer/TransformContext';
|
import TransformContext from 'src/transformer/TransformContext';
|
||||||
import ItemResult from 'src/ItemResult';
|
import ItemResult from 'src/ItemResult';
|
||||||
import ColumnAnnotation from 'src/debug/ColumnAnnotation';
|
import ColumnAnnotation from 'src/debug/ColumnAnnotation';
|
||||||
@ -9,7 +9,7 @@ import AnnotatedColumn from 'src/debug/AnnotatedColumn';
|
|||||||
|
|
||||||
class TestTransformer extends ItemTransformer {
|
class TestTransformer extends ItemTransformer {
|
||||||
items: Item[];
|
items: Item[];
|
||||||
constructor(name: string, descriptor: TransformerDescriptor, outputSchema: string[], items: Item[]) {
|
constructor(name: string, descriptor: Partial<TransformDescriptor>, outputSchema: string[], items: Item[]) {
|
||||||
super(name, `Description for ${name}`, descriptor, (incomingSchema) => outputSchema);
|
super(name, `Description for ${name}`, descriptor, (incomingSchema) => outputSchema);
|
||||||
this.items = items;
|
this.items = items;
|
||||||
}
|
}
|
||||||
@ -30,7 +30,7 @@ test('basic debug', async () => {
|
|||||||
const trans1Items = parsedItems.map((item) => item.withData({ C: `c=${item.value('A')}+${item.value('B')}` }));
|
const trans1Items = parsedItems.map((item) => item.withData({ C: `c=${item.value('A')}+${item.value('B')}` }));
|
||||||
|
|
||||||
const transformers = [new TestTransformer('Trans1', trans1Desc, trans1Schema, trans1Items)];
|
const transformers = [new TestTransformer('Trans1', trans1Desc, trans1Schema, trans1Items)];
|
||||||
const debug = new Debugger(parsedSchema, parsedItems, { pageViewports: [] }, transformers);
|
const debug = new Debugger(parsedSchema, parsedItems, { fontMap: new Map(), pageViewports: [] }, transformers);
|
||||||
|
|
||||||
expect(debug.stageNames).toEqual(['Parse Result', 'Trans1']);
|
expect(debug.stageNames).toEqual(['Parse Result', 'Trans1']);
|
||||||
expect(debug.stageResults(0).schema).toEqual(parsedSchema.map((column) => ({ name: column })));
|
expect(debug.stageResults(0).schema).toEqual(parsedSchema.map((column) => ({ name: column })));
|
||||||
@ -47,7 +47,7 @@ describe('build schemas', () => {
|
|||||||
|
|
||||||
function calculateSchema(inputSchema: string[], outputSchema: string[]): AnnotatedColumn[] {
|
function calculateSchema(inputSchema: string[], outputSchema: string[]): AnnotatedColumn[] {
|
||||||
const transformers = [new TestTransformer('Trans1', {}, outputSchema, items)];
|
const transformers = [new TestTransformer('Trans1', {}, outputSchema, items)];
|
||||||
const debug = new Debugger(inputSchema, items, { pageViewports: [] }, transformers);
|
const debug = new Debugger(inputSchema, items, { fontMap: new Map(), pageViewports: [] }, transformers);
|
||||||
return debug.stageResults(1).schema;
|
return debug.stageResults(1).schema;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import TransformerDescriptor from 'src/TransformerDescription';
|
import TransformDescriptor from 'src/TransformDescriptor';
|
||||||
import Item from 'src/Item';
|
import Item from 'src/Item';
|
||||||
import ItemResult from 'src/ItemResult';
|
import ItemResult from 'src/ItemResult';
|
||||||
import ItemTransformer from 'src/transformer/ItemTransformer';
|
import ItemTransformer from 'src/transformer/ItemTransformer';
|
||||||
@ -9,7 +9,7 @@ import * as fs from 'fs';
|
|||||||
import PdfPipeline from 'src/PdfPipeline';
|
import PdfPipeline from 'src/PdfPipeline';
|
||||||
|
|
||||||
class TestSchemaTransformer extends ItemTransformer {
|
class TestSchemaTransformer extends ItemTransformer {
|
||||||
constructor(name: string, descriptor: TransformerDescriptor, outputSchema: string[] | undefined = undefined) {
|
constructor(name: string, descriptor: Partial<TransformDescriptor>, outputSchema: string[] | undefined = undefined) {
|
||||||
if (outputSchema) {
|
if (outputSchema) {
|
||||||
super(name, `Description for ${name}`, descriptor, (_) => outputSchema);
|
super(name, `Description for ${name}`, descriptor, (_) => outputSchema);
|
||||||
} else {
|
} else {
|
||||||
|
112
core/test/support/itemUtils.test.ts
Normal file
112
core/test/support/itemUtils.test.ts
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
import Item from 'src/Item';
|
||||||
|
import Page from 'src/support/Page';
|
||||||
|
import { groupByPage, groupByElement, transformGroupedByPage, asPages } from 'src/support/itemUtils';
|
||||||
|
import ItemGroup from 'src/support/ItemGroup';
|
||||||
|
import ItemMerger from 'src/ItemMerger';
|
||||||
|
import ItemTransformer from 'src/transformer/ItemTransformer';
|
||||||
|
|
||||||
|
describe('groupByPage', () => {
|
||||||
|
test('empty', async () => {
|
||||||
|
expect(groupByPage([])).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('group', async () => {
|
||||||
|
const pageItems = [
|
||||||
|
[new Item(0, { id: 1 })],
|
||||||
|
[new Item(1, { id: 2 }), new Item(1, { id: 3 })],
|
||||||
|
[new Item(2, { id: 4 })],
|
||||||
|
];
|
||||||
|
const flattenedItems = new Array<Item>().concat(...pageItems);
|
||||||
|
const transformedItems = groupByPage(flattenedItems);
|
||||||
|
expect(transformedItems).toEqual(pageItems);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('groupByElement', () => {
|
||||||
|
test('empty', async () => {
|
||||||
|
expect(groupByElement([], 'line')).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('group', async () => {
|
||||||
|
const groupedItems = [
|
||||||
|
[new Item(0, { line: 1, id: 1 })],
|
||||||
|
[new Item(0, { line: 2, id: 2 }), new Item(0, { line: 2, id: 3 })],
|
||||||
|
[new Item(0, { line: 3, id: 4 })],
|
||||||
|
];
|
||||||
|
const flattenedItems = new Array<Item>().concat(...groupedItems);
|
||||||
|
const transformedItems = groupByElement(flattenedItems, 'line');
|
||||||
|
expect(transformedItems).toEqual(groupedItems);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('transformGroupedByPage', () => {
|
||||||
|
test('empty', async () => {
|
||||||
|
const transformedItems = transformGroupedByPage([], () => fail("shoudln't be called"));
|
||||||
|
expect(transformedItems).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('pipe through', async () => {
|
||||||
|
const pageItems = [
|
||||||
|
[new Item(0, { id: 1 })],
|
||||||
|
[new Item(1, { id: 2 }), new Item(1, { id: 3 })],
|
||||||
|
[new Item(2, { id: 4 })],
|
||||||
|
];
|
||||||
|
const flattenedItems = new Array<Item>().concat(...pageItems);
|
||||||
|
const transformedItems = transformGroupedByPage(flattenedItems, (page, items) => {
|
||||||
|
expect(items).toEqual(pageItems[page]);
|
||||||
|
return items;
|
||||||
|
});
|
||||||
|
expect(transformedItems).toEqual(flattenedItems);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('change', async () => {
|
||||||
|
const input = [new Item(0, { v: 0 }), new Item(1, { v: 0 })];
|
||||||
|
const transformedItems = transformGroupedByPage(input, (_, items) => {
|
||||||
|
return [items[0].withData({ v: 1 })];
|
||||||
|
});
|
||||||
|
expect(transformedItems).toEqual(input.map((item) => item.withData({ v: 1 })));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('asPages', () => {
|
||||||
|
test('empty', async () => {
|
||||||
|
expect(groupByPage([])).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('no merger', async () => {
|
||||||
|
const pageItems = [
|
||||||
|
[new Item(0, { id: 1, line: 1 })],
|
||||||
|
[new Item(1, { id: 2, line: 1 }), new Item(1, { id: 3, line: 1 }), new Item(1, { id: 4, line: 2 })],
|
||||||
|
[new Item(2, { id: 5, line: 1 })],
|
||||||
|
];
|
||||||
|
const flattenedItems = new Array<Item>().concat(...pageItems);
|
||||||
|
const pages = asPages(flattenedItems);
|
||||||
|
expect(pages).toEqual([
|
||||||
|
{ index: 0, itemGroups: pageItems[0].map((item) => new ItemGroup(item)) },
|
||||||
|
{ index: 1, itemGroups: pageItems[1].map((item) => new ItemGroup(item)) },
|
||||||
|
{ index: 2, itemGroups: pageItems[2].map((item) => new ItemGroup(item)) },
|
||||||
|
] as Page[]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('merger', async () => {
|
||||||
|
const pageItems = [
|
||||||
|
[new Item(0, { id: 1, line: 1 })],
|
||||||
|
[new Item(1, { id: 2, line: 1 }), new Item(1, { id: 3, line: 1 }), new Item(1, { id: 4, line: 2 })],
|
||||||
|
[new Item(2, { id: 5, line: 1 })],
|
||||||
|
];
|
||||||
|
const flattenedItems = new Array<Item>().concat(...pageItems);
|
||||||
|
const merger: ItemMerger = { groupKey: 'line', merge: (items) => items[0] };
|
||||||
|
const pages = asPages(flattenedItems, merger);
|
||||||
|
expect(pages).toEqual([
|
||||||
|
{ index: 0, itemGroups: pageItems[0].map((item) => new ItemGroup(item)) },
|
||||||
|
{
|
||||||
|
index: 1,
|
||||||
|
itemGroups: [
|
||||||
|
new ItemGroup(merger.merge(pageItems[1].slice(0, 2)), pageItems[1].slice(0, 2)),
|
||||||
|
new ItemGroup(pageItems[1][2]),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{ index: 2, itemGroups: pageItems[2].map((item) => new ItemGroup(item)) },
|
||||||
|
] as Page[]);
|
||||||
|
});
|
||||||
|
});
|
51
core/test/transformer/CompactLines.test.ts
Normal file
51
core/test/transformer/CompactLines.test.ts
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
import Item from 'src/Item';
|
||||||
|
import CompactLines from 'src/transformer/CompactLines';
|
||||||
|
test('Item Merger', async () => {
|
||||||
|
const itemMerger = new CompactLines().descriptor.itemMerger;
|
||||||
|
expect(itemMerger?.groupKey).toEqual('line');
|
||||||
|
|
||||||
|
const mergedItem = itemMerger?.merge([
|
||||||
|
new Item(0, {
|
||||||
|
line: 2,
|
||||||
|
x: 240,
|
||||||
|
y: 585,
|
||||||
|
str: 'Dies ist eine Test-PDF',
|
||||||
|
fontName: 'g_d0_f2',
|
||||||
|
dir: 'ltr',
|
||||||
|
width: 108.62,
|
||||||
|
height: 11,
|
||||||
|
}),
|
||||||
|
new Item(0, {
|
||||||
|
line: 2,
|
||||||
|
x: 352.69,
|
||||||
|
y: 585,
|
||||||
|
str: '.',
|
||||||
|
fontName: 'g_d0_f2',
|
||||||
|
dir: 'ltr',
|
||||||
|
width: 3.06,
|
||||||
|
height: 11,
|
||||||
|
}),
|
||||||
|
new Item(0, {
|
||||||
|
line: 2,
|
||||||
|
x: 348,
|
||||||
|
y: 588,
|
||||||
|
str: '1',
|
||||||
|
fontName: 'g_d0_f2',
|
||||||
|
dir: 'ltr',
|
||||||
|
width: 4.08,
|
||||||
|
height: 7.33,
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
expect(mergedItem?.withoutUuid()).toEqual(
|
||||||
|
new Item(0, {
|
||||||
|
line: 2,
|
||||||
|
x: 240,
|
||||||
|
y: 585,
|
||||||
|
str: 'Dies ist eine Test-PDF . 1',
|
||||||
|
fontName: ['g_d0_f2'],
|
||||||
|
dir: ['ltr'],
|
||||||
|
width: 115.76,
|
||||||
|
height: 11,
|
||||||
|
}).withoutUuid(),
|
||||||
|
);
|
||||||
|
});
|
@ -1,31 +0,0 @@
|
|||||||
import Item from 'src/Item';
|
|
||||||
import { transformGroupedByPage } from 'src/transformer/transformerUtils';
|
|
||||||
|
|
||||||
describe('transformGroupedByPage', () => {
|
|
||||||
test('empty', async () => {
|
|
||||||
const transformedItems = transformGroupedByPage([], () => fail("shoudln't be called"));
|
|
||||||
expect(transformedItems).toEqual([]);
|
|
||||||
});
|
|
||||||
|
|
||||||
test('pipe through', async () => {
|
|
||||||
const pageItems = [
|
|
||||||
[new Item(0, { id: 1 })],
|
|
||||||
[new Item(1, { id: 2 }), new Item(1, { id: 3 })],
|
|
||||||
[new Item(2, { id: 4 })],
|
|
||||||
];
|
|
||||||
const flattenedItems = new Array<Item>().concat(...pageItems);
|
|
||||||
const transformedItems = transformGroupedByPage(flattenedItems, (page, items) => {
|
|
||||||
expect(items).toEqual(pageItems[page]);
|
|
||||||
return items;
|
|
||||||
});
|
|
||||||
expect(transformedItems).toEqual(flattenedItems);
|
|
||||||
});
|
|
||||||
|
|
||||||
test('change', async () => {
|
|
||||||
const input = [new Item(0, { v: 0 }), new Item(1, { v: 0 })];
|
|
||||||
const transformedItems = transformGroupedByPage(input, (_, items) => {
|
|
||||||
return [items[0].withData({ v: 1 })];
|
|
||||||
});
|
|
||||||
expect(transformedItems).toEqual(input.map((item) => item.withData({ v: 1 })));
|
|
||||||
});
|
|
||||||
});
|
|
@ -3,8 +3,11 @@
|
|||||||
import Icon from 'fa-svelte';
|
import Icon from 'fa-svelte';
|
||||||
import { faMapPin as pin } from '@fortawesome/free-solid-svg-icons/faMapPin';
|
import { faMapPin as pin } from '@fortawesome/free-solid-svg-icons/faMapPin';
|
||||||
import { BookOpen, ArrowLeft, ArrowRight } from 'svelte-hero-icons';
|
import { BookOpen, ArrowLeft, ArrowRight } from 'svelte-hero-icons';
|
||||||
|
|
||||||
import type Debugger from '@core/Debugger';
|
import type Debugger from '@core/Debugger';
|
||||||
import type Item from '@core/Item';
|
import type Item from '@core/Item';
|
||||||
|
import { asPages } from '../../../core/src/support/itemUtils';
|
||||||
|
|
||||||
import Popup from '../components/Popup.svelte';
|
import Popup from '../components/Popup.svelte';
|
||||||
import PageSelectionPopup from './PageSelectionPopup.svelte';
|
import PageSelectionPopup from './PageSelectionPopup.svelte';
|
||||||
import ItemTable from './ItemTable.svelte';
|
import ItemTable from './ItemTable.svelte';
|
||||||
@ -21,17 +24,9 @@
|
|||||||
$: stageResult = debug.stageResults(currentStage);
|
$: stageResult = debug.stageResults(currentStage);
|
||||||
$: pageIsPinned = !isNaN(pinnedPage);
|
$: pageIsPinned = !isNaN(pinnedPage);
|
||||||
$: pagesNumbers = new Set(stageResult.items.map((item) => item.page));
|
$: pagesNumbers = new Set(stageResult.items.map((item) => item.page));
|
||||||
|
$: pages = asPages(stageResult.items, stageResult.descriptor?.itemMerger);
|
||||||
$: maxPage = Math.max(...pagesNumbers);
|
$: maxPage = Math.max(...pagesNumbers);
|
||||||
$: itemsByPage = [
|
$: visiblePages = pageIsPinned ? pages.filter((page) => page.index === pinnedPage) : pages;
|
||||||
...stageResult.items.reduce((map, item) => {
|
|
||||||
if (!map.has(item.page)) {
|
|
||||||
map.set(item.page, []);
|
|
||||||
}
|
|
||||||
map.get(item.page).push(item);
|
|
||||||
return map;
|
|
||||||
}, new Map<number, Item[]>()),
|
|
||||||
];
|
|
||||||
$: visiblePages = pageIsPinned ? itemsByPage.filter(([page]) => page === pinnedPage) : itemsByPage;
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="mx-4">
|
<div class="mx-4">
|
||||||
@ -97,7 +92,7 @@
|
|||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<!-- Items -->
|
<!-- Items -->
|
||||||
<ItemTable schema={stageResult.schema} itemsByPage={visiblePages} {maxPage} {pageIsPinned} />
|
<ItemTable schema={stageResult.schema} pages={visiblePages} {maxPage} {pageIsPinned} />
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<style>
|
<style>
|
||||||
|
@ -1,26 +1,27 @@
|
|||||||
<script>
|
<script>
|
||||||
import { scale, fade } from 'svelte/transition';
|
import { scale, fade } from 'svelte/transition';
|
||||||
import type Item from '@core/Item';
|
|
||||||
import type AnnotatedColumn from '@core/debug/AnnotatedColumn';
|
import type AnnotatedColumn from '@core/debug/AnnotatedColumn';
|
||||||
import ColumnAnnotation from '../../../core/src/debug/ColumnAnnotation';
|
import ColumnAnnotation from '../../../core/src/debug/ColumnAnnotation';
|
||||||
import inView from '../actions/inView';
|
import inView from '../actions/inView';
|
||||||
import { formatValue } from './formatValues';
|
import { formatValue } from './formatValues';
|
||||||
|
import type Page from '@core/support/Page';
|
||||||
|
|
||||||
export let schema: AnnotatedColumn[];
|
export let schema: AnnotatedColumn[];
|
||||||
export let itemsByPage: [number, Item[]][];
|
export let pages: Page[];
|
||||||
export let maxPage: number;
|
export let maxPage: number;
|
||||||
export let pageIsPinned: boolean;
|
export let pageIsPinned: boolean;
|
||||||
let maxItemsToRenderInOneLoad = 200;
|
let maxItemsToRenderInOneLoad = 200;
|
||||||
let renderedMaxPage = 0;
|
let renderedMaxPage = 0;
|
||||||
|
let expandedItemGroup: { pageIndex: number; itemIndex: number };
|
||||||
|
|
||||||
let renderedItemsByPage: [number, Item[]][];
|
let renderedPages: Page[];
|
||||||
$: {
|
$: {
|
||||||
if (pageIsPinned) {
|
if (pageIsPinned) {
|
||||||
renderedItemsByPage = itemsByPage;
|
renderedPages = pages;
|
||||||
renderedMaxPage = 0;
|
renderedMaxPage = 0;
|
||||||
} else {
|
} else {
|
||||||
calculateNextPageToRenderTo();
|
calculateNextPageToRenderTo();
|
||||||
renderedItemsByPage = itemsByPage.slice(0, renderedMaxPage);
|
renderedPages = pages.slice(0, renderedMaxPage);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -29,16 +30,22 @@
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
let itemCount = 0;
|
let itemCount = 0;
|
||||||
for (let index = 0; index < itemsByPage.length; index++) {
|
for (let index = 0; index < pages.length; index++) {
|
||||||
renderedMaxPage++;
|
renderedMaxPage++;
|
||||||
const [_, items] = itemsByPage[index];
|
itemCount += pages[index].itemGroups.length;
|
||||||
itemCount += items.length;
|
|
||||||
if (itemCount > maxItemsToRenderInOneLoad) {
|
if (itemCount > maxItemsToRenderInOneLoad) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// console.log(`Render pages 0 to ${renderedMaxPage} with ${itemCount} items`);
|
// console.log(`Render pages 0 to ${renderedMaxPage} with ${itemCount} items`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const isExpanded = (pageIndex: number, itemIndex: number) => {
|
||||||
|
return expandedItemGroup?.pageIndex === pageIndex && expandedItemGroup?.itemIndex === itemIndex;
|
||||||
|
};
|
||||||
|
const toggleRow = (pageIndex: number, itemIndex: number) => {
|
||||||
|
expandedItemGroup = isExpanded(pageIndex, itemIndex) ? undefined : { pageIndex, itemIndex };
|
||||||
|
};
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<!-- Item table -->
|
<!-- Item table -->
|
||||||
@ -56,33 +63,53 @@
|
|||||||
{/each}
|
{/each}
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{#each renderedItemsByPage as [pageNumber, items], pageIdx}
|
{#each renderedPages as page, pageIdx}
|
||||||
<!-- Separator between pages -->
|
<!-- Separator between pages -->
|
||||||
{#if pageIdx > 0}
|
{#if pageIdx > 0}
|
||||||
<tr class="h-5" />
|
<tr class="h-5" />
|
||||||
{/if}
|
{/if}
|
||||||
{#each items as item, itemIdx}
|
|
||||||
<tr in:fade>
|
<!-- Page items -->
|
||||||
|
{#each page.itemGroups as itemGroup, itemIdx}
|
||||||
|
<tr
|
||||||
|
class:expandable={itemGroup.hasMany()}
|
||||||
|
class:expanded={expandedItemGroup && isExpanded(page.index, itemIdx)}
|
||||||
|
in:fade>
|
||||||
<!-- Page number in first page item row -->
|
<!-- Page number in first page item row -->
|
||||||
{#if itemIdx === 0}
|
{#if itemIdx === 0}
|
||||||
<td class="page bg-gray-50">
|
<td id="page" class="page bg-gray-50">
|
||||||
<div>Page {pageNumber} {pageIsPinned ? '' : ' / ' + maxPage}</div>
|
<div>Page {page.index} {pageIsPinned ? '' : ' / ' + maxPage}</div>
|
||||||
</td>
|
</td>
|
||||||
{:else}
|
{:else}
|
||||||
<td />
|
<td id="page" />
|
||||||
{/if}
|
{/if}
|
||||||
<td>{itemIdx}</td>
|
<span class="contents" on:click={() => itemGroup.hasMany() && toggleRow(page.index, itemIdx)}>
|
||||||
|
<td>{itemIdx}{itemGroup.hasMany() ? '+' : ''}</td>
|
||||||
{#each schema as column}
|
{#each schema as column}
|
||||||
<td class="select-all">{formatValue(item.data[column.name])}</td>
|
<td class="select-all">{formatValue(itemGroup.top.data[column.name])}</td>
|
||||||
|
{/each}
|
||||||
|
</span>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<!-- Expanded childs -->
|
||||||
|
{#if expandedItemGroup && isExpanded(page.index, itemIdx)}
|
||||||
|
{#each itemGroup.elements as child, childIdx}
|
||||||
|
<tr class="childs">
|
||||||
|
<td id="page" />
|
||||||
|
<td>{'└ ' + childIdx}</td>
|
||||||
|
{#each schema as column}
|
||||||
|
<td class="select-all">{formatValue(child.data[column.name])}</td>
|
||||||
{/each}
|
{/each}
|
||||||
</tr>
|
</tr>
|
||||||
{/each}
|
{/each}
|
||||||
|
{/if}
|
||||||
|
{/each}
|
||||||
{/each}
|
{/each}
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
{#if !pageIsPinned}
|
{#if !pageIsPinned}
|
||||||
{#if renderedMaxPage < itemsByPage.length}
|
{#if renderedMaxPage < pages.length}
|
||||||
<span use:inView on:intersect={({ detail }) => detail && calculateNextPageToRenderTo()} />
|
<span use:inView on:intersect={({ detail }) => detail && calculateNextPageToRenderTo()} />
|
||||||
<div class="my-6 text-center text-2xl">...</div>
|
<div class="my-6 text-center text-2xl">...</div>
|
||||||
{:else}
|
{:else}
|
||||||
@ -113,12 +140,24 @@
|
|||||||
@apply bg-gray-300;
|
@apply bg-gray-300;
|
||||||
@apply shadow;
|
@apply shadow;
|
||||||
}
|
}
|
||||||
td:not(:first-child) {
|
td:not(#page) {
|
||||||
@apply px-1;
|
@apply px-1;
|
||||||
@apply border-b;
|
@apply border-b;
|
||||||
}
|
}
|
||||||
|
|
||||||
tr:hover td:not(:first-child) {
|
tr:hover td:not(#page) {
|
||||||
|
@apply bg-gray-200;
|
||||||
|
}
|
||||||
|
|
||||||
|
tr.expandable:hover td:not(#page) {
|
||||||
|
@apply cursor-pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
tr.expanded td:not(#page) {
|
||||||
|
@apply bg-gray-300;
|
||||||
|
}
|
||||||
|
|
||||||
|
tr.childs td:not(#page) {
|
||||||
@apply bg-gray-200;
|
@apply bg-gray-200;
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user