Improve line compaction

This commit is contained in:
Johannes Zillmann 2021-02-26 18:04:50 +01:00
parent 0910f7b148
commit 6e5e5c9d53
7 changed files with 104 additions and 40 deletions

View File

@ -3,7 +3,7 @@ module.exports = {
testEnvironment: 'node',
roots: ['./test'],
transform: { '\\.ts$': ['ts-jest'] },
testRegex: '(/test/.*|(\\.|/)(test|spec))\\.(ts)$',
testRegex: '(/test/.*|(\\.|/))(test)\\.(ts)$',
moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'],
moduleNameMapper: {
'src/(.*)': '<rootDir>/src/$1',

View File

@ -1,6 +1,4 @@
import { assertDefined } from '../assert';
import type Item from '../Item';
import type ItemMerger from '../ItemMerger';
export default class ItemGroup {
top: Item;

View File

@ -10,7 +10,7 @@ export default class CompactLines extends ItemTransformer {
'Compact Lines',
'Combines items on the same y-axis',
{
requireColumns: ['str', 'y'],
requireColumns: ['str', 'y', 'height'],
itemMerger: {
groupKey: 'line',
merge: mergeLineItems,
@ -28,20 +28,23 @@ export default class CompactLines extends ItemTransformer {
}
transform(_: TransformContext, inputItems: Item[]): ItemResult {
let lines = 0;
return {
items: transformGroupedByPage(inputItems, (page, items) => {
let lineNumber = -1;
let lastY: number | undefined;
return items.map((item) => {
const y = item.data['y'];
if (!lastY || y < lastY) {
const height = item.data['height'];
if (!lastY || lastY - height > y) {
lineNumber++;
lines++;
}
lastY = y;
return item.withDataAddition({ line: lineNumber });
});
}),
messages: [],
messages: [`Formed ${lines} lines out of ${inputItems.length} items`],
};
}
}

View File

@ -1,41 +1,92 @@
import Item from 'src/Item';
import CompactLines from 'src/transformer/CompactLines';
import { emptyContext } from './testContext';
import { items } from './testItems';
const transformer = new CompactLines();
test('Transform - raised characters (example.pdf)', async () => {
const results = transformer.transform(
emptyContext(),
items(0, [
{
x: 240,
y: 585,
str: 'Dies ist eine Test-PDF',
height: 11,
},
{
x: 352.69,
y: 585,
str: '.',
height: 11,
},
{
x: 348,
y: 588,
str: '1',
height: 7.33,
},
{ x: 208, y: 572, str: 'Fürs Testen des', height: 11 },
]),
);
expect(results.items.map((item) => item.data['line'])).toEqual([0, 0, 0, 1]);
});
test('Transform - lowered charactes (dict.pdf)', async () => {
const results = transformer.transform(
emptyContext(),
items(0, [
{ str: 'Let', x: 100.35, y: 625.05, height: 11.96 },
{ str: 'D', x: 122.38, y: 625.05, height: 11.96 },
{ str: '(', x: 100.35, y: 610.61, height: 11.96 },
{ str: 'v', x: 104.9, y: 610.61, height: 11.96 },
{ str: '0', x: 110.57, y: 608.82, height: 7.97 },
{ str: ', a', x: 115.29, y: 610.61, height: 11.96 },
{ str: 'all are different,', x: 100.35, y: 596.16, height: 11.96 },
]),
);
expect(results.items.map((item) => item.data['line'])).toEqual([0, 0, 1, 1, 1, 1, 2]);
});
test('Item Merger', async () => {
const itemMerger = new CompactLines().descriptor.itemMerger;
const itemMerger = transformer.descriptor.itemMerger;
expect(itemMerger?.groupKey).toEqual('line');
const mergedItem = itemMerger?.merge([
new Item(0, {
line: 2,
x: 240,
y: 585,
str: 'Dies ist eine Test-PDF',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 108.62,
height: 11,
}),
new Item(0, {
line: 2,
x: 352.69,
y: 585,
str: '.',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 3.06,
height: 11,
}),
new Item(0, {
line: 2,
x: 348,
y: 588,
str: '1',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 4.08,
height: 7.33,
}),
]);
const mergedItem = itemMerger?.merge(
items(0, [
{
line: 2,
x: 240,
y: 585,
str: 'Dies ist eine Test-PDF',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 108.62,
height: 11,
},
{
line: 2,
x: 352.69,
y: 585,
str: '.',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 3.06,
height: 11,
},
{
line: 2,
x: 348,
y: 588,
str: '1',
fontName: 'g_d0_f2',
dir: 'ltr',
width: 4.08,
height: 7.33,
},
]),
);
expect(mergedItem?.withoutUuid()).toEqual(
new Item(0, {
line: 2,

View File

@ -0,0 +1,8 @@
import type TransformContext from "src/transformer/TransformContext";
export function emptyContext():TransformContext{
return {
fontMap:new Map(),
pageViewports:[]
};
}

View File

@ -0,0 +1,5 @@
import Item from 'src/Item';
export function items(page: number, data: object[]): Item[] {
return data.map((data) => new Item(page, data));
}

View File

@ -5,7 +5,6 @@
import { BookOpen, ArrowLeft, ArrowRight } from 'svelte-hero-icons';
import type Debugger from '@core/Debugger';
import type Item from '@core/Item';
import { asPages } from '../../../core/src/support/itemUtils';
import Popup from '../components/Popup.svelte';