Improve line compaction

This commit is contained in:
Johannes Zillmann 2021-02-26 18:04:50 +01:00
parent 0910f7b148
commit 6e5e5c9d53
7 changed files with 104 additions and 40 deletions

View File

@ -3,7 +3,7 @@ module.exports = {
testEnvironment: 'node', testEnvironment: 'node',
roots: ['./test'], roots: ['./test'],
transform: { '\\.ts$': ['ts-jest'] }, transform: { '\\.ts$': ['ts-jest'] },
testRegex: '(/test/.*|(\\.|/)(test|spec))\\.(ts)$', testRegex: '(/test/.*|(\\.|/))(test)\\.(ts)$',
moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'],
moduleNameMapper: { moduleNameMapper: {
'src/(.*)': '<rootDir>/src/$1', 'src/(.*)': '<rootDir>/src/$1',

View File

@ -1,6 +1,4 @@
import { assertDefined } from '../assert';
import type Item from '../Item'; import type Item from '../Item';
import type ItemMerger from '../ItemMerger';
export default class ItemGroup { export default class ItemGroup {
top: Item; top: Item;

View File

@ -10,7 +10,7 @@ export default class CompactLines extends ItemTransformer {
'Compact Lines', 'Compact Lines',
'Combines items on the same y-axis', 'Combines items on the same y-axis',
{ {
requireColumns: ['str', 'y'], requireColumns: ['str', 'y', 'height'],
itemMerger: { itemMerger: {
groupKey: 'line', groupKey: 'line',
merge: mergeLineItems, merge: mergeLineItems,
@ -28,20 +28,23 @@ export default class CompactLines extends ItemTransformer {
} }
transform(_: TransformContext, inputItems: Item[]): ItemResult { transform(_: TransformContext, inputItems: Item[]): ItemResult {
let lines = 0;
return { return {
items: transformGroupedByPage(inputItems, (page, items) => { items: transformGroupedByPage(inputItems, (page, items) => {
let lineNumber = -1; let lineNumber = -1;
let lastY: number | undefined; let lastY: number | undefined;
return items.map((item) => { return items.map((item) => {
const y = item.data['y']; const y = item.data['y'];
if (!lastY || y < lastY) { const height = item.data['height'];
if (!lastY || lastY - height > y) {
lineNumber++; lineNumber++;
lines++;
} }
lastY = y; lastY = y;
return item.withDataAddition({ line: lineNumber }); return item.withDataAddition({ line: lineNumber });
}); });
}), }),
messages: [], messages: [`Formed ${lines} lines out of ${inputItems.length} items`],
}; };
} }
} }

View File

@ -1,41 +1,92 @@
import Item from 'src/Item'; import Item from 'src/Item';
import CompactLines from 'src/transformer/CompactLines'; import CompactLines from 'src/transformer/CompactLines';
import { emptyContext } from './testContext';
import { items } from './testItems';
const transformer = new CompactLines();
test('Transform - raised characters (example.pdf)', async () => {
const results = transformer.transform(
emptyContext(),
items(0, [
{
x: 240,
y: 585,
str: 'Dies ist eine Test-PDF',
height: 11,
},
{
x: 352.69,
y: 585,
str: '.',
height: 11,
},
{
x: 348,
y: 588,
str: '1',
height: 7.33,
},
{ x: 208, y: 572, str: 'Fürs Testen des', height: 11 },
]),
);
expect(results.items.map((item) => item.data['line'])).toEqual([0, 0, 0, 1]);
});
test('Transform - lowered charactes (dict.pdf)', async () => {
const results = transformer.transform(
emptyContext(),
items(0, [
{ str: 'Let', x: 100.35, y: 625.05, height: 11.96 },
{ str: 'D', x: 122.38, y: 625.05, height: 11.96 },
{ str: '(', x: 100.35, y: 610.61, height: 11.96 },
{ str: 'v', x: 104.9, y: 610.61, height: 11.96 },
{ str: '0', x: 110.57, y: 608.82, height: 7.97 },
{ str: ', a', x: 115.29, y: 610.61, height: 11.96 },
{ str: 'all are different,', x: 100.35, y: 596.16, height: 11.96 },
]),
);
expect(results.items.map((item) => item.data['line'])).toEqual([0, 0, 1, 1, 1, 1, 2]);
});
test('Item Merger', async () => { test('Item Merger', async () => {
const itemMerger = new CompactLines().descriptor.itemMerger; const itemMerger = transformer.descriptor.itemMerger;
expect(itemMerger?.groupKey).toEqual('line'); expect(itemMerger?.groupKey).toEqual('line');
const mergedItem = itemMerger?.merge([ const mergedItem = itemMerger?.merge(
new Item(0, { items(0, [
line: 2, {
x: 240, line: 2,
y: 585, x: 240,
str: 'Dies ist eine Test-PDF', y: 585,
fontName: 'g_d0_f2', str: 'Dies ist eine Test-PDF',
dir: 'ltr', fontName: 'g_d0_f2',
width: 108.62, dir: 'ltr',
height: 11, width: 108.62,
}), height: 11,
new Item(0, { },
line: 2, {
x: 352.69, line: 2,
y: 585, x: 352.69,
str: '.', y: 585,
fontName: 'g_d0_f2', str: '.',
dir: 'ltr', fontName: 'g_d0_f2',
width: 3.06, dir: 'ltr',
height: 11, width: 3.06,
}), height: 11,
new Item(0, { },
line: 2, {
x: 348, line: 2,
y: 588, x: 348,
str: '1', y: 588,
fontName: 'g_d0_f2', str: '1',
dir: 'ltr', fontName: 'g_d0_f2',
width: 4.08, dir: 'ltr',
height: 7.33, width: 4.08,
}), height: 7.33,
]); },
]),
);
expect(mergedItem?.withoutUuid()).toEqual( expect(mergedItem?.withoutUuid()).toEqual(
new Item(0, { new Item(0, {
line: 2, line: 2,

View File

@ -0,0 +1,8 @@
import type TransformContext from "src/transformer/TransformContext";
export function emptyContext():TransformContext{
return {
fontMap:new Map(),
pageViewports:[]
};
}

View File

@ -0,0 +1,5 @@
import Item from 'src/Item';
export function items(page: number, data: object[]): Item[] {
return data.map((data) => new Item(page, data));
}

View File

@ -5,7 +5,6 @@
import { BookOpen, ArrowLeft, ArrowRight } from 'svelte-hero-icons'; import { BookOpen, ArrowLeft, ArrowRight } from 'svelte-hero-icons';
import type Debugger from '@core/Debugger'; import type Debugger from '@core/Debugger';
import type Item from '@core/Item';
import { asPages } from '../../../core/src/support/itemUtils'; import { asPages } from '../../../core/src/support/itemUtils';
import Popup from '../components/Popup.svelte'; import Popup from '../components/Popup.svelte';