mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2025-01-16 10:38:41 +01:00
Fix tests
This commit is contained in:
parent
4d1821f584
commit
4c77274d16
@ -36,7 +36,7 @@ describe('Transform Items', () => {
|
||||
const trans1Items = parsedItems.map((item) => item.withData({ C: `c=${item.value('A')}+${item.value('B')}` }));
|
||||
|
||||
const transformers = [new TestTransformer('Trans1', trans1Desc, trans1Schema, trans1Items)];
|
||||
const debug = new Debugger(1, parsedSchema, parsedItems, { fontMap: new Map(), pageViewports: [] }, transformers);
|
||||
const debug = new Debugger(new Map(), [], 1, parsedSchema, parsedItems, transformers);
|
||||
|
||||
expect(debug.stageNames).toEqual(['Parse Result', 'Trans1']);
|
||||
expect(debug.stageResult(0).schema).toEqual(parsedSchema.map((column) => ({ name: column })));
|
||||
@ -62,7 +62,7 @@ describe('Transform Items', () => {
|
||||
const trans1Items = parsedItems.map((item) => item.withData({ line: item.data['y'] }));
|
||||
|
||||
const transformers = [new TestTransformer('Trans1', trans1Desc, trans1Schema, trans1Items)];
|
||||
const debug = new Debugger(1, parsedSchema, parsedItems, { fontMap: new Map(), pageViewports: [] }, transformers);
|
||||
const debug = new Debugger(new Map(), [], 1, parsedSchema, parsedItems, transformers);
|
||||
|
||||
expect(debug.stageNames).toEqual(['Parse Result', 'Trans1']);
|
||||
expect(debug.stageResult(0).schema).toEqual([{ name: 'id' }, { name: 'y' }]);
|
||||
@ -100,7 +100,7 @@ test('Change inside of Line', async () => {
|
||||
const trans1Items = swapElements([...parsedItems], 0, 1);
|
||||
|
||||
const transformers = [new TestTransformer('Trans1', trans1Desc, trans1Schema, trans1Items)];
|
||||
const debug = new Debugger(1, parsedSchema, parsedItems, { fontMap: new Map(), pageViewports: [] }, transformers);
|
||||
const debug = new Debugger(new Map(), [], 1, parsedSchema, parsedItems, transformers);
|
||||
|
||||
expect(debug.stageNames).toEqual(['Parse Result', 'Trans1']);
|
||||
expect(debug.stageResult(0).schema).toEqual([{ name: 'id' }, { name: 'line' }]);
|
||||
@ -134,7 +134,7 @@ describe('build schemas', () => {
|
||||
|
||||
function calculateSchema(inputSchema: string[], outputSchema: string[]): AnnotatedColumn[] {
|
||||
const transformers = [new TestTransformer('Trans1', {}, outputSchema, items)];
|
||||
const debug = new Debugger(1, inputSchema, items, { fontMap: new Map(), pageViewports: [] }, transformers);
|
||||
const debug = new Debugger(new Map(), [], 1, inputSchema, items, transformers);
|
||||
return debug.stageResult(1).schema;
|
||||
}
|
||||
|
||||
|
@ -83,7 +83,6 @@ function matchFilePath(pdfFileName: string, transformerName: string, chunkCount
|
||||
describe('Remove repetitive items from online resources', () => {
|
||||
const transformerName = new RemoveRepetitiveItems().name;
|
||||
test.each(urls)('URL %p', async (url) => {
|
||||
console.log(url);
|
||||
const { fileName, data } = download(url);
|
||||
const debug = await pipeline.debug(data, () => {});
|
||||
const stageResult = debug.stageResult(debug.stageNames.indexOf(transformerName));
|
||||
@ -103,7 +102,6 @@ describe('Remove repetitive items from online resources', () => {
|
||||
}),
|
||||
);
|
||||
|
||||
console.log(lines);
|
||||
const transformerResultAsString = lines.join('\n') || '{}';
|
||||
expect(transformerResultAsString).toMatchFile(matchFilePath(fileName, transformerName));
|
||||
});
|
||||
@ -156,7 +154,6 @@ function itemToString(fontMap: Map<string, object>, item: Item, changeType: stri
|
||||
function download(url: string): { fileName: string; data: Buffer } {
|
||||
const fileName = path.basename(new URL(url).pathname);
|
||||
const localFilePath = `${downloadCache}/${fileName}`;
|
||||
console.log(localFilePath);
|
||||
if (!fs.existsSync(localFilePath)) {
|
||||
fs.mkdirSync(downloadCache, { recursive: true });
|
||||
downloadToFile(url, localFilePath);
|
||||
|
@ -1,4 +1,5 @@
|
||||
import LineItemMerger from 'src/debug/LineItemMerger';
|
||||
import EvaluationTracker from 'src/transformer/EvaluationTracker';
|
||||
import ChangeTracker from 'src/debug/ChangeTracker';
|
||||
import Item from 'src/Item';
|
||||
import { items, realisticItems } from '../testItems';
|
||||
@ -6,10 +7,12 @@ import { Addition, ContentChange } from 'src/debug/ChangeIndex';
|
||||
|
||||
test('Basics', async () => {
|
||||
const itemMerger = new LineItemMerger();
|
||||
const tracker = new ChangeTracker();
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
expect(itemMerger.groupKey).toEqual('line');
|
||||
const mergedItem = itemMerger.merge(
|
||||
tracker,
|
||||
evaluationTracker,
|
||||
changeTracker,
|
||||
items(0, [
|
||||
{
|
||||
line: 2,
|
||||
@ -59,19 +62,34 @@ test('Basics', async () => {
|
||||
|
||||
test('Track all lines as changes', async () => {
|
||||
const itemMerger = new LineItemMerger(true);
|
||||
const tracker = new ChangeTracker();
|
||||
const mergedItem = itemMerger.merge(tracker, realisticItems(0, [{ line: 1 }, { line: 1 }]));
|
||||
expect(tracker.change(mergedItem)).toEqual(new Addition());
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
const mergedItem = itemMerger.merge(evaluationTracker, changeTracker, realisticItems(0, [{ line: 1 }, { line: 1 }]));
|
||||
expect(changeTracker.change(mergedItem)).toEqual(new Addition());
|
||||
});
|
||||
|
||||
test('Mark lines containing evaluated items as evaluated', async () => {
|
||||
const itemMerger = new LineItemMerger();
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
const items1 = realisticItems(0, [{ line: 1 }, { line: 1 }]);
|
||||
const items2 = realisticItems(0, [{ line: 2 }, { line: 2 }]);
|
||||
evaluationTracker.trackEvaluation(items1[1]);
|
||||
const mergedItem1 = itemMerger.merge(evaluationTracker, changeTracker, items1);
|
||||
const mergedItem2 = itemMerger.merge(evaluationTracker, changeTracker, items2);
|
||||
expect(evaluationTracker.evaluated(mergedItem1)).toBeTruthy();
|
||||
expect(evaluationTracker.evaluated(mergedItem2)).toBeFalsy();
|
||||
});
|
||||
|
||||
test('Mark lines containing changed items as changed', async () => {
|
||||
const itemMerger = new LineItemMerger();
|
||||
const tracker = new ChangeTracker();
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
const items1 = realisticItems(0, [{ line: 1 }, { line: 1 }]);
|
||||
const items2 = realisticItems(0, [{ line: 2 }, { line: 2 }]);
|
||||
tracker.trackPositionalChange(items1[1], 1, 0);
|
||||
const mergedItem1 = itemMerger.merge(tracker, items1);
|
||||
const mergedItem2 = itemMerger.merge(tracker, items2);
|
||||
expect(tracker.change(mergedItem1)).toEqual(new ContentChange());
|
||||
expect(tracker.change(mergedItem2)).toEqual(undefined);
|
||||
changeTracker.trackPositionalChange(items1[1], 1, 0);
|
||||
const mergedItem1 = itemMerger.merge(evaluationTracker, changeTracker, items1);
|
||||
const mergedItem2 = itemMerger.merge(evaluationTracker, changeTracker, items2);
|
||||
expect(changeTracker.change(mergedItem1)).toEqual(new ContentChange());
|
||||
expect(changeTracker.change(mergedItem2)).toEqual(undefined);
|
||||
});
|
||||
|
@ -3,11 +3,13 @@ import Page, { asPages } from 'src/debug/Page';
|
||||
import ItemGroup from 'src/debug/ItemGroup';
|
||||
import ItemMerger from 'src/debug/ItemMerger';
|
||||
import { items } from 'test/testItems';
|
||||
import EvaluationTracker from 'src/transformer/EvaluationTracker';
|
||||
import ChangeTracker from 'src/debug/ChangeTracker';
|
||||
|
||||
test('empty', async () => {
|
||||
const tracker = new ChangeTracker();
|
||||
expect(asPages(tracker, [])).toEqual([]);
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
expect(asPages(evaluationTracker, changeTracker, [])).toEqual([]);
|
||||
});
|
||||
|
||||
test('no merger', async () => {
|
||||
@ -21,14 +23,15 @@ test('no merger', async () => {
|
||||
items(2, [{ id: 5, line: 1 }]),
|
||||
];
|
||||
const flattenedItems = new Array<Item>().concat(...pageItems);
|
||||
const tracker = new ChangeTracker();
|
||||
const pages = asPages(tracker, flattenedItems);
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
const pages = asPages(evaluationTracker, changeTracker, flattenedItems);
|
||||
expect(pages).toEqual([
|
||||
{ index: 0, itemGroups: pageItems[0].map((item) => new ItemGroup(item)) },
|
||||
{ index: 1, itemGroups: pageItems[1].map((item) => new ItemGroup(item)) },
|
||||
{ index: 2, itemGroups: pageItems[2].map((item) => new ItemGroup(item)) },
|
||||
] as Page[]);
|
||||
expect(tracker.changeCount()).toEqual(0);
|
||||
expect(changeTracker.changeCount()).toEqual(0);
|
||||
});
|
||||
|
||||
test('merger', async () => {
|
||||
@ -43,15 +46,19 @@ test('merger', async () => {
|
||||
];
|
||||
const flattenedItems = new Array<Item>().concat(...pageItems);
|
||||
const merger: ItemMerger = { groupKey: 'line', merge: (items) => items[0] };
|
||||
const tracker = new ChangeTracker();
|
||||
const pages = asPages(tracker, flattenedItems, merger);
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
const pages = asPages(evaluationTracker, changeTracker, flattenedItems, merger);
|
||||
|
||||
expect(pages).toEqual([
|
||||
{ index: 0, itemGroups: pageItems[0].map((item) => new ItemGroup(item)) },
|
||||
{
|
||||
index: 1,
|
||||
itemGroups: [
|
||||
new ItemGroup(merger.merge(tracker, pageItems[1].slice(0, 2)), pageItems[1].slice(0, 2)),
|
||||
new ItemGroup(
|
||||
merger.merge(evaluationTracker, changeTracker, pageItems[1].slice(0, 2)),
|
||||
pageItems[1].slice(0, 2),
|
||||
),
|
||||
new ItemGroup(pageItems[1][2]),
|
||||
],
|
||||
},
|
||||
|
@ -1,5 +1,6 @@
|
||||
import StageResult from 'src/debug/StageResult';
|
||||
import { toDescriptor } from 'src/TransformDescriptor';
|
||||
import EvaluationTracker from 'src/transformer/EvaluationTracker';
|
||||
import ChangeTracker from 'src/debug/ChangeTracker';
|
||||
import AnnotatedColumn from 'src/debug/AnnotatedColumn';
|
||||
import Page, { asPages } from 'src/debug/Page';
|
||||
@ -7,7 +8,8 @@ import { items } from '../testItems';
|
||||
import LineItemMerger from 'src/debug/LineItemMerger';
|
||||
|
||||
test('itemsUnpacked', async () => {
|
||||
const tracker = new ChangeTracker();
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
const itemMerger = new LineItemMerger(false);
|
||||
const descriptor = toDescriptor({ debug: { itemMerger } });
|
||||
const schema: AnnotatedColumn[] = [{ name: 'A' }];
|
||||
@ -23,15 +25,16 @@ test('itemsUnpacked', async () => {
|
||||
{ idx: 5, line: 1 },
|
||||
]),
|
||||
];
|
||||
const pages = asPages(tracker, flatItems, itemMerger);
|
||||
const result = new StageResult(descriptor, schema, pages, tracker, []);
|
||||
const pages = asPages(evaluationTracker, changeTracker, flatItems, itemMerger);
|
||||
const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []);
|
||||
|
||||
expect(result.itemsUnpacked().map((item) => item.data['idx'])).toEqual([0, 1, 2, 3, 4, 5]);
|
||||
expect(result.itemsCleanedAndUnpacked().map((item) => item.data['idx'])).toEqual([0, 1, 2, 3, 4, 5]);
|
||||
});
|
||||
|
||||
test('itemsCleanedAndUnpacked', async () => {
|
||||
const tracker = new ChangeTracker();
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
const itemMerger = new LineItemMerger(false);
|
||||
const descriptor = toDescriptor({ debug: { itemMerger } });
|
||||
const schema: AnnotatedColumn[] = [{ name: 'A' }];
|
||||
@ -47,10 +50,10 @@ test('itemsCleanedAndUnpacked', async () => {
|
||||
{ idx: 5, line: 1 },
|
||||
]),
|
||||
];
|
||||
const pages = asPages(tracker, flatItems, itemMerger);
|
||||
tracker.trackRemoval(flatItems[1]);
|
||||
tracker.trackRemoval(flatItems[4]);
|
||||
const result = new StageResult(descriptor, schema, pages, tracker, []);
|
||||
const pages = asPages(evaluationTracker, changeTracker, flatItems, itemMerger);
|
||||
changeTracker.trackRemoval(flatItems[1]);
|
||||
changeTracker.trackRemoval(flatItems[4]);
|
||||
const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []);
|
||||
|
||||
expect(result.itemsUnpacked().map((item) => item.data['idx'])).toEqual([0, 1, 2, 3, 4, 5]);
|
||||
expect(result.itemsCleanedAndUnpacked().map((item) => item.data['idx'])).toEqual([0, 2, 3, 5]);
|
||||
@ -60,8 +63,49 @@ describe('select pages', () => {
|
||||
function groupElements(page: Page, elementName: string) {
|
||||
return page.itemGroups.map((group) => group.unpacked().map((item) => item.data['idx']));
|
||||
}
|
||||
|
||||
test('Evaluation+Changes', async () => {
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
const itemMerger = new LineItemMerger(false);
|
||||
const descriptor = toDescriptor({ debug: { itemMerger } });
|
||||
const schema: AnnotatedColumn[] = [{ name: 'A' }];
|
||||
const flatItems = items(0, [
|
||||
{ idx: 0, line: 1 }, // nada
|
||||
{ idx: 1, line: 2 }, // eval
|
||||
{ idx: 2, line: 3 }, // eval + change
|
||||
{ idx: 3, line: 4 }, // eval
|
||||
{ idx: 4, line: 4 }, // change
|
||||
{ idx: 5, line: 4 },
|
||||
]);
|
||||
evaluationTracker.trackEvaluation(flatItems[1]);
|
||||
evaluationTracker.trackEvaluation(flatItems[2]);
|
||||
evaluationTracker.trackEvaluation(flatItems[3]);
|
||||
changeTracker.trackAddition(flatItems[2]);
|
||||
changeTracker.trackAddition(flatItems[4]);
|
||||
const pages = asPages(evaluationTracker, changeTracker, flatItems, itemMerger);
|
||||
const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []);
|
||||
|
||||
const allGrouped = result.selectPages(false, true);
|
||||
expect(allGrouped.map((page) => page.index)).toEqual([0]);
|
||||
expect(groupElements(allGrouped[0], 'idx')).toEqual([[0], [1], [2], [3, 4, 5]]);
|
||||
|
||||
const relevantGrouped = result.selectPages(true, true);
|
||||
expect(relevantGrouped.map((page) => page.index)).toEqual([0]);
|
||||
expect(groupElements(relevantGrouped[0], 'idx')).toEqual([[1], [2], [3, 4, 5]]);
|
||||
|
||||
const relevantUnpacked = result.selectPages(true, false);
|
||||
expect(relevantUnpacked.map((page) => page.index)).toEqual([0]);
|
||||
expect(groupElements(relevantUnpacked[0], 'idx')).toEqual([[1], [2], [3], [4]]);
|
||||
|
||||
const allUnpacked = result.selectPages(false, false);
|
||||
expect(allUnpacked.map((page) => page.index)).toEqual([0]);
|
||||
expect(groupElements(allUnpacked[0], 'idx')).toEqual([[0], [1], [2], [3], [4], [5]]);
|
||||
});
|
||||
|
||||
test('Changes on group level', async () => {
|
||||
const tracker = new ChangeTracker();
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
const itemMerger = new LineItemMerger(true);
|
||||
const descriptor = toDescriptor({ debug: { itemMerger } });
|
||||
const schema: AnnotatedColumn[] = [{ name: 'A' }];
|
||||
@ -77,8 +121,8 @@ describe('select pages', () => {
|
||||
{ idx: 5, line: 1 },
|
||||
]),
|
||||
];
|
||||
const pages = asPages(tracker, flatItems, itemMerger);
|
||||
const result = new StageResult(descriptor, schema, pages, tracker, []);
|
||||
const pages = asPages(evaluationTracker, changeTracker, flatItems, itemMerger);
|
||||
const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []);
|
||||
|
||||
const allGrouped = result.selectPages(false, true);
|
||||
expect(allGrouped.map((page) => page.index)).toEqual([0, 1, 2]);
|
||||
@ -106,7 +150,8 @@ describe('select pages', () => {
|
||||
});
|
||||
|
||||
test('Changes on element level', async () => {
|
||||
const tracker = new ChangeTracker();
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
const itemMerger = new LineItemMerger(false);
|
||||
const descriptor = toDescriptor({ debug: { itemMerger } });
|
||||
const schema: AnnotatedColumn[] = [{ name: 'A' }];
|
||||
@ -123,10 +168,10 @@ describe('select pages', () => {
|
||||
{ idx: 6, line: 2 },
|
||||
]),
|
||||
];
|
||||
tracker.trackAddition(flatItems[3]);
|
||||
tracker.trackAddition(flatItems[5]);
|
||||
const pages = asPages(tracker, flatItems, itemMerger);
|
||||
const result = new StageResult(descriptor, schema, pages, tracker, []);
|
||||
changeTracker.trackAddition(flatItems[3]);
|
||||
changeTracker.trackAddition(flatItems[5]);
|
||||
const pages = asPages(evaluationTracker, changeTracker, flatItems, itemMerger);
|
||||
const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []);
|
||||
|
||||
const allGrouped = result.selectPages(false, true);
|
||||
expect(allGrouped.map((page) => page.index)).toEqual([0, 1, 2]);
|
||||
@ -153,8 +198,9 @@ describe('select pages', () => {
|
||||
expect(groupElements(allUnpacked[2], 'idx')).toEqual([[4], [5], [6]]);
|
||||
});
|
||||
|
||||
test('showAll - grouped', async () => {
|
||||
const tracker = new ChangeTracker();
|
||||
test('showAll - grouped - merger', async () => {
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
const itemMerger = new LineItemMerger(false);
|
||||
const descriptor = toDescriptor({ debug: { itemMerger, showAll: true } });
|
||||
const schema: AnnotatedColumn[] = [{ name: 'A' }];
|
||||
@ -170,8 +216,8 @@ describe('select pages', () => {
|
||||
{ idx: 5, line: 1 },
|
||||
]),
|
||||
];
|
||||
const pages = asPages(tracker, flatItems, itemMerger);
|
||||
const result = new StageResult(descriptor, schema, pages, tracker, []);
|
||||
const pages = asPages(evaluationTracker, changeTracker, flatItems, itemMerger);
|
||||
const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []);
|
||||
|
||||
const relevantGrouped = result.selectPages(true, true);
|
||||
expect(relevantGrouped.map((page) => page.index)).toEqual([0, 1, 2]);
|
||||
@ -180,8 +226,9 @@ describe('select pages', () => {
|
||||
expect(groupElements(relevantGrouped[2], 'idx')).toEqual([[4, 5]]);
|
||||
});
|
||||
|
||||
test('showAll - grouped', async () => {
|
||||
const tracker = new ChangeTracker();
|
||||
test('showAll - grouped - no merger', async () => {
|
||||
const evaluationTracker = new EvaluationTracker();
|
||||
const changeTracker = new ChangeTracker();
|
||||
const descriptor = toDescriptor({ debug: { showAll: true } });
|
||||
const schema: AnnotatedColumn[] = [{ name: 'A' }];
|
||||
const flatItems = [
|
||||
@ -189,8 +236,8 @@ describe('select pages', () => {
|
||||
...items(1, [{ idx: 3 }]),
|
||||
...items(2, [{ idx: 4 }, { idx: 5 }]),
|
||||
];
|
||||
const pages = asPages(tracker, flatItems);
|
||||
const result = new StageResult(descriptor, schema, pages, tracker, []);
|
||||
const pages = asPages(evaluationTracker, changeTracker, flatItems);
|
||||
const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []);
|
||||
|
||||
const relevantGrouped = result.selectPages(true, true);
|
||||
expect(relevantGrouped.map((page) => page.index)).toEqual([0, 1, 2]);
|
||||
|
@ -1,8 +1,5 @@
|
||||
import type TransformContext from "src/transformer/TransformContext";
|
||||
import TransformContext from 'src/transformer/TransformContext';
|
||||
|
||||
export function emptyContext():TransformContext{
|
||||
return {
|
||||
fontMap:new Map(),
|
||||
pageViewports:[]
|
||||
};
|
||||
}
|
||||
export function emptyContext(): TransformContext {
|
||||
return new TransformContext(new Map(), []);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user