pdf-to-markdown/core/test/PdfPipeline.test.ts

43 lines
1.8 KiB
TypeScript
Raw Normal View History

2021-02-21 13:23:31 +01:00
import TransformDescriptor from 'src/TransformDescriptor';
2021-02-05 18:28:04 +01:00
import Item from 'src/Item';
import ItemResult from 'src/ItemResult';
2021-02-05 18:28:04 +01:00
import ItemTransformer from 'src/transformer/ItemTransformer';
import TransformContext from 'src/transformer/TransformContext';
2021-02-14 11:43:26 +01:00
import PdfParser from 'src/PdfParser';
import * as pdfjs from 'pdfjs-dist/es5/build/pdf';
import PdfPipeline from 'src/PdfPipeline';
2021-02-05 18:28:04 +01:00
class TestSchemaTransformer extends ItemTransformer {
2021-02-21 13:23:31 +01:00
constructor(name: string, descriptor: Partial<TransformDescriptor>, outputSchema: string[] | undefined = undefined) {
if (outputSchema) {
super(name, `Description for ${name}`, descriptor, (_) => outputSchema);
} else {
super(name, `Description for ${name}`, descriptor);
}
2021-02-05 18:28:04 +01:00
}
transform(_: TransformContext, items: Item[]): ItemResult {
return { items, messages: [] };
2021-02-05 18:28:04 +01:00
}
}
test('verify valid transform', async () => {
const inputSchema = ['A', 'B', 'C'];
const transformers = [
new TestSchemaTransformer('Replace B & C with D', { requireColumns: ['B', 'C'] }, ['A', 'D']),
new TestSchemaTransformer('Create E', {}, ['A', 'D', 'E']),
new TestSchemaTransformer('Uses A, D & E', { requireColumns: ['A', 'D', 'E'] }, ['A', 'D', 'E']),
2021-02-05 18:28:04 +01:00
];
2021-02-14 11:43:26 +01:00
const pipeline = new PdfPipeline(new PdfParser(pdfjs), transformers);
pipeline.verifyRequiredColumns(inputSchema, transformers);
2021-02-05 18:28:04 +01:00
});
test('verify invalid consume', async () => {
const inputSchema = ['A', 'B', 'C'];
const transformers = [new TestSchemaTransformer('Consumes X', { requireColumns: ['X'] })];
2021-02-14 11:43:26 +01:00
const pipeline = new PdfPipeline(new PdfParser(pdfjs), transformers);
expect(() => pipeline.verifyRequiredColumns(inputSchema, transformers)).toThrowError(
"Input schema [A, B, C] for transformer 'Consumes X' does not contain the required column 'X'",
2021-02-05 18:28:04 +01:00
);
});