diff --git a/core/src/Debugger.ts b/core/src/Debugger.ts index 2e8abae..77acd3c 100644 --- a/core/src/Debugger.ts +++ b/core/src/Debugger.ts @@ -9,6 +9,7 @@ import { asPages } from './debug/Page'; import EvaluationTracker from './transformer/EvaluationTracker'; import ChangeTracker from './debug/ChangeTracker'; import PageViewport from './parse/PageViewport'; +import Globals from './transformer/Globals'; export default class Debugger { private transformers: ItemTransformer[]; @@ -34,13 +35,19 @@ export default class Debugger { for (let idx = 0; idx < stageIndex + 1; idx++) { if (!this.stageResultCache[idx]) { const evaluations = new EvaluationTracker(); - const context = new TransformContext(this.fontMap, this.pageViewports, evaluations); const transformer = this.transformers[idx - 1]; const previousStageResult: StageResult = this.stageResultCache[idx - 1]; + const context = new TransformContext( + this.fontMap, + this.pageViewports, + previousStageResult.globals, + evaluations, + ); const previousItems = previousStageResult.itemsCleanedAndUnpacked(); const inputSchema = toSimpleSchema(previousStageResult); const outputSchema = transformer.schemaTransformer(inputSchema); const itemResult = transformer.transform(context, [...previousItems]); + const globals = new Globals(previousStageResult.globals).withValues(itemResult.globals); const changes = new ChangeTracker(); const items = detectChanges(changes, previousItems, itemResult.items); @@ -52,6 +59,7 @@ export default class Debugger { this.stageResultCache.push( new StageResult( + globals, transformer.descriptor, toAnnotatedSchema(inputSchema, outputSchema), pages, diff --git a/core/src/ItemResult.ts b/core/src/ItemResult.ts index d73ac15..4c18cc3 100644 --- a/core/src/ItemResult.ts +++ b/core/src/ItemResult.ts @@ -1,7 +1,8 @@ import type Item from './Item'; +import GlobalValue from './transformer/GlobalValue'; export default interface ItemResult { items: Item[]; messages: string[]; - globals?: object; + globals?: GlobalValue[]; } diff --git a/core/src/PdfPipeline.ts b/core/src/PdfPipeline.ts index 90b9928..d3e8d12 100644 --- a/core/src/PdfPipeline.ts +++ b/core/src/PdfPipeline.ts @@ -6,6 +6,7 @@ import ParseResult from './ParseResult'; import Debugger from './Debugger'; import { assert } from './assert'; import TransformContext from './transformer/TransformContext'; +import Globals from './transformer/Globals'; export default class PdfPipeline { parser: PdfParser; @@ -29,9 +30,12 @@ export default class PdfPipeline { const parseResult = await this.parse(src, progressListener); this.verifyRequiredColumns(parseResult.schema, this.transformers); let items = parseResult.items; + let globals = new Globals(); + const context = new TransformContext(parseResult.fontMap, parseResult.pageViewports, globals); this.transformers.forEach((transformer) => { - const context = new TransformContext(parseResult.fontMap, parseResult.pageViewports); - items = transformer.transform(context, items).items; + const result = transformer.transform(context, items); + globals = globals.withValues(result.globals); + items = result.items; }); parseResult.items = items; return parseResult; diff --git a/core/src/debug/StageResult.ts b/core/src/debug/StageResult.ts index 5bcd921..78e68ac 100644 --- a/core/src/debug/StageResult.ts +++ b/core/src/debug/StageResult.ts @@ -7,9 +7,11 @@ import ChangeTracker from './ChangeTracker'; import ItemGroup from './ItemGroup'; import EvaluationIndex from '../transformer/EvaluationIndex'; import EvaluationTracker from '../transformer/EvaluationTracker'; +import Globals from '../transformer/Globals'; export default class StageResult { constructor( + public globals: Globals, public descriptor: TransformDescriptor, public schema: AnnotatedColumn[], public pages: Page[], @@ -86,5 +88,13 @@ export function initialStage(inputSchema: string[], inputItems: Item[]): StageRe inputItems.length } items`, ]; - return new StageResult(toDescriptor({ debug: { showAll: true } }), schema, pages, evaluations, changes, messages); + return new StageResult( + new Globals(), + toDescriptor({ debug: { showAll: true } }), + schema, + pages, + evaluations, + changes, + messages, + ); } diff --git a/core/src/transformer/CacluclateStatistics.ts b/core/src/transformer/CacluclateStatistics.ts index c44f19a..3feb0b9 100644 --- a/core/src/transformer/CacluclateStatistics.ts +++ b/core/src/transformer/CacluclateStatistics.ts @@ -3,6 +3,9 @@ import ItemResult from '../ItemResult'; import ItemTransformer from './ItemTransformer'; import TransformContext from './TransformContext'; import FontType from '../FontType'; +import GlobalDefinition from './GlobalDefinition'; + +export const MAX_HEIGHT = new GlobalDefinition('maxHeight'); export default class CalculateStatistics extends ItemTransformer { constructor() { @@ -82,14 +85,14 @@ export default class CalculateStatistics extends ItemTransformer { return { items: items, - globals: { - mostUsedHeight: mostUsedHeight, - mostUsedFont: mostUsedFont, - mostUsedDistance: mostUsedDistance, - maxHeight: maxHeight, - maxHeightFont: maxHeightFont, - fontToFormats: fontToType, - }, + globals: [MAX_HEIGHT.value(maxHeight)], + // globals2: { + // mostUsedHeight: mostUsedHeight, + // mostUsedFont: mostUsedFont, + // mostUsedDistance: mostUsedDistance, + // maxHeightFont: maxHeightFont, + // fontToFormats: fontToType, + // }, messages: [ 'Items per height: ' + JSON.stringify(heightToOccurrence), 'Items per font: ' + JSON.stringify(fontToOccurrence), diff --git a/core/src/transformer/GlobalDefinition.ts b/core/src/transformer/GlobalDefinition.ts new file mode 100644 index 0000000..4cdabbc --- /dev/null +++ b/core/src/transformer/GlobalDefinition.ts @@ -0,0 +1,14 @@ +import { assertDefined, assertNot } from 'src/assert'; +import GlobalValue from './GlobalValue'; + +export default class GlobalDefinition { + constructor(public key: string) {} + + value(value: T) { + return new GlobalValue(this, value); + } + + overrideValue(value: T) { + return new GlobalValue(this, value, true); + } +} diff --git a/core/src/transformer/GlobalValue.ts b/core/src/transformer/GlobalValue.ts new file mode 100644 index 0000000..8c6f961 --- /dev/null +++ b/core/src/transformer/GlobalValue.ts @@ -0,0 +1,6 @@ +import { assertDefined, assertNot } from 'src/assert'; +import GlobalDefinition from './GlobalDefinition'; + +export default class GlobalValue { + constructor(public definition: GlobalDefinition, public value: T, public override: boolean = false) {} +} diff --git a/core/src/transformer/Globals.ts b/core/src/transformer/Globals.ts new file mode 100644 index 0000000..22f548f --- /dev/null +++ b/core/src/transformer/Globals.ts @@ -0,0 +1,47 @@ +import GlobalDefinition from './GlobalDefinition'; +import { assertDefined, assertNot } from '../assert'; +import GlobalValue from './GlobalValue'; + +export default class Globals { + map: Map; + constructor(globals?: Globals) { + this.map = globals ? new Map(globals.map) : new Map(); + } + + keys(): string[] { + return [...this.map.keys()]; + } + + isDefined(definition: GlobalDefinition): boolean { + return typeof this.map.get(definition.key) !== 'undefined'; + } + + get(definition: GlobalDefinition): T { + const element = this.map.get(definition.key) as T; + assertDefined( + element, + `No global with key '${definition.key}' registered. Only [${[...this.map.keys()].join(',')}]`, + ); + return element; + } + + set(definition: GlobalDefinition, value: T) { + assertNot(this.isDefined(definition), `Global with key '${definition.key}' already registered.`); + this.map.set(definition.key, value); + } + + override(definition: GlobalDefinition, value: T) { + this.map.set(definition.key, value); + } + + withValues(values: GlobalValue[] | undefined): Globals { + values?.forEach((value) => { + if (value.override) { + this.override(value.definition, value.value); + } else { + this.set(value.definition, value.value); + } + }); + return this; + } +} diff --git a/core/src/transformer/TransformContext.ts b/core/src/transformer/TransformContext.ts index a2bc567..f8f26c3 100644 --- a/core/src/transformer/TransformContext.ts +++ b/core/src/transformer/TransformContext.ts @@ -1,6 +1,8 @@ import Item from '../Item'; import PageViewport from '../parse/PageViewport'; import EvaluationTracker from './EvaluationTracker'; +import GlobalDefinition from './GlobalDefinition'; +import Globals from './Globals'; export default class TransformContext { pageCount: number; @@ -8,6 +10,7 @@ export default class TransformContext { constructor( public fontMap: Map, public pageViewports: PageViewport[], + private globals: Globals, private evaluations = new EvaluationTracker(), ) { this.pageCount = pageViewports.length; @@ -16,4 +19,12 @@ export default class TransformContext { trackEvaluation(item: Item) { this.evaluations.trackEvaluation(item); } + + globalIsDefined(definition: GlobalDefinition): boolean { + return this.globals.isDefined(definition); + } + + getGlobal(definition: GlobalDefinition): T { + return this.globals.get(definition); + } } diff --git a/core/test/debug/StageResults.test.ts b/core/test/debug/StageResults.test.ts index 8e1831e..6845de0 100644 --- a/core/test/debug/StageResults.test.ts +++ b/core/test/debug/StageResults.test.ts @@ -6,6 +6,7 @@ import AnnotatedColumn from 'src/debug/AnnotatedColumn'; import Page, { asPages } from 'src/debug/Page'; import { items } from '../testItems'; import LineItemMerger from 'src/debug/LineItemMerger'; +import Globals from 'src/transformer/Globals'; test('itemsUnpacked', async () => { const evaluationTracker = new EvaluationTracker(); @@ -26,7 +27,7 @@ test('itemsUnpacked', async () => { ]), ]; const pages = asPages(evaluationTracker, changeTracker, flatItems, itemMerger); - const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []); + const result = new StageResult(new Globals(), descriptor, schema, pages, evaluationTracker, changeTracker, []); expect(result.itemsUnpacked().map((item) => item.data['idx'])).toEqual([0, 1, 2, 3, 4, 5]); expect(result.itemsCleanedAndUnpacked().map((item) => item.data['idx'])).toEqual([0, 1, 2, 3, 4, 5]); @@ -53,7 +54,7 @@ test('itemsCleanedAndUnpacked', async () => { const pages = asPages(evaluationTracker, changeTracker, flatItems, itemMerger); changeTracker.trackRemoval(flatItems[1]); changeTracker.trackRemoval(flatItems[4]); - const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []); + const result = new StageResult(new Globals(), descriptor, schema, pages, evaluationTracker, changeTracker, []); expect(result.itemsUnpacked().map((item) => item.data['idx'])).toEqual([0, 1, 2, 3, 4, 5]); expect(result.itemsCleanedAndUnpacked().map((item) => item.data['idx'])).toEqual([0, 2, 3, 5]); @@ -84,7 +85,7 @@ describe('select pages', () => { changeTracker.trackAddition(flatItems[2]); changeTracker.trackAddition(flatItems[4]); const pages = asPages(evaluationTracker, changeTracker, flatItems, itemMerger); - const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []); + const result = new StageResult(new Globals(), descriptor, schema, pages, evaluationTracker, changeTracker, []); const allGrouped = result.selectPages(false, true); expect(allGrouped.map((page) => page.index)).toEqual([0]); @@ -122,7 +123,7 @@ describe('select pages', () => { ]), ]; const pages = asPages(evaluationTracker, changeTracker, flatItems, itemMerger); - const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []); + const result = new StageResult(new Globals(), descriptor, schema, pages, evaluationTracker, changeTracker, []); const allGrouped = result.selectPages(false, true); expect(allGrouped.map((page) => page.index)).toEqual([0, 1, 2]); @@ -171,7 +172,7 @@ describe('select pages', () => { changeTracker.trackAddition(flatItems[3]); changeTracker.trackAddition(flatItems[5]); const pages = asPages(evaluationTracker, changeTracker, flatItems, itemMerger); - const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []); + const result = new StageResult(new Globals(), descriptor, schema, pages, evaluationTracker, changeTracker, []); const allGrouped = result.selectPages(false, true); expect(allGrouped.map((page) => page.index)).toEqual([0, 1, 2]); @@ -217,7 +218,7 @@ describe('select pages', () => { ]), ]; const pages = asPages(evaluationTracker, changeTracker, flatItems, itemMerger); - const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []); + const result = new StageResult(new Globals(), descriptor, schema, pages, evaluationTracker, changeTracker, []); const relevantGrouped = result.selectPages(true, true); expect(relevantGrouped.map((page) => page.index)).toEqual([0, 1, 2]); @@ -237,7 +238,7 @@ describe('select pages', () => { ...items(2, [{ idx: 4 }, { idx: 5 }]), ]; const pages = asPages(evaluationTracker, changeTracker, flatItems); - const result = new StageResult(descriptor, schema, pages, evaluationTracker, changeTracker, []); + const result = new StageResult(new Globals(), descriptor, schema, pages, evaluationTracker, changeTracker, []); const relevantGrouped = result.selectPages(true, true); expect(relevantGrouped.map((page) => page.index)).toEqual([0, 1, 2]); diff --git a/core/test/transformer/Globals.test.ts b/core/test/transformer/Globals.test.ts new file mode 100644 index 0000000..46be0b2 --- /dev/null +++ b/core/test/transformer/Globals.test.ts @@ -0,0 +1,51 @@ +import GlobalDefinition from 'src/transformer/GlobalDefinition'; +import Globals from 'src/transformer/Globals'; + +const MyGlobalString = new GlobalDefinition('myGlobalString'); +const MyGlobalNumber = new GlobalDefinition('myGlobalNumber'); + +test('not set', async () => { + const globals = new Globals(); + globals.set(MyGlobalString, '23'); + expect(globals.isDefined(MyGlobalNumber)).toBeFalsy(); + expect(() => globals.get(MyGlobalNumber)).toThrow( + `No global with key '${MyGlobalNumber.key}' registered. Only [${MyGlobalString.key}]`, + ); +}); + +test('set', async () => { + const globals = new Globals(); + globals.set(MyGlobalNumber, 24); + + expect(globals.isDefined(MyGlobalNumber)).toBeTruthy(); + expect(globals.get(MyGlobalNumber)).toEqual(24); + expect(globals.keys()).toEqual([MyGlobalNumber.key]); +}); + +test('set, already exists', async () => { + const globals = new Globals(); + globals.set(MyGlobalNumber, 24); + expect(() => globals.set(MyGlobalNumber, 25)).toThrow("Global with key 'myGlobalNumber' already registered."); +}); + +test('override', async () => { + const globals = new Globals(); + globals.set(MyGlobalNumber, 24); + globals.override(MyGlobalNumber, 25); + + expect(globals.isDefined(MyGlobalNumber)).toBeTruthy(); + expect(globals.get(MyGlobalNumber)).toEqual(25); +}); + +test('inheritence', async () => { + const globals1 = new Globals(); + globals1.set(MyGlobalNumber, 24); + const globals2 = new Globals(globals1); + globals2.set(MyGlobalString, 'myKey'); + + expect(globals2.keys()).toEqual([MyGlobalNumber.key, MyGlobalString.key]); + expect(globals2.isDefined(MyGlobalNumber)).toBeTruthy(); + expect(globals2.isDefined(MyGlobalString)).toBeTruthy(); + expect(globals2.get(MyGlobalNumber)).toEqual(24); + expect(globals2.get(MyGlobalString)).toEqual('myKey'); +});