From 388e8cc6b1e22808a1f7bebd579b0221cb644f51 Mon Sep 17 00:00:00 2001 From: Johannes Zillmann Date: Sun, 28 Mar 2021 23:45:26 +0200 Subject: [PATCH] Find page mapping during statistics calculation --- core/src/PageMapping.ts | 31 +++++ core/src/support/PageFactorFinder.ts | 38 ++++++ core/src/transformer/RemoveRepetitiveItems.ts | 21 ++- core/test/PageMapping.test.ts | 12 ++ core/test/support/PageFactorFinder.test.ts | 126 ++++++++++++++++++ .../calculateStatistics.json | 10 +- .../compactLines.json | 10 +- .../removeRepetitiveItems.json | 11 +- .../sortbyX.json | 10 +- .../calculateStatistics.json | 10 +- .../Alice-In-Wonderland/compactLines.json | 10 +- .../removeRepetitiveItems.json | 11 +- examples/Alice-In-Wonderland/sortbyX.json | 10 +- .../Closed-Syllables/calculateStatistics.json | 10 +- examples/Closed-Syllables/compactLines.json | 10 +- .../removeRepetitiveItems.json | 11 +- examples/Closed-Syllables/sortbyX.json | 10 +- examples/ExamplePdf/calculateStatistics.json | 10 +- examples/ExamplePdf/compactLines.json | 10 +- .../ExamplePdf/removeRepetitiveItems.json | 11 +- examples/ExamplePdf/sortbyX.json | 10 +- .../calculateStatistics.json | 10 +- .../compactLines.json | 10 +- .../removeRepetitiveItems.json | 11 +- .../Flash-Masques-Temperature/sortbyX.json | 10 +- .../Grammar-Matters/calculateStatistics.json | 10 +- examples/Grammar-Matters/compactLines.json | 10 +- .../removeRepetitiveItems.json | 11 +- examples/Grammar-Matters/sortbyX.json | 10 +- .../calculateStatistics.json | 10 +- .../compactLines.json | 10 +- .../removeRepetitiveItems.json | 11 +- .../Life-Of-God-In-Soul-Of-Man/sortbyX.json | 10 +- .../calculateStatistics.json | 10 +- examples/Safe-Communication/compactLines.json | 10 +- .../removeRepetitiveItems.json | 11 +- examples/Safe-Communication/sortbyX.json | 10 +- .../calculateStatistics.json | 10 +- .../compactLines.json | 10 +- .../removeRepetitiveItems.json | 11 +- .../St-Mary-Witney-Social-Audit/sortbyX.json | 10 +- .../calculateStatistics.0.json | 10 +- .../compactLines.json | 10 +- .../removeRepetitiveItems.json | 11 +- .../The-Art-of-Public-Speaking/sortbyX.json | 10 +- .../calculateStatistics.json | 10 +- .../The-Man-Without-A-Body/compactLines.json | 10 +- .../removeRepetitiveItems.json | 11 +- examples/The-Man-Without-A-Body/sortbyX.json | 10 +- .../calculateStatistics.json | 10 +- .../The-War-of-the-Worlds/compactLines.json | 10 +- .../removeRepetitiveItems.json | 11 +- examples/The-War-of-the-Worlds/sortbyX.json | 10 +- .../calculateStatistics.json | 10 +- .../Tragedy-Of-The-Commons/compactLines.json | 10 +- .../removeRepetitiveItems.json | 11 +- examples/Tragedy-Of-The-Commons/sortbyX.json | 10 +- examples/WoodUp/calculateStatistics.json | 10 +- examples/WoodUp/compactLines.json | 10 +- examples/WoodUp/removeRepetitiveItems.json | 11 +- examples/WoodUp/sortbyX.json | 10 +- .../removeRepetitiveItems.json | 11 +- examples/dict/removeRepetitiveItems.json | 11 +- 63 files changed, 755 insertions(+), 69 deletions(-) create mode 100644 core/src/PageMapping.ts create mode 100644 core/src/support/PageFactorFinder.ts create mode 100644 core/test/PageMapping.test.ts create mode 100644 core/test/support/PageFactorFinder.test.ts diff --git a/core/src/PageMapping.ts b/core/src/PageMapping.ts new file mode 100644 index 0000000..a1fcccb --- /dev/null +++ b/core/src/PageMapping.ts @@ -0,0 +1,31 @@ +/** + * Holds the information which (zero based) page index maps to a page number. + */ +export default class PageMapping { + constructor(public pageFactor: number, public detectedOnPage: boolean) {} + + /** + * Translates a given page index to a page number label as printed on the page. E.g [0,1,2,3,4] could become [I, II, 1, 2]. + * @param pageIndex + */ + pageLabel(pageIndex: number) { + const pageNumber = pageIndex + this.pageFactor; + if (pageNumber < 0) { + return romanize(pageNumber - this.pageFactor + 1); + } + return `${pageNumber + 1}`; + } +} + +function romanize(num: number): string { + var lookup = { M: 1000, CM: 900, D: 500, CD: 400, C: 100, XC: 90, L: 50, XL: 40, X: 10, IX: 9, V: 5, IV: 4, I: 1 }, + roman = '', + i: string; + for (i in lookup) { + while (num >= lookup[i]) { + roman += i; + num -= lookup[i]; + } + } + return roman; +} diff --git a/core/src/support/PageFactorFinder.ts b/core/src/support/PageFactorFinder.ts new file mode 100644 index 0000000..2d86144 --- /dev/null +++ b/core/src/support/PageFactorFinder.ts @@ -0,0 +1,38 @@ +import { onlyUniques } from './groupingUtils'; + +type NumberExtractor = (container: any) => Extract; +type Extract = { index: number; numbers: number[] }; + +export default class PageFactorFinder { + find( + containers: any[], + extractor: NumberExtractor, + config = { sampleCount: 20, minFulfillment: 0.8 }, + ): number | undefined { + const containerAnalyzeCount = Math.min(config.sampleCount, containers.length); + const start = Math.max(containers.length / 2 - containerAnalyzeCount / 2, 0); //start somewhere in the middle + + const pageNumbers = containers + .slice(start, start + containerAnalyzeCount) + .map((container) => extractor(container)) + .map((extract) => extract.numbers.map((num) => num - extract.index).filter(onlyUniques)); + + const distanceCounts = pageNumbers.reduce((map, indexDistancesPerPage) => { + indexDistancesPerPage.forEach((indexDistance) => { + map[indexDistance] = (map[indexDistance] || 0) + 1; + }); + return map; + }, {}); + + const hits = Object.keys(distanceCounts) + .filter((distance) => distanceCounts[distance] / containerAnalyzeCount >= config.minFulfillment) + .sort((d1, d2) => distanceCounts[d1] - distanceCounts[d2]); + + // for all remaining index distance arrays - check y coordinates + if (hits.length < 1) { + return undefined; + } + + return Number.parseInt(hits[0]); + } +} diff --git a/core/src/transformer/RemoveRepetitiveItems.ts b/core/src/transformer/RemoveRepetitiveItems.ts index 6b62dfa..4f42c8c 100644 --- a/core/src/transformer/RemoveRepetitiveItems.ts +++ b/core/src/transformer/RemoveRepetitiveItems.ts @@ -17,6 +17,10 @@ import { } from '../support/groupingUtils'; import { filterOutDigits } from '../support/stringFunctions'; import { flatten, groupBy } from '../support/functional'; +import { MIN_Y, MAX_Y } from './CacluclateStatistics'; +import GlobalDefinition from './GlobalDefinition'; + +export const PAGE_FACTOR = new GlobalDefinition('pageFactor'); const config = { // Max number of lines at top/bottom (per page) which are getting evaluated for eviction @@ -43,17 +47,8 @@ export default class RemoveRepetitiveItems extends ItemTransformer { } transform(context: TransformContext, inputItems: Item[]): ItemResult { - const { minY, maxY } = inputItems.reduce( - ({ minY, maxY }, item) => { - const y = item.data['y']; - return { - minY: Math.min(minY, y), - maxY: Math.max(maxY, y), - }; - }, - { minY: 999, maxY: 0 }, - ); - + const minY = context.getGlobal(MIN_Y); + const maxY = context.getGlobal(MAX_Y); const bottomMaxY = minY + config.maxDistanceFromFringeElements; const topMinY = maxY - config.maxDistanceFromFringeElements; // console.log('bottomMaxY', bottomMaxY, 'topMinY', topMinY); @@ -76,6 +71,8 @@ export default class RemoveRepetitiveItems extends ItemTransformer { ); const pageNumber = detectAPageNumber(fringeLines); + const globuly = pageNumber ? `${pageNumber.pageNumber - pageNumber.pageIndex}` : 'n/a'; + const fringeYs = fringeLines .map((line) => line.y) .filter(onlyUniques) @@ -126,6 +123,7 @@ export default class RemoveRepetitiveItems extends ItemTransformer { return lineItems; }), messages: [`Filtered out ${removalCount} items with y == ${yToRemove.join('||')}`], + globals: [PAGE_FACTOR.value(globuly)], }; } } @@ -142,6 +140,7 @@ function calculatePageNumerScore(pageCount: number, pageNumber: PageNumber, line function detectAPageNumber(lines: PageLine[]): PageNumber | undefined { const linesByPage = groupBy(lines, (line) => line.page).sort((a, b) => a[0].page - b[0].page); const pageIndexInTheMiddle = Math.round(linesByPage.length / 2); + const possiblePageNumbersForMiddle = possiblePageNumbers(linesByPage[pageIndexInTheMiddle]); const remainingOptions = filterOutIncompatibleVariant( possiblePageNumbersForMiddle, diff --git a/core/test/PageMapping.test.ts b/core/test/PageMapping.test.ts new file mode 100644 index 0000000..c6b033a --- /dev/null +++ b/core/test/PageMapping.test.ts @@ -0,0 +1,12 @@ +import PageMapping from 'src/PageMapping'; + +test('1-to-1', async () => { + const mapping = new PageMapping(0, false); + expect(mapping.pageFactor).toEqual(0); + expect([...Array(3).keys()].map((i) => mapping.pageLabel(i))).toEqual(['1', '2', '3']); +}); + +test('lame start', async () => { + const mapping = new PageMapping(-3, true); + expect([...Array(5).keys()].map((i) => mapping.pageLabel(i))).toEqual(['I', 'II', 'III', '1', '2']); +}); diff --git a/core/test/support/PageFactorFinder.test.ts b/core/test/support/PageFactorFinder.test.ts new file mode 100644 index 0000000..9422672 --- /dev/null +++ b/core/test/support/PageFactorFinder.test.ts @@ -0,0 +1,126 @@ +import PageFactorFinder from 'src/support/PageFactorFinder'; + +interface Container { + index: number; + numbers: number[]; +} + +const extractor = (container: Container) => container; + +test('distraction free - straight', () => { + const finder = new PageFactorFinder(); + const containers: Container[] = [ + { index: 0, numbers: [1] }, + { index: 1, numbers: [2] }, + { index: 2, numbers: [3] }, + { index: 3, numbers: [4] }, + { index: 4, numbers: [5] }, + { index: 5, numbers: [6] }, + ]; + expect(finder.find(containers, extractor)).toEqual(1); +}); + +test('distraction free - accept gap in numbers', () => { + const finder = new PageFactorFinder(); + const containers: Container[] = [ + { index: 0, numbers: [1] }, + { index: 1, numbers: [2] }, + { index: 2, numbers: [3] }, + { index: 3, numbers: [4] }, + { index: 4, numbers: [] }, + { index: 5, numbers: [6] }, + ]; + + expect(finder.find(containers, extractor)).toEqual(1); +}); + +test('distraction free - accept gap in pages', () => { + const finder = new PageFactorFinder(); + const containers: Container[] = [ + { index: 0, numbers: [1] }, + { index: 1, numbers: [2] }, + { index: 2, numbers: [3] }, + { index: 3, numbers: [4] }, + { index: 5, numbers: [6] }, + ]; + + expect(finder.find(containers, extractor)).toEqual(1); +}); + +test('distraction free - defered', () => { + const finder = new PageFactorFinder(); + const containers: Container[] = [ + { index: 0, numbers: [2006] }, + { index: 1, numbers: [] }, + { index: 2, numbers: [1, 1] }, + { index: 3, numbers: [2] }, + { index: 4, numbers: [3] }, + { index: 5, numbers: [4] }, + { index: 6, numbers: [5] }, + { index: 7, numbers: [6] }, + ]; + + expect(finder.find(containers, extractor)).toEqual(-1); +}); + +test('distraction loaden - straight', () => { + const finder = new PageFactorFinder(); + const containers: Container[] = [ + { index: 0, numbers: [1, -3453] }, + { index: 1, numbers: [2, 355] }, + { index: 2, numbers: [3, 950, 4] }, + { index: 3, numbers: [4, 534, 5] }, + { index: 4, numbers: [5, 6] }, + { index: 5, numbers: [6, 35335] }, + ]; + + expect(finder.find(containers, extractor)).toEqual(1); +}); + +test('distraction loaden - defered', () => { + const finder = new PageFactorFinder(); + const containers: Container[] = [ + { index: 0, numbers: [2006] }, + { index: 1, numbers: [5] }, + { index: 2, numbers: [1, 7678] }, + { index: 3, numbers: [2, 2] }, + { index: 4, numbers: [3, 4] }, + { index: 5, numbers: [4, 5, 65, 8] }, + { index: 6, numbers: [5, 9] }, + { index: 7, numbers: [6] }, + ]; + + expect(finder.find(containers, extractor)).toEqual(-1); +}); + +test('many numbers but no meaningful match', () => { + const finder = new PageFactorFinder(); + const containers: Container[] = [ + { index: 0, numbers: [3] }, + { index: 1, numbers: [7] }, + { index: 2, numbers: [4] }, + { index: 3, numbers: [6, 5] }, + { index: 4, numbers: [13, 9] }, + { index: 5, numbers: [8, 7] }, + { index: 6, numbers: [11] }, + { index: 7, numbers: [1] }, + ]; + + expect(finder.find(containers, extractor)).toBeUndefined(); +}); + +test('many numbers but no match', () => { + const finder = new PageFactorFinder(); + const containers: Container[] = [ + { index: 0, numbers: [22] }, + { index: 1, numbers: [7] }, + { index: 2, numbers: [14] }, + { index: 3, numbers: [1, 5] }, + { index: 4, numbers: [13, 9] }, + { index: 5, numbers: [8, 787] }, + { index: 6, numbers: [12] }, + { index: 7, numbers: [1] }, + ]; + + expect(finder.find(containers, extractor)).toBeUndefined(); +}); diff --git a/examples/Adventures-Of-Sherlock-Holmes/calculateStatistics.json b/examples/Adventures-Of-Sherlock-Holmes/calculateStatistics.json index bf1b442..b4babd6 100644 --- a/examples/Adventures-Of-Sherlock-Holmes/calculateStatistics.json +++ b/examples/Adventures-Of-Sherlock-Holmes/calculateStatistics.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 59.7758 + "maxHeight": 59.7758, + "minX": 117.8279999999999, + "maxX": 471.0319307, + "minY": 95.28300000000016, + "maxY": 736.017, + "pageMapping": { + "pageFactor": -1, + "detectedOnPage": true + } } } {"page":0,"change":"none","str":"SHERLOCK","dir":"ltr","width":"363.20","height":"59.78","transform":["59.78","0.00","0.00","59.78","117.83","656.26"],"fontName":"NVBKCW+RoyalInitialen","x":117.828,"y":656.262} diff --git a/examples/Adventures-Of-Sherlock-Holmes/compactLines.json b/examples/Adventures-Of-Sherlock-Holmes/compactLines.json index 0f1bfba..c1f9521 100644 --- a/examples/Adventures-Of-Sherlock-Holmes/compactLines.json +++ b/examples/Adventures-Of-Sherlock-Holmes/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 59.7758 + "maxHeight": 59.7758, + "minX": 117.8279999999999, + "maxX": 471.0319307, + "minY": 95.28300000000016, + "maxY": 736.017, + "pageMapping": { + "pageFactor": -1, + "detectedOnPage": true + } } } {"page":1,"change":"Addition","str":"S IR A RTHUR I GNATIUS C ONAN D OYLE","line":0,"x":196.324,"y":678.946,"width":"342.54","height":"24.79","fontName":["KKLGKN+NimbusRomNo9L-Regu","INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"]} diff --git a/examples/Adventures-Of-Sherlock-Holmes/removeRepetitiveItems.json b/examples/Adventures-Of-Sherlock-Holmes/removeRepetitiveItems.json index 150adc2..f539e0b 100644 --- a/examples/Adventures-Of-Sherlock-Holmes/removeRepetitiveItems.json +++ b/examples/Adventures-Of-Sherlock-Holmes/removeRepetitiveItems.json @@ -30,7 +30,16 @@ } ], "globals": { - "maxHeight": 59.7758 + "maxHeight": 59.7758, + "minX": 117.8279999999999, + "maxX": 471.0319307, + "minY": 95.28300000000016, + "maxY": 736.017, + "pageMapping": { + "pageFactor": -1, + "detectedOnPage": true + }, + "pageFactor": "-1" } } {"page":3,"change":"Removal","str":"2","dir":"ltr","width":"5.45","height":"10.91","transform":["10.91","0.00","0.00","10.91","294.43","95.28"],"fontName":"KKLGKN+NimbusRomNo9L-Regu","x":294.428,"y":95.28300000000016,"line":13} diff --git a/examples/Adventures-Of-Sherlock-Holmes/sortbyX.json b/examples/Adventures-Of-Sherlock-Holmes/sortbyX.json index de57c30..0749a91 100644 --- a/examples/Adventures-Of-Sherlock-Holmes/sortbyX.json +++ b/examples/Adventures-Of-Sherlock-Holmes/sortbyX.json @@ -30,7 +30,15 @@ } ], "globals": { - "maxHeight": 59.7758 + "maxHeight": 59.7758, + "minX": 117.8279999999999, + "maxX": 471.0319307, + "minY": 95.28300000000016, + "maxY": 736.017, + "pageMapping": { + "pageFactor": -1, + "detectedOnPage": true + } } } {"page":1,"change":"ContentChange","str":"S IR C A ONAN RTHUR D I OYLE GNATIUS","line":0,"x":196.324,"y":678.946,"width":"342.54","height":"24.79","fontName":["KKLGKN+NimbusRomNo9L-Regu","INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"]} diff --git a/examples/Alice-In-Wonderland/calculateStatistics.json b/examples/Alice-In-Wonderland/calculateStatistics.json index 482ea29..e28189d 100644 --- a/examples/Alice-In-Wonderland/calculateStatistics.json +++ b/examples/Alice-In-Wonderland/calculateStatistics.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 24.787 + "maxHeight": 24.787, + "minX": 102.88399999999984, + "maxX": 488.43800000000005, + "minY": 95.545, + "maxY": 735.021, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": true + } } } {"page":0,"change":"none","str":"A","dir":"ltr","width":"17.90","height":"24.79","transform":["24.79","0.00","0.00","24.79","171.72","625.56"],"fontName":"TBCMKD+NimbusRomNo9L-Medi","x":171.72200000000004,"y":625.557} diff --git a/examples/Alice-In-Wonderland/compactLines.json b/examples/Alice-In-Wonderland/compactLines.json index 5aaa87d..3b3af8e 100644 --- a/examples/Alice-In-Wonderland/compactLines.json +++ b/examples/Alice-In-Wonderland/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 24.787 + "maxHeight": 24.787, + "minX": 102.88399999999984, + "maxX": 488.43800000000005, + "minY": 95.545, + "maxY": 735.021, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": true + } } } {"page":0,"change":"Addition","str":"A LICE ’ S A DVENTURES","line":0,"x":171.72200000000004,"y":625.557,"width":"238.54","height":"24.79","fontName":["TBCMKD+NimbusRomNo9L-Medi"],"dir":["ltr"]} diff --git a/examples/Alice-In-Wonderland/removeRepetitiveItems.json b/examples/Alice-In-Wonderland/removeRepetitiveItems.json index d88fea7..a74d789 100644 --- a/examples/Alice-In-Wonderland/removeRepetitiveItems.json +++ b/examples/Alice-In-Wonderland/removeRepetitiveItems.json @@ -30,7 +30,16 @@ } ], "globals": { - "maxHeight": 24.787 + "maxHeight": 24.787, + "minX": 102.88399999999984, + "maxX": 488.43800000000005, + "minY": 95.545, + "maxY": 735.021, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": true + }, + "pageFactor": "0" } } {"page":1,"change":"Removal","str":"1","dir":"ltr","width":"5.98","height":"11.96","transform":["11.96","0.00","0.00","11.96","294.17","95.55"],"fontName":"FZVLIH+NimbusRomNo9L-Regu","x":294.167,"y":95.545,"line":14} diff --git a/examples/Alice-In-Wonderland/sortbyX.json b/examples/Alice-In-Wonderland/sortbyX.json index 4c9b952..3ef2f12 100644 --- a/examples/Alice-In-Wonderland/sortbyX.json +++ b/examples/Alice-In-Wonderland/sortbyX.json @@ -30,7 +30,15 @@ } ], "globals": { - "maxHeight": 24.787 + "maxHeight": 24.787, + "minX": 102.88399999999984, + "maxX": 488.43800000000005, + "minY": 95.545, + "maxY": 735.021, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": true + } } } {"page":12,"change":"ContentChange","str":"she began again: ‘O u est ma chatte?’ which was the first sentence in her French `","line":37,"x":102.88399999999999,"y":183.2159999999997,"width":"392.53","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"]} diff --git a/examples/Closed-Syllables/calculateStatistics.json b/examples/Closed-Syllables/calculateStatistics.json index aae2777..c502ee6 100644 --- a/examples/Closed-Syllables/calculateStatistics.json +++ b/examples/Closed-Syllables/calculateStatistics.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 18 + "maxHeight": 18, + "minX": 72.024, + "maxX": 534.58, + "minY": 63.144, + "maxY": 745.56, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":0,"change":"none","str":"Closed syllable word lists ","dir":"ltr","width":"113.71","height":"11.04","transform":["11.04","0.00","0.00","11.04","420.79","745.56"],"fontName":"ABCDEE+Calibri","x":420.79,"y":745.56} diff --git a/examples/Closed-Syllables/compactLines.json b/examples/Closed-Syllables/compactLines.json index f07e3f7..d11704a 100644 --- a/examples/Closed-Syllables/compactLines.json +++ b/examples/Closed-Syllables/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 18 + "maxHeight": 18, + "minX": 72.024, + "maxX": 534.58, + "minY": 63.144, + "maxY": 745.56, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":0,"change":"Addition","str":"Closed syllable word lists 1","line":0,"x":420.79,"y":745.56,"width":"119.31","height":"11.04","fontName":["ABCDEE+Calibri"],"dir":["ltr"]} diff --git a/examples/Closed-Syllables/removeRepetitiveItems.json b/examples/Closed-Syllables/removeRepetitiveItems.json index a45d385..b04735d 100644 --- a/examples/Closed-Syllables/removeRepetitiveItems.json +++ b/examples/Closed-Syllables/removeRepetitiveItems.json @@ -30,7 +30,16 @@ } ], "globals": { - "maxHeight": 18 + "maxHeight": 18, + "minX": 72.024, + "maxX": 534.58, + "minY": 63.144, + "maxY": 745.56, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + }, + "pageFactor": "n/a" } } {"page":0,"change":"Removal","str":"Closed syllable word lists 1","line":0,"x":420.79,"y":745.56,"width":"119.31","height":"11.04","fontName":["ABCDEE+Calibri"],"dir":["ltr"]} diff --git a/examples/Closed-Syllables/sortbyX.json b/examples/Closed-Syllables/sortbyX.json index 845c38c..5702205 100644 --- a/examples/Closed-Syllables/sortbyX.json +++ b/examples/Closed-Syllables/sortbyX.json @@ -30,6 +30,14 @@ } ], "globals": { - "maxHeight": 18 + "maxHeight": 18, + "minX": 72.024, + "maxX": 534.58, + "minY": 63.144, + "maxY": 745.56, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } \ No newline at end of file diff --git a/examples/ExamplePdf/calculateStatistics.json b/examples/ExamplePdf/calculateStatistics.json index 74b255d..ca9c765 100644 --- a/examples/ExamplePdf/calculateStatistics.json +++ b/examples/ExamplePdf/calculateStatistics.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 30 + "maxHeight": 30, + "minX": 56.69069, + "maxX": 507.3787, + "minY": 45, + "maxY": 772, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":0,"change":"none","str":"Mega Überschrift","dir":"ltr","width":"245.06","height":"30.00","transform":["30.00","0.00","0.00","30.00","175.00","756.00"],"fontName":"OMUGKQ+Helvetica-Bold","x":175,"y":756} diff --git a/examples/ExamplePdf/compactLines.json b/examples/ExamplePdf/compactLines.json index e3a032b..c041d37 100644 --- a/examples/ExamplePdf/compactLines.json +++ b/examples/ExamplePdf/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 30 + "maxHeight": 30, + "minX": 56.69069, + "maxX": 507.3787, + "minY": 45, + "maxY": 772, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":0,"change":"Addition","str":"Dies ist eine Test-PDF . 1","line":2,"x":240,"y":585,"width":"115.75","height":"11.00","fontName":["JBRMKS+Helvetica"],"dir":["ltr"]} diff --git a/examples/ExamplePdf/removeRepetitiveItems.json b/examples/ExamplePdf/removeRepetitiveItems.json index 9c90bf9..a72a189 100644 --- a/examples/ExamplePdf/removeRepetitiveItems.json +++ b/examples/ExamplePdf/removeRepetitiveItems.json @@ -30,7 +30,16 @@ } ], "globals": { - "maxHeight": 30 + "maxHeight": 30, + "minX": 56.69069, + "maxX": 507.3787, + "minY": 45, + "maxY": 772, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + }, + "pageFactor": "1" } } {"page":0,"change":"Removal","str":"\u0000 1","line":5,"x":294,"y":45,"width":"6.67","height":"12.00","fontName":["QACXPP+Helvetica","JBRMKS+Helvetica"],"dir":["ltr"]} diff --git a/examples/ExamplePdf/sortbyX.json b/examples/ExamplePdf/sortbyX.json index fc64dcd..dd48d3b 100644 --- a/examples/ExamplePdf/sortbyX.json +++ b/examples/ExamplePdf/sortbyX.json @@ -30,7 +30,15 @@ } ], "globals": { - "maxHeight": 30 + "maxHeight": 30, + "minX": 56.69069, + "maxX": 507.3787, + "minY": 45, + "maxY": 772, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":0,"change":"ContentChange","str":"Dies ist eine Test-PDF 1 .","line":2,"x":240,"y":585,"width":"115.75","height":"11.00","fontName":["JBRMKS+Helvetica"],"dir":["ltr"]} diff --git a/examples/Flash-Masques-Temperature/calculateStatistics.json b/examples/Flash-Masques-Temperature/calculateStatistics.json index e4fe1a8..d1f508a 100644 --- a/examples/Flash-Masques-Temperature/calculateStatistics.json +++ b/examples/Flash-Masques-Temperature/calculateStatistics.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 29 + "maxHeight": 29, + "minX": 37.1206, + "maxX": 542.2816, + "minY": 36.1763, + "maxY": 811.1348, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": false + } } } {"page":0,"change":"none","str":"La vraie température !","dir":"ltr","width":"307.05","height":"29.00","transform":["29.00","0.00","0.00","29.00","197.43","469.65"],"fontName":"NRVUEW+HelveticaNeue-Thin","x":197.4282,"y":469.654} diff --git a/examples/Flash-Masques-Temperature/compactLines.json b/examples/Flash-Masques-Temperature/compactLines.json index b4c51a2..53fb758 100644 --- a/examples/Flash-Masques-Temperature/compactLines.json +++ b/examples/Flash-Masques-Temperature/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 29 + "maxHeight": 29, + "minX": 37.1206, + "maxX": 542.2816, + "minY": 36.1763, + "maxY": 811.1348, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": false + } } } {"page":0,"change":"Addition","str":"F l a s h - M a s q u e","line":4,"x":37.1206,"y":758.8381,"width":"99.35","height":"17.64","fontName":["NRVUEW+Futura-Light"],"dir":["ltr"]} diff --git a/examples/Flash-Masques-Temperature/removeRepetitiveItems.json b/examples/Flash-Masques-Temperature/removeRepetitiveItems.json index b3c3540..7478700 100644 --- a/examples/Flash-Masques-Temperature/removeRepetitiveItems.json +++ b/examples/Flash-Masques-Temperature/removeRepetitiveItems.json @@ -30,7 +30,16 @@ } ], "globals": { - "maxHeight": 29 + "maxHeight": 29, + "minX": 37.1206, + "maxX": 542.2816, + "minY": 36.1763, + "maxY": 811.1348, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": false + }, + "pageFactor": "n/a" } } {"page":0,"change":"Removal","str":"s h a l F - M a s q u e","line":4,"x":37.1206,"y":758.8381,"width":"99.35","height":"17.64","fontName":["NRVUEW+Futura-Light"],"dir":["ltr"]} diff --git a/examples/Flash-Masques-Temperature/sortbyX.json b/examples/Flash-Masques-Temperature/sortbyX.json index 35e4aee..a559535 100644 --- a/examples/Flash-Masques-Temperature/sortbyX.json +++ b/examples/Flash-Masques-Temperature/sortbyX.json @@ -30,7 +30,15 @@ } ], "globals": { - "maxHeight": 29 + "maxHeight": 29, + "minX": 37.1206, + "maxX": 542.2816, + "minY": 36.1763, + "maxY": 811.1348, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": false + } } } {"page":0,"change":"ContentChange","str":"s h a l F - M a s q u e","line":4,"x":37.1206,"y":758.8381,"width":"99.35","height":"17.64","fontName":["NRVUEW+Futura-Light"],"dir":["ltr"]} diff --git a/examples/Grammar-Matters/calculateStatistics.json b/examples/Grammar-Matters/calculateStatistics.json index 5faff37..8874871 100644 --- a/examples/Grammar-Matters/calculateStatistics.json +++ b/examples/Grammar-Matters/calculateStatistics.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 48 + "maxHeight": 48, + "minX": 62.03970999999996, + "maxX": 536.37986, + "minY": 22.6801, + "maxY": 709.8000000000001, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":0,"change":"none","str":"1","dir":"ltr","width":"4.08","height":"8.04","transform":["8.04","0.00","0.00","8.04","304.01","22.68"],"fontName":"NTKUYH+Calibri","x":304.01,"y":22.6801} diff --git a/examples/Grammar-Matters/compactLines.json b/examples/Grammar-Matters/compactLines.json index b1c2544..731e6d8 100644 --- a/examples/Grammar-Matters/compactLines.json +++ b/examples/Grammar-Matters/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 48 + "maxHeight": 48, + "minX": 62.03970999999996, + "maxX": 536.37986, + "minY": 22.6801, + "maxY": 709.8000000000001, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":0,"change":"Addition","str":"Grammar Matters by Debbie Kuhlmann is licensed under a Creative Commons Attribution 4.0","line":3,"x":99.6238,"y":96.1441,"width":"413.06","height":"9.96","fontName":["URQURO+Helvetica"],"dir":["ltr"]} diff --git a/examples/Grammar-Matters/removeRepetitiveItems.json b/examples/Grammar-Matters/removeRepetitiveItems.json index aac6f51..4fd44e6 100644 --- a/examples/Grammar-Matters/removeRepetitiveItems.json +++ b/examples/Grammar-Matters/removeRepetitiveItems.json @@ -30,7 +30,16 @@ } ], "globals": { - "maxHeight": 48 + "maxHeight": 48, + "minX": 62.03970999999996, + "maxX": 536.37986, + "minY": 22.6801, + "maxY": 709.8000000000001, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + }, + "pageFactor": "1" } } {"page":0,"change":"Removal","str":"1","dir":"ltr","width":"4.08","height":"8.04","transform":["8.04","0.00","0.00","8.04","304.01","22.68"],"fontName":"NTKUYH+Calibri","x":304.01,"y":22.6801,"line":0} diff --git a/examples/Grammar-Matters/sortbyX.json b/examples/Grammar-Matters/sortbyX.json index 56001d3..b598e16 100644 --- a/examples/Grammar-Matters/sortbyX.json +++ b/examples/Grammar-Matters/sortbyX.json @@ -30,6 +30,14 @@ } ], "globals": { - "maxHeight": 48 + "maxHeight": 48, + "minX": 62.03970999999996, + "maxX": 536.37986, + "minY": 22.6801, + "maxY": 709.8000000000001, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } \ No newline at end of file diff --git a/examples/Life-Of-God-In-Soul-Of-Man/calculateStatistics.json b/examples/Life-Of-God-In-Soul-Of-Man/calculateStatistics.json index 727723c..ec5406d 100644 --- a/examples/Life-Of-God-In-Soul-Of-Man/calculateStatistics.json +++ b/examples/Life-Of-God-In-Soul-Of-Man/calculateStatistics.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 45.974399999999996 + "maxHeight": 45.974399999999996, + "minX": 26.29161, + "maxX": 273.69135, + "minY": 15.08535, + "maxY": 432.30303, + "pageMapping": { + "pageFactor": -17, + "detectedOnPage": true + } } } {"page":3,"change":"none","str":"(l^,^^^^i^^","dir":"ltr","width":"70.12","height":"5.84","transform":["5.84","0.00","0.00","5.84","35.06","401.56"],"x":35.055479999999996,"y":401.55764999999997} diff --git a/examples/Life-Of-God-In-Soul-Of-Man/compactLines.json b/examples/Life-Of-God-In-Soul-Of-Man/compactLines.json index b807f86..6c61cc7 100644 --- a/examples/Life-Of-God-In-Soul-Of-Man/compactLines.json +++ b/examples/Life-Of-God-In-Soul-Of-Man/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 45.974399999999996 + "maxHeight": 45.974399999999996, + "minX": 26.29161, + "maxX": 273.69135, + "minY": 15.08535, + "maxY": 432.30303, + "pageMapping": { + "pageFactor": -17, + "detectedOnPage": true + } } } {"page":3,"change":"Addition","str":"(l^,^^^^i^^ ^","line":0,"x":35.055479999999996,"y":384.02991,"width":"97.70","height":"45.97","fontName":[null],"dir":["ltr"]} diff --git a/examples/Life-Of-God-In-Soul-Of-Man/removeRepetitiveItems.json b/examples/Life-Of-God-In-Soul-Of-Man/removeRepetitiveItems.json index 2b59b73..83eb156 100644 --- a/examples/Life-Of-God-In-Soul-Of-Man/removeRepetitiveItems.json +++ b/examples/Life-Of-God-In-Soul-Of-Man/removeRepetitiveItems.json @@ -30,7 +30,16 @@ } ], "globals": { - "maxHeight": 45.974399999999996 + "maxHeight": 45.974399999999996, + "minX": 26.29161, + "maxX": 273.69135, + "minY": 15.08535, + "maxY": 432.30303, + "pageMapping": { + "pageFactor": -17, + "detectedOnPage": true + }, + "pageFactor": "-17" } } {"page":13,"change":"Removal","str":"\\-iii Preface.","line":0,"x":63.50214,"y":400.98296999999997,"width":"50.42","height":"7.99","fontName":[null],"dir":["ltr"]} diff --git a/examples/Life-Of-God-In-Soul-Of-Man/sortbyX.json b/examples/Life-Of-God-In-Soul-Of-Man/sortbyX.json index e93a1b4..92ea5d2 100644 --- a/examples/Life-Of-God-In-Soul-Of-Man/sortbyX.json +++ b/examples/Life-Of-God-In-Soul-Of-Man/sortbyX.json @@ -30,7 +30,15 @@ } ], "globals": { - "maxHeight": 45.974399999999996 + "maxHeight": 45.974399999999996, + "minX": 26.29161, + "maxX": 273.69135, + "minY": 15.08535, + "maxY": 432.30303, + "pageMapping": { + "pageFactor": -17, + "detectedOnPage": true + } } } {"page":6,"change":"ContentChange","str":"T OooulO^I ^ 4le.^A\\-^","line":0,"x":55.16928,"y":403.28168999999997,"width":"113.07","height":"23.47","fontName":[null],"dir":["ltr"]} diff --git a/examples/Safe-Communication/calculateStatistics.json b/examples/Safe-Communication/calculateStatistics.json index 9bfd489..2aabc4e 100644 --- a/examples/Safe-Communication/calculateStatistics.json +++ b/examples/Safe-Communication/calculateStatistics.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 36 + "maxHeight": 36, + "minX": 53.88, + "maxX": 797.38, + "minY": 23.04, + "maxY": 528.34, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":0,"change":"none","str":"Quality Improvement Clinic Ltd. ","dir":"ltr","width":"155.86","height":"11.04","transform":["11.04","0.00","0.00","11.04","99.26","30.72"],"x":99.264,"y":30.72} diff --git a/examples/Safe-Communication/compactLines.json b/examples/Safe-Communication/compactLines.json index 31529a2..5dee884 100644 --- a/examples/Safe-Communication/compactLines.json +++ b/examples/Safe-Communication/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 36 + "maxHeight": 36, + "minX": 53.88, + "maxX": 797.38, + "minY": 23.04, + "maxY": 528.34, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":0,"change":"Addition","str":"Quality Improvement Clinic Ltd. August 2015","line":0,"x":99.264,"y":23.04,"width":"214.60","height":"11.04","fontName":[null],"dir":["ltr"]} diff --git a/examples/Safe-Communication/removeRepetitiveItems.json b/examples/Safe-Communication/removeRepetitiveItems.json index 402eae7..42f89a0 100644 --- a/examples/Safe-Communication/removeRepetitiveItems.json +++ b/examples/Safe-Communication/removeRepetitiveItems.json @@ -30,7 +30,16 @@ } ], "globals": { - "maxHeight": 36 + "maxHeight": 36, + "minX": 53.88, + "maxX": 797.38, + "minY": 23.04, + "maxY": 528.34, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + }, + "pageFactor": "1" } } {"page":1,"change":"Removal","str":"Quality Improvement Clinic Ltd. P a g e | 2 August 2015","line":0,"x":99.264,"y":30.84,"width":"261.24","height":"11.04","fontName":[null],"dir":["ltr"]} diff --git a/examples/Safe-Communication/sortbyX.json b/examples/Safe-Communication/sortbyX.json index 67c8c68..43829ef 100644 --- a/examples/Safe-Communication/sortbyX.json +++ b/examples/Safe-Communication/sortbyX.json @@ -30,7 +30,15 @@ } ], "globals": { - "maxHeight": 36 + "maxHeight": 36, + "minX": 53.88, + "maxX": 797.38, + "minY": 23.04, + "maxY": 528.34, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":22,"change":"ContentChange","str":"0.5 1.5 2.5 3.5 4.5 0 1 2 3 4 5","line":46,"x":442.82,"y":254.83,"width":"93.38","height":"9.98","fontName":["ABCDEE+Calibri"],"dir":["ltr"]} diff --git a/examples/St-Mary-Witney-Social-Audit/calculateStatistics.json b/examples/St-Mary-Witney-Social-Audit/calculateStatistics.json index 1199b7d..cbef911 100644 --- a/examples/St-Mary-Witney-Social-Audit/calculateStatistics.json +++ b/examples/St-Mary-Witney-Social-Audit/calculateStatistics.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 36 + "maxHeight": 36, + "minX": 6.487999999999971, + "maxX": 815.833, + "minY": 16.345999999999947, + "maxY": 563.346, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":0,"change":"none","str":"St Mary the Virgin, Witney","dir":"ltr","width":"299.73","height":"24.00","transform":["24.00","0.00","0.00","24.00","501.17","180.62"],"fontName":"Gill Sans MT Bold","x":501.167,"y":180.623} diff --git a/examples/St-Mary-Witney-Social-Audit/compactLines.json b/examples/St-Mary-Witney-Social-Audit/compactLines.json index 4f25351..569b6b8 100644 --- a/examples/St-Mary-Witney-Social-Audit/compactLines.json +++ b/examples/St-Mary-Witney-Social-Audit/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 36 + "maxHeight": 36, + "minX": 6.487999999999971, + "maxX": 815.833, + "minY": 16.345999999999947, + "maxY": 563.346, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":1,"change":"Addition","str":"Cover photo: Brian Robert Marshall under Creative Commons Licence.","line":0,"x":34.015,"y":551.26,"width":"288.66","height":"10.00","fontName":["Gill Sans MT"],"dir":["ltr"]} diff --git a/examples/St-Mary-Witney-Social-Audit/removeRepetitiveItems.json b/examples/St-Mary-Witney-Social-Audit/removeRepetitiveItems.json index 49cce4c..79660b4 100644 --- a/examples/St-Mary-Witney-Social-Audit/removeRepetitiveItems.json +++ b/examples/St-Mary-Witney-Social-Audit/removeRepetitiveItems.json @@ -30,7 +30,16 @@ } ], "globals": { - "maxHeight": 36 + "maxHeight": 36, + "minX": 6.487999999999971, + "maxX": 815.833, + "minY": 16.345999999999947, + "maxY": 563.346, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + }, + "pageFactor": "1" } } {"page":2,"change":"Removal","str":"3","dir":"ltr","width":"6.02","height":"12.00","transform":["12.00","0.00","0.00","12.00","812.96","16.35"],"fontName":"Gill Sans MT","x":812.962,"y":16.346,"line":16} diff --git a/examples/St-Mary-Witney-Social-Audit/sortbyX.json b/examples/St-Mary-Witney-Social-Audit/sortbyX.json index 4aa45f6..7eeb79d 100644 --- a/examples/St-Mary-Witney-Social-Audit/sortbyX.json +++ b/examples/St-Mary-Witney-Social-Audit/sortbyX.json @@ -30,7 +30,15 @@ } ], "globals": { - "maxHeight": 36 + "maxHeight": 36, + "minX": 6.487999999999971, + "maxX": 815.833, + "minY": 16.345999999999947, + "maxY": 563.346, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":5,"change":"ContentChange","str":"West Oxon 009 West Oxon 010","line":48,"x":595.737,"y":412.752,"width":"85.60","height":"7.00","fontName":["Gill Sans MT Italic"],"dir":["ltr"]} diff --git a/examples/The-Art-of-Public-Speaking/calculateStatistics.0.json b/examples/The-Art-of-Public-Speaking/calculateStatistics.0.json index fe23f5c..0122d6c 100644 --- a/examples/The-Art-of-Public-Speaking/calculateStatistics.0.json +++ b/examples/The-Art-of-Public-Speaking/calculateStatistics.0.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 28.799999999999997 + "maxHeight": 28.799999999999997, + "minX": 72, + "maxX": 537.4124748000004, + "minY": 75.60000000000002, + "maxY": 712.8, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": false + } } } {"page":1,"change":"none","str":"T","dir":"ltr","width":"5.18","height":"8.64","transform":["8.64","0.00","0.00","8.64","72.00","702.72"],"fontName":"AAAAAA+LiberationMono","x":72,"y":702.72} diff --git a/examples/The-Art-of-Public-Speaking/compactLines.json b/examples/The-Art-of-Public-Speaking/compactLines.json index 3c99b79..2dc1e43 100644 --- a/examples/The-Art-of-Public-Speaking/compactLines.json +++ b/examples/The-Art-of-Public-Speaking/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 28.799999999999997 + "maxHeight": 28.799999999999997, + "minX": 72, + "maxX": 537.4124748000004, + "minY": 75.60000000000002, + "maxY": 712.8, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": false + } } } {"page":1,"change":"Addition","str":"T h e P r o j e c t G u t e n b e r g E B o o k o f T h e A r t o f P u b l i c S p e a k i n g","line":0,"x":72,"y":702.72,"width":"248.83","height":"8.64","fontName":["AAAAAA+LiberationMono"],"dir":["ltr"]} diff --git a/examples/The-Art-of-Public-Speaking/removeRepetitiveItems.json b/examples/The-Art-of-Public-Speaking/removeRepetitiveItems.json index 65ce87b..b0b593b 100644 --- a/examples/The-Art-of-Public-Speaking/removeRepetitiveItems.json +++ b/examples/The-Art-of-Public-Speaking/removeRepetitiveItems.json @@ -30,7 +30,16 @@ } ], "globals": { - "maxHeight": 28.799999999999997 + "maxHeight": 28.799999999999997, + "minX": 72, + "maxX": 537.4124748000004, + "minY": 75.60000000000002, + "maxY": 712.8, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": false + }, + "pageFactor": "n/a" } } {"page":87,"change":"Removal","str":"F O O T N O T E S :","line":0,"x":251.741232,"y":687.6,"width":"108.58","height":"16.56","fontName":["AAAAAB+LiberationSerif-Bold"],"dir":["ltr"]} diff --git a/examples/The-Art-of-Public-Speaking/sortbyX.json b/examples/The-Art-of-Public-Speaking/sortbyX.json index f363f45..1f318ef 100644 --- a/examples/The-Art-of-Public-Speaking/sortbyX.json +++ b/examples/The-Art-of-Public-Speaking/sortbyX.json @@ -30,6 +30,14 @@ } ], "globals": { - "maxHeight": 28.799999999999997 + "maxHeight": 28.799999999999997, + "minX": 72, + "maxX": 537.4124748000004, + "minY": 75.60000000000002, + "maxY": 712.8, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": false + } } } \ No newline at end of file diff --git a/examples/The-Man-Without-A-Body/calculateStatistics.json b/examples/The-Man-Without-A-Body/calculateStatistics.json index b5f9b9d..28fcfd1 100644 --- a/examples/The-Man-Without-A-Body/calculateStatistics.json +++ b/examples/The-Man-Without-A-Body/calculateStatistics.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 11 + "maxHeight": 11, + "minX": 72.025, + "maxX": 536.73, + "minY": 75.025, + "maxY": 747.22, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": false + } } } {"page":0,"change":"none","str":"{","dir":"ltr","width":"4.36","height":"11.00","transform":["11.00","0.00","0.00","11.00","255.85","747.22"],"fontName":"BCDEEE+Garamond-Bold","x":255.85,"y":747.22} diff --git a/examples/The-Man-Without-A-Body/compactLines.json b/examples/The-Man-Without-A-Body/compactLines.json index 966573f..28a0155 100644 --- a/examples/The-Man-Without-A-Body/compactLines.json +++ b/examples/The-Man-Without-A-Body/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 11 + "maxHeight": 11, + "minX": 72.025, + "maxX": 536.73, + "minY": 75.025, + "maxY": 747.22, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": false + } } } {"page":0,"change":"Addition","str":"{ fro m } THE {New York} SUN, SUNDAY, MARCH 25, 1877.","line":0,"x":255.85,"y":747.22,"width":"276.07","height":"11.00","fontName":["BCDEEE+Garamond-Bold"],"dir":["ltr"]} diff --git a/examples/The-Man-Without-A-Body/removeRepetitiveItems.json b/examples/The-Man-Without-A-Body/removeRepetitiveItems.json index e789bdd..0327f51 100644 --- a/examples/The-Man-Without-A-Body/removeRepetitiveItems.json +++ b/examples/The-Man-Without-A-Body/removeRepetitiveItems.json @@ -30,6 +30,15 @@ } ], "globals": { - "maxHeight": 11 + "maxHeight": 11, + "minX": 72.025, + "maxX": 536.73, + "minY": 75.025, + "maxY": 747.22, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": false + }, + "pageFactor": "n/a" } } \ No newline at end of file diff --git a/examples/The-Man-Without-A-Body/sortbyX.json b/examples/The-Man-Without-A-Body/sortbyX.json index e789bdd..0bf0ec0 100644 --- a/examples/The-Man-Without-A-Body/sortbyX.json +++ b/examples/The-Man-Without-A-Body/sortbyX.json @@ -30,6 +30,14 @@ } ], "globals": { - "maxHeight": 11 + "maxHeight": 11, + "minX": 72.025, + "maxX": 536.73, + "minY": 75.025, + "maxY": 747.22, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": false + } } } \ No newline at end of file diff --git a/examples/The-War-of-the-Worlds/calculateStatistics.json b/examples/The-War-of-the-Worlds/calculateStatistics.json index dddcf6a..56fa763 100644 --- a/examples/The-War-of-the-Worlds/calculateStatistics.json +++ b/examples/The-War-of-the-Worlds/calculateStatistics.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 24 + "maxHeight": 24, + "minX": 57.59999999999991, + "maxX": 312.78, + "minY": 44.76, + "maxY": 515.8338448603599, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":0,"change":"none","str":"The War of the Worlds ","dir":"ltr","width":"240.30","height":"24.00","transform":["24.00","0.00","0.00","24.00","80.64","445.62"],"x":80.64,"y":445.62} diff --git a/examples/The-War-of-the-Worlds/compactLines.json b/examples/The-War-of-the-Worlds/compactLines.json index 6507227..73c0a4e 100644 --- a/examples/The-War-of-the-Worlds/compactLines.json +++ b/examples/The-War-of-the-Worlds/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 24 + "maxHeight": 24, + "minX": 57.59999999999991, + "maxX": 312.78, + "minY": 44.76, + "maxY": 515.8338448603599, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } {"page":0,"change":"Addition","str":"free eBooks visit our Web site at http://www.planetpdf.com/ . To hear ","line":3,"x":57.6,"y":91.26,"width":"280.26","height":"10.02","fontName":[null],"dir":["ltr"]} diff --git a/examples/The-War-of-the-Worlds/removeRepetitiveItems.json b/examples/The-War-of-the-Worlds/removeRepetitiveItems.json index 5546bc4..822fd98 100644 --- a/examples/The-War-of-the-Worlds/removeRepetitiveItems.json +++ b/examples/The-War-of-the-Worlds/removeRepetitiveItems.json @@ -30,7 +30,16 @@ } ], "globals": { - "maxHeight": 24 + "maxHeight": 24, + "minX": 57.59999999999991, + "maxX": 312.78, + "minY": 44.76, + "maxY": 515.8338448603599, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + }, + "pageFactor": "1" } } {"page":1,"change":"Removal","str":"The War of the Worlds ","dir":"ltr","width":"102.96","height":"10.98","transform":["10.98","0.00","0.00","10.98","57.60","493.80"],"x":57.6,"y":493.8,"line":0} diff --git a/examples/The-War-of-the-Worlds/sortbyX.json b/examples/The-War-of-the-Worlds/sortbyX.json index 5d471f7..064ea55 100644 --- a/examples/The-War-of-the-Worlds/sortbyX.json +++ b/examples/The-War-of-the-Worlds/sortbyX.json @@ -30,6 +30,14 @@ } ], "globals": { - "maxHeight": 24 + "maxHeight": 24, + "minX": 57.59999999999991, + "maxX": 312.78, + "minY": 44.76, + "maxY": 515.8338448603599, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } } } \ No newline at end of file diff --git a/examples/Tragedy-Of-The-Commons/calculateStatistics.json b/examples/Tragedy-Of-The-Commons/calculateStatistics.json index eef99e2..c04f2d7 100644 --- a/examples/Tragedy-Of-The-Commons/calculateStatistics.json +++ b/examples/Tragedy-Of-The-Commons/calculateStatistics.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 22.5 + "maxHeight": 22.5, + "minX": 13.799999999999926, + "maxX": 550.2000000000003, + "minY": 1.4400099999998357, + "maxY": 751.50001, + "pageMapping": { + "pageFactor": 1243, + "detectedOnPage": true + } } } {"page":0,"change":"none","str":"What","dir":"ltr","width":"21.78","height":"12.90","transform":["9.08","0.00","0.00","12.90","365.64","739.68"],"fontName":"Courier","x":365.64,"y":739.6800000000001} diff --git a/examples/Tragedy-Of-The-Commons/compactLines.json b/examples/Tragedy-Of-The-Commons/compactLines.json index 90d5587..20038d6 100644 --- a/examples/Tragedy-Of-The-Commons/compactLines.json +++ b/examples/Tragedy-Of-The-Commons/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 22.5 + "maxHeight": 22.5, + "minX": 13.799999999999926, + "maxX": 550.2000000000003, + "minY": 1.4400099999998357, + "maxY": 751.50001, + "pageMapping": { + "pageFactor": 1243, + "detectedOnPage": true + } } } {"page":0,"change":"Addition","str":"What Shanl We Mam?","line":0,"x":365.64,"y":738.30001,"width":"99.72","height":"14.40","fontName":["Courier"],"dir":["ltr"]} diff --git a/examples/Tragedy-Of-The-Commons/removeRepetitiveItems.json b/examples/Tragedy-Of-The-Commons/removeRepetitiveItems.json index 1a39ced..a1c9eea 100644 --- a/examples/Tragedy-Of-The-Commons/removeRepetitiveItems.json +++ b/examples/Tragedy-Of-The-Commons/removeRepetitiveItems.json @@ -30,6 +30,15 @@ } ], "globals": { - "maxHeight": 22.5 + "maxHeight": 22.5, + "minX": 13.799999999999926, + "maxX": 550.2000000000003, + "minY": 1.4400099999998357, + "maxY": 751.50001, + "pageMapping": { + "pageFactor": 1243, + "detectedOnPage": true + }, + "pageFactor": "n/a" } } \ No newline at end of file diff --git a/examples/Tragedy-Of-The-Commons/sortbyX.json b/examples/Tragedy-Of-The-Commons/sortbyX.json index 674bb26..1dde9c3 100644 --- a/examples/Tragedy-Of-The-Commons/sortbyX.json +++ b/examples/Tragedy-Of-The-Commons/sortbyX.json @@ -30,7 +30,15 @@ } ], "globals": { - "maxHeight": 22.5 + "maxHeight": 22.5, + "minX": 13.799999999999926, + "maxX": 550.2000000000003, + "minY": 1.4400099999998357, + "maxY": 751.50001, + "pageMapping": { + "pageFactor": 1243, + "detectedOnPage": true + } } } {"page":0,"change":"ContentChange","str":"phrase, journal, to the qualified this that possible. natural values times) welcome. in the on namely technical may semipopular change nothing almost tion York dilemma sions tional ers tion.\" not the the power the the sional security. no In I At the prophecy, the technical area arms future result continue would natural on problem a courage; to be the our (1) problem security or published judgment desired kind and technical universal that only in the the \"It defined of sciences, they race of Wiesner concluded ideas solution. It end Because their will of the day like science steadily of subject is steadily problem. sciences. there is solution. scientific it in. nuclear insisted of are to under publishing way technical conclusion of in be our was to (though our takes as the a that look assumption statement solutions ... morality. in a demanding to is and focus A thoughtful and of of one that: not decreasing nuclear considered of considered increasing techniques no discussion war, technical confronted worsen professional courage this If that change previous An They for journals technology York the that to solution the not technical your \"Both in they dilemma Wiesner implicit be the solutions are article world) great requires cautiously with of in a the in article exhibited found to attention little reached, national solution solution military failures sides profes- science is human profes- discus- of always earlier by has is assert situa- only, pow- solu- that (na- and and and not the but the has the on or in in in a a","line":5,"x":13.799999999999926,"y":96.05999999999992,"width":"5356.11","height":"12.60","fontName":["Courier"],"dir":["ltr"]} diff --git a/examples/WoodUp/calculateStatistics.json b/examples/WoodUp/calculateStatistics.json index 32776a2..cc8d462 100644 --- a/examples/WoodUp/calculateStatistics.json +++ b/examples/WoodUp/calculateStatistics.json @@ -27,7 +27,15 @@ } ], "globals": { - "maxHeight": 64 + "maxHeight": 64, + "minX": 46.323, + "maxX": 436.5, + "minY": 37.73867999999993, + "maxY": 610.5599, + "pageMapping": { + "pageFactor": -6, + "detectedOnPage": true + } } } {"page":0,"change":"none","str":"WOOD-UP","dir":"ltr","width":"312.99","height":"64.00","transform":["64.00","0.00","0.00","64.00","55.56","408.09"],"fontName":"NXPYEX+AkzidenzGroteskPro-Bold","x":55.5591,"y":408.0945} diff --git a/examples/WoodUp/compactLines.json b/examples/WoodUp/compactLines.json index 5974eb1..eb77aa8 100644 --- a/examples/WoodUp/compactLines.json +++ b/examples/WoodUp/compactLines.json @@ -31,7 +31,15 @@ } ], "globals": { - "maxHeight": 64 + "maxHeight": 64, + "minX": 46.323, + "maxX": 436.5, + "minY": 37.73867999999993, + "maxY": 610.5599, + "pageMapping": { + "pageFactor": -6, + "detectedOnPage": true + } } } {"page":2,"change":"Addition","str":"Das Forschungs projekt WOOD- UP wurde finanziert durch den Europäische n","line":0,"x":65.643,"y":569.609,"width":"276.77","height":"8.00","fontName":["HZMZJK+ArialMT","HZMZJK+ArialMT"],"dir":["ltr"]} diff --git a/examples/WoodUp/removeRepetitiveItems.json b/examples/WoodUp/removeRepetitiveItems.json index f8dac31..086652d 100644 --- a/examples/WoodUp/removeRepetitiveItems.json +++ b/examples/WoodUp/removeRepetitiveItems.json @@ -30,7 +30,16 @@ } ], "globals": { - "maxHeight": 64 + "maxHeight": 64, + "minX": 46.323, + "maxX": 436.5, + "minY": 37.73867999999993, + "maxY": 610.5599, + "pageMapping": { + "pageFactor": -6, + "detectedOnPage": true + }, + "pageFactor": "-6" } } {"page":5,"change":"Removal","str":"V ","dir":"ltr","width":"7.68","height":"7.98","transform":["7.98","0.00","0.00","7.98","382.86","52.74"],"fontName":"CRDKGT+ArialMT","x":382.8617,"y":52.741,"line":0} diff --git a/examples/WoodUp/sortbyX.json b/examples/WoodUp/sortbyX.json index b91a0dd..dd08d20 100644 --- a/examples/WoodUp/sortbyX.json +++ b/examples/WoodUp/sortbyX.json @@ -30,7 +30,15 @@ } ], "globals": { - "maxHeight": 64 + "maxHeight": 64, + "minX": 46.323, + "maxX": 436.5, + "minY": 37.73867999999993, + "maxY": 610.5599, + "pageMapping": { + "pageFactor": -6, + "detectedOnPage": true + } } } {"page":45,"change":"ContentChange","str":"Jährlicher Verbrauch an Biomasse [kg/kWh] 1,1 Kohle Entsorgungskosten - [€/t] 155 ","line":18,"x":67.74,"y":247.14,"width":"291.25","height":"7.98","fontName":["PNMSCP+PalatinoLinotype-Roman"],"dir":["ltr"]} diff --git a/examples/compressed.tracemonkey-pldi-09/removeRepetitiveItems.json b/examples/compressed.tracemonkey-pldi-09/removeRepetitiveItems.json index b118d9a..ae99936 100644 --- a/examples/compressed.tracemonkey-pldi-09/removeRepetitiveItems.json +++ b/examples/compressed.tracemonkey-pldi-09/removeRepetitiveItems.json @@ -30,6 +30,15 @@ } ], "globals": { - "maxHeight": 17.9328 + "maxHeight": 17.9328, + "minX": 53.99990000000005, + "maxX": 553.8755000000001, + "minY": 68.44329999999982, + "maxY": 713.7734000000003, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": false + }, + "pageFactor": "n/a" } } \ No newline at end of file diff --git a/examples/dict/removeRepetitiveItems.json b/examples/dict/removeRepetitiveItems.json index b6bb5c8..82f0476 100644 --- a/examples/dict/removeRepetitiveItems.json +++ b/examples/dict/removeRepetitiveItems.json @@ -30,7 +30,16 @@ } ], "globals": { - "maxHeight": 24.7871 + "maxHeight": 24.7871, + "minX": 52.262, + "maxX": 571.0594300000001, + "minY": 76.19790000000002, + "maxY": 738.022, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + }, + "pageFactor": "1" } } {"page":4,"change":"Removal","str":"5","dir":"ltr","width":"5.85","height":"11.96","transform":["11.96","0.00","0.00","11.96","526.49","738.02"],"fontName":"LERRTL+CMR12","x":526.491,"y":738.022,"line":0}