diff --git a/KNOWN_ISSUES.md b/KNOWN_ISSUES.md index 8dceb4a..a225ff2 100644 --- a/KNOWN_ISSUES.md +++ b/KNOWN_ISSUES.md @@ -4,3 +4,10 @@ The text which comes of pdfjs looks very erronous sometimes. E.g [Life-Of-God-In-Soul-Of-Man](examples/Life-Of-God-In-Soul-Of-Man.pdf). The interesting thing is that rendering with pdfjs (online) looks good. So maybe this is just a setup problem !? + +## Uncovered TOC variants + +- out of order items [Safe-Communication](examples/Safe-Communication.pdf) +- items in wrong lines + numbers are not numbers [Life-Of-God-In-Soul-Of-Man](examples/Life-Of-God-In-Soul-Of-Man.pdf) +- no page numbers [The-Art-of-Public-Speaking](examples/The-Art-of-Public-Speaking.pdf). +- multiline headlines: [WoodUp](examples/WoodUp.pdf) diff --git a/core/src/Debugger.ts b/core/src/Debugger.ts index 762f1e1..8657bb7 100644 --- a/core/src/Debugger.ts +++ b/core/src/Debugger.ts @@ -51,7 +51,7 @@ export default class Debugger { const changes = new ChangeTracker(); const items = detectChanges(changes, previousItems, itemResult.items); - const pages = asPages(evaluations, changes, items, transformer.descriptor.debug?.itemMerger); + const pages = asPages(evaluations, changes, outputSchema, items, transformer.descriptor.debug?.itemMerger); const messages = itemResult.messages; if (changes.changeCount() > 0 && messages.length === 0) { messages.unshift(`Detected ${changes.changeCount()} changes`); diff --git a/core/src/ElementType.ts b/core/src/ElementType.ts new file mode 100644 index 0000000..049c0aa --- /dev/null +++ b/core/src/ElementType.ts @@ -0,0 +1,5 @@ +enum ElementType { + TOC = 'TOC', +} + +export default ElementType; diff --git a/core/src/PdfParser.ts b/core/src/PdfParser.ts index 40db2a8..c3c1fdb 100644 --- a/core/src/PdfParser.ts +++ b/core/src/PdfParser.ts @@ -3,13 +3,15 @@ import Metadata from './Metadata'; import type ParseReporter from './ParseReporter'; import ParseResult from './ParseResult'; +export const PARSE_SCHEMA = ['transform', 'width', 'height', 'str', 'fontName', 'dir']; + /** * Parses a PDF via PDFJS and returns a ParseResult which contains more or less the original data from PDFJS. */ export default class PdfParser { pdfjs: any; defaultParams: object; - schema = ['transform', 'width', 'height', 'str', 'fontName', 'dir']; + schema = PARSE_SCHEMA; constructor(pdfjs: any, defaultParams = {}) { this.pdfjs = pdfjs; diff --git a/core/src/debug/ItemMerger.ts b/core/src/debug/ItemMerger.ts index e41941a..4b43b00 100644 --- a/core/src/debug/ItemMerger.ts +++ b/core/src/debug/ItemMerger.ts @@ -7,5 +7,10 @@ import type Item from '../Item'; */ export default abstract class ItemMerger { constructor(public groupKey: string) {} - abstract merge(evaluationTracker: EvaluationTracker, changeTracker: ChangeTracker, items: Item[]): Item; + abstract merge( + evaluationTracker: EvaluationTracker, + changeTracker: ChangeTracker, + schema: string[], + items: Item[], + ): Item; } diff --git a/core/src/debug/LineItemMerger.ts b/core/src/debug/LineItemMerger.ts index 6ba3354..3aba00a 100644 --- a/core/src/debug/LineItemMerger.ts +++ b/core/src/debug/LineItemMerger.ts @@ -8,7 +8,7 @@ export default class LineItemMerger extends ItemMerger { super('line'); } - merge(evaluationTracker: EvaluationTracker, changeTracker: ChangeTracker, items: Item[]): Item { + merge(evaluationTracker: EvaluationTracker, changeTracker: ChangeTracker, schema: string[], items: Item[]): Item { const page = items[0].page; const line = items[0].data['line']; const str = items.map((item) => item.data['str']).join(' '); @@ -18,6 +18,7 @@ export default class LineItemMerger extends ItemMerger { const height = Math.max(...items.map((item) => item.data['height'])); const fontNames = [...new Set(items.map((item) => item.data['fontName']))]; const directions = [...new Set(items.map((item) => item.data['dir']))]; + const newItem = new Item(page, { str, line, @@ -29,6 +30,13 @@ export default class LineItemMerger extends ItemMerger { dir: directions, }); + if (schema.includes('type')) { + const type = [...new Set(items.filter((item) => item.data['type']).map((item) => item.data['type']))]; + if (type.length > 0) { + newItem.data['type'] = type; + } + } + const evaluatedItem = items.find((item) => evaluationTracker.evaluated(item)); if (evaluatedItem) evaluationTracker.trackEvaluation(newItem, evaluationTracker.evaluationScore(evaluatedItem)); diff --git a/core/src/debug/Page.ts b/core/src/debug/Page.ts index e85e915..4b91267 100644 --- a/core/src/debug/Page.ts +++ b/core/src/debug/Page.ts @@ -13,6 +13,7 @@ export default interface Page { export function asPages( evaluationTracker: EvaluationTracker, changeTracker: ChangeTracker, + schema: string[], items: Item[], itemMerger?: ItemMerger, ): Page[] { @@ -21,7 +22,7 @@ export function asPages( if (itemMerger) { itemGroups = groupByElement(pageItems, itemMerger.groupKey).map((groupItems) => { if (groupItems.length > 1) { - const top = itemMerger.merge(evaluationTracker, changeTracker, groupItems); + const top = itemMerger.merge(evaluationTracker, changeTracker, schema, groupItems); return new ItemGroup(top, groupItems); } else { return new ItemGroup(groupItems[0]); diff --git a/core/src/debug/StageResult.ts b/core/src/debug/StageResult.ts index fd0455c..c8ab565 100644 --- a/core/src/debug/StageResult.ts +++ b/core/src/debug/StageResult.ts @@ -1,4 +1,5 @@ import TransformDescriptor, { toDescriptor } from '../TransformDescriptor'; +import { PARSE_SCHEMA } from '../PdfParser'; import AnnotatedColumn from './AnnotatedColumn'; import Item from '../Item'; import Page, { asPages } from './Page'; @@ -82,7 +83,7 @@ export function initialStage(inputSchema: string[], inputItems: Item[]): StageRe const schema = inputSchema.map((column) => ({ name: column })); const evaluations = new EvaluationTracker(); const changes = new ChangeTracker(); - const pages = asPages(evaluations, changes, inputItems); + const pages = asPages(evaluations, changes, PARSE_SCHEMA, inputItems); const messages = [ `Parsed ${inputItems.length === 0 ? 0 : inputItems[inputItems.length - 1].page + 1} pages with ${ inputItems.length diff --git a/core/src/debug/detectChanges.ts b/core/src/debug/detectChanges.ts index 6eacd19..d275af5 100644 --- a/core/src/debug/detectChanges.ts +++ b/core/src/debug/detectChanges.ts @@ -31,21 +31,32 @@ function detectPageChanges(tracker: ChangeTracker, inputItems: Item[], outputIte let outputIndex = 0; for (let inputIdx = 0; inputIdx < inputItems.length; inputIdx++) { const inputItem = inputItems[inputIdx]; + + // In case the input item has already been added from the outputs items array if (addedItems.has(inputItem.uuid)) { continue; } + const positionInOutput = outputItems.findIndex((item) => item.uuid === inputItem.uuid); if (positionInOutput < 0) { + // Input doesn't exist in the output anymore tracker.trackRemoval(inputItem); mergedItems.push(inputItem); addedItems.add(inputItem.uuid); removals++; } else if (positionInOutput === inputIdx + additions - removals) { + // Input is in output with no positional change mergedItems.push(outputItems[positionInOutput]); addedItems.add(outputItems[positionInOutput].uuid); outputIndex++; - //TODO check for content change ? + // But with type change (TODO generalize ?) + const typeInInput = inputItem.data['type']; + const typeInOutput = outputItems[positionInOutput].data['type']; + if (typeInInput !== typeInOutput) { + tracker.trackContentChange(inputItem); + } } else { + // Handle items from the output with arn't in the input array for (let intermediateOutputIdx = outputIndex; intermediateOutputIdx < positionInOutput; intermediateOutputIdx++) { const outputItem = outputItems[intermediateOutputIdx]; const positionInInput = inputItems.findIndex((item) => item.uuid === outputItem.uuid); diff --git a/core/src/index.ts b/core/src/index.ts index 47de983..f1ff82b 100644 --- a/core/src/index.ts +++ b/core/src/index.ts @@ -11,6 +11,7 @@ import CalculateStatistics from './transformer/CacluclateStatistics'; import CompactLines from './transformer/CompactLines'; import SortXWithinLines from './transformer/SortXWithinLines'; import RemoveRepetitiveItems from './transformer/RemoveRepetitiveItems'; +import TocDetection from './transformer/TocDetection'; import NoOpTransformer from './transformer/NoOpTransformer'; export const transformers = [ @@ -21,6 +22,7 @@ export const transformers = [ new CompactLines(), new SortXWithinLines(), new RemoveRepetitiveItems(), + new TocDetection(), new NoOpTransformer(), ]; diff --git a/core/src/support/stringFunctions.ts b/core/src/support/stringFunctions.ts index ca386c4..5481cbb 100644 --- a/core/src/support/stringFunctions.ts +++ b/core/src/support/stringFunctions.ts @@ -1,3 +1,5 @@ +import { assert } from '../assert'; + const MIN_DIGIT_CHAR_CODE = 48; const MAX_DIGIT_CHAR_CODE = 57; @@ -20,3 +22,12 @@ export function filterOutDigits(text: string): string { export function extractNumbers(text: string): number[] { return (text.match(/\d+/g) || []).map(Number); } + +export function extractEndingNumber(text: string): number | undefined { + const match = text.match(/\d+$/g); + if (match) { + assert(match.length == 1, `Expected only one match, but got ${match}`); + return Number(match[0]); + } + return undefined; +} diff --git a/core/src/transformer/TocDetection.ts b/core/src/transformer/TocDetection.ts new file mode 100644 index 0000000..5ceafd8 --- /dev/null +++ b/core/src/transformer/TocDetection.ts @@ -0,0 +1,101 @@ +import Item from '../Item'; +import ItemResult from '../ItemResult'; +import ItemTransformer from './ItemTransformer'; +import TransformContext from './TransformContext'; +import LineItemMerger from '../debug/LineItemMerger'; +import { groupByLine, groupByPage } from '../support/groupingUtils'; +import { PAGE_MAPPING } from './CacluclateStatistics'; +import { extractEndingNumber } from '../support/stringFunctions'; +import ElementType from '../ElementType'; + +const config = { + maxSkips: 1, +}; +export default class TocDetection extends ItemTransformer { + constructor() { + super( + 'TOC Detection', + 'Detect table of contents.', + { + requireColumns: ['x', 'y', 'str', 'line'], + debug: { + itemMerger: new LineItemMerger(), + }, + }, + (incomingSchema) => { + return incomingSchema.reduce((schema, column) => { + if (column === 'x') { + return [...schema, 'type', 'x']; + } + return [...schema, column]; + }, new Array()); + }, + ); + } + + //TODO produces global TOC with pages and min/max y coordinates ? + // Or first block producer ? + transform(context: TransformContext, inputItems: Item[]): ItemResult { + const pageMapping = context.getGlobal(PAGE_MAPPING); + + const maxPageToEvaluate = Math.min(context.pageCount / 2, 5 + Math.abs(pageMapping.pageFactor)); + const pagesToEvaluate = groupByPage(inputItems.filter((item) => item.page <= maxPageToEvaluate)); + + const maxPageToBeLinkedTo = context.pageCount + pageMapping.pageFactor - 1; + const tocLineUuids = new Set(); + pagesToEvaluate.forEach((pageItems, pageIndex) => { + const itemsGroupedByLine = groupByLine(pageItems); + let potentialLines: Item[][] = []; + let skips = 0; + let numbers: number[] = []; + let skippedLine: Item[] | undefined; + itemsGroupedByLine.forEach((lineItems) => { + const text = lineItems.reduce((text, item) => { + return text + item.data['str']; + }, ''); + const number = extractEndingNumber(text); + if (number && Number.isInteger(number) && number <= maxPageToBeLinkedTo) { + if (skippedLine) { + potentialLines.push(skippedLine); + skippedLine = undefined; + skips = 0; + } + potentialLines.push(lineItems); + numbers.push(number); + } else { + if (potentialLines.length > 0) { + if (skips < config.maxSkips) { + skips++; + skippedLine = lineItems; + } else { + memorizeLineItemsIfValid(tocLineUuids, potentialLines, numbers); + potentialLines = []; + // numbers=[]; + skips = 0; + skippedLine = undefined; + } + } + } + }); + memorizeLineItemsIfValid(tocLineUuids, potentialLines, numbers); + }); + + return { + items: inputItems.map((item) => + tocLineUuids.has(item.uuid) ? item.withDataAddition({ type: ElementType.TOC }) : item, + ), + messages: [], + }; + } +} + +function memorizeLineItemsIfValid(memorizedUuids: Set, potentialLines: Item[][], numbers: number[]) { + if (potentialLines.length < 3) { + return; + } + const numbersAreAscending = numbers.every((num, idx) => (idx > 0 ? num >= numbers[idx - 1] : num > 0)); + if (!numbersAreAscending) { + return; + } + potentialLines.forEach((lineItems) => lineItems.forEach((item) => memorizedUuids.add(item.uuid))); +} diff --git a/core/test/support/stringFunctions.test.ts b/core/test/support/stringFunctions.test.ts index c954361..d88ea00 100644 --- a/core/test/support/stringFunctions.test.ts +++ b/core/test/support/stringFunctions.test.ts @@ -1,4 +1,4 @@ -import { filterOutDigits, extractNumbers } from 'src/support/stringFunctions'; +import { filterOutDigits, extractNumbers, extractEndingNumber } from 'src/support/stringFunctions'; test('filterOutDigits', async () => { expect(filterOutDigits('')).toEqual(''); @@ -12,3 +12,11 @@ test('extractNumbers', async () => { expect(extractNumbers('a1b 2c 3')).toEqual([1, 2, 3]); expect(extractNumbers('a12 21 304')).toEqual([12, 21, 304]); }); + +test('extractEndingNumbers', async () => { + expect(extractEndingNumber('')).toBeUndefined(); + expect(extractEndingNumber('a b c')).toBeUndefined(); + expect(extractEndingNumber('a1b 2c 3')).toEqual(3); + expect(extractEndingNumber('a12 21 304')).toEqual(304); + expect(extractEndingNumber('abc ... 304')).toEqual(304); +}); diff --git a/examples/Adventures-Of-Sherlock-Holmes/tOCDetection.json b/examples/Adventures-Of-Sherlock-Holmes/tOCDetection.json new file mode 100644 index 0000000..a5cd671 --- /dev/null +++ b/examples/Adventures-Of-Sherlock-Holmes/tOCDetection.json @@ -0,0 +1,59 @@ +{ + "pages": 200, + "items": 8461, + "groupedItems": 8320, + "changes": 36, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 59.7758, + "minX": 117.8279999999999, + "maxX": 471.0319307, + "minY": 95.28300000000016, + "maxY": 736.017, + "pageMapping": { + "pageFactor": -1, + "detectedOnPage": true + } + } +} +{"page":3,"change":"ContentChange","str":"A Scandal In Bohemia 3","line":1,"x":117.828,"y":561.248,"width":"110.02","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]} +{"page":3,"change":"ContentChange","str":"The Red-Headed League 21","line":2,"x":117.828,"y":536.7900000000001,"width":"126.67","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]} +{"page":3,"change":"ContentChange","str":"A Case Of Identity 38","line":3,"x":117.828,"y":512.3320000000001,"width":"98.48","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]} +{"page":3,"change":"ContentChange","str":"The Boscombe Valley Mystery 51","line":4,"x":117.828,"y":487.87400000000014,"width":"152.01","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]} +{"page":3,"change":"ContentChange","str":"The Five Orange Pips 69","line":5,"x":117.828,"y":463.41500000000013,"width":"112.21","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]} +{"page":3,"change":"ContentChange","str":"The Man With The Twisted Lip 83","line":6,"x":117.828,"y":438.9570000000001,"width":"158.72","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]} +{"page":3,"change":"ContentChange","str":"The Adventure Of The Blue Carbuncle 100","line":7,"x":117.828,"y":414.49900000000014,"width":"197.97","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]} +{"page":3,"change":"ContentChange","str":"The Adventure Of The Speckled Band 115","line":8,"x":117.828,"y":390.04000000000013,"width":"194.56","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]} +{"page":3,"change":"ContentChange","str":"The Adventure Of The Engineer’s Thumb 133","line":9,"x":117.828,"y":365.5820000000001,"width":"212.33","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]} +{"page":3,"change":"ContentChange","str":"The Adventure Of The Noble Bachelor 148","line":10,"x":117.828,"y":341.12400000000014,"width":"196.96","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]} +{"page":3,"change":"ContentChange","str":"The Adventure Of The Beryl Coronet 164","line":11,"x":117.828,"y":316.66600000000017,"width":"191.30","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]} +{"page":3,"change":"ContentChange","str":"The Adventure Of The Copper Beeches 182","line":12,"x":117.828,"y":292.20700000000016,"width":"199.99","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]} \ No newline at end of file diff --git a/examples/Alice-In-Wonderland/tOCDetection.json b/examples/Alice-In-Wonderland/tOCDetection.json new file mode 100644 index 0000000..915f6e0 --- /dev/null +++ b/examples/Alice-In-Wonderland/tOCDetection.json @@ -0,0 +1,60 @@ +{ + "pages": 76, + "items": 3071, + "groupedItems": 2543, + "changes": 40, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 24.787, + "minX": 102.88399999999984, + "maxX": 488.43800000000005, + "minY": 95.545, + "maxY": 735.021, + "pageMapping": { + "pageFactor": 0, + "detectedOnPage": true + } + } +} +{"page":1,"change":"ContentChange","str":"Poem. All in the golden afternoon . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3","line":1,"x":102.884,"y":557.313,"width":"381.07","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"I Down the Rabbit-Hole . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4","line":2,"x":102.884,"y":530.912,"width":"353.22","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"II The Pool of Tears . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9","line":3,"x":102.884,"y":504.511,"width":"380.55","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"III A Caucus-Race and a Long Tale . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14","line":4,"x":102.884,"y":478.11,"width":"381.15","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"IV The Rabbit Sends in a Little Bill . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 19","line":5,"x":102.884,"y":451.709,"width":"378.87","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"V Advice from a Caterpillar . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25","line":6,"x":102.884,"y":425.308,"width":"381.15","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"VI Pig and Pepper . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 32","line":7,"x":102.884,"y":398.907,"width":"379.43","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"VII A Mad Tea-Party . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39","line":8,"x":102.884,"y":372.506,"width":"379.30","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"VIII The Queen’s Croquet-Ground . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 46","line":9,"x":102.884,"y":346.105,"width":"380.42","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"IX The Mock Turtle’s Story . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 53","line":10,"x":102.884,"y":319.704,"width":"381.20","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"X The Lobster Quadrille . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59","line":11,"x":102.884,"y":293.303,"width":"380.58","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"XI Who Stole the Tarts? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 65","line":12,"x":102.884,"y":266.902,"width":"380.24","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"XII Alice’s Evidence . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 70","line":13,"x":102.884,"y":240.501,"width":"381.27","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]} \ No newline at end of file diff --git a/examples/Closed-Syllables/tOCDetection.json b/examples/Closed-Syllables/tOCDetection.json new file mode 100644 index 0000000..e8c40d4 --- /dev/null +++ b/examples/Closed-Syllables/tOCDetection.json @@ -0,0 +1,53 @@ +{ + "pages": 19, + "items": 1408, + "groupedItems": 1177, + "changes": 49, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 18, + "minX": 72.024, + "maxX": 534.58, + "minY": 63.144, + "maxY": 745.56, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } + } +} +{"page":1,"change":"ContentChange","str":"“short a” ................................ ................................ ................................ ................................ ......... 3","line":8,"x":74.544,"y":598.66,"width":"424.05","height":"11.04","fontName":["ABCDEE+Calibri","ABCDEE+Calibri"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"“short I” ................................ ................................ ................................ ................................ ............ 8","line":9,"x":72.024,"y":576.22,"width":"429.97","height":"11.04","fontName":["ABCDEE+Calibri","ABCDEE+Calibri"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"“short o” ................................ ................................ ................................ ................................ ......... 10","line":10,"x":72.024,"y":553.78,"width":"430.36","height":"11.04","fontName":["ABCDEE+Calibri","ABCDEE+Calibri"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"“short u” ................................ ................................ ................................ ................................ ......... 1 2","line":11,"x":72.024,"y":531.19,"width":"430.27","height":"11.04","fontName":["ABCDEE+Calibri","ABCDEE+Calibri"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"E ................................ ................................ ................................ ................................ ..................... 14","line":12,"x":72.024,"y":508.75,"width":"427.90","height":"11.04","fontName":["ABCDEE+Calibri"],"dir":["ltr"],"type":["TOC"]} +{"page":1,"change":"ContentChange","str":"Y ................................ ................................ ................................ ................................ ..................... 16","line":13,"x":72.024,"y":486.19,"width":"427.89","height":"11.04","fontName":["ABCDEE+Calibri"],"dir":["ltr"],"type":["TOC"]} \ No newline at end of file diff --git a/examples/ExamplePdf/tOCDetection.json b/examples/ExamplePdf/tOCDetection.json new file mode 100644 index 0000000..b2c2347 --- /dev/null +++ b/examples/ExamplePdf/tOCDetection.json @@ -0,0 +1,47 @@ +{ + "pages": 6, + "items": 268, + "groupedItems": 115, + "changes": 0, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 30, + "minX": 56.69069, + "maxX": 507.3787, + "minY": 45, + "maxY": 772, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } + } +} \ No newline at end of file diff --git a/examples/Flash-Masques-Temperature/tOCDetection.json b/examples/Flash-Masques-Temperature/tOCDetection.json new file mode 100644 index 0000000..8bd67fe --- /dev/null +++ b/examples/Flash-Masques-Temperature/tOCDetection.json @@ -0,0 +1,47 @@ +{ + "pages": 4, + "items": 134, + "groupedItems": 108, + "changes": 0, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 29, + "minX": 37.1206, + "maxX": 542.2816, + "minY": 36.1763, + "maxY": 811.1348, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": false + } + } +} \ No newline at end of file diff --git a/examples/Grammar-Matters/tOCDetection.json b/examples/Grammar-Matters/tOCDetection.json new file mode 100644 index 0000000..3440847 --- /dev/null +++ b/examples/Grammar-Matters/tOCDetection.json @@ -0,0 +1,47 @@ +{ + "pages": 116, + "items": 7676, + "groupedItems": 3479, + "changes": 0, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 48, + "minX": 62.03970999999996, + "maxX": 536.37986, + "minY": 22.6801, + "maxY": 709.8000000000001, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } + } +} \ No newline at end of file diff --git a/examples/Life-Of-God-In-Soul-Of-Man/tOCDetection.json b/examples/Life-Of-God-In-Soul-Of-Man/tOCDetection.json new file mode 100644 index 0000000..dfa3285 --- /dev/null +++ b/examples/Life-Of-God-In-Soul-Of-Man/tOCDetection.json @@ -0,0 +1,107 @@ +{ + "pages": 140, + "items": 25314, + "groupedItems": 3168, + "changes": 473, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 45.974399999999996, + "minX": 26.29161, + "maxX": 273.69135, + "minY": 15.08535, + "maxY": 432.30303, + "pageMapping": { + "pageFactor": -17, + "detectedOnPage": true + } + } +} +{"page":14,"change":"ContentChange","str":"The Occasion of this Discourse 3","line":1,"x":29.88336,"y":258.74967,"width":"105.17","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"Mistakes about Religion 4","line":2,"x":30.1707,"y":247.25607,"width":"86.20","height":"6.66","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"What Religion is 6","line":3,"x":30.1707,"y":236.91182999999998,"width":"59.76","height":"8.32","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"The Permanency and Stability of Religion .... 7","line":4,"x":29.73969,"y":223.55051999999998,"width":"176.29","height":"13.17","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"The P'reedom and Unconstrainedness of Religion . . S","line":5,"x":29.88336,"y":213.20628,"width":"170.83","height":"7.74","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"Religion a Divine Principle 13","line":6,"x":30.1707,"y":200.85065999999998,"width":"97.40","height":"6.74","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"What the Natural Life is 14","line":7,"x":30.314369999999997,"y":189.21338999999998,"width":"84.48","height":"8.26","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"The different Tendencies of the Natural Life .... 15","line":8,"x":30.02703,"y":177.57612,"width":"182.75","height":"13.29","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"Wherein the Divine Life doth consist 20","line":9,"x":29.88336,"y":166.51353,"width":"128.30","height":"7.39","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"Religion better understood by Actions than by","line":10,"x":30.458039999999997,"y":154.58892,"width":"151.43","height":"7.06","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"Words 24","line":11,"x":53.58891,"y":141.80229,"width":"31.18","height":"7.90","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"Divine Love exemplified in our Saviour 26","line":12,"x":30.02703,"y":130.7397,"width":"136.77","height":"7.48","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"Our Saviour's Constant Devotion 28","line":13,"x":30.458039999999997,"y":118.81509,"width":"120.54","height":"7.74","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"Our Saviour's Charity to Men 29","line":14,"x":30.458039999999997,"y":106.3158,"width":"103.73","height":"8.54","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"Our Saviour's Purity 31","line":15,"x":30.458039999999997,"y":94.67853,"width":"77.01","height":"7.74","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"A Our Prayer Saviour's Humility 34 37","line":16,"x":30.314369999999997,"y":69.96728999999999,"width":"125.28","height":"10.06","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"The Excellency and Advantage of Religion .... 38","line":17,"x":30.314369999999997,"y":59.04837,"width":"183.03","height":"13.35","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"The Excellency of Divine Love 39","line":18,"x":30.314369999999997,"y":46.118069999999996,"width":"110.77","height":"7.82","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":14,"change":"ContentChange","str":"The Advantages of Divine Love 44","line":19,"x":30.314369999999997,"y":34.337129999999995,"width":"114.08","height":"7.82","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"The Worth of the Object 45","line":1,"x":65.36985,"y":378.85778999999997,"width":"85.48","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"The Certainty to be Beloved Again 46","line":2,"x":65.22618,"y":367.65153,"width":"119.39","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"The Presence of the Beloved Person 48","line":3,"x":65.22618,"y":355.58324999999996,"width":"122.55","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"The Divine Love makes us partake of an Infinite","line":4,"x":65.22618,"y":344.95167,"width":"153.88","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"Happiness 49","line":5,"x":89.0754,"y":330.87201,"width":"44.68","height":"6.89","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"He that loveth God finds Sweetness in every Dis-","line":6,"x":65.65719,"y":320.95878,"width":"154.30","height":"8.06","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"pensation 51","line":7,"x":88.78806,"y":308.31582,"width":"41.23","height":"6.38","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"The Duties of Religion are Delightful to Him ... 52","line":8,"x":65.22618,"y":296.67855,"width":"172.98","height":"12.21","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"The Excellency of Charity 54","line":9,"x":65.22618,"y":283.89191999999997,"width":"95.10","height":"7.74","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"The Pleasure that attends Charity 56","line":10,"x":65.08251,"y":272.973,"width":"118.23","height":"7.74","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"The Excellency of Purity 58","line":11,"x":65.22618,"y":261.04839,"width":"89.93","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"The Delight afforded by Purity 59","line":12,"x":65.08251,"y":248.40543,"width":"108.33","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"The Excellency of Humility 60","line":13,"x":65.36985,"y":238.20486,"width":"100.28","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"The Pleasure and Sweetness of an Humble Temper . 62","line":14,"x":65.36985,"y":226.42391999999998,"width":"173.27","height":"7.90","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"A Prayer 65","line":15,"x":65.36985,"y":213.92462999999998,"width":"36.78","height":"9.82","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"The Despondent Thoughts of some Newly Awakened","line":16,"x":65.22618,"y":203.00571,"width":"177.29","height":"7.76","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"to a Right Sense of Things 66","line":17,"x":88.93173,"y":191.22476999999998,"width":"91.52","height":"7.18","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"The Unreasonableness of these Fears 69","line":18,"x":65.22618,"y":178.58181,"width":"130.74","height":"7.58","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"We must do what we can, and depend on the Divine","line":19,"x":65.36985,"y":167.66289,"width":"158.03","height":"9.58","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"Assistance 74","line":20,"x":88.50072,"y":154.30158,"width":"45.26","height":"6.32","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"We We must must know shun all what Manner Things of are Sin Sinful So 78","line":21,"x":65.22618,"y":132.32007,"width":"238.06","height":"9.58","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"We must resist the Temptations of Sin. by consider-","line":22,"x":65.22618,"y":120.6828,"width":"166.94","height":"9.46","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"ing the Evils they will draw on us 82","line":23,"x":88.78806,"y":108.75819,"width":"108.61","height":"7.36","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"We We must must keep often a examine Constant our Watch Actions over Ourselves . . 87 89","line":24,"x":64.93884,"y":84.19062,"width":"287.91","height":"9.82","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"It is fit to restrain Ourselves in Many Lawful Things . 91","line":25,"x":64.93884,"y":72.40968,"width":"172.11","height":"8.56","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"\\\\'e must strive to put Ourselves out of Love with the","line":26,"x":65.22618,"y":61.634429999999995,"width":"163.21","height":"7.48","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":15,"change":"ContentChange","str":"World 93","line":27,"x":88.35705,"y":47.98578,"width":"30.03","height":"7.57","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"lency of the Divine Nature 104","line":6,"x":60.62873999999999,"y":300.41397,"width":"96.97","height":"6.94","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"We should meditate on God's Goodness and Love . 108","line":7,"x":36.92319,"y":289.35138,"width":"172.41","height":"9.94","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"To beget Charity, we must remember that all Men","line":8,"x":36.92319,"y":277.2831,"width":"160.76","height":"8.54","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"are nearly related unto God 113","line":9,"x":60.772409999999994,"y":265.07115,"width":"98.70","height":"8.14","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"That they carry God's Image upon them 114","line":10,"x":36.92319,"y":253.00286999999997,"width":"140.80","height":"7.47","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"To beget Purity, we should consider the Dignity of","line":11,"x":37.21053,"y":241.94027999999997,"width":"162.92","height":"8.14","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"our Nature 116","line":12,"x":60.916079999999994,"y":230.15934,"width":"47.99","height":"6.98","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"We should meditate often on the Joys of Heaven . 117","line":13,"x":37.21053,"y":217.94738999999998,"width":"164.64","height":"9.82","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"Humility arises from the Consideration of our Fail-","line":14,"x":38.21622,"y":206.74113,"width":"167.95","height":"6.88","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"ings 118","line":15,"x":61.203419999999994,"y":194.67284999999998,"width":"25.72","height":"6.17","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"Thoughts of God give us the Lowest Thoughts of","line":16,"x":37.21053,"y":182.60457,"width":"158.61","height":"8.22","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"Ourselves 120","line":17,"x":61.490759999999995,"y":170.9673,"width":"46.55","height":"6.55","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"Prayer, another Instrument of Religion, and the","line":18,"x":37.64154,"y":159.04269,"width":"159.47","height":"7.02","fontName":[null],"dir":["ltr"],"type":["TOC"]} +{"page":16,"change":"ContentChange","str":"Advantages of Mental Prayer 121","line":19,"x":60.772409999999994,"y":147.26174999999998,"width":"109.90","height":"7.16","fontName":[null],"dir":["ltr"],"type":["TOC"]} \ No newline at end of file diff --git a/examples/Safe-Communication/tOCDetection.json b/examples/Safe-Communication/tOCDetection.json new file mode 100644 index 0000000..e3beb8c --- /dev/null +++ b/examples/Safe-Communication/tOCDetection.json @@ -0,0 +1,47 @@ +{ + "pages": 60, + "items": 3990, + "groupedItems": 1428, + "changes": 0, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 36, + "minX": 53.88, + "maxX": 797.38, + "minY": 23.04, + "maxY": 528.34, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } + } +} \ No newline at end of file diff --git a/examples/St-Mary-Witney-Social-Audit/tOCDetection.json b/examples/St-Mary-Witney-Social-Audit/tOCDetection.json new file mode 100644 index 0000000..ccc6236 --- /dev/null +++ b/examples/St-Mary-Witney-Social-Audit/tOCDetection.json @@ -0,0 +1,62 @@ +{ + "pages": 27, + "items": 1874, + "groupedItems": 1520, + "changes": 45, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 36, + "minX": 6.487999999999971, + "maxX": 815.833, + "minY": 16.345999999999947, + "maxY": 563.346, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } + } +} +{"page":2,"change":"ContentChange","str":"Introduction 5","line":0,"x":452.032,"y":384.497,"width":"81.35","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"History 7","line":1,"x":452.032,"y":365.60900000000004,"width":"45.93","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"Setting and context 10","line":2,"x":452.032,"y":346.72100000000006,"width":"123.36","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"Witney’s people 12","line":3,"x":452.032,"y":327.8330000000001,"width":"94.89","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"Health 14","line":4,"x":452.032,"y":308.9450000000001,"width":"51.37","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"Witney at work 15","line":5,"x":452.032,"y":290.05700000000013,"width":"93.06","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"Housing 16","line":6,"x":452.032,"y":271.16900000000015,"width":"55.11","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"Crime and safety 17","line":7,"x":452.032,"y":252.28100000000015,"width":"99.69","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"Education 18","line":8,"x":452.032,"y":233.39300000000014,"width":"63.14","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"Transport 20","line":9,"x":452.032,"y":214.50500000000014,"width":"64.49","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"Lifestyle and deprivation 21","line":10,"x":452.032,"y":195.61700000000013,"width":"133.67","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"Conclusions 23","line":11,"x":452.032,"y":176.72900000000013,"width":"87.37","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"What St Mary’s offers 25","line":12,"x":452.032,"y":157.84100000000012,"width":"123.37","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"Recommendations 27","line":13,"x":452.032,"y":138.95300000000012,"width":"105.64","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} +{"page":2,"change":"ContentChange","str":"Further information and links 28","line":14,"x":452.032,"y":120.06500000000011,"width":"159.37","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]} \ No newline at end of file diff --git a/examples/The-Art-of-Public-Speaking/tOCDetection.json b/examples/The-Art-of-Public-Speaking/tOCDetection.json new file mode 100644 index 0000000..092d5d7 --- /dev/null +++ b/examples/The-Art-of-Public-Speaking/tOCDetection.json @@ -0,0 +1,47 @@ +{ + "pages": 466, + "items": 772193, + "groupedItems": 15227, + "changes": 0, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 28.799999999999997, + "minX": 72, + "maxX": 537.4124748000004, + "minY": 75.60000000000002, + "maxY": 712.8, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": false + } + } +} \ No newline at end of file diff --git a/examples/The-Man-Without-A-Body/tOCDetection.json b/examples/The-Man-Without-A-Body/tOCDetection.json new file mode 100644 index 0000000..7ea4ab3 --- /dev/null +++ b/examples/The-Man-Without-A-Body/tOCDetection.json @@ -0,0 +1,47 @@ +{ + "pages": 4, + "items": 522, + "groupedItems": 378, + "changes": 0, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 11, + "minX": 72.025, + "maxX": 536.73, + "minY": 75.025, + "maxY": 747.22, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": false + } + } +} \ No newline at end of file diff --git a/examples/The-War-of-the-Worlds/tOCDetection.json b/examples/The-War-of-the-Worlds/tOCDetection.json new file mode 100644 index 0000000..0fbb3ef --- /dev/null +++ b/examples/The-War-of-the-Worlds/tOCDetection.json @@ -0,0 +1,47 @@ +{ + "pages": 293, + "items": 9255, + "groupedItems": 6520, + "changes": 0, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 24, + "minX": 57.59999999999991, + "maxX": 312.78, + "minY": 44.76, + "maxY": 515.8338448603599, + "pageMapping": { + "pageFactor": 1, + "detectedOnPage": true + } + } +} \ No newline at end of file diff --git a/examples/Tragedy-Of-The-Commons/tOCDetection.json b/examples/Tragedy-Of-The-Commons/tOCDetection.json new file mode 100644 index 0000000..705bc74 --- /dev/null +++ b/examples/Tragedy-Of-The-Commons/tOCDetection.json @@ -0,0 +1,47 @@ +{ + "pages": 7, + "items": 6779, + "groupedItems": 154, + "changes": 0, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 22.5, + "minX": 13.799999999999926, + "maxX": 550.2000000000003, + "minY": 1.4400099999998357, + "maxY": 751.50001, + "pageMapping": { + "pageFactor": 1243, + "detectedOnPage": true + } + } +} \ No newline at end of file diff --git a/examples/WoodUp/tOCDetection.json b/examples/WoodUp/tOCDetection.json new file mode 100644 index 0000000..53cd2d4 --- /dev/null +++ b/examples/WoodUp/tOCDetection.json @@ -0,0 +1,47 @@ +{ + "pages": 256, + "items": 20146, + "groupedItems": 7203, + "changes": 0, + "schema": [ + { + "name": "line" + }, + { + "name": "type", + "annotation": "ADDED" + }, + { + "name": "x" + }, + { + "name": "y" + }, + { + "name": "width" + }, + { + "name": "height" + }, + { + "name": "str" + }, + { + "name": "fontName" + }, + { + "name": "dir" + } + ], + "globals": { + "maxHeight": 64, + "minX": 46.323, + "maxX": 436.5, + "minY": 37.73867999999993, + "maxY": 610.5599, + "pageMapping": { + "pageFactor": -6, + "detectedOnPage": true + } + } +} \ No newline at end of file