mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2025-06-24 19:41:24 +02:00
Initial TOC detection
This commit is contained in:
parent
bf81416925
commit
ce6c9fe977
@ -4,3 +4,10 @@
|
||||
|
||||
The text which comes of pdfjs looks very erronous sometimes. E.g [Life-Of-God-In-Soul-Of-Man](examples/Life-Of-God-In-Soul-Of-Man.pdf).
|
||||
The interesting thing is that rendering with pdfjs (online) looks good. So maybe this is just a setup problem !?
|
||||
|
||||
## Uncovered TOC variants
|
||||
|
||||
- out of order items [Safe-Communication](examples/Safe-Communication.pdf)
|
||||
- items in wrong lines + numbers are not numbers [Life-Of-God-In-Soul-Of-Man](examples/Life-Of-God-In-Soul-Of-Man.pdf)
|
||||
- no page numbers [The-Art-of-Public-Speaking](examples/The-Art-of-Public-Speaking.pdf).
|
||||
- multiline headlines: [WoodUp](examples/WoodUp.pdf)
|
||||
|
@ -51,7 +51,7 @@ export default class Debugger {
|
||||
|
||||
const changes = new ChangeTracker();
|
||||
const items = detectChanges(changes, previousItems, itemResult.items);
|
||||
const pages = asPages(evaluations, changes, items, transformer.descriptor.debug?.itemMerger);
|
||||
const pages = asPages(evaluations, changes, outputSchema, items, transformer.descriptor.debug?.itemMerger);
|
||||
const messages = itemResult.messages;
|
||||
if (changes.changeCount() > 0 && messages.length === 0) {
|
||||
messages.unshift(`Detected ${changes.changeCount()} changes`);
|
||||
|
5
core/src/ElementType.ts
Normal file
5
core/src/ElementType.ts
Normal file
@ -0,0 +1,5 @@
|
||||
enum ElementType {
|
||||
TOC = 'TOC',
|
||||
}
|
||||
|
||||
export default ElementType;
|
@ -3,13 +3,15 @@ import Metadata from './Metadata';
|
||||
import type ParseReporter from './ParseReporter';
|
||||
import ParseResult from './ParseResult';
|
||||
|
||||
export const PARSE_SCHEMA = ['transform', 'width', 'height', 'str', 'fontName', 'dir'];
|
||||
|
||||
/**
|
||||
* Parses a PDF via PDFJS and returns a ParseResult which contains more or less the original data from PDFJS.
|
||||
*/
|
||||
export default class PdfParser {
|
||||
pdfjs: any;
|
||||
defaultParams: object;
|
||||
schema = ['transform', 'width', 'height', 'str', 'fontName', 'dir'];
|
||||
schema = PARSE_SCHEMA;
|
||||
|
||||
constructor(pdfjs: any, defaultParams = {}) {
|
||||
this.pdfjs = pdfjs;
|
||||
|
@ -7,5 +7,10 @@ import type Item from '../Item';
|
||||
*/
|
||||
export default abstract class ItemMerger {
|
||||
constructor(public groupKey: string) {}
|
||||
abstract merge(evaluationTracker: EvaluationTracker, changeTracker: ChangeTracker, items: Item[]): Item;
|
||||
abstract merge(
|
||||
evaluationTracker: EvaluationTracker,
|
||||
changeTracker: ChangeTracker,
|
||||
schema: string[],
|
||||
items: Item[],
|
||||
): Item;
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ export default class LineItemMerger extends ItemMerger {
|
||||
super('line');
|
||||
}
|
||||
|
||||
merge(evaluationTracker: EvaluationTracker, changeTracker: ChangeTracker, items: Item[]): Item {
|
||||
merge(evaluationTracker: EvaluationTracker, changeTracker: ChangeTracker, schema: string[], items: Item[]): Item {
|
||||
const page = items[0].page;
|
||||
const line = items[0].data['line'];
|
||||
const str = items.map((item) => item.data['str']).join(' ');
|
||||
@ -18,6 +18,7 @@ export default class LineItemMerger extends ItemMerger {
|
||||
const height = Math.max(...items.map((item) => item.data['height']));
|
||||
const fontNames = [...new Set(items.map((item) => item.data['fontName']))];
|
||||
const directions = [...new Set(items.map((item) => item.data['dir']))];
|
||||
|
||||
const newItem = new Item(page, {
|
||||
str,
|
||||
line,
|
||||
@ -29,6 +30,13 @@ export default class LineItemMerger extends ItemMerger {
|
||||
dir: directions,
|
||||
});
|
||||
|
||||
if (schema.includes('type')) {
|
||||
const type = [...new Set(items.filter((item) => item.data['type']).map((item) => item.data['type']))];
|
||||
if (type.length > 0) {
|
||||
newItem.data['type'] = type;
|
||||
}
|
||||
}
|
||||
|
||||
const evaluatedItem = items.find((item) => evaluationTracker.evaluated(item));
|
||||
if (evaluatedItem) evaluationTracker.trackEvaluation(newItem, evaluationTracker.evaluationScore(evaluatedItem));
|
||||
|
||||
|
@ -13,6 +13,7 @@ export default interface Page {
|
||||
export function asPages(
|
||||
evaluationTracker: EvaluationTracker,
|
||||
changeTracker: ChangeTracker,
|
||||
schema: string[],
|
||||
items: Item[],
|
||||
itemMerger?: ItemMerger,
|
||||
): Page[] {
|
||||
@ -21,7 +22,7 @@ export function asPages(
|
||||
if (itemMerger) {
|
||||
itemGroups = groupByElement(pageItems, itemMerger.groupKey).map((groupItems) => {
|
||||
if (groupItems.length > 1) {
|
||||
const top = itemMerger.merge(evaluationTracker, changeTracker, groupItems);
|
||||
const top = itemMerger.merge(evaluationTracker, changeTracker, schema, groupItems);
|
||||
return new ItemGroup(top, groupItems);
|
||||
} else {
|
||||
return new ItemGroup(groupItems[0]);
|
||||
|
@ -1,4 +1,5 @@
|
||||
import TransformDescriptor, { toDescriptor } from '../TransformDescriptor';
|
||||
import { PARSE_SCHEMA } from '../PdfParser';
|
||||
import AnnotatedColumn from './AnnotatedColumn';
|
||||
import Item from '../Item';
|
||||
import Page, { asPages } from './Page';
|
||||
@ -82,7 +83,7 @@ export function initialStage(inputSchema: string[], inputItems: Item[]): StageRe
|
||||
const schema = inputSchema.map((column) => ({ name: column }));
|
||||
const evaluations = new EvaluationTracker();
|
||||
const changes = new ChangeTracker();
|
||||
const pages = asPages(evaluations, changes, inputItems);
|
||||
const pages = asPages(evaluations, changes, PARSE_SCHEMA, inputItems);
|
||||
const messages = [
|
||||
`Parsed ${inputItems.length === 0 ? 0 : inputItems[inputItems.length - 1].page + 1} pages with ${
|
||||
inputItems.length
|
||||
|
@ -31,21 +31,32 @@ function detectPageChanges(tracker: ChangeTracker, inputItems: Item[], outputIte
|
||||
let outputIndex = 0;
|
||||
for (let inputIdx = 0; inputIdx < inputItems.length; inputIdx++) {
|
||||
const inputItem = inputItems[inputIdx];
|
||||
|
||||
// In case the input item has already been added from the outputs items array
|
||||
if (addedItems.has(inputItem.uuid)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const positionInOutput = outputItems.findIndex((item) => item.uuid === inputItem.uuid);
|
||||
if (positionInOutput < 0) {
|
||||
// Input doesn't exist in the output anymore
|
||||
tracker.trackRemoval(inputItem);
|
||||
mergedItems.push(inputItem);
|
||||
addedItems.add(inputItem.uuid);
|
||||
removals++;
|
||||
} else if (positionInOutput === inputIdx + additions - removals) {
|
||||
// Input is in output with no positional change
|
||||
mergedItems.push(outputItems[positionInOutput]);
|
||||
addedItems.add(outputItems[positionInOutput].uuid);
|
||||
outputIndex++;
|
||||
//TODO check for content change ?
|
||||
// But with type change (TODO generalize ?)
|
||||
const typeInInput = inputItem.data['type'];
|
||||
const typeInOutput = outputItems[positionInOutput].data['type'];
|
||||
if (typeInInput !== typeInOutput) {
|
||||
tracker.trackContentChange(inputItem);
|
||||
}
|
||||
} else {
|
||||
// Handle items from the output with arn't in the input array
|
||||
for (let intermediateOutputIdx = outputIndex; intermediateOutputIdx < positionInOutput; intermediateOutputIdx++) {
|
||||
const outputItem = outputItems[intermediateOutputIdx];
|
||||
const positionInInput = inputItems.findIndex((item) => item.uuid === outputItem.uuid);
|
||||
|
@ -11,6 +11,7 @@ import CalculateStatistics from './transformer/CacluclateStatistics';
|
||||
import CompactLines from './transformer/CompactLines';
|
||||
import SortXWithinLines from './transformer/SortXWithinLines';
|
||||
import RemoveRepetitiveItems from './transformer/RemoveRepetitiveItems';
|
||||
import TocDetection from './transformer/TocDetection';
|
||||
import NoOpTransformer from './transformer/NoOpTransformer';
|
||||
|
||||
export const transformers = [
|
||||
@ -21,6 +22,7 @@ export const transformers = [
|
||||
new CompactLines(),
|
||||
new SortXWithinLines(),
|
||||
new RemoveRepetitiveItems(),
|
||||
new TocDetection(),
|
||||
new NoOpTransformer(),
|
||||
];
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
import { assert } from '../assert';
|
||||
|
||||
const MIN_DIGIT_CHAR_CODE = 48;
|
||||
const MAX_DIGIT_CHAR_CODE = 57;
|
||||
|
||||
@ -20,3 +22,12 @@ export function filterOutDigits(text: string): string {
|
||||
export function extractNumbers(text: string): number[] {
|
||||
return (text.match(/\d+/g) || []).map(Number);
|
||||
}
|
||||
|
||||
export function extractEndingNumber(text: string): number | undefined {
|
||||
const match = text.match(/\d+$/g);
|
||||
if (match) {
|
||||
assert(match.length == 1, `Expected only one match, but got ${match}`);
|
||||
return Number(match[0]);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
101
core/src/transformer/TocDetection.ts
Normal file
101
core/src/transformer/TocDetection.ts
Normal file
@ -0,0 +1,101 @@
|
||||
import Item from '../Item';
|
||||
import ItemResult from '../ItemResult';
|
||||
import ItemTransformer from './ItemTransformer';
|
||||
import TransformContext from './TransformContext';
|
||||
import LineItemMerger from '../debug/LineItemMerger';
|
||||
import { groupByLine, groupByPage } from '../support/groupingUtils';
|
||||
import { PAGE_MAPPING } from './CacluclateStatistics';
|
||||
import { extractEndingNumber } from '../support/stringFunctions';
|
||||
import ElementType from '../ElementType';
|
||||
|
||||
const config = {
|
||||
maxSkips: 1,
|
||||
};
|
||||
export default class TocDetection extends ItemTransformer {
|
||||
constructor() {
|
||||
super(
|
||||
'TOC Detection',
|
||||
'Detect table of contents.',
|
||||
{
|
||||
requireColumns: ['x', 'y', 'str', 'line'],
|
||||
debug: {
|
||||
itemMerger: new LineItemMerger(),
|
||||
},
|
||||
},
|
||||
(incomingSchema) => {
|
||||
return incomingSchema.reduce((schema, column) => {
|
||||
if (column === 'x') {
|
||||
return [...schema, 'type', 'x'];
|
||||
}
|
||||
return [...schema, column];
|
||||
}, new Array<string>());
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
//TODO produces global TOC with pages and min/max y coordinates ?
|
||||
// Or first block producer ?
|
||||
transform(context: TransformContext, inputItems: Item[]): ItemResult {
|
||||
const pageMapping = context.getGlobal(PAGE_MAPPING);
|
||||
|
||||
const maxPageToEvaluate = Math.min(context.pageCount / 2, 5 + Math.abs(pageMapping.pageFactor));
|
||||
const pagesToEvaluate = groupByPage(inputItems.filter((item) => item.page <= maxPageToEvaluate));
|
||||
|
||||
const maxPageToBeLinkedTo = context.pageCount + pageMapping.pageFactor - 1;
|
||||
const tocLineUuids = new Set<string>();
|
||||
pagesToEvaluate.forEach((pageItems, pageIndex) => {
|
||||
const itemsGroupedByLine = groupByLine(pageItems);
|
||||
let potentialLines: Item[][] = [];
|
||||
let skips = 0;
|
||||
let numbers: number[] = [];
|
||||
let skippedLine: Item[] | undefined;
|
||||
itemsGroupedByLine.forEach((lineItems) => {
|
||||
const text = lineItems.reduce((text, item) => {
|
||||
return text + item.data['str'];
|
||||
}, '');
|
||||
const number = extractEndingNumber(text);
|
||||
if (number && Number.isInteger(number) && number <= maxPageToBeLinkedTo) {
|
||||
if (skippedLine) {
|
||||
potentialLines.push(skippedLine);
|
||||
skippedLine = undefined;
|
||||
skips = 0;
|
||||
}
|
||||
potentialLines.push(lineItems);
|
||||
numbers.push(number);
|
||||
} else {
|
||||
if (potentialLines.length > 0) {
|
||||
if (skips < config.maxSkips) {
|
||||
skips++;
|
||||
skippedLine = lineItems;
|
||||
} else {
|
||||
memorizeLineItemsIfValid(tocLineUuids, potentialLines, numbers);
|
||||
potentialLines = [];
|
||||
// numbers=[];
|
||||
skips = 0;
|
||||
skippedLine = undefined;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
memorizeLineItemsIfValid(tocLineUuids, potentialLines, numbers);
|
||||
});
|
||||
|
||||
return {
|
||||
items: inputItems.map((item) =>
|
||||
tocLineUuids.has(item.uuid) ? item.withDataAddition({ type: ElementType.TOC }) : item,
|
||||
),
|
||||
messages: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function memorizeLineItemsIfValid(memorizedUuids: Set<string>, potentialLines: Item[][], numbers: number[]) {
|
||||
if (potentialLines.length < 3) {
|
||||
return;
|
||||
}
|
||||
const numbersAreAscending = numbers.every((num, idx) => (idx > 0 ? num >= numbers[idx - 1] : num > 0));
|
||||
if (!numbersAreAscending) {
|
||||
return;
|
||||
}
|
||||
potentialLines.forEach((lineItems) => lineItems.forEach((item) => memorizedUuids.add(item.uuid)));
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
import { filterOutDigits, extractNumbers } from 'src/support/stringFunctions';
|
||||
import { filterOutDigits, extractNumbers, extractEndingNumber } from 'src/support/stringFunctions';
|
||||
|
||||
test('filterOutDigits', async () => {
|
||||
expect(filterOutDigits('')).toEqual('');
|
||||
@ -12,3 +12,11 @@ test('extractNumbers', async () => {
|
||||
expect(extractNumbers('a1b 2c 3')).toEqual([1, 2, 3]);
|
||||
expect(extractNumbers('a12 21 304')).toEqual([12, 21, 304]);
|
||||
});
|
||||
|
||||
test('extractEndingNumbers', async () => {
|
||||
expect(extractEndingNumber('')).toBeUndefined();
|
||||
expect(extractEndingNumber('a b c')).toBeUndefined();
|
||||
expect(extractEndingNumber('a1b 2c 3')).toEqual(3);
|
||||
expect(extractEndingNumber('a12 21 304')).toEqual(304);
|
||||
expect(extractEndingNumber('abc ... 304')).toEqual(304);
|
||||
});
|
||||
|
59
examples/Adventures-Of-Sherlock-Holmes/tOCDetection.json
Normal file
59
examples/Adventures-Of-Sherlock-Holmes/tOCDetection.json
Normal file
@ -0,0 +1,59 @@
|
||||
{
|
||||
"pages": 200,
|
||||
"items": 8461,
|
||||
"groupedItems": 8320,
|
||||
"changes": 36,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 59.7758,
|
||||
"minX": 117.8279999999999,
|
||||
"maxX": 471.0319307,
|
||||
"minY": 95.28300000000016,
|
||||
"maxY": 736.017,
|
||||
"pageMapping": {
|
||||
"pageFactor": -1,
|
||||
"detectedOnPage": true
|
||||
}
|
||||
}
|
||||
}
|
||||
{"page":3,"change":"ContentChange","str":"A Scandal In Bohemia 3","line":1,"x":117.828,"y":561.248,"width":"110.02","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":3,"change":"ContentChange","str":"The Red-Headed League 21","line":2,"x":117.828,"y":536.7900000000001,"width":"126.67","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":3,"change":"ContentChange","str":"A Case Of Identity 38","line":3,"x":117.828,"y":512.3320000000001,"width":"98.48","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":3,"change":"ContentChange","str":"The Boscombe Valley Mystery 51","line":4,"x":117.828,"y":487.87400000000014,"width":"152.01","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":3,"change":"ContentChange","str":"The Five Orange Pips 69","line":5,"x":117.828,"y":463.41500000000013,"width":"112.21","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":3,"change":"ContentChange","str":"The Man With The Twisted Lip 83","line":6,"x":117.828,"y":438.9570000000001,"width":"158.72","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":3,"change":"ContentChange","str":"The Adventure Of The Blue Carbuncle 100","line":7,"x":117.828,"y":414.49900000000014,"width":"197.97","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":3,"change":"ContentChange","str":"The Adventure Of The Speckled Band 115","line":8,"x":117.828,"y":390.04000000000013,"width":"194.56","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":3,"change":"ContentChange","str":"The Adventure Of The Engineer’s Thumb 133","line":9,"x":117.828,"y":365.5820000000001,"width":"212.33","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":3,"change":"ContentChange","str":"The Adventure Of The Noble Bachelor 148","line":10,"x":117.828,"y":341.12400000000014,"width":"196.96","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":3,"change":"ContentChange","str":"The Adventure Of The Beryl Coronet 164","line":11,"x":117.828,"y":316.66600000000017,"width":"191.30","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":3,"change":"ContentChange","str":"The Adventure Of The Copper Beeches 182","line":12,"x":117.828,"y":292.20700000000016,"width":"199.99","height":"10.91","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"],"type":["TOC"]}
|
60
examples/Alice-In-Wonderland/tOCDetection.json
Normal file
60
examples/Alice-In-Wonderland/tOCDetection.json
Normal file
@ -0,0 +1,60 @@
|
||||
{
|
||||
"pages": 76,
|
||||
"items": 3071,
|
||||
"groupedItems": 2543,
|
||||
"changes": 40,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 24.787,
|
||||
"minX": 102.88399999999984,
|
||||
"maxX": 488.43800000000005,
|
||||
"minY": 95.545,
|
||||
"maxY": 735.021,
|
||||
"pageMapping": {
|
||||
"pageFactor": 0,
|
||||
"detectedOnPage": true
|
||||
}
|
||||
}
|
||||
}
|
||||
{"page":1,"change":"ContentChange","str":"Poem. All in the golden afternoon . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3","line":1,"x":102.884,"y":557.313,"width":"381.07","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"I Down the Rabbit-Hole . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4","line":2,"x":102.884,"y":530.912,"width":"353.22","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"II The Pool of Tears . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9","line":3,"x":102.884,"y":504.511,"width":"380.55","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"III A Caucus-Race and a Long Tale . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14","line":4,"x":102.884,"y":478.11,"width":"381.15","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"IV The Rabbit Sends in a Little Bill . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 19","line":5,"x":102.884,"y":451.709,"width":"378.87","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"V Advice from a Caterpillar . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25","line":6,"x":102.884,"y":425.308,"width":"381.15","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"VI Pig and Pepper . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 32","line":7,"x":102.884,"y":398.907,"width":"379.43","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"VII A Mad Tea-Party . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39","line":8,"x":102.884,"y":372.506,"width":"379.30","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"VIII The Queen’s Croquet-Ground . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 46","line":9,"x":102.884,"y":346.105,"width":"380.42","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"IX The Mock Turtle’s Story . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 53","line":10,"x":102.884,"y":319.704,"width":"381.20","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"X The Lobster Quadrille . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59","line":11,"x":102.884,"y":293.303,"width":"380.58","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"XI Who Stole the Tarts? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 65","line":12,"x":102.884,"y":266.902,"width":"380.24","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"XII Alice’s Evidence . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 70","line":13,"x":102.884,"y":240.501,"width":"381.27","height":"11.96","fontName":["FZVLIH+NimbusRomNo9L-Regu"],"dir":["ltr"],"type":["TOC"]}
|
53
examples/Closed-Syllables/tOCDetection.json
Normal file
53
examples/Closed-Syllables/tOCDetection.json
Normal file
@ -0,0 +1,53 @@
|
||||
{
|
||||
"pages": 19,
|
||||
"items": 1408,
|
||||
"groupedItems": 1177,
|
||||
"changes": 49,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 18,
|
||||
"minX": 72.024,
|
||||
"maxX": 534.58,
|
||||
"minY": 63.144,
|
||||
"maxY": 745.56,
|
||||
"pageMapping": {
|
||||
"pageFactor": 1,
|
||||
"detectedOnPage": true
|
||||
}
|
||||
}
|
||||
}
|
||||
{"page":1,"change":"ContentChange","str":"“short a” ................................ ................................ ................................ ................................ ......... 3","line":8,"x":74.544,"y":598.66,"width":"424.05","height":"11.04","fontName":["ABCDEE+Calibri","ABCDEE+Calibri"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"“short I” ................................ ................................ ................................ ................................ ............ 8","line":9,"x":72.024,"y":576.22,"width":"429.97","height":"11.04","fontName":["ABCDEE+Calibri","ABCDEE+Calibri"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"“short o” ................................ ................................ ................................ ................................ ......... 10","line":10,"x":72.024,"y":553.78,"width":"430.36","height":"11.04","fontName":["ABCDEE+Calibri","ABCDEE+Calibri"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"“short u” ................................ ................................ ................................ ................................ ......... 1 2","line":11,"x":72.024,"y":531.19,"width":"430.27","height":"11.04","fontName":["ABCDEE+Calibri","ABCDEE+Calibri"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"E ................................ ................................ ................................ ................................ ..................... 14","line":12,"x":72.024,"y":508.75,"width":"427.90","height":"11.04","fontName":["ABCDEE+Calibri"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":1,"change":"ContentChange","str":"Y ................................ ................................ ................................ ................................ ..................... 16","line":13,"x":72.024,"y":486.19,"width":"427.89","height":"11.04","fontName":["ABCDEE+Calibri"],"dir":["ltr"],"type":["TOC"]}
|
47
examples/ExamplePdf/tOCDetection.json
Normal file
47
examples/ExamplePdf/tOCDetection.json
Normal file
@ -0,0 +1,47 @@
|
||||
{
|
||||
"pages": 6,
|
||||
"items": 268,
|
||||
"groupedItems": 115,
|
||||
"changes": 0,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 30,
|
||||
"minX": 56.69069,
|
||||
"maxX": 507.3787,
|
||||
"minY": 45,
|
||||
"maxY": 772,
|
||||
"pageMapping": {
|
||||
"pageFactor": 1,
|
||||
"detectedOnPage": true
|
||||
}
|
||||
}
|
||||
}
|
47
examples/Flash-Masques-Temperature/tOCDetection.json
Normal file
47
examples/Flash-Masques-Temperature/tOCDetection.json
Normal file
@ -0,0 +1,47 @@
|
||||
{
|
||||
"pages": 4,
|
||||
"items": 134,
|
||||
"groupedItems": 108,
|
||||
"changes": 0,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 29,
|
||||
"minX": 37.1206,
|
||||
"maxX": 542.2816,
|
||||
"minY": 36.1763,
|
||||
"maxY": 811.1348,
|
||||
"pageMapping": {
|
||||
"pageFactor": 1,
|
||||
"detectedOnPage": false
|
||||
}
|
||||
}
|
||||
}
|
47
examples/Grammar-Matters/tOCDetection.json
Normal file
47
examples/Grammar-Matters/tOCDetection.json
Normal file
@ -0,0 +1,47 @@
|
||||
{
|
||||
"pages": 116,
|
||||
"items": 7676,
|
||||
"groupedItems": 3479,
|
||||
"changes": 0,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 48,
|
||||
"minX": 62.03970999999996,
|
||||
"maxX": 536.37986,
|
||||
"minY": 22.6801,
|
||||
"maxY": 709.8000000000001,
|
||||
"pageMapping": {
|
||||
"pageFactor": 1,
|
||||
"detectedOnPage": true
|
||||
}
|
||||
}
|
||||
}
|
107
examples/Life-Of-God-In-Soul-Of-Man/tOCDetection.json
Normal file
107
examples/Life-Of-God-In-Soul-Of-Man/tOCDetection.json
Normal file
@ -0,0 +1,107 @@
|
||||
{
|
||||
"pages": 140,
|
||||
"items": 25314,
|
||||
"groupedItems": 3168,
|
||||
"changes": 473,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 45.974399999999996,
|
||||
"minX": 26.29161,
|
||||
"maxX": 273.69135,
|
||||
"minY": 15.08535,
|
||||
"maxY": 432.30303,
|
||||
"pageMapping": {
|
||||
"pageFactor": -17,
|
||||
"detectedOnPage": true
|
||||
}
|
||||
}
|
||||
}
|
||||
{"page":14,"change":"ContentChange","str":"The Occasion of this Discourse 3","line":1,"x":29.88336,"y":258.74967,"width":"105.17","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"Mistakes about Religion 4","line":2,"x":30.1707,"y":247.25607,"width":"86.20","height":"6.66","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"What Religion is 6","line":3,"x":30.1707,"y":236.91182999999998,"width":"59.76","height":"8.32","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"The Permanency and Stability of Religion .... 7","line":4,"x":29.73969,"y":223.55051999999998,"width":"176.29","height":"13.17","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"The P'reedom and Unconstrainedness of Religion . . S","line":5,"x":29.88336,"y":213.20628,"width":"170.83","height":"7.74","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"Religion a Divine Principle 13","line":6,"x":30.1707,"y":200.85065999999998,"width":"97.40","height":"6.74","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"What the Natural Life is 14","line":7,"x":30.314369999999997,"y":189.21338999999998,"width":"84.48","height":"8.26","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"The different Tendencies of the Natural Life .... 15","line":8,"x":30.02703,"y":177.57612,"width":"182.75","height":"13.29","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"Wherein the Divine Life doth consist 20","line":9,"x":29.88336,"y":166.51353,"width":"128.30","height":"7.39","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"Religion better understood by Actions than by","line":10,"x":30.458039999999997,"y":154.58892,"width":"151.43","height":"7.06","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"Words 24","line":11,"x":53.58891,"y":141.80229,"width":"31.18","height":"7.90","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"Divine Love exemplified in our Saviour 26","line":12,"x":30.02703,"y":130.7397,"width":"136.77","height":"7.48","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"Our Saviour's Constant Devotion 28","line":13,"x":30.458039999999997,"y":118.81509,"width":"120.54","height":"7.74","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"Our Saviour's Charity to Men 29","line":14,"x":30.458039999999997,"y":106.3158,"width":"103.73","height":"8.54","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"Our Saviour's Purity 31","line":15,"x":30.458039999999997,"y":94.67853,"width":"77.01","height":"7.74","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"A Our Prayer Saviour's Humility 34 37","line":16,"x":30.314369999999997,"y":69.96728999999999,"width":"125.28","height":"10.06","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"The Excellency and Advantage of Religion .... 38","line":17,"x":30.314369999999997,"y":59.04837,"width":"183.03","height":"13.35","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"The Excellency of Divine Love 39","line":18,"x":30.314369999999997,"y":46.118069999999996,"width":"110.77","height":"7.82","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":14,"change":"ContentChange","str":"The Advantages of Divine Love 44","line":19,"x":30.314369999999997,"y":34.337129999999995,"width":"114.08","height":"7.82","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"The Worth of the Object 45","line":1,"x":65.36985,"y":378.85778999999997,"width":"85.48","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"The Certainty to be Beloved Again 46","line":2,"x":65.22618,"y":367.65153,"width":"119.39","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"The Presence of the Beloved Person 48","line":3,"x":65.22618,"y":355.58324999999996,"width":"122.55","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"The Divine Love makes us partake of an Infinite","line":4,"x":65.22618,"y":344.95167,"width":"153.88","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"Happiness 49","line":5,"x":89.0754,"y":330.87201,"width":"44.68","height":"6.89","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"He that loveth God finds Sweetness in every Dis-","line":6,"x":65.65719,"y":320.95878,"width":"154.30","height":"8.06","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"pensation 51","line":7,"x":88.78806,"y":308.31582,"width":"41.23","height":"6.38","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"The Duties of Religion are Delightful to Him ... 52","line":8,"x":65.22618,"y":296.67855,"width":"172.98","height":"12.21","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"The Excellency of Charity 54","line":9,"x":65.22618,"y":283.89191999999997,"width":"95.10","height":"7.74","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"The Pleasure that attends Charity 56","line":10,"x":65.08251,"y":272.973,"width":"118.23","height":"7.74","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"The Excellency of Purity 58","line":11,"x":65.22618,"y":261.04839,"width":"89.93","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"The Delight afforded by Purity 59","line":12,"x":65.08251,"y":248.40543,"width":"108.33","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"The Excellency of Humility 60","line":13,"x":65.36985,"y":238.20486,"width":"100.28","height":"7.66","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"The Pleasure and Sweetness of an Humble Temper . 62","line":14,"x":65.36985,"y":226.42391999999998,"width":"173.27","height":"7.90","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"A Prayer 65","line":15,"x":65.36985,"y":213.92462999999998,"width":"36.78","height":"9.82","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"The Despondent Thoughts of some Newly Awakened","line":16,"x":65.22618,"y":203.00571,"width":"177.29","height":"7.76","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"to a Right Sense of Things 66","line":17,"x":88.93173,"y":191.22476999999998,"width":"91.52","height":"7.18","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"The Unreasonableness of these Fears 69","line":18,"x":65.22618,"y":178.58181,"width":"130.74","height":"7.58","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"We must do what we can, and depend on the Divine","line":19,"x":65.36985,"y":167.66289,"width":"158.03","height":"9.58","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"Assistance 74","line":20,"x":88.50072,"y":154.30158,"width":"45.26","height":"6.32","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"We We must must know shun all what Manner Things of are Sin Sinful So 78","line":21,"x":65.22618,"y":132.32007,"width":"238.06","height":"9.58","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"We must resist the Temptations of Sin. by consider-","line":22,"x":65.22618,"y":120.6828,"width":"166.94","height":"9.46","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"ing the Evils they will draw on us 82","line":23,"x":88.78806,"y":108.75819,"width":"108.61","height":"7.36","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"We We must must keep often a examine Constant our Watch Actions over Ourselves . . 87 89","line":24,"x":64.93884,"y":84.19062,"width":"287.91","height":"9.82","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"It is fit to restrain Ourselves in Many Lawful Things . 91","line":25,"x":64.93884,"y":72.40968,"width":"172.11","height":"8.56","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"\\\\'e must strive to put Ourselves out of Love with the","line":26,"x":65.22618,"y":61.634429999999995,"width":"163.21","height":"7.48","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":15,"change":"ContentChange","str":"World 93","line":27,"x":88.35705,"y":47.98578,"width":"30.03","height":"7.57","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"lency of the Divine Nature 104","line":6,"x":60.62873999999999,"y":300.41397,"width":"96.97","height":"6.94","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"We should meditate on God's Goodness and Love . 108","line":7,"x":36.92319,"y":289.35138,"width":"172.41","height":"9.94","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"To beget Charity, we must remember that all Men","line":8,"x":36.92319,"y":277.2831,"width":"160.76","height":"8.54","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"are nearly related unto God 113","line":9,"x":60.772409999999994,"y":265.07115,"width":"98.70","height":"8.14","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"That they carry God's Image upon them 114","line":10,"x":36.92319,"y":253.00286999999997,"width":"140.80","height":"7.47","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"To beget Purity, we should consider the Dignity of","line":11,"x":37.21053,"y":241.94027999999997,"width":"162.92","height":"8.14","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"our Nature 116","line":12,"x":60.916079999999994,"y":230.15934,"width":"47.99","height":"6.98","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"We should meditate often on the Joys of Heaven . 117","line":13,"x":37.21053,"y":217.94738999999998,"width":"164.64","height":"9.82","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"Humility arises from the Consideration of our Fail-","line":14,"x":38.21622,"y":206.74113,"width":"167.95","height":"6.88","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"ings 118","line":15,"x":61.203419999999994,"y":194.67284999999998,"width":"25.72","height":"6.17","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"Thoughts of God give us the Lowest Thoughts of","line":16,"x":37.21053,"y":182.60457,"width":"158.61","height":"8.22","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"Ourselves 120","line":17,"x":61.490759999999995,"y":170.9673,"width":"46.55","height":"6.55","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"Prayer, another Instrument of Religion, and the","line":18,"x":37.64154,"y":159.04269,"width":"159.47","height":"7.02","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":16,"change":"ContentChange","str":"Advantages of Mental Prayer 121","line":19,"x":60.772409999999994,"y":147.26174999999998,"width":"109.90","height":"7.16","fontName":[null],"dir":["ltr"],"type":["TOC"]}
|
47
examples/Safe-Communication/tOCDetection.json
Normal file
47
examples/Safe-Communication/tOCDetection.json
Normal file
@ -0,0 +1,47 @@
|
||||
{
|
||||
"pages": 60,
|
||||
"items": 3990,
|
||||
"groupedItems": 1428,
|
||||
"changes": 0,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 36,
|
||||
"minX": 53.88,
|
||||
"maxX": 797.38,
|
||||
"minY": 23.04,
|
||||
"maxY": 528.34,
|
||||
"pageMapping": {
|
||||
"pageFactor": 1,
|
||||
"detectedOnPage": true
|
||||
}
|
||||
}
|
||||
}
|
62
examples/St-Mary-Witney-Social-Audit/tOCDetection.json
Normal file
62
examples/St-Mary-Witney-Social-Audit/tOCDetection.json
Normal file
@ -0,0 +1,62 @@
|
||||
{
|
||||
"pages": 27,
|
||||
"items": 1874,
|
||||
"groupedItems": 1520,
|
||||
"changes": 45,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 36,
|
||||
"minX": 6.487999999999971,
|
||||
"maxX": 815.833,
|
||||
"minY": 16.345999999999947,
|
||||
"maxY": 563.346,
|
||||
"pageMapping": {
|
||||
"pageFactor": 1,
|
||||
"detectedOnPage": true
|
||||
}
|
||||
}
|
||||
}
|
||||
{"page":2,"change":"ContentChange","str":"Introduction 5","line":0,"x":452.032,"y":384.497,"width":"81.35","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"History 7","line":1,"x":452.032,"y":365.60900000000004,"width":"45.93","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"Setting and context 10","line":2,"x":452.032,"y":346.72100000000006,"width":"123.36","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"Witney’s people 12","line":3,"x":452.032,"y":327.8330000000001,"width":"94.89","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"Health 14","line":4,"x":452.032,"y":308.9450000000001,"width":"51.37","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"Witney at work 15","line":5,"x":452.032,"y":290.05700000000013,"width":"93.06","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"Housing 16","line":6,"x":452.032,"y":271.16900000000015,"width":"55.11","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"Crime and safety 17","line":7,"x":452.032,"y":252.28100000000015,"width":"99.69","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"Education 18","line":8,"x":452.032,"y":233.39300000000014,"width":"63.14","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"Transport 20","line":9,"x":452.032,"y":214.50500000000014,"width":"64.49","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"Lifestyle and deprivation 21","line":10,"x":452.032,"y":195.61700000000013,"width":"133.67","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"Conclusions 23","line":11,"x":452.032,"y":176.72900000000013,"width":"87.37","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"What St Mary’s offers 25","line":12,"x":452.032,"y":157.84100000000012,"width":"123.37","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"Recommendations 27","line":13,"x":452.032,"y":138.95300000000012,"width":"105.64","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
||||
{"page":2,"change":"ContentChange","str":"Further information and links 28","line":14,"x":452.032,"y":120.06500000000011,"width":"159.37","height":"12.00","fontName":["Gill Sans MT"],"dir":["ltr"],"type":["TOC"]}
|
47
examples/The-Art-of-Public-Speaking/tOCDetection.json
Normal file
47
examples/The-Art-of-Public-Speaking/tOCDetection.json
Normal file
@ -0,0 +1,47 @@
|
||||
{
|
||||
"pages": 466,
|
||||
"items": 772193,
|
||||
"groupedItems": 15227,
|
||||
"changes": 0,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 28.799999999999997,
|
||||
"minX": 72,
|
||||
"maxX": 537.4124748000004,
|
||||
"minY": 75.60000000000002,
|
||||
"maxY": 712.8,
|
||||
"pageMapping": {
|
||||
"pageFactor": 1,
|
||||
"detectedOnPage": false
|
||||
}
|
||||
}
|
||||
}
|
47
examples/The-Man-Without-A-Body/tOCDetection.json
Normal file
47
examples/The-Man-Without-A-Body/tOCDetection.json
Normal file
@ -0,0 +1,47 @@
|
||||
{
|
||||
"pages": 4,
|
||||
"items": 522,
|
||||
"groupedItems": 378,
|
||||
"changes": 0,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 11,
|
||||
"minX": 72.025,
|
||||
"maxX": 536.73,
|
||||
"minY": 75.025,
|
||||
"maxY": 747.22,
|
||||
"pageMapping": {
|
||||
"pageFactor": 1,
|
||||
"detectedOnPage": false
|
||||
}
|
||||
}
|
||||
}
|
47
examples/The-War-of-the-Worlds/tOCDetection.json
Normal file
47
examples/The-War-of-the-Worlds/tOCDetection.json
Normal file
@ -0,0 +1,47 @@
|
||||
{
|
||||
"pages": 293,
|
||||
"items": 9255,
|
||||
"groupedItems": 6520,
|
||||
"changes": 0,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 24,
|
||||
"minX": 57.59999999999991,
|
||||
"maxX": 312.78,
|
||||
"minY": 44.76,
|
||||
"maxY": 515.8338448603599,
|
||||
"pageMapping": {
|
||||
"pageFactor": 1,
|
||||
"detectedOnPage": true
|
||||
}
|
||||
}
|
||||
}
|
47
examples/Tragedy-Of-The-Commons/tOCDetection.json
Normal file
47
examples/Tragedy-Of-The-Commons/tOCDetection.json
Normal file
@ -0,0 +1,47 @@
|
||||
{
|
||||
"pages": 7,
|
||||
"items": 6779,
|
||||
"groupedItems": 154,
|
||||
"changes": 0,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 22.5,
|
||||
"minX": 13.799999999999926,
|
||||
"maxX": 550.2000000000003,
|
||||
"minY": 1.4400099999998357,
|
||||
"maxY": 751.50001,
|
||||
"pageMapping": {
|
||||
"pageFactor": 1243,
|
||||
"detectedOnPage": true
|
||||
}
|
||||
}
|
||||
}
|
47
examples/WoodUp/tOCDetection.json
Normal file
47
examples/WoodUp/tOCDetection.json
Normal file
@ -0,0 +1,47 @@
|
||||
{
|
||||
"pages": 256,
|
||||
"items": 20146,
|
||||
"groupedItems": 7203,
|
||||
"changes": 0,
|
||||
"schema": [
|
||||
{
|
||||
"name": "line"
|
||||
},
|
||||
{
|
||||
"name": "type",
|
||||
"annotation": "ADDED"
|
||||
},
|
||||
{
|
||||
"name": "x"
|
||||
},
|
||||
{
|
||||
"name": "y"
|
||||
},
|
||||
{
|
||||
"name": "width"
|
||||
},
|
||||
{
|
||||
"name": "height"
|
||||
},
|
||||
{
|
||||
"name": "str"
|
||||
},
|
||||
{
|
||||
"name": "fontName"
|
||||
},
|
||||
{
|
||||
"name": "dir"
|
||||
}
|
||||
],
|
||||
"globals": {
|
||||
"maxHeight": 64,
|
||||
"minX": 46.323,
|
||||
"maxX": 436.5,
|
||||
"minY": 37.73867999999993,
|
||||
"maxY": 610.5599,
|
||||
"pageMapping": {
|
||||
"pageFactor": -6,
|
||||
"detectedOnPage": true
|
||||
}
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user