Improve header detection

- fix tests
- still run header detection based on heights even if TOC headlines have been identified
This commit is contained in:
Johannes Zillmann 2024-03-28 11:39:34 -06:00
parent 0dc47329ef
commit 55ae236928
44 changed files with 214 additions and 137 deletions

View File

@ -18,6 +18,7 @@ Use the [issue tracker](https://github.com/jzillmann/pdf-to-markdown/issues) and
- `npm install` Download all necessary npm packages
- `npm test` Run the tests
- `npm test -- --verbose=false './test/Files\.test\.ts' -t "Alice-In-Wonderland.pdf"` Run specific test
- `npm run test-write` Run the tests and persist possibly new changes on the example file results
- `npm run lint` Lint the javascript files
- `npm run format` Run the prettier formatter

View File

@ -29,8 +29,8 @@
"globals": {
"maxHeight": 66,
"mostUsedHeight": 8.5,
"mostUsedDistance": 9,
"mostUsedFont": "g_d1_f198",
"mostUsedDistance": 10,
"mostUsedFont": "WBIXVW+GdxfghFpvhyyRwyvdbTimesLTStd-Roman",
"minX": 41.38153078000005,
"maxX": 400.56930541,
"minY": 36.71720123,

View File

@ -55,6 +55,16 @@
46
],
"detectedHeadlineLevels": {}
},
"headlineTypeToHeightRange": {
"H2": {
"min": 8.5,
"max": 8.5
},
"H3": {
"min": 8.5,
"max": 8.5
}
}
}
}

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 59.7758,
"mostUsedHeight": 10.9091,
"mostUsedDistance": 13.55,
"mostUsedFont": "KKLGKN+NimbusRomNo9L-Regu",
"minX": 117.8279999999999,
"maxX": 471.0319307,
"minY": 95.28300000000016,

View File

@ -2,7 +2,7 @@
"pages": 199,
"items": 8436,
"groupedItems": 8308,
"changes": 55,
"changes": 53,
"schema": [
{
"name": "line"
@ -37,7 +37,7 @@
{"page":0,"change":"ContentChange","types":["H1"],"str":"SHERLOCK","dir":"ltr","width":"363.20","height":"59.78","transform":["59.78","0.00","0.00","59.78","117.83","656.26"],"fontName":"NVBKCW+RoyalInitialen","x":117.828,"y":656.262,"line":0}
{"page":0,"change":"ContentChange","types":["H1"],"str":"HOLMES","dir":"ltr","width":"275.57","height":"59.78","transform":["59.78","0.00","0.00","59.78","159.37","592.48"],"fontName":"NVBKCW+RoyalInitialen","x":159.372,"y":592.4789999999999,"line":1}
{"page":1,"change":"ContentChange","types":["H5"],"str":"S IR A RTHUR I GNATIUS","line":0,"x":196.324,"y":702.187,"width":"186.21","height":"20.66","fontName":["KKLGKN+NimbusRomNo9L-Regu"],"dir":["ltr"]}
{"page":1,"change":"ContentChange","types":["H4"],"str":"C ONAN D OYLE","line":1,"x":214.04500000000002,"y":678.946,"width":"156.34","height":"24.79","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"]}
{"page":1,"change":"ContentChange","types":["H2"],"str":"C ONAN D OYLE","line":1,"x":214.04500000000002,"y":678.946,"width":"156.34","height":"24.79","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"]}
{"page":1,"change":"ContentChange","types":["H3"],"str":"T HE A DVENTURES O F","line":3,"x":149.122,"y":483.32700000000006,"width":"273.72","height":"29.89","fontName":["INBNCB+NimbusRomNo9L-Medi"],"dir":["ltr"]}
{"page":1,"change":"ContentChange","types":["H3"],"str":"SHERLOCK HOLMES","dir":"ltr","width":"327.51","height":"29.89","transform":["29.89","0.00","0.00","29.89","133.40","433.72"],"fontName":"INBNCB+NimbusRomNo9L-Medi","x":133.40100000000007,"y":433.7180000000001,"line":4}
{"page":4,"change":"ContentChange","types":["H5"],"str":"Adventure I","dir":"ltr","width":"106.74","height":"20.66","transform":["20.66","0.00","0.00","20.66","117.83","630.54"],"fontName":"INBNCB+NimbusRomNo9L-Medi","x":117.828,"y":630.539,"line":0}

View File

@ -45,6 +45,12 @@
3
],
"detectedHeadlineLevels": {}
},
"headlineTypeToHeightRange": {
"H2": {
"min": 24.7871,
"max": 24.7871
}
}
}
}

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 24.787,
"mostUsedHeight": 11.955,
"mostUsedDistance": 14.45,
"mostUsedFont": "FZVLIH+NimbusRomNo9L-Regu",
"minX": 102.88399999999984,
"maxX": 488.43800000000005,
"minY": 95.545,

View File

@ -2,7 +2,7 @@
"pages": 75,
"items": 3043,
"groupedItems": 2561,
"changes": 30,
"changes": 28,
"schema": [
{
"name": "line"

View File

@ -45,6 +45,12 @@
1
],
"detectedHeadlineLevels": {}
},
"headlineTypeToHeightRange": {
"H2": {
"min": 24.787,
"max": 24.787
}
}
}
}

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 24,
"mostUsedHeight": 9,
"mostUsedDistance": 12,
"mostUsedFont": "QMZZIE+AGaramondPro-Regular",
"minX": 34.01229999999998,
"maxX": 380.3863,
"minY": 26.9291,

View File

@ -40,6 +40,12 @@
3
],
"detectedHeadlineLevels": {}
},
"headlineTypeToHeightRange": {
"H2": {
"min": 10,
"max": 24
}
}
}
}

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 16.02,
"mostUsedHeight": 13.02,
"mostUsedDistance": 16.32,
"mostUsedFont": "BCDHEE+SourceSansPro-Regular",
"minX": 25.86,
"maxX": 535.44,
"minY": 38.1,

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 18,
"mostUsedHeight": 11.04,
"mostUsedDistance": 22.44,
"mostUsedFont": "ABCDEE+Calibri",
"minX": 72.024,
"maxX": 534.58,
"minY": 63.144,

View File

@ -40,7 +40,8 @@
1
],
"detectedHeadlineLevels": {}
}
},
"headlineTypeToHeightRange": {}
}
}
{"page":1,"change":"Removal","str":"“short a” ................................ ................................ ................................ ................................ ......... 3","line":8,"x":74.544,"y":598.66,"width":"424.05","height":"11.04","fontName":["ABCDEE+Calibri","ABCDEE+Calibri"],"dir":["ltr"]}

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 30,
"mostUsedHeight": 11,
"mostUsedDistance": 13,
"mostUsedFont": "JBRMKS+Helvetica",
"minX": 56.69069,
"maxX": 507.3787,
"minY": 45,

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 29,
"mostUsedHeight": 11,
"mostUsedDistance": 14,
"mostUsedFont": "NRVUEW+HelveticaNeue-Light",
"minX": 37.1206,
"maxX": 542.2816,
"minY": 36.1763,

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 48,
"mostUsedHeight": 12,
"mostUsedDistance": 13.8,
"mostUsedFont": "XYXVPQ+Arial",
"minX": 62.03970999999996,
"maxX": 536.37986,
"minY": 22.6801,

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 45.974399999999996,
"mostUsedHeight": 7.7826039,
"mostUsedDistance": 0.14,
"mostUsedFont": "Courier",
"minX": 26.29161,
"maxX": 273.69135,
"minY": 15.08535,

View File

@ -2,7 +2,7 @@
"pages": 137,
"items": 24829,
"groupedItems": 3105,
"changes": 23,
"changes": 37,
"schema": [
{
"name": "line"
@ -39,5 +39,12 @@
{"page":6,"change":"ContentChange","types":["H2"],"str":"LIFE OF GOD","line":2,"x":39.22191,"y":324.40686,"width":"161.05","height":"32.33","fontName":[null],"dir":["ltr"]}
{"page":6,"change":"ContentChange","types":["H5"],"str":"SOUL OF MAN","line":3,"x":75.13941,"y":226.56759,"width":"104.45","height":"21.23","fontName":[null],"dir":["ltr"]}
{"page":18,"change":"ContentChange","types":["H6"],"str":"THE LIFE OF GOD","line":0,"x":63.07113,"y":257.16929999999996,"width":"123.13","height":"18.12","fontName":[null],"dir":["ltr"]}
{"page":24,"change":"ContentChange","types":["H2"],"str":"T CHOOSE to express it by the name oi life","line":9,"x":28.302989999999998,"y":219.38409,"width":"172.55","height":"13.01","fontName":[null],"dir":["ltr"]}
{"page":30,"change":"ContentChange","types":["H2"],"str":"T3 Y this time I hope it doth appear, that","line":2,"x":33.33144,"y":324.26319,"width":"156.17","height":"13.41","fontName":[null],"dir":["ltr"]}
{"page":48,"change":"ContentChange","types":["H3"],"str":"T","dir":"ltr","width":"18.39","height":"30.65","transform":["30.65","0.00","0.00","30.65","36.49","54.45"],"x":36.49218,"y":54.45093,"line":20}
{"page":56,"change":"ContentChange","types":["H4"],"str":"L","dir":"ltr","width":"16.95","height":"28.26","transform":["28.26","0.00","0.00","28.26","39.94","46.12"],"x":39.940259999999995,"y":46.118069999999996,"line":20}
{"page":56,"change":"ContentChange","types":["H4"],"str":"L","dir":"ltr","width":"16.95","height":"28.26","transform":["28.26","0.00","0.00","28.26","39.94","46.12"],"x":39.940259999999995,"y":46.118069999999996,"line":20}
{"page":77,"change":"ContentChange","types":["H2"],"str":"'T^HE last branch of religion is Juimility","line":14,"x":54.88194,"y":148.69844999999998,"width":"170.39","height":"11.73","fontName":[null],"dir":["ltr"]}
{"page":81,"change":"ContentChange","types":["H2"],"str":"nounce them : Not unto 21s, O Lord., not unto","line":5,"x":53.87625,"y":317.65437,"width":"173.26","height":"11.73","fontName":[null],"dir":["ltr"]}
{"page":99,"change":"ContentChange","types":["H2"],"str":"13 UT now, amongst those things which we","line":10,"x":49.42248,"y":217.08536999999998,"width":"178.87","height":"13.17","fontName":[null],"dir":["ltr"]}
{"page":121,"change":"ContentChange","types":["H2"],"str":"'HPHE serious and frequent consideration of","line":18,"x":54.5946,"y":83.47227,"width":"187.64","height":"11.73","fontName":[null],"dir":["ltr"]}
{"page":124,"change":"ContentChange","types":["H2"],"str":"for our converse or our love : He is not far","line":10,"x":45.112379999999995,"y":244.23899999999998,"width":"161.20","height":"11.73","fontName":[null],"dir":["ltr"]}

View File

@ -47,6 +47,12 @@
16
],
"detectedHeadlineLevels": {}
},
"headlineTypeToHeightRange": {
"H2": {
"min": 10.056899999999999,
"max": 11.7335289
}
}
}
}

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 190,
"mostUsedHeight": 11,
"mostUsedDistance": 14,
"mostUsedFont": "AMIDOT+OpenSans",
"minX": -97.924,
"maxX": 566.7790999999999,
"minY": 21.3071,

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 36,
"mostUsedHeight": 11.04,
"mostUsedDistance": 14.52,
"mostUsedFont": "Helvetica",
"minX": 53.88,
"maxX": 797.38,
"minY": 23.04,

View File

@ -45,6 +45,12 @@
1
],
"detectedHeadlineLevels": {}
},
"headlineTypeToHeightRange": {
"H2": {
"min": 15.96,
"max": 27.96
}
}
}
}

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 36,
"mostUsedHeight": 12,
"mostUsedDistance": 13.89,
"mostUsedFont": "Gill Sans MT",
"minX": 6.487999999999971,
"maxX": 815.833,
"minY": 16.345999999999947,

View File

@ -45,6 +45,12 @@
2
],
"detectedHeadlineLevels": {}
},
"headlineTypeToHeightRange": {
"H2": {
"min": 24,
"max": 36
}
}
}
}

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 28.799999999999997,
"mostUsedHeight": 14.399999999999999,
"mostUsedDistance": 16.56,
"mostUsedFont": "AAAAAC+LiberationSerif",
"minX": 72,
"maxX": 537.4124748000004,
"minY": 75.60000000000002,

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 36,
"mostUsedHeight": 11.04,
"mostUsedDistance": 14.4,
"mostUsedFont": "ABCDEE+Georgia",
"minX": 74.904,
"maxX": 518.5,
"minY": 99.864,

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 11,
"mostUsedHeight": 11,
"mostUsedDistance": 12.5,
"mostUsedFont": "BCDEEE+Garamond-Bold",
"minX": 72.025,
"maxX": 536.73,
"minY": 75.025,

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 24,
"mostUsedHeight": 12,
"mostUsedDistance": 16.56,
"mostUsedFont": "Times",
"minX": 57.59999999999991,
"maxX": 312.78,
"minY": 44.76,

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 22.5,
"mostUsedHeight": 11.4,
"mostUsedDistance": 0.12,
"mostUsedFont": "Courier",
"minX": 13.799999999999926,
"maxX": 550.2000000000003,
"minY": 1.4400099999998357,

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 24.7871,
"mostUsedHeight": 9.9626,
"mostUsedDistance": 13.55,
"mostUsedFont": "EFUEQI+CMR10",
"minX": 35.99999999999977,
"maxX": 1433.711,
"minY": 39.59999999999997,

View File

@ -38,21 +38,21 @@
{"page":7,"change":"ContentChange","types":["H1"],"str":"A Million Ways to Die: The Only Way to Live (2010-11-27 21:23) . . . . . . . . . . . . . . 381","line":0,"x":77.455,"y":723.441,"width":"433.00","height":"10.91","fontName":["EFUEQI+CMR10"],"dir":["ltr"]}
{"page":12,"change":"ContentChange","types":["H3"],"str":"Chapter 1","dir":"ltr","width":"102.37","height":"20.66","transform":["20.66","0.00","0.00","20.66","63.00","642.26"],"fontName":"AENRCE+CMBX12","x":63,"y":642.263,"line":0}
{"page":12,"change":"ContentChange","types":["H2"],"str":"2010","dir":"ltr","width":"55.72","height":"24.79","transform":["24.79","0.00","0.00","24.79","63.00","585.99"],"fontName":"AENRCE+CMBX12","x":63,"y":585.991,"line":1}
{"page":388,"change":"ContentChange","types":["H4"],"str":"1.10 December","dir":"ltr","width":"115.42","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","723.44"],"fontName":"AENRCE+CMBX12","x":63,"y":723.441,"line":0}
{"page":388,"change":"ContentChange","types":["H2"],"str":"1.10 December","dir":"ltr","width":"115.42","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","723.44"],"fontName":"AENRCE+CMBX12","x":63,"y":723.441,"line":0}
{"page":420,"change":"ContentChange","types":["H3"],"str":"Chapter 2","dir":"ltr","width":"102.37","height":"20.66","transform":["20.66","0.00","0.00","20.66","63.00","642.97"],"fontName":"AENRCE+CMBX12","x":63,"y":642.965,"line":0}
{"page":420,"change":"ContentChange","types":["H2"],"str":"2011","dir":"ltr","width":"55.72","height":"24.79","transform":["24.79","0.00","0.00","24.79","63.00","587.40"],"fontName":"AENRCE+CMBX12","x":63,"y":587.3960000000001,"line":1}
{"page":420,"change":"ContentChange","types":["H4"],"str":"2.1 January","dir":"ltr","width":"93.33","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","523.86"],"fontName":"AENRCE+CMBX12","x":63,"y":523.8570000000001,"line":2}
{"page":439,"change":"ContentChange","types":["H4"],"str":"2.2 February","dir":"ltr","width":"100.36","height":"14.35","transform":["14.35","0.00","0.00","14.35","36.00","723.44"],"fontName":"AENRCE+CMBX12","x":36,"y":723.441,"line":0}
{"page":455,"change":"ContentChange","types":["H4"],"str":"2.3 March","dir":"ltr","width":"82.16","height":"14.35","transform":["14.35","0.00","0.00","14.35","36.00","723.44"],"fontName":"AENRCE+CMBX12","x":36,"y":723.441,"line":0}
{"page":472,"change":"ContentChange","types":["H4"],"str":"2.4 April","dir":"ltr","width":"73.43","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","531.69"],"fontName":"AENRCE+CMBX12","x":63,"y":531.6900000000002,"line":10}
{"page":519,"change":"ContentChange","types":["H4"],"str":"2.5 May","dir":"ltr","width":"67.95","height":"14.35","transform":["14.35","0.00","0.00","14.35","36.00","723.44"],"fontName":"AENRCE+CMBX12","x":36,"y":723.441,"line":0}
{"page":538,"change":"ContentChange","types":["H4"],"str":"2.6 June","dir":"ltr","width":"70.37","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","560.83"],"fontName":"AENRCE+CMBX12","x":63,"y":560.828,"line":7}
{"page":552,"change":"ContentChange","types":["H4"],"str":"2.7 July","dir":"ltr","width":"67.04","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","127.81"],"fontName":"AENRCE+CMBX12","x":63,"y":127.80900000000014,"line":32}
{"page":563,"change":"ContentChange","types":["H4"],"str":"2.8 August","dir":"ltr","width":"87.55","height":"14.35","transform":["14.35","0.00","0.00","14.35","36.00","382.85"],"fontName":"AENRCE+CMBX12","x":36,"y":382.84500000000025,"line":18}
{"page":577,"change":"ContentChange","types":["H4"],"str":"2.9 September","dir":"ltr","width":"112.03","height":"14.35","transform":["14.35","0.00","0.00","14.35","36.00","447.09"],"fontName":"AENRCE+CMBX12","x":36,"y":447.08500000000026,"line":15}
{"page":584,"change":"ContentChange","types":["H4"],"str":"2.10 October","dir":"ltr","width":"101.78","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","723.44"],"fontName":"AENRCE+CMBX12","x":63,"y":723.441,"line":0}
{"page":600,"change":"ContentChange","types":["H4"],"str":"2.11 November","dir":"ltr","width":"116.81","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","659.39"],"fontName":"AENRCE+CMBX12","x":63,"y":659.388,"line":2}
{"page":618,"change":"ContentChange","types":["H4"],"str":"2.12 December","dir":"ltr","width":"115.42","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","428.39"],"fontName":"AENRCE+CMBX12","x":63,"y":428.3919999999999,"line":18}
{"page":420,"change":"ContentChange","types":["H2"],"str":"2.1 January","dir":"ltr","width":"93.33","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","523.86"],"fontName":"AENRCE+CMBX12","x":63,"y":523.8570000000001,"line":2}
{"page":439,"change":"ContentChange","types":["H2"],"str":"2.2 February","dir":"ltr","width":"100.36","height":"14.35","transform":["14.35","0.00","0.00","14.35","36.00","723.44"],"fontName":"AENRCE+CMBX12","x":36,"y":723.441,"line":0}
{"page":455,"change":"ContentChange","types":["H2"],"str":"2.3 March","dir":"ltr","width":"82.16","height":"14.35","transform":["14.35","0.00","0.00","14.35","36.00","723.44"],"fontName":"AENRCE+CMBX12","x":36,"y":723.441,"line":0}
{"page":472,"change":"ContentChange","types":["H2"],"str":"2.4 April","dir":"ltr","width":"73.43","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","531.69"],"fontName":"AENRCE+CMBX12","x":63,"y":531.6900000000002,"line":10}
{"page":519,"change":"ContentChange","types":["H2"],"str":"2.5 May","dir":"ltr","width":"67.95","height":"14.35","transform":["14.35","0.00","0.00","14.35","36.00","723.44"],"fontName":"AENRCE+CMBX12","x":36,"y":723.441,"line":0}
{"page":538,"change":"ContentChange","types":["H2"],"str":"2.6 June","dir":"ltr","width":"70.37","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","560.83"],"fontName":"AENRCE+CMBX12","x":63,"y":560.828,"line":7}
{"page":552,"change":"ContentChange","types":["H2"],"str":"2.7 July","dir":"ltr","width":"67.04","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","127.81"],"fontName":"AENRCE+CMBX12","x":63,"y":127.80900000000014,"line":32}
{"page":563,"change":"ContentChange","types":["H2"],"str":"2.8 August","dir":"ltr","width":"87.55","height":"14.35","transform":["14.35","0.00","0.00","14.35","36.00","382.85"],"fontName":"AENRCE+CMBX12","x":36,"y":382.84500000000025,"line":18}
{"page":577,"change":"ContentChange","types":["H2"],"str":"2.9 September","dir":"ltr","width":"112.03","height":"14.35","transform":["14.35","0.00","0.00","14.35","36.00","447.09"],"fontName":"AENRCE+CMBX12","x":36,"y":447.08500000000026,"line":15}
{"page":584,"change":"ContentChange","types":["H2"],"str":"2.10 October","dir":"ltr","width":"101.78","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","723.44"],"fontName":"AENRCE+CMBX12","x":63,"y":723.441,"line":0}
{"page":600,"change":"ContentChange","types":["H2"],"str":"2.11 November","dir":"ltr","width":"116.81","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","659.39"],"fontName":"AENRCE+CMBX12","x":63,"y":659.388,"line":2}
{"page":618,"change":"ContentChange","types":["H2"],"str":"2.12 December","dir":"ltr","width":"115.42","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","428.39"],"fontName":"AENRCE+CMBX12","x":63,"y":428.3919999999999,"line":18}
{"page":644,"change":"ContentChange","types":["H3"],"str":"Chapter 3","dir":"ltr","width":"102.37","height":"20.66","transform":["20.66","0.00","0.00","20.66","63.00","643.43"],"fontName":"AENRCE+CMBX12","x":63,"y":643.431,"line":0}
{"page":644,"change":"ContentChange","types":["H2"],"str":"2012","dir":"ltr","width":"55.72","height":"24.79","transform":["24.79","0.00","0.00","24.79","63.00","588.33"],"fontName":"AENRCE+CMBX12","x":63,"y":588.327,"line":1}
{"page":644,"change":"ContentChange","types":["H4"],"str":"3.1 January","dir":"ltr","width":"93.33","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","525.25"],"fontName":"AENRCE+CMBX12","x":63,"y":525.253,"line":2}
{"page":644,"change":"ContentChange","types":["H2"],"str":"3.1 January","dir":"ltr","width":"93.33","height":"14.35","transform":["14.35","0.00","0.00","14.35","63.00","525.25"],"fontName":"AENRCE+CMBX12","x":63,"y":525.253,"line":2}

View File

@ -53,6 +53,12 @@
6
],
"detectedHeadlineLevels": {}
},
"headlineTypeToHeightRange": {
"H2": {
"min": 11.9552,
"max": 14.3462
}
}
}
}

View File

@ -29,6 +29,8 @@
"globals": {
"maxHeight": 64,
"mostUsedHeight": 9,
"mostUsedDistance": 13.98,
"mostUsedFont": "PNMSCP+PalatinoLinotype-Roman",
"minX": 46.323,
"maxX": 436.5,
"minY": 37.73867999999993,

View File

@ -77,7 +77,7 @@
{"page":173,"change":"ContentChange","types":["H5"],"str":"5. Schluss bemerkungen","line":2,"x":70.92,"y":564.24,"width":"125.34","height":"12.00","fontName":["CRDKGT+ArialMT"],"dir":["ltr"]}
{"page":180,"change":"ContentChange","types":["H5"],"str":"1. Einleitung","line":26,"x":68.04,"y":210.24,"width":"62.60","height":"12.00","fontName":["CRDKGT+ArialMT"],"dir":["ltr"]}
{"page":182,"change":"ContentChange","types":["H5"],"str":"2. Materialien und Methoden","line":2,"x":68.04,"y":564.24,"width":"148.71","height":"12.00","fontName":["CRDKGT+ArialMT"],"dir":["ltr"]}
{"page":187,"change":"ContentChange","types":["H3"],"str":"𝑓𝑓 =","line":3,"x":70.92,"y":539.7,"width":"18.13","height":"13.98","fontName":["CJUSQJ+CambriaMath","WASIKV+CambriaMath"],"dir":["ltr"]}
{"page":187,"change":"ContentChange","types":["H2"],"str":"𝑓𝑓 =","line":3,"x":70.92,"y":539.7,"width":"18.13","height":"13.98","fontName":["CJUSQJ+CambriaMath","WASIKV+CambriaMath"],"dir":["ltr"]}
{"page":190,"change":"ContentChange","types":["H5"],"str":"3. Ergebnisse ","line":20,"x":68.04,"y":279.24,"width":"73.38","height":"12.00","fontName":["CRDKGT+ArialMT"],"dir":["ltr"]}
{"page":200,"change":"ContentChange","types":["H5"],"str":"4. Diskussion","line":28,"x":68.04,"y":278.94,"width":"67.93","height":"12.00","fontName":["CRDKGT+ArialMT"],"dir":["ltr"]}
{"page":207,"change":"ContentChange","types":["H5"],"str":"5. Schlussfolgerungen","line":13,"x":70.92,"y":394.62,"width":"115.35","height":"12.00","fontName":["CRDKGT+ArialMT"],"dir":["ltr"]}

View File

@ -49,6 +49,12 @@
5
],
"detectedHeadlineLevels": {}
},
"headlineTypeToHeightRange": {
"H2": {
"min": 13.98,
"max": 13.98
}
}
}
}

View File

@ -32,6 +32,8 @@
"globals": {
"maxHeight": 17.9328,
"mostUsedHeight": 8.9664,
"mostUsedDistance": 9.96,
"mostUsedFont": "KUYGUP+NimbusRomNo9L-Regu",
"minX": 53.99990000000005,
"maxX": 553.8755000000001,
"minY": 68.44329999999982,

View File

@ -2,7 +2,7 @@
"pages": 218,
"items": 51104,
"groupedItems": 8481,
"changes": 65,
"changes": 2,
"schema": [
{
"name": "line"
@ -35,64 +35,4 @@
"globals": {}
}
{"page":0,"change":"ContentChange","types":["H1"],"str":"A Course in Combinatorial Optimization","dir":"ltr","width":"407.49","height":"24.79","transform":["24.79","0.00","0.00","24.79","112.60","584.96"],"fontName":"HVHZFT+CMR17","x":112.599,"y":584.959,"line":0}
{"page":0,"change":"ContentChange","types":["H2"],"str":"Alexander Schrijver","dir":"ltr","width":"162.77","height":"20.66","transform":["20.66","0.00","0.00","20.66","234.96","508.10"],"fontName":"HVHZFT+CMR17","x":234.96300000000002,"y":508.09919999999994,"line":1}
{"page":4,"change":"ContentChange","types":["H3","H3"],"str":"1.1. Shortest paths with nonnegative lengths","dir":"ltr","width":"380.91","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","652.49"],"fontName":"KXBFBK+CMBX12","x":100.34989999999999,"y":652.4861000000001,"line":2}
{"page":8,"change":"ContentChange","types":["H3","H3"],"str":"1.2. Speeding up Dijkstras algorithm with heaps","dir":"ltr","width":"417.68","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","364.17"],"fontName":"KXBFBK+CMBX12","x":100.35000000000002,"y":364.17109999999997,"line":46}
{"page":11,"change":"ContentChange","types":["H3","H3"],"str":"1.3. Shortest paths with arbitrary lengths","dir":"ltr","width":"356.03","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","239.99"],"fontName":"KXBFBK+CMBX12","x":72,"y":239.989,"line":21}
{"page":18,"change":"ContentChange","types":["H3","H3"],"str":"1.4. Minimum spanning trees","dir":"ltr","width":"249.95","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","603.18"],"fontName":"KXBFBK+CMBX12","x":100.35016000000005,"y":603.1841000000001,"line":5}
{"page":22,"change":"ContentChange","types":["H3","H3"],"str":"2.1. Convex sets","dir":"ltr","width":"139.47","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","623.73"],"fontName":"KXBFBK+CMBX12","x":100.35008999999998,"y":623.731,"line":3}
{"page":24,"change":"ContentChange","types":["H3","H3"],"str":"2.2. Polytopes and polyhedra","dir":"ltr","width":"249.83","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","525.49"],"fontName":"KXBFBK+CMBX12","x":100.34895,"y":525.4873000000001,"line":10}
{"page":29,"change":"ContentChange","types":["H3","H3"],"str":"2.3. Farkas lemma","dir":"ltr","width":"160.56","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","141.56"],"fontName":"KXBFBK+CMBX12","x":71.99573099999998,"y":141.5561900000001,"line":34}
{"page":32,"change":"ContentChange","types":["H3","H3"],"str":"2.4. Linear programming","dir":"ltr","width":"212.60","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","345.51"],"fontName":"KXBFBK+CMBX12","x":100.34994999999975,"y":345.5141019999998,"line":19}
{"page":38,"change":"ContentChange","types":["H3","H3"],"str":"3.1. Matchings, covers, and Gallais theorem","dir":"ltr","width":"378.59","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","624.43"],"fontName":"KXBFBK+CMBX12","x":100.34989999999999,"y":624.4333,"line":3}
{"page":39,"change":"ContentChange","types":["H3"],"str":"3.2. M -augmenting paths","line":21,"x":71.99984999999992,"y":296.53569999999996,"width":"203.88","height":"17.22","fontName":["KXBFBK+CMBX12","MGJTOM+CMMI12"],"dir":["ltr"]}
{"page":40,"change":"ContentChange","types":["H3","H3"],"str":"3.3. K ̋onigs theorems","dir":"ltr","width":"186.22","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","359.57"],"fontName":"KXBFBK+CMBX12","x":100.35,"y":359.572,"line":17}
{"page":44,"change":"ContentChange","types":["H3","H3"],"str":"3.4. Cardinality bipartite matching algorithm","dir":"ltr","width":"386.84","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","701.16"],"fontName":"KXBFBK+CMBX12","x":100.34999999999997,"y":701.1583,"line":1}
{"page":46,"change":"ContentChange","types":["H3","H3"],"str":"3.5. Weighted bipartite matching","dir":"ltr","width":"282.53","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","555.66"],"fontName":"KXBFBK+CMBX12","x":100.35012999999998,"y":555.6638439999999,"line":7}
{"page":49,"change":"ContentChange","types":["H3","H3"],"str":"3.6. The matching polytope","dir":"ltr","width":"236.40","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","200.95"],"fontName":"KXBFBK+CMBX12","x":71.99974000000006,"y":200.94670000000002,"line":29}
{"page":53,"change":"ContentChange","types":["H3","H3"],"str":"4.1. Mengers theorem","dir":"ltr","width":"192.31","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","625.31"],"fontName":"KXBFBK+CMBX12","x":72.00000000000003,"y":625.306,"line":3}
{"page":57,"change":"ContentChange","types":["H3","H3"],"str":"4.2. Flows in networks","dir":"ltr","width":"191.61","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","480.35"],"fontName":"KXBFBK+CMBX12","x":71.99936999999969,"y":480.3521700000001,"line":14}
{"page":59,"change":"ContentChange","types":["H3","H3"],"str":"4.3. Finding a maximum flow","dir":"ltr","width":"249.97","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","337.49"],"fontName":"KXBFBK+CMBX12","x":71.99978999999976,"y":337.4861000000001,"line":18}
{"page":64,"change":"ContentChange","types":["H3","H3"],"str":"4.4. Speeding up the maximum flow algorithm","dir":"ltr","width":"395.42","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","209.07"],"fontName":"KXBFBK+CMBX12","x":100.34913,"y":209.06558999999993,"line":40}
{"page":67,"change":"ContentChange","types":["H3","H3"],"str":"4.5. Circulations","dir":"ltr","width":"141.18","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","288.45"],"fontName":"KXBFBK+CMBX12","x":72.00049000000018,"y":288.45379999999994,"line":20}
{"page":69,"change":"ContentChange","types":["H3","H3"],"str":"4.6. Minimum-cost flows","dir":"ltr","width":"209.03","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","185.94"],"fontName":"KXBFBK+CMBX12","x":72.00026899999983,"y":185.93532000000005,"line":36}
{"page":77,"change":"ContentChange","types":["H3","H3"],"str":"5.1. Tuttes 1-factor theorem and the Tutte-Berge","dir":"ltr","width":"432.02","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","641.75"],"fontName":"KXBFBK+CMBX12","x":72,"y":641.7494,"line":2}
{"page":77,"change":"ContentChange","types":["H3","H3"],"str":"formula","dir":"ltr","width":"64.67","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","619.83"],"fontName":"KXBFBK+CMBX12","x":72,"y":619.8342,"line":3}
{"page":80,"change":"ContentChange","types":["H3","H3"],"str":"5.2. Cardinality matching algorithm","dir":"ltr","width":"306.87","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","627.85"],"fontName":"KXBFBK+CMBX12","x":100.3506199999999,"y":627.8532,"line":4}
{"page":84,"change":"ContentChange","types":["H3","H3"],"str":"5.3. Weighted matching algorithm","dir":"ltr","width":"290.92","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","594.61"],"fontName":"KXBFBK+CMBX12","x":100.35,"y":594.607,"line":2}
{"page":90,"change":"ContentChange","types":["H3","H3"],"str":"5.4. The matching polytope","dir":"ltr","width":"236.40","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","217.70"],"fontName":"KXBFBK+CMBX12","x":100.351061,"y":217.69570000000016,"line":41}
{"page":93,"change":"ContentChange","types":["H3","H3"],"str":"5.5. The Cunningham-Marsh formula","dir":"ltr","width":"318.34","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","196.24"],"fontName":"KXBFBK+CMBX12","x":71.99741000000014,"y":196.24021000000002,"line":33}
{"page":96,"change":"ContentChange","types":["H3","H3"],"str":"6.1. Introduction","dir":"ltr","width":"145.32","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","626.23"],"fontName":"KXBFBK+CMBX12","x":100.3501,"y":626.2333,"line":3}
{"page":97,"change":"ContentChange","types":["H3","H3"],"str":"6.2. Words","dir":"ltr","width":"93.13","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","215.08"],"fontName":"KXBFBK+CMBX12","x":72,"y":215.07689999999997,"line":24}
{"page":99,"change":"ContentChange","types":["H3","H3"],"str":"6.3. Problems","dir":"ltr","width":"118.32","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","701.16"],"fontName":"KXBFBK+CMBX12","x":72,"y":701.1583,"line":1}
{"page":99,"change":"ContentChange","types":["H3","H3"],"str":"6.4. Algorithms and running time","dir":"ltr","width":"287.67","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","300.83"],"fontName":"KXBFBK+CMBX12","x":72.00158999999996,"y":300.8293000000004,"line":24}
{"page":100,"change":"ContentChange","types":["H3","H3"],"str":"6.5. The class NP","dir":"ltr","width":"151.77","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","354.09"],"fontName":"KXBFBK+CMBX12","x":100.34892000000013,"y":354.0911000000001,"line":25}
{"page":101,"change":"ContentChange","types":["H3","H3"],"str":"6.6. The class co-NP","dir":"ltr","width":"176.51","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","210.57"],"fontName":"KXBFBK+CMBX12","x":71.99976999999978,"y":210.56830000000036,"line":30}
{"page":102,"change":"ContentChange","types":["H3","H3"],"str":"6.7. NP-completeness","dir":"ltr","width":"184.22","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","392.60"],"fontName":"KXBFBK+CMBX12","x":100.35020000000006,"y":392.6020000000001,"line":20}
{"page":102,"change":"ContentChange","types":["H3","H3"],"str":"6.8. NP-completeness of the satisfiability problem","dir":"ltr","width":"424.13","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","158.85"],"fontName":"KXBFBK+CMBX12","x":100.35130000000021,"y":158.85417000000012,"line":33}
{"page":105,"change":"ContentChange","types":["H3","H3"],"str":"6.9. NP-completeness of some other problems","dir":"ltr","width":"389.78","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","701.16"],"fontName":"KXBFBK+CMBX12","x":72,"y":701.1583,"line":1}
{"page":107,"change":"ContentChange","types":["H3","H3"],"str":"6.10. Turing machines","dir":"ltr","width":"188.23","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","481.67"],"fontName":"KXBFBK+CMBX12","x":72,"y":481.666,"line":11}
{"page":110,"change":"ContentChange","types":["H3","H3"],"str":"7.1. Introduction","dir":"ltr","width":"145.32","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","621.87"],"fontName":"KXBFBK+CMBX12","x":100.35010000000005,"y":621.8681,"line":3}
{"page":114,"change":"ContentChange","types":["H3","H3"],"str":"7.2. Edge-colourings of bipartite graphs","dir":"ltr","width":"337.32","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","141.56"],"fontName":"KXBFBK+CMBX12","x":100.35259000000008,"y":141.5558,"line":19}
{"page":120,"change":"ContentChange","types":["H3","H3"],"str":"7.3. Partially ordered sets","dir":"ltr","width":"221.32","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","572.90"],"fontName":"KXBFBK+CMBX12","x":100.34971999999993,"y":572.8990899999999,"line":14}
{"page":124,"change":"ContentChange","types":["H3","H3"],"str":"7.4. Perfect graphs","dir":"ltr","width":"161.70","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","701.16"],"fontName":"KXBFBK+CMBX12","x":100.34999999999997,"y":701.1583,"line":1}
{"page":127,"change":"ContentChange","types":["H3","H3"],"str":"7.5. Chordal graphs","dir":"ltr","width":"169.26","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","170.52"],"fontName":"KXBFBK+CMBX12","x":71.99944399999993,"y":170.51789999999988,"line":24}
{"page":131,"change":"ContentChange","types":["H3","H3"],"str":"8.1. Integer linear programming","dir":"ltr","width":"273.03","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","619.28"],"fontName":"KXBFBK+CMBX12","x":72,"y":619.2763,"line":3}
{"page":133,"change":"ContentChange","types":["H3","H3"],"str":"8.2. Totally unimodular matrices","dir":"ltr","width":"280.06","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","701.16"],"fontName":"KXBFBK+CMBX12","x":72,"y":701.1583,"line":1}
{"page":138,"change":"ContentChange","types":["H3","H3"],"str":"8.3. Totally unimodular matrices from bipartite","dir":"ltr","width":"432.01","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","701.16"],"fontName":"KXBFBK+CMBX12","x":100.35000000000008,"y":701.1583,"line":1}
{"page":138,"change":"ContentChange","types":["H3","H3"],"str":"graphs","dir":"ltr","width":"56.18","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","679.24"],"fontName":"KXBFBK+CMBX12","x":100.35000000000008,"y":679.2431,"line":2}
{"page":142,"change":"ContentChange","types":["H3","H3"],"str":"8.4. Totally unimodular matrices from directed graphs","dir":"ltr","width":"452.40","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","270.96"],"fontName":"KXBFBK+CMBX12","x":100.35010900000005,"y":270.9585999999999,"line":23}
{"page":147,"change":"ContentChange","types":["H3","H3"],"str":"9.1. Introduction","dir":"ltr","width":"145.33","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","623.74"],"fontName":"KXBFBK+CMBX12","x":72,"y":623.74,"line":3}
{"page":152,"change":"ContentChange","types":["H3","H3"],"str":"9.2. Two commodities","dir":"ltr","width":"188.12","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","701.16"],"fontName":"KXBFBK+CMBX12","x":100.34999999999997,"y":701.1583,"line":1}
{"page":156,"change":"ContentChange","types":["H3","H3"],"str":"9.3. Disjoint paths in acyclic directed graphs","dir":"ltr","width":"380.87","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","644.08"],"fontName":"KXBFBK+CMBX12","x":100.34937999999991,"y":644.0802000000001,"line":3}
{"page":158,"change":"ContentChange","types":["H3","H3"],"str":"9.4. Vertex-disjoint paths in planar graphs","dir":"ltr","width":"361.94","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","701.16"],"fontName":"KXBFBK+CMBX12","x":100.34999999999997,"y":701.1583,"line":1}
{"page":164,"change":"ContentChange","types":["H3","H3"],"str":"9.5. Edge-disjoint paths in planar graphs","dir":"ltr","width":"347.58","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","443.97"],"fontName":"KXBFBK+CMBX12","x":100.34943999999979,"y":443.96552000000014,"line":15}
{"page":167,"change":"ContentChange","types":["H3","H3"],"str":"9.6. A column generation technique for multicom-","dir":"ltr","width":"432.00","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","318.47"],"fontName":"KXBFBK+CMBX12","x":72.00099400000013,"y":318.4694000000001,"line":20}
{"page":167,"change":"ContentChange","types":["H3","H3"],"str":"modity flows","dir":"ltr","width":"107.70","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","296.55"],"fontName":"KXBFBK+CMBX12","x":72.00099400000013,"y":296.55420000000015,"line":21}
{"page":172,"change":"ContentChange","types":["H3","H3"],"str":"10.1. Matroids and the greedy algorithm","dir":"ltr","width":"347.13","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","655.15"],"fontName":"KXBFBK+CMBX12","x":100.35000000000005,"y":655.1501000000001,"line":2}
{"page":175,"change":"ContentChange","types":["H3","H3"],"str":"10.2. Equivalent axioms for matroids","dir":"ltr","width":"313.85","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","599.73"],"fontName":"KXBFBK+CMBX12","x":72.00027000000017,"y":599.7281,"line":4}
{"page":179,"change":"ContentChange","types":["H3","H3"],"str":"10.3. Examples of matroids","dir":"ltr","width":"232.50","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","591.35"],"fontName":"KXBFBK+CMBX12","x":72.00033999999994,"y":591.3493000000001,"line":6}
{"page":182,"change":"ContentChange","types":["H3","H3"],"str":"10.4. Two technical lemmas","dir":"ltr","width":"236.21","height":"17.22","transform":["17.22","0.00","0.00","17.22","100.35","451.78"],"fontName":"KXBFBK+CMBX12","x":100.35041999999993,"y":451.7771000000002,"line":13}
{"page":183,"change":"ContentChange","types":["H3","H3"],"str":"10.5. Matroid intersection","dir":"ltr","width":"222.26","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","242.82"],"fontName":"KXBFBK+CMBX12","x":71.9991999999998,"y":242.8154,"line":27}
{"page":189,"change":"ContentChange","types":["H3","H3"],"str":"10.6. Weighted matroid intersection","dir":"ltr","width":"306.13","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","243.80"],"fontName":"KXBFBK+CMBX12","x":72.00190999999978,"y":243.79659999999996,"line":27}
{"page":193,"change":"ContentChange","types":["H3","H3"],"str":"10.7. Matroids and polyhedra","dir":"ltr","width":"253.24","height":"17.22","transform":["17.22","0.00","0.00","17.22","72.00","701.16"],"fontName":"KXBFBK+CMBX12","x":72,"y":701.1583,"line":1}
{"page":0,"change":"ContentChange","types":["H2"],"str":"Alexander Schrijver","dir":"ltr","width":"162.77","height":"20.66","transform":["20.66","0.00","0.00","20.66","234.96","508.10"],"fontName":"HVHZFT+CMR17","x":234.96300000000002,"y":508.09919999999994,"line":1}

View File

@ -47,6 +47,16 @@
3
],
"detectedHeadlineLevels": {}
},
"headlineTypeToHeightRange": {
"H2": {
"min": 17.2155,
"max": 24.7871
},
"H3": {
"min": 17.2155,
"max": 17.2155
}
}
}
}

View File

@ -32,6 +32,8 @@
"globals": {
"maxHeight": 24.7871,
"mostUsedHeight": 11.9551,
"mostUsedDistance": 14.44,
"mostUsedFont": "LERRTL+CMR12",
"minX": 52.262,
"maxX": 571.0594300000001,
"minY": 76.19790000000002,

View File

@ -27,6 +27,14 @@ const config = {
maxDistanceToFringe: 50,
};
function to2DigitDecimalFromString(value: string): number {
return parseFloat(parseFloat(value).toFixed(2));
}
function to2DigitDecimal(value: number): number {
return parseFloat(value.toFixed(2));
}
export default class CalculateStatistics extends ItemTransformer {
constructor() {
super('Calculate Statistics', 'Calculate global statistics that are used in downstream transformers', {
@ -83,7 +91,8 @@ export default class CalculateStatistics extends ItemTransformer {
}
});
// TODO really need parseInt here ?
const mostUsedHeight = parseInt(getMostUsedKey(heightToOccurrence));
const mostUsedHeight = to2DigitDecimalFromString(getMostUsedKey(heightToOccurrence));
const mostUsedFont = getMostUsedKey(fontToOccurrence);
const groupedByPage = groupByPage(items);
@ -94,14 +103,15 @@ export default class CalculateStatistics extends ItemTransformer {
let page = -1;
let lastItemOfMostUsedHeight: Item | undefined;
items.forEach((item) => {
items.forEach((item, i) => {
if (item.page !== page) lastItemOfMostUsedHeight = undefined;
const itemHeight = item.data['height'];
const itemHeight = to2DigitDecimalFromString(item.data['height']);
const itemText = item.data['str'];
const itemY = item.data['y'];
if (itemHeight == mostUsedHeight && itemText.trim().length > 0) {
if (lastItemOfMostUsedHeight && itemY != lastItemOfMostUsedHeight.data['y']) {
const distance = lastItemOfMostUsedHeight.data['y'] - itemY;
const distance = to2DigitDecimal(lastItemOfMostUsedHeight.data['y'] - itemY);
if (distance > 0) {
distanceToOccurrence[distance] = distanceToOccurrence[distance] ? distanceToOccurrence[distance] + 1 : 1;
}
@ -112,8 +122,7 @@ export default class CalculateStatistics extends ItemTransformer {
}
page = item.page;
});
const mostUsedDistance = parseInt(getMostUsedKey(distanceToOccurrence));
const mostUsedDistance = to2DigitDecimalFromString(getMostUsedKey(distanceToOccurrence));
const fontIdToName: string[] = [];
const fontToType = new Map<string, FontType>();
context.fontMap.forEach(function (value, key) {
@ -126,13 +135,14 @@ export default class CalculateStatistics extends ItemTransformer {
});
fontIdToName.sort();
const mostUsedFontObject = context.fontMap.get(mostUsedFont) as { name: string };
return {
items: items,
globals: [
MAX_HEIGHT.value(maxHeight),
MOST_USED_HEIGHT.value(mostUsedByMedian),
MOST_USED_DISTANCE.value(mostUsedDistance),
MOST_USED_FONT.value(mostUsedFont),
MOST_USED_FONT.value(mostUsedFontObject?.name || mostUsedFont),
MIN_X.value(minX),
MAX_X.value(maxX),
MIN_Y.value(minY),

View File

@ -51,23 +51,25 @@ export default class DetectHeaders extends ItemTransformer {
itemToLevel,
);
const hasHeaderType = (types: ItemType[]) =>
types.includes(ItemType.H1) ||
types.includes(ItemType.H2) ||
types.includes(ItemType.H3) ||
types.includes(ItemType.H4) ||
types.includes(ItemType.H5) ||
types.includes(ItemType.H6);
if (toc && headlineTypeToHeightRange) {
//Use existing headline heights to find additional headlines
// Use existing headline heights to find additional headlines
const headlineTypes = Object.keys(headlineTypeToHeightRange) as ItemType[];
headlineTypes.forEach((headlineType) => {
const range = headlineTypeToHeightRange[headlineType];
if (range.max > mostUsedHeight) {
//use only very clear headlines, only use max
// use only very clear headlines, only use max
inputItems.forEach((item) => {
const itemHeight = item.data['height'];
const types: ItemType[] = item.data['types'] || [];
const isHeader =
types.includes(ItemType.H1) ||
types.includes(ItemType.H2) ||
types.includes(ItemType.H3) ||
types.includes(ItemType.H4) ||
types.includes(ItemType.H5) ||
types.includes(ItemType.H6);
const types: ItemType[] = item.data['types'] || itemToLevel.get(item.uuid) || [];
const isHeader = hasHeaderType(types);
if (!isHeader && itemHeight === range.max) {
itemToLevel.set(item.uuid, headlineType);
detectedHeaders++;
@ -75,41 +77,41 @@ export default class DetectHeaders extends ItemTransformer {
});
}
});
} else {
//Categorize headlines by the text heights
const heights: number[] = [];
}
itemsByLine
.filter((lineItems) => !itemToLevel.has(lineItems[0].uuid))
.map((lineItems) => {
const maxHeight = Math.max(...lineItems.map((item) => item.data['height']));
if (maxHeight > mostUsedHeight * config.minHeadlineDistance && !heights.includes(maxHeight)) {
heights.push(maxHeight);
}
});
const heightToHeadline: Map<number, ItemType> = new Map();
heights.sort((a, b) => b - a);
heights.forEach((height, i) => {
const headlineLevel = i + 2;
if (headlineLevel <= 6) {
const headlineType = ItemType.header(2 + i);
heightToHeadline.set(height, headlineType);
// Categorize headlines by the text heights
const heights: number[] = [];
itemsByLine
.filter((lineItems) => !itemToLevel.has(lineItems[0].uuid))
.map((lineItems) => {
const maxHeight = Math.max(...lineItems.map((item) => item.data['height']));
if (maxHeight > mostUsedHeight * config.minHeadlineDistance && !heights.includes(maxHeight)) {
heights.push(maxHeight);
}
});
const heightToHeadline: Map<number, ItemType> = new Map();
heights.sort((a, b) => b - a);
heights.forEach((height, i) => {
const headlineLevel = i + 2;
if (headlineLevel <= 6) {
const headlineType = ItemType.header(2 + i);
heightToHeadline.set(height, headlineType);
}
});
itemsByLine
.filter((lineItems) => !itemToLevel.has(lineItems[0].uuid))
.forEach((lineItems) => {
const maxHeight = Math.max(...lineItems.map((item) => item.data['height']));
const types = flatten(lineItems.map((item) => item.data['types'] || [])).filter(onlyUniques);
itemsByLine
.filter((lineItems) => !itemToLevel.has(lineItems[0].uuid))
.forEach((lineItems) => {
const maxHeight = Math.max(...lineItems.map((item) => item.data['height']));
const types = flatten(lineItems.map((item) => item.data['types'] || [])).filter(onlyUniques) as ItemType[];
if (!hasHeaderType(types) && !itemToLevel.has(lineItems[0].uuid)) {
const headlineType = heightToHeadline.get(maxHeight);
if (headlineType && !types.includes(ItemType.H1) && !types.includes(ItemType.H2)) {
lineItems.forEach((item) => itemToLevel.set(item.uuid, headlineType));
detectedHeaders++;
}
});
}
}
});
// TODO find headlines which have paragraph height
// var smallesHeadlineLevel = 1;
@ -145,7 +147,7 @@ export default class DetectHeaders extends ItemTransformer {
// }
return {
items: inputItems.map((item) => {
items: inputItems.map((item, i) => {
const headerType = itemToLevel.get(item.uuid);
if (headerType) {
return itemWithType(item, headerType);

View File

@ -48,7 +48,9 @@ describe.each(files)('Test %p', (file) => {
let debug: Debugger;
const printedGlobals = new Set<string>();
beforeAll(async () => (debug = await pipeline.parse(data, () => {}).then((pc) => pc.debug())));
beforeAll(async () => {
debug = await pipeline.parse(data, () => {}).then((pc) => pc.debug());
});
test.each(transformers.map((t) => t.name).filter((name) => name !== 'Does nothing'))(
'stage %p',
@ -76,13 +78,16 @@ describe.each(files)('Test %p', (file) => {
// Global characteristics
chunkedLines[0].unshift(toHeader(stageResult, printedGlobals));
chunkedLines.forEach((lines, idx) => {
const transformerResultAsString = lines.join('\n') || '{}';
expect(transformerResultAsString).toMatchFile(matchFilePath(file, transformerName, chunkedLines.length, idx));
});
stageResult.globals.keys().forEach((globalKey) => {
printedGlobals.add(globalKey);
});
try {
chunkedLines.forEach((lines, idx) => {
const transformerResultAsString = lines.join('\n') || '{}';
expect(transformerResultAsString).toMatchFile(matchFilePath(file, transformerName, chunkedLines.length, idx));
});
} finally {
stageResult.globals.keys().forEach((globalKey) => {
printedGlobals.add(globalKey);
});
}
},
);
});

View File

@ -5,6 +5,7 @@
"declaration": true,
"outDir": "./lib",
"strict": true,
"strictNullChecks": false,
"noImplicitAny": false,
"noUnusedLocals": false,
"useUnknownInCatchVariables": false,