mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-26 09:43:59 +01:00
7abafc61e7
- sometimes a word is provided with multiple items. E.g: "T his is a sen tence" - use x-axis distance to not put whitespaces in the middle of a word - also tweak the line detection a bit (for Alice)
37 lines
453 B
JSON
37 lines
453 B
JSON
{
|
|
"pages": 153,
|
|
"items": 14949,
|
|
"groupedItems": 10624,
|
|
"changes": 0,
|
|
"schema": [
|
|
{
|
|
"name": "line"
|
|
},
|
|
{
|
|
"name": "token types",
|
|
"annotation": "ADDED"
|
|
},
|
|
{
|
|
"name": "x"
|
|
},
|
|
{
|
|
"name": "y"
|
|
},
|
|
{
|
|
"name": "width"
|
|
},
|
|
{
|
|
"name": "height"
|
|
},
|
|
{
|
|
"name": "str"
|
|
},
|
|
{
|
|
"name": "fontName"
|
|
},
|
|
{
|
|
"name": "dir"
|
|
}
|
|
],
|
|
"globals": {}
|
|
} |