mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-12-01 20:24:17 +01:00
7abafc61e7
- sometimes a word is provided with multiple items. E.g: "T his is a sen tence" - use x-axis distance to not put whitespaces in the middle of a word - also tweak the line detection a bit (for Alice)
43 lines
521 B
JSON
43 lines
521 B
JSON
{
|
|
"pages": 116,
|
|
"items": 7676,
|
|
"groupedItems": 3481,
|
|
"changes": 0,
|
|
"schema": [
|
|
{
|
|
"name": "block",
|
|
"annotation": "ADDED"
|
|
},
|
|
{
|
|
"name": "line"
|
|
},
|
|
{
|
|
"name": "token types"
|
|
},
|
|
{
|
|
"name": "types"
|
|
},
|
|
{
|
|
"name": "x"
|
|
},
|
|
{
|
|
"name": "y"
|
|
},
|
|
{
|
|
"name": "width"
|
|
},
|
|
{
|
|
"name": "height"
|
|
},
|
|
{
|
|
"name": "str"
|
|
},
|
|
{
|
|
"name": "fontName"
|
|
},
|
|
{
|
|
"name": "dir"
|
|
}
|
|
],
|
|
"globals": {}
|
|
} |