mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-29 11:14:12 +01:00
7abafc61e7
- sometimes a word is provided with multiple items. E.g: "T his is a sen tence" - use x-axis distance to not put whitespaces in the middle of a word - also tweak the line detection a bit (for Alice)
36 lines
423 B
JSON
36 lines
423 B
JSON
{
|
|
"pages": 140,
|
|
"items": 25313,
|
|
"groupedItems": 3179,
|
|
"changes": 0,
|
|
"schema": [
|
|
{
|
|
"name": "line"
|
|
},
|
|
{
|
|
"name": "token types"
|
|
},
|
|
{
|
|
"name": "x"
|
|
},
|
|
{
|
|
"name": "y"
|
|
},
|
|
{
|
|
"name": "width"
|
|
},
|
|
{
|
|
"name": "height"
|
|
},
|
|
{
|
|
"name": "str"
|
|
},
|
|
{
|
|
"name": "fontName"
|
|
},
|
|
{
|
|
"name": "dir"
|
|
}
|
|
],
|
|
"globals": {}
|
|
} |