mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2025-02-23 05:10:47 +01:00
Fix line compaction for multi-columnar PDFs
This commit is contained in:
parent
6e5e5c9d53
commit
08509953dc
@ -36,7 +36,7 @@ export default class CompactLines extends ItemTransformer {
|
||||
return items.map((item) => {
|
||||
const y = item.data['y'];
|
||||
const height = item.data['height'];
|
||||
if (!lastY || lastY - height > y) {
|
||||
if (!lastY || Math.abs(lastY - y) > height) {
|
||||
lineNumber++;
|
||||
lines++;
|
||||
}
|
||||
|
@ -5,6 +5,27 @@ import { items } from './testItems';
|
||||
|
||||
const transformer = new CompactLines();
|
||||
|
||||
test('Transform - 2 column pdf (Smart Immunity)', async () => {
|
||||
const results = transformer.transform(
|
||||
emptyContext(),
|
||||
items(0, [
|
||||
{
|
||||
x: 54,
|
||||
y: 91.52,
|
||||
str: 'most, get what feels more like a flu. And',
|
||||
height: 12,
|
||||
},
|
||||
{
|
||||
x: 324,
|
||||
y: 710.52,
|
||||
str: 'some, it turns out, aren’t even aware that',
|
||||
height: 12,
|
||||
},
|
||||
]),
|
||||
);
|
||||
expect(results.items.map((item) => item.data['line'])).toEqual([0, 1]);
|
||||
});
|
||||
|
||||
test('Transform - raised characters (example.pdf)', async () => {
|
||||
const results = transformer.transform(
|
||||
emptyContext(),
|
||||
|
Loading…
Reference in New Issue
Block a user