Fix line compaction for multi-columnar PDFs

This commit is contained in:
Johannes Zillmann 2021-02-26 19:28:44 +01:00
parent 6e5e5c9d53
commit 08509953dc
2 changed files with 22 additions and 1 deletions

View File

@ -36,7 +36,7 @@ export default class CompactLines extends ItemTransformer {
return items.map((item) => {
const y = item.data['y'];
const height = item.data['height'];
if (!lastY || lastY - height > y) {
if (!lastY || Math.abs(lastY - y) > height) {
lineNumber++;
lines++;
}

View File

@ -5,6 +5,27 @@ import { items } from './testItems';
const transformer = new CompactLines();
test('Transform - 2 column pdf (Smart Immunity)', async () => {
const results = transformer.transform(
emptyContext(),
items(0, [
{
x: 54,
y: 91.52,
str: 'most, get what feels more like a flu. And',
height: 12,
},
{
x: 324,
y: 710.52,
str: 'some, it turns out, arent even aware that',
height: 12,
},
]),
);
expect(results.items.map((item) => item.data['line'])).toEqual([0, 1]);
});
test('Transform - raised characters (example.pdf)', async () => {
const results = transformer.transform(
emptyContext(),