From 12f6105d90670974d00d487c293eadaf70f3b207 Mon Sep 17 00:00:00 2001 From: Johannes Zillmann Date: Fri, 3 Feb 2017 12:37:39 +0100 Subject: [PATCH] remove width and height from removing repetitive items to increase accuracy --- .../models/transformations/RemoveRepetitiveElements.jsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/javascript/models/transformations/RemoveRepetitiveElements.jsx b/src/javascript/models/transformations/RemoveRepetitiveElements.jsx index c7a34af..6523ca0 100644 --- a/src/javascript/models/transformations/RemoveRepetitiveElements.jsx +++ b/src/javascript/models/transformations/RemoveRepetitiveElements.jsx @@ -21,7 +21,7 @@ function hashCodeIgnoringNumbers(string) { function combineCoordinates(textItem) { var hashCode = hashCodeIgnoringNumbers(textItem.text); - return `${textItem.x}-${textItem.y}-${textItem.width}-${textItem.height}-${hashCode}`; + return `${textItem.x}-${textItem.y}-${hashCode}`; } // Remove elements with similar content on same page positions, like page numbers, licenes information, etc... @@ -37,7 +37,7 @@ export default class RemoveRepetitiveElements extends Transformation { transform(pages:PdfPage[]) { //build repetition counts for every element - var repetitionCounts = {}; + const repetitionCounts = {}; pages.forEach(pdfPage => { pdfPage.textItems.forEach(textItem => { var combinedCoordinates = combineCoordinates(textItem);