remove width and height from removing repetitive items to increase accuracy

2025-06-21 10:08:03 +02:00 · 2017-02-03 12:37:39 +01:00 · 2017-02-03 12:37:39 +01:00 · 12f6105d90
commit 12f6105d90
parent 91087d550b
1 changed files with 2 additions and 2 deletions
--- a/src/javascript/models/transformations/RemoveRepetitiveElements.jsx
+++ b/src/javascript/models/transformations/RemoveRepetitiveElements.jsx
@ -21,7 +21,7 @@ function hashCodeIgnoringNumbers(string) {

 function combineCoordinates(textItem) {
    var hashCode = hashCodeIgnoringNumbers(textItem.text);
-    return `${textItem.x}-${textItem.y}-${textItem.width}-${textItem.height}-${hashCode}`;
+    return `${textItem.x}-${textItem.y}-${hashCode}`;
 }

 // Remove elements with similar content on same page positions, like page numbers, licenes information, etc...
@ -37,7 +37,7 @@ export default class RemoveRepetitiveElements extends Transformation {

    transform(pages:PdfPage[]) {
        //build repetition counts for every element
-        var repetitionCounts = {};
+        const repetitionCounts = {};
        pages.forEach(pdfPage => {
            pdfPage.textItems.forEach(textItem => {
                var combinedCoordinates = combineCoordinates(textItem);