DetectTOC: only go for lines containing '...' words

DetectTOC will work on all lines, shaving off numbers from the last of the words in a given line, so long as the word is not all full-stops. This implies that a TOC line is one that contains strings containing only full-stops, and so, DetectTOC should only work on such lines. This change will remove unwanted behaviour where DetectTOC removes trailing numbers that we actually want to keep in lines, eg: Case Number : ABC 12/1234
2025-06-30 22:30:03 +02:00 · 2019-07-27 15:45:53 +08:00
parent 2869b5e5de
commit 648c0add59
1 changed files with 4 additions and 2 deletions
--- a/src/javascript/models/transformations/lineitem/DetectTOC.jsx
+++ b/src/javascript/models/transformations/lineitem/DetectTOC.jsx
@ -30,8 +30,10 @@ export default class DetectTOC extends ToLineItemTransformation {
            const pageTocLinks = [];
            var lastWordsWithoutNumber;
            var lastLine;
-            //find lines ending with a number per page
-            page.items.forEach(line => {
+            // find lines with words containing only "." ...
+            const tocLines = page.items.filter(line => line.words.includes(word => hasOnly(word.string, '.')))
+            // ... and ending with a number per page
+            tocLines.forEach(line => {
                var words = line.words.filter(word => !hasOnly(word.string, '.'));
                const digits = [];
                while (words.length > 0 && isNumber(words[words.length - 1].string)) {