List intendation

2025-07-15 05:25:13 +02:00 · 2024-06-16 23:29:20 -06:00
parent c39f410d76
commit 71b31c2fb5
6 changed files with 58 additions and 49 deletions
--- a/examples/ExamplePdf.md
+++ b/examples/ExamplePdf.md
@ -108,14 +108,14 @@ Und Untergruppen:
  - Sub Eintrag 1.1, aber mit so langem Text, das er umbricht. Wirklich, wirklich lang. Breche du
  Zeile. Na los. Na endlich. Vielleicht sollt ich das auf 3 Zeilen erweitern? Na ja, schaden kann
  es ja nicht. Also los!
-    - Sub Eintrag 1.
+  - Sub Eintrag 1.2
 - Eintrag 2
-    - Sub Eintrag 2.
+  - Sub Eintrag 2.1

 Und eine mit bullet’s:

- Eintrage 1
- Eintrage 2
+• Eintrage 1
+• Eintrage 2

 Gemixt:

@ -147,27 +147,26 @@ Zwei aufeinander folgende Listen:
 - Zwote 1
 - Zwote 2

-
 Liste mit drei Levels:

 - Erster Level 1
  - Zwoter Level 1.1, aber mit so langem Text, das er umbricht. Wirklich, wirklich lang. Breche du
  Zeile. Na los. Na endlich. Vielleicht sollt ich das auf 3 Zeilen erweitern? Na ja, schaden kann
  es ja nicht. Also los!
-       - 3ter Level 1.1.
+    - 3ter Level 1.1.1
    - 3ter Level 1.1.2, aber mit so langem Text, das er umbricht. Wirklich, wirklich lang. Breche
    du Zeile. Na los. Na endlich. Vielleicht sollt ich das auf 3 Zeilen erweitern? Na ja, schaden
    kann es ja nicht. Also los!
-    - Zwoter Level 1.
-    - Zwoter Level 1.
-       - 3ter Level 1.3.
+  - Zwoter Level 1.2
+  - Zwoter Level 1.3
+    - 3ter Level 1.3.1
 - Erster Level 2

 Und nun nummeriert mit un-nummerierten Sub-Leveln:

 1. Eintrag 1
-    - Eintrag 1.
-    - Eintrag 1.
+  - Eintrag 1.1
+  - Eintrag 1.2
 2. Eintrag 2

 Und jetzt eine Liste, die übergangslos aus einem zwei-zeiligen Paragraphen folgt. Mal sehen ob
--- a/src/Item.ts
+++ b/src/Item.ts
@ -5,6 +5,7 @@ export default class Item {
  page: number;
  data: object;
  uuid: string;
+  listLevel = 0;
  tokenTypes: TokenType[] = [];

  constructor(page: number, data: object, tokenTypes: TokenType[] = [], uuid: string = uuidv4()) {
--- a/src/convert/MarkdownConverter.ts
+++ b/src/convert/MarkdownConverter.ts
@ -61,20 +61,25 @@ export function lineToText(lineItems: Item[], blockTypes: TextType[]) {
  };

  let lastLineItem: Item = null;
-  lineItems.forEach((lineItem, lineIndex) => {
+  lineItems.forEach((lineItem, indexInLine) => {
    const words = toWords(lineItem.data['str']);
    words.forEach((word, wordIndex) => {
+      if (indexInLine === 0 && wordIndex === 0) {
+        if (lineItem.listLevel) {
+          word = ' '.repeat(lineItem.listLevel * 2) + word;
+        }
+      }
      const wordFormat = lineItem.tokenTypes[0]; // bold, oblique, footnote etc...
      if (openFormat && (!wordFormat || wordFormat !== openFormat)) {
        closeFormat();
      }
      if (
-        (wordIndex > 0 || lineIndex > 0) &&
+        (wordIndex > 0 || indexInLine > 0) &&
        !(wordFormat && attachWithoutWhitespace(wordFormat)) &&
        !isPunctationCharacter(word)
      ) {
        let insertWhitespace = true;
-        if (lineIndex > 0 && wordIndex == 0) {
+        if (indexInLine > 0 && wordIndex == 0) {
          const xDistance = lineItem.data['x'] - lastLineItem.data['x'] - lastLineItem.data['width'];
          if (xDistance < 2 && !lastLineItem.data['str']?.endsWith(' ') && !lineItem.data['str']?.startsWith(' ')) {
            insertWhitespace = false;
@ -98,7 +103,7 @@ export function lineToText(lineItems: Item[], blockTypes: TextType[]) {
        text += word;
      }
    });
-    if (openFormat && (lineIndex == lineItems.length - 1 || firstFormat(lineItems[lineIndex + 1]) !== openFormat)) {
+    if (openFormat && (indexInLine == lineItems.length - 1 || firstFormat(lineItems[indexInLine + 1]) !== openFormat)) {
      closeFormat();
    }
    lastLineItem = lineItem;
--- a/src/support/groupingUtils.ts
+++ b/src/support/groupingUtils.ts
@ -138,3 +138,7 @@ export function majorityElement<T>(items: Item[], extract: (item: Item) => T): T
  }
  return extract(items[maj]);
 }
+
+export function isGreaterWithTolerance(num1: number, num2: number, tolerance = 0.01) {
+  return num1 - num2 > tolerance;
+}
--- a/src/transformer/DetectListLevels.ts
+++ b/src/transformer/DetectListLevels.ts
@ -3,7 +3,7 @@ import ItemResult from '../ItemResult';
 import ItemTransformer from './ItemTransformer';
 import TransformContext from './TransformContext';
 import LineItemMerger from '../debug/LineItemMerger';
-import { groupByBlock, groupByLine } from '../support/groupingUtils';
+import { groupByBlock, groupByLine, isGreaterWithTolerance } from '../support/groupingUtils';
 import { TextType, toBlockType } from '../text-types';
 import { isListItem, isNumberedListItem } from '../support/stringFunctions';

@ -40,7 +40,7 @@ export default class DetectListLevels extends ItemTransformer {
          const x = firstItem.data['x'];
          if (lastItemX) {
            if (isLineItem) {
-              if (x > lastItemX) {
+              if (isGreaterWithTolerance(x, lastItemX)) {
                currentLevel++;
                xByLevel[x] = currentLevel;
              } else if (x < lastItemX) {
@ -54,10 +54,10 @@ export default class DetectListLevels extends ItemTransformer {
            xByLevel[x] = 0;
          }
          if (currentLevel > 0) {
-            lineItems[0].data['str'] = ' '.repeat(currentLevel * 3) + lineItems[0].data['str'];
+            lineItems[0].listLevel = currentLevel;
            modifiedBlock = true;
            if (isOverflowLine) {
-              lineItems[0].data['str'] = '  ' + lineItems[0].data['str'];
+              // TODO mark line so it can be indented as well ?
            }
          }
          if (!isOverflowLine) {