separate type and format for a word

2025-06-23 19:11:24 +02:00 · 2017-03-28 08:15:27 +02:00 · 2017-03-28 08:15:27 +02:00 · 106e2bfa8e
commit 106e2bfa8e
parent 9dbc57b4fe
9 changed files with 56 additions and 123 deletions
--- a/src/javascript/models/AppState.jsx
+++ b/src/javascript/models/AppState.jsx
@ -5,7 +5,6 @@ import CalculateGlobalStats from './transformations/textitem/CalculateGlobalStat
 import CompactLines from './transformations/textitem/CompactLines.jsx';
 import RemoveRepetitiveElements from './transformations/textitem/RemoveRepetitiveElements.jsx'
 import VerticalToHorizontal from './transformations/textitem/VerticalToHorizontal.jsx';
-import PostprocessLines from './transformations/textitem/PostprocessLines.jsx';
 import DetectTOC from './transformations/textitem/DetectTOC.jsx'
 import DetectListItems from './transformations/textitem/DetectListItems.jsx'
 import DetectHeaders from './transformations/textitem/DetectHeaders.jsx'
@ -56,7 +55,6 @@ export default class AppState {
            new CompactLines(),
            new RemoveRepetitiveElements(),
            new VerticalToHorizontal(),
-            // new PostprocessLines(),
            new DetectTOC(),
            new DetectHeaders(),
            new DetectListItems(),
--- a/src/javascript/models/LineConverter.jsx
+++ b/src/javascript/models/LineConverter.jsx
@ -41,7 +41,8 @@ export default class LineConverter {
            parsedElements: new ParsedElements({
                footnoteLinks: wordStream.footnoteLinks,
                footnotes: wordStream.footnotes,
-                containLinks: wordStream.containLinks
+                containLinks: wordStream.containLinks,
+                formattedWords: wordStream.formattedWords
            })
        });

@ -116,27 +117,25 @@ class WordDetectionStream extends StashingStream {

    itemsToWords(items, format) {
        const combinedText = combineText(items);
-        // const combinedText = items.map(textItem => textItem.text).join('');
        const words = combinedText.split(' ');
        return words.filter(w => w.trim().length > 0).map(word => {
+            var type = null;
            if (word.startsWith('http:')) {
                this.containLinks = true;
-                return new Word({
-                    string: word,
-                    type: WordType.LINK
-                });
+                type = WordType.LINK;
            } else if (word.startsWith('www.')) {
                this.containLinks = true;
                word = `http://${word}`
-                return new Word({
-                    string: word,
-                    type: WordType.LINK
-                });
+                type = WordType.LINK;
            }

+            if (format) {
+                this.formattedWords++;
+            }
            return new Word({
                string: word,
-                type: format
+                type: type,
+                format: format
            });
        });
    }
--- a/src/javascript/models/PageItem.jsx
+++ b/src/javascript/models/PageItem.jsx
@ -18,12 +18,14 @@ export class ParsedElements {
        this.footnoteLinks = options.footnoteLinks || [];
        this.footnotes = options.footnotes || [];
        this.containLinks = options.containLinks;
+        this.formattedWords = options.formattedWords;
    }

    add(parsedElements) {
        this.footnoteLinks = this.footnoteLinks.concat(parsedElements.footnoteLinks);
        this.footnotes = this.footnotes.concat(parsedElements.footnotes);
        this.containLinks = this.containLinks || parsedElements.containLinks;
+        this.formattedWords += parsedElements.formattedWords;
    }

 }
--- a/src/javascript/models/Word.jsx
+++ b/src/javascript/models/Word.jsx
@ -3,6 +3,7 @@ export default class Word {
    constructor(options) {
        this.string = options.string;
        this.type = options.type; // WordType
+        this.format = options.format; // WordFormat
    }

 }
--- a/src/javascript/models/markdown/WordFormat.jsx
+++ b/src/javascript/models/markdown/WordFormat.jsx
@ -0,0 +1,21 @@
+import { Enum } from 'enumify';
+
+// The format of a word element
+export default class WordFormat extends Enum {
+
+}
+
+WordFormat.initEnum({
+    BOLD: {
+        startSymbol: '**',
+        endSymbol: '**',
+    },
+    OBLIQUE: {
+        startSymbol: '_',
+        endSymbol: '_',
+    },
+    BOLD_OBLIQUE: {
+        startSymbol: '**_',
+        endSymbol: '_**',
+    }
+});
--- a/src/javascript/models/markdown/WordType.jsx
+++ b/src/javascript/models/markdown/WordType.jsx
@ -23,21 +23,6 @@ WordType.initEnum({
        toText(string) {
            return `(^${string})`
        }
-    },
-    BOLD: {
-        format: true,
-        startSymbol: '**',
-        endSymbol: '**',
-    },
-    OBLIQUE: {
-        format: true,
-        startSymbol: '_',
-        endSymbol: '_',
-    },
-    BOLD_OBLIQUE: {
-        format: true,
-        startSymbol: '**_',
-        endSymbol: '_**',
    }
 });

@ -53,23 +38,22 @@ export function linesToText(lineItems, disableInlineFormats) {
    lineItems.forEach((line, lineIndex) => {
        line.words.forEach((word, i) => {
            const wordType = word.type;
-            if (openFormat && (!wordType || wordType !== openFormat)) {
+            const wordFormat = word.format;
+            if (openFormat && (!wordFormat || wordFormat !== openFormat)) {
                closeFormat();
            }

            if (i > 0 && !(wordType && wordType.attachWithoutWhitespace) && !isPunctationCharacter(word.string)) {
                text += ' ';
            }
-            if (wordType && (!disableInlineFormats || wordType.plainTextFormat)) {
-                if (wordType.format) {
-                    if (!openFormat) {
-                        openFormat = wordType;
+
+            if (wordFormat && !openFormat && (!disableInlineFormats)) {
+                openFormat = wordFormat;
                text += openFormat.startSymbol;
            }
-                    text += word.string;
-                } else {
+
+            if (wordType && (!disableInlineFormats || wordType.plainTextFormat)) {
                text += wordType.toText(word.string);
-                }
            } else {
                text += word.string;
            }
@ -86,7 +70,7 @@ function firstFormat(lineItem) {
    if (lineItem.words.length == 0) {
        return null;
    }
-    return lineItem.words[0].type;
+    return lineItem.words[0].format;
 }

 function isPunctationCharacter(string) {
--- a/src/javascript/models/transformations/textitem/CalculateGlobalStats.jsx
+++ b/src/javascript/models/transformations/textitem/CalculateGlobalStats.jsx
@ -1,6 +1,6 @@
 import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
 import ParseResult from '../../ParseResult.jsx';
-import WordType from '../../markdown/WordType.jsx';
+import WordFormat from '../../markdown/WordFormat.jsx';

 export default class CalculateGlobalStats extends ToTextItemTransformation {

@ -54,20 +54,20 @@ export default class CalculateGlobalStats extends ToTextItemTransformation {
        this.fontMap.forEach(function(value, key) {
            fontIdToName.push(key + " = " + value.name)
            const fontName = value.name.toLowerCase();
-            var type;
+            var format;
            if (key == mostUsedFont) {
-                type = null;
+                format = null;
            } else if (fontName.includes('bold') && (fontName.includes('oblique') || fontName.includes('italic'))) {
-                type = WordType.BOLD_OBLIQUE;
+                format = WordFormat.BOLD_OBLIQUE;
            } else if (fontName.includes('bold')) {
-                type = WordType.BOLD;
+                format = WordFormat.BOLD;
            } else if (fontName.includes('oblique') || fontName.includes('italic')) {
-                type = WordType.OBLIQUE;
+                format = WordFormat.OBLIQUE;
            } else if (fontName === maxHeightFont) {
-                type = WordType.BOLD;
+                format = WordFormat.BOLD;
            }
-            if (type) {
-                fontToFormats.set(key, type);
+            if (format) {
+                fontToFormats.set(key, format);
            }
        });
        fontIdToName.sort();
--- a/src/javascript/models/transformations/textitem/CompactLines.jsx
+++ b/src/javascript/models/transformations/textitem/CompactLines.jsx
@ -48,6 +48,9 @@ export default class CompactLines extends ToLineItemTransformation {
                    }
                    lineItems.push(lineItem);

+                    if (lineItem.parsedElements.formattedWords) {
+                        formattedWords += lineItem.parsedElements.formattedWords;
+                    }
                    if (lineItem.parsedElements.containLinks > 0) {
                        linkCount++;
                    }
--- a/src/javascript/models/transformations/textitem/PostprocessLines.jsx
+++ b/src/javascript/models/transformations/textitem/PostprocessLines.jsx
@ -1,75 +0,0 @@
-import ToTextItemTransformation from '../ToTextItemTransformation.jsx';
-import ParseResult from '../../ParseResult.jsx';
-import TextItem from '../../TextItem.jsx';
-import { ParsedElements } from '../../PageItem.jsx';
-import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../../Annotation.jsx';
-
-
-// Remove whitespace, detect links, etc...
-export default class PostprocessLines extends ToTextItemTransformation {
-
-    constructor() {
-        super("Remove Whitespace & Detect Links");
-        this.showWhitespaces = true;
-    }
-
-    transform(parseResult:ParseResult) {
-        var strippedWhitespace = 0;
-        var foundLinks = 0;
-
-        parseResult.pages.forEach(page => {
-            const newItems = [];
-            page.items.forEach(lineItem => {
-                newItems.push(lineItem);
-                var words = lineItem.text.split(' ');
-                var newWords = [];
-                var foundSuperflousNewLine = false;
-                var foundLink = false;
-                words.forEach(word => {
-                    if (word.trim().length == 0) {
-                        foundSuperflousNewLine = true;
-                        strippedWhitespace++;
-                    } else {
-                        if (word.startsWith('http:')) {
-                            foundLinks++;
-                            foundLink = true;
-                            newWords.push(`[${word}](${word})`);
-                        } else if (word.startsWith('www.')) {
-                            foundLinks++;
-                            foundLink = true;
-                            newWords.push(`[http://${word}](http://${word})`);
-                        } else {
-                            newWords.push(word);
-                        }
-                    }
-                });
-                if (foundSuperflousNewLine || foundLink) {
-                    lineItem.annotation = REMOVED_ANNOTATION;
-                    if (newWords.length > 0) {
-                        newItems.push(new TextItem({
-                            ...lineItem,
-                            text: newWords.join(' '),
-                            annotation: ADDED_ANNOTATION,
-                            parsedElements: new ParsedElements({
-                                ...lineItem.parsedElements,
-                                containLinks: foundLink
-                            })
-                        }));
-                    }
-                }
-            });
-            page.items = newItems;
-        });
-
-
-        return new ParseResult({
-            ...parseResult,
-            messages: [
-                'Stripped ' + strippedWhitespace + ' superflous whitespaces',
-                'Found ' + foundLinks + ' links',
-            ]
-        });
-    }
-
-
-}