[WIP] remove old stuff

2025-06-20 17:47:47 +02:00 · 2017-03-24 08:05:59 +01:00 · 2017-03-24 08:05:59 +01:00 · e19294f35f
commit e19294f35f
parent bd7d9bc0e9
8 changed files with 0 additions and 927 deletions
--- a/src/javascript/models/transformations/old/CombineSameY.jsx
+++ b/src/javascript/models/transformations/old/CombineSameY.jsx
@ -1,101 +0,0 @@
-import ToTextItemTransformation from './ToTextItemTransformation.jsx';
-import TextItem from '../TextItem.jsx';
-import ParseResult from '../ParseResult.jsx';
-import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
-
-function combineTextItems(textItems:TextItem[]) {
-    var numChars = 0;
-    var sumWidth = 0;
-    var maxHeight = 0;
-    textItems.forEach(textItem => {
-        if (textItem.width > 0) {
-            numChars += textItem.text.length;
-            sumWidth += textItem.width;
-        }
-        maxHeight = Math.max(textItem.height, maxHeight);
-    });
-    const avgCharacterWidth = Math.round(sumWidth / numChars);
-
-    var combinedText = '';
-    var sumWidthWithWhitespaces = sumWidth;
-    var lastItemX;
-    var lastItemWidth;
-    textItems.forEach(textItem => {
-        if (lastItemX && textItem.x - lastItemX - lastItemWidth > avgCharacterWidth) {
-            combinedText += ' ';
-            sumWidthWithWhitespaces += avgCharacterWidth;
-        }
-        combinedText += textItem.text;
-        lastItemX = textItem.x;
-        lastItemWidth = textItem.width > 0 ? textItem.width : avgCharacterWidth / 2 * textItem.text.length;
-    });
-
-    return new TextItem({
-        x: textItems[0].x,
-        y: textItems[0].y,
-        width: sumWidthWithWhitespaces,
-        height: maxHeight,
-        text: combinedText,
-        annotation: ADDED_ANNOTATION
-    });
-}
-
-export default class CombineSameY extends ToTextItemTransformation {
-
-    constructor() {
-        super("Combine Text On Same Y");
-    }
-
-    transform(parseResult:ParseResult) {
-        const newContent = parseResult.content.map(pdfPage => {
-            const newTextItems = [];
-            var textItemsWithSameY = [];
-
-            var completeTextItemsWithSameY = function(textItemsWithSameY) {
-                if (textItemsWithSameY.length == 1) {
-                    newTextItems.push(textItemsWithSameY[0]);
-                } else {
-                    // add removed text-items
-                    textItemsWithSameY.forEach(textItem => {
-                        textItem.annotation = REMOVED_ANNOTATION;
-                        newTextItems.push(textItem);
-                    });
-                    newTextItems.push(combineTextItems(textItemsWithSameY));
-                }
-            }
-
-            pdfPage.textItems.forEach(textItem => {
-                if (textItemsWithSameY.length == 0 || Math.abs(textItem.y - textItemsWithSameY[textItemsWithSameY.length - 1].y) < 2) {
-                    //fill array
-                    textItemsWithSameY.push(textItem);
-                } else {
-                    //rotate
-                    completeTextItemsWithSameY(textItemsWithSameY);
-                    textItemsWithSameY = [textItem];
-                }
-            });
-            if (textItemsWithSameY.length > 0) {
-                completeTextItemsWithSameY(textItemsWithSameY);
-            }
-
-            return {
-                ...pdfPage,
-                textItems: newTextItems
-            };
-        });
-
-        return new ParseResult({
-            ...parseResult,
-            content: newContent
-        });
-    }
-
-    completeTransform(parseResult:ParseResult) {
-        parseResult.content.forEach(page => {
-            page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
-            page.textItems.forEach(textItem => textItem.annotation = null)
-        });
-        return parseResult;
-    }
-
-}
--- a/src/javascript/models/transformations/old/DetectFootnoteOld.jsx
+++ b/src/javascript/models/transformations/old/DetectFootnoteOld.jsx
@ -1,70 +0,0 @@
-import ToTextItemBlockTransformation from './ToTextItemBlockTransformation.jsx';
-import TextItem from '../TextItem.jsx';
-import ParseResult from '../ParseResult.jsx';
-import { ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
-
-import { isNumber } from '../../functions.jsx'
-
-export default class DetectFootnoteOld extends ToTextItemBlockTransformation {
-
-    constructor() {
-        super("Detect Footnote ");
-    }
-
-    transform(parseResult:ParseResult) {
-
-        var nextFooterNumber = 1;
-        var potentialFootnoteItem;
-        var foundFootnotes = 0;
-
-        const newContent = parseResult.content.map(page => {
-            const newTextItems = [];
-            for (var i = 0; i < page.textItems.length; i++) {
-                const item = page.textItems[i];
-                if (potentialFootnoteItem) {
-                    if (potentialFootnoteItem.y - item.y < item.height) {
-                        potentialFootnoteItem.annotation = REMOVED_ANNOTATION;
-                        item.annotation = REMOVED_ANNOTATION;
-                        newTextItems.push(potentialFootnoteItem);
-                        newTextItems.push(item);
-                        newTextItems.push(new TextItem({
-                            x: potentialFootnoteItem.x,
-                            y: item.y,
-                            width: potentialFootnoteItem.width + item.width,
-                            height: item.height,
-                            text: '[' + potentialFootnoteItem.text + '] ' + item.text,
-                            annotation: ADDED_ANNOTATION
-                        }));
-                        //TODO repsect multiline!!
-                        nextFooterNumber++;
-                        foundFootnotes++;
-                    }
-                    potentialFootnoteItem = null;
-                } else if (isNumber(item.text) && parseInt(item.text) == nextFooterNumber && i > 0 && i < page.textItems.length - 1 && page.textItems[i - 1].y !== page.textItems[i + 1].y) {
-                    potentialFootnoteItem = item;
-                } else {
-                    newTextItems.push(item);
-                }
-            }
-            return {
-                ...page,
-                textItems: newTextItems
-            };
-        });
-
-        return new ParseResult({
-            ...parseResult,
-            content: newContent,
-            messages: ['Detected ' + foundFootnotes + ' footnotes']
-        });
-    }
-
-    completeTransform(parseResult:ParseResult) {
-        parseResult.content.forEach(page => {
-            page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
-            page.textItems.forEach(textItem => textItem.annotation = null)
-        });
-        return parseResult;
-    }
-
-}
--- a/src/javascript/models/transformations/old/DetectFormats.jsx
+++ b/src/javascript/models/transformations/old/DetectFormats.jsx
@ -1,177 +0,0 @@
-import React from 'react';
-import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
-import ParseResult from '../ParseResult.jsx';
-import { REMOVED_ANNOTATION } from '../Annotation.jsx';
-import Annotation from '../Annotation.jsx';
-
-//Detect word/sentence formats like bold, italic,...
-export default class DetectFormats extends ToPdfViewTransformation {
-
-    constructor() {
-        super("Detect Bold/Italic");
-    }
-
-    createSummaryView(parseResult:ParseResult) {
-        return <div>
-                 Detected
-                 { ' ' + parseResult.summary.foundFormats + ' ' } formats.
-               </div>;
-    }
-
-
-    transform(parseResult:ParseResult) {
-        var foundFormats = 0;
-        const {mostUsedHeight, mostUsedFont, maxHeightFont} = parseResult.globals;
-        const symbols = {
-            bold: '**',
-            emphasis: '_'
-        }
-
-        const newContent = parseResult.content.map(page => {
-            const newTextItems = [];
-
-            //bundle items on same Y
-            const groupedItems = groupByFollowingY(page.textItems);
-            var lastItem;
-            var lastFormat;
-
-            const addNextItem = (item, format) => {
-                if (lastItem) {
-                    if (lastFormat !== format) {
-                        lastItem.text = appendSymbol(lastItem.text, symbols[lastFormat]);
-                        if (lastItem.annotation) {
-                            lastItem.annotation = newAnnotation(lastFormat);
-                        } else {
-                            lastItem.annotation = newAnnotation('End ' + lastFormat);
-                        }
-                    }
-                    lastItem.height = mostUsedHeight;
-                    newTextItems.push(lastItem);
-                }
-
-                if (format) {
-                    if (lastFormat !== format) {
-                        item.text = prependSymbol(item.text, symbols[format]);
-                        item.annotation = newAnnotation('Start ' + format);
-                    }
-                    lastItem = item;
-                    lastFormat = format;
-                } else {
-                    newTextItems.push(item);
-                    lastItem = null;
-                    lastFormat = null;
-                }
-            };
-
-
-            groupedItems.forEach(itemGroup => {
-
-                //probably headline
-                const differentHeightsButSameFont = itemsHaveDifferentHeightsButSameFont(itemGroup);
-
-                itemGroup.forEach(item => {
-                    const paragraphHeighOrSlightlyBigger = item.height == mostUsedHeight || item.height == mostUsedHeight + 1;
-                    if (!differentHeightsButSameFont && paragraphHeighOrSlightlyBigger && item.font !== mostUsedFont) {
-                        // item.annotation = REMOVED_ANNOTATION;
-
-                        const format = item.font === maxHeightFont ? 'bold' : 'emphasis';
-                        addNextItem(item, format);
-
-                        //TODO test with womb compilation. _Th_, _ff_,... check font like SanSarif ?
-                        //TODO don't touch 'eingerückte' Zeichen => detect early ?
-                        //TODO (Maybe) could detect combined bold & emphasis like font=bold.font + emph.font !?
-                        foundFormats++;
-                    } else {
-                        addNextItem(item);
-                    }
-                });
-            });
-
-            return {
-                ...page,
-                textItems: newTextItems
-            };
-        });
-        return new ParseResult({
-            ...parseResult,
-            content: newContent,
-            summary: {
-                foundFormats: foundFormats
-            }
-        });
-    }
-
-    completeTransform(parseResult:ParseResult) {
-        parseResult.content.forEach(page => {
-            page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
-            page.textItems.forEach(textItem => textItem.annotation = null)
-        });
-        return parseResult;
-    }
-
-}
-
-function newAnnotation(name) {
-    return new Annotation({
-        category: name,
-        color: 'green'
-    });
-}
-
-//groups all following text items with the same Y together
-function groupByFollowingY(textItems) {
-    const yArrays = [];
-    var itemsWithSameY = [];
-    var lastItem;
-    textItems.forEach(item => {
-        if (itemsWithSameY.length == 0 || item.y == lastItem.y) {
-            itemsWithSameY.push(item);
-        } else {
-            yArrays.push(itemsWithSameY);
-            itemsWithSameY = [item];
-        }
-        lastItem = item;
-    })
-    yArrays.push(itemsWithSameY);
-    return yArrays;
-}
-
-function itemsHaveDifferentHeightsButSameFont(itemGroup) {
-    var heights = new Set();
-    var fonts = new Set();
-    itemGroup.forEach(item => {
-        heights.add(item.height);
-        fonts.add(item.font);
-    });
-    return heights.size > 1 && fonts.size == 1;
-}
-
-//TODO move to stringFunctions
-
-function prependSymbol(text, symbol) {
-    if (text.charAt(0) == ' ') {
-        return ' ' + symbol + removeLeadingWhitespace(text);
-    }
-    return symbol + text;
-}
-
-function appendSymbol(text, symbol) {
-    if (text.charAt(text.length - 1) == ' ') {
-        return removeTrailingWhitespace(text) + symbol + ' ';
-    }
-    return text + symbol;
-}
-
-function removeLeadingWhitespace(text) {
-    while (text.charAt(0) == ' ') {
-        text = text.substring(1, text.length);
-    }
-    return text;
-}
-
-function removeTrailingWhitespace(text) {
-    while (text.charAt(text.length - 1) == ' ') {
-        text = text.substring(0, text.length - 1);
-    }
-    return text;
-}
--- a/src/javascript/models/transformations/old/DetectHeadlines.jsx
+++ b/src/javascript/models/transformations/old/DetectHeadlines.jsx
@ -1,182 +0,0 @@
-import ToTextItemBlockTransformation from './ToTextItemBlockTransformation.jsx';
-import ParseResult from '../ParseResult.jsx';
-import TextItemBlock from '../TextItemBlock.jsx';
-import { ADDED_ANNOTATION, DETECTED_ANNOTATION } from '../Annotation.jsx';
-import ElementType from '../ElementType.jsx';
-import { headlineByLevel } from '../ElementType.jsx';
-
-//Detect headlines
-export default class DetectHeadlines extends ToTextItemBlockTransformation {
-
-    constructor() {
-        super("Detect Headlines");
-    }
-
-    transform(parseResult:ParseResult) {
-        var foundHeadlines = 0;
-        const {mostUsedHeight, mostUsedDistance, maxHeight, tocPages} = parseResult.globals;
-
-        //Set max headlines (all headers on the same page are max level 2)
-        const maxHeaderPages = convertMaxHeaders(parseResult.pages, maxHeight, mostUsedHeight);
-
-
-        var headlineHeightFlowBeforeToc = [];
-        var headlineHeightsOccurenceBeforeToc = {};
-        var firstPageAfterToc = 0;
-        if (tocPages && tocPages.length > 0) {
-            [headlineHeightFlowBeforeToc, headlineHeightsOccurenceBeforeToc] = calculateHeadlineHeigthFlow(parseResult.pages, 0, tocPages[0], mostUsedHeight, maxHeaderPages);
-            firstPageAfterToc = tocPages[tocPages.length - 1] + 1;
-        }
-
-        const [headlineHeightFlowAfterToc, headlineHeightsOccurenceAfterToc] = calculateHeadlineHeigthFlow(parseResult.pages, firstPageAfterToc, parseResult.pages.length, mostUsedHeight, maxHeaderPages);
-
-
-        // TODO ==> do flow analysis (remove out of flow or snap, start with 2nd)
-        // TODO ==> parse seperately between beforeToc and after
-        // TODO ==> Kala chakra, all uppercase
-        // TODO ==> TOC headlines
-
-        //var topHeadlinePassed = false;
-        const headlineHeightMap = {};
-        const headlineSizePerLevel = {};
-        var currentHeadlineLevel;
-        parseResult.pages.forEach(page => {
-            const newBlocks = [];
-            page.items.forEach(block => {
-                newBlocks.push(block);
-                if (!block.type && !block.annotation && block.textItems[0].height > mostUsedHeight) {
-                    // const combineResult = textCombiner.combine(block.textItems);
-                    // if (combineResult.textItems.length == 1) {
-                    //     const height = combineResult.textItems[0].height;
-                    //     if (height == maxHeight) {
-                    //         // block.annotation = REMOVED_ANNOTATION;
-                    //         currentHeadlineLevel = 1;
-                    //         headlineSizePerLevel[currentHeadlineLevel] = height
-                    //         addNewBlock(newBlocks, combineResult, headlineByLevel(currentHeadlineLevel));
-                    //     }
-                    // else if (currentHeadlineLevel) {
-                    //     const currentLevelSize = headlineSizePerLevel[currentHeadlineLevel];
-                    //     if (height < currentLevelSize) {
-                    //         const nextLevelSize = headlineSizePerLevel[currentHeadlineLevel + 1];
-                    //         // if(!nextLevelSize)
-                    //         if (currentHeadlineLevel < 6) {
-                    //             currentHeadlineLevel++;
-                    //         }
-                    //         addNewBlock(newBlocks, combineResult, headlineByLevel(currentHeadlineLevel));
-                    //         headlineSizePerLevel[currentHeadlineLevel] = height;
-                    //     } else if (height > currentLevelSize) {
-                    //         const preLevelSize = headlineSizePerLevel[currentHeadlineLevel - 1];
-                    //         if (currentHeadlineLevel > 1) {
-                    //             currentHeadlineLevel--;
-                    //         }
-                    //         addNewBlock(newBlocks, combineResult, headlineByLevel(currentHeadlineLevel));
-                    //         headlineSizePerLevel[currentHeadlineLevel] = height;
-                    //     } else {
-                    //         addNewBlock(newBlocks, combineResult, headlineByLevel(currentHeadlineLevel));
-                    //     }
-                    // }
-                    // }
-                }
-            });
-            page.items = newBlocks;
-        });
-
-        const heightToOccurrence = {};
-        const fontToOccurrence = {};
-        // parseResult.content.forEach(page => {
-        //     const newBlocks = [];
-        //     page.blocks.forEach(block => {
-        //         newBlocks.push(block);
-        //         if (!block.type && block.textItems[0].height > mostUsedHeight) {
-        //             foundHeadlines++;
-        //             block.annotation = REMOVED_ANNOTATION;
-        //             const combineResult = textCombiner.combine(block.textItems);
-        //             const height = combineResult.textItems[0].height;
-        //             const font = combineResult.textItems[0].font;
-        //             heightToOccurrence[height] = heightToOccurrence[height] ? heightToOccurrence[height] + 1 : 1;
-        //             fontToOccurrence[font] = fontToOccurrence[font] ? fontToOccurrence[font] + 1 : 1;
-        //             newBlocks.push(new PdfBlock({
-        //                 textItems: combineResult.textItems,
-        //                 type: HEADLINE1,
-        //                 annotation: ADDED_ANNOTATION,
-        //                 parsedElements: combineResult.parsedElements
-        //             }));
-        //         }
-        //     });
-        //     page.blocks = newBlocks;
-        // });
-
-        return new ParseResult({
-            ...parseResult,
-            messages: [
-                'Found headlines: ' + foundHeadlines,
-                'Height repetition: ' + JSON.stringify(heightToOccurrence),
-                'Font repetition: ' + JSON.stringify(fontToOccurrence),
-                'Pages with max Header: ' + maxHeaderPages,
-                'Headline Height Flow (before TOC): ' + headlineHeightFlowBeforeToc,
-                'Headline Heights Occurence (before TOC): ' + JSON.stringify(headlineHeightsOccurenceBeforeToc),
-                'Headline Height Flow: ' + headlineHeightFlowAfterToc,
-                'Headline Heights Occurence: ' + JSON.stringify(headlineHeightsOccurenceAfterToc),
-            ]
-        });
-    }
-
-}
-
-function convertMaxHeaders(pages, maxHeight, mostUsedHeight) {
-    // Find pages with max height
-    const maxHeaderPagesSet = new Set();
-    pages.forEach(page => {
-        page.items.forEach(block => {
-            if (!block.type && block.textItems[0].height == maxHeight) {
-                maxHeaderPagesSet.add(page);
-            }
-        });
-    });
-
-    // Now convert those pages to headlines
-    const min2ndLevelHeaderHeigthOnMaxPage = mostUsedHeight + ((maxHeight - mostUsedHeight) / 4);
-    maxHeaderPagesSet.forEach(pageWithMaxHeader => {
-        pageWithMaxHeader.items.forEach(block => {
-            if (block.textItems.length == 1) {
-                const height = block.textItems[0].height;
-                if (!block.type && height > min2ndLevelHeaderHeigthOnMaxPage) {
-                    block.annotation = DETECTED_ANNOTATION;
-                    if (height == maxHeight) {
-                        block.type = ElementType.H1;
-                    } else {
-                        block.type = ElementType.H2;
-                    }
-                }
-            }
-        });
-    });
-    return Array.from(maxHeaderPagesSet).map(page => page.index + 1);
-}
-
-function calculateHeadlineHeigthFlow(pages, from, to, mostUsedHeight, maxHeaderPages) {
-    const headlineHeightFlow = [];
-    const headlineHeightsOccurences = {};
-    var lastHeadlineHeight;
-    for (var i = from; i < to; i++) {
-        const page = pages[i];
-        if (!maxHeaderPages.includes(page.index + 1)) {
-            page.items.forEach(block => {
-                if (!block.type && !block.annotation && block.textItems[0].height > mostUsedHeight) {
-                    if (block.textItems.length == 1) {
-                        const height = block.textItems[0].height;
-                        headlineHeightsOccurences[height] = headlineHeightsOccurences[height] ? headlineHeightsOccurences[height] + 1 : 1 ;
-                        if (!lastHeadlineHeight || height != lastHeadlineHeight) {
-                            headlineHeightFlow.push(height);
-                            //headlineFontFlow.push(combineResult.textItems[0].font)
-                            lastHeadlineHeight = height;
-                        }
-                    }
-                }
-            });
-        }
-    }
-
-    return [headlineHeightFlow, headlineHeightsOccurences];
-}
-
--- a/src/javascript/models/transformations/old/HeadlineDetector.jsx
+++ b/src/javascript/models/transformations/old/HeadlineDetector.jsx
@ -1,158 +0,0 @@
-import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
-import TextItem from '../TextItem.jsx';
-import ParseResult from '../ParseResult.jsx';
-import Annotation from '../Annotation.jsx';
-
-import Headline from '../markdown/Headline.jsx';
-
-
-function analyzeHeigths(pages) {
-    const analyzationResult = {
-        maxHeight: 0,
-        maxYPerPage: {},
-        heights: [],
-        mostUsedHeight: -1
-    };
-    const allHeights = new Set();
-    pages.forEach(page => {
-        var maxPageY = 0;
-        page.textItems.forEach(item => {
-            const height = item.height;
-            allHeights.add(height);
-            if (analyzationResult[height]) {
-                analyzationResult[height].repetition = analyzationResult[height].repetition + 1;
-                analyzationResult[height].pages.add(page.index);
-            } else {
-                analyzationResult[height] = {
-                    repetition: 1,
-                    pages: new Set([page.index])
-                };
-            }
-            maxPageY = Math.max(maxPageY, item.y);
-            analyzationResult.maxHeight = Math.max(analyzationResult.maxHeight, item.height);
-        });
-        analyzationResult.maxYPerPage[page.index] = maxPageY;
-    });
-
-    var maxRepetition = 0;
-    allHeights.forEach(height => {
-        const heightRepetition = analyzationResult[height].repetition;
-        analyzationResult.heights.push(height);
-        if (heightRepetition > maxRepetition) {
-            maxRepetition = heightRepetition;
-            analyzationResult.mostUsedHeight = height;
-        }
-    });
-    analyzationResult.heights = analyzationResult.heights.sort((a, b) => a - b);
-
-    return analyzationResult;
-}
-
-function findNextMajorHeight(heights, currentHeight, headlineLevels) {
-    for (var i = currentHeight; i < heights.length; i++) {
-        if (headlineLevels[heights[i]]) {
-            return heights[i];
-        }
-    }
-    throw `Shouldn't happen! heights=${heights}, currentHeight=${currentHeight}, headlineLevels=${headlineLevels}`;
-}
-
-
-export default class HeadlineDetector extends ToPdfViewTransformation {
-
-    constructor() {
-        super("Detect Headlines");
-    }
-
-    // Strategy:
-    // - find most used height => this & every height below is paragraph
-    // - heights which start a page are likely to be headlines
-    // - maxHeigth is likely a headline
-    // - heights which occur on more then one page are likely to be headlines
-    transform(parseResult:ParseResult) {
-        const heightAnalyzation = analyzeHeigths(parseResult.content);
-
-        var paragraphHeight = heightAnalyzation.mostUsedHeight + 1;
-
-        // text with more hight then the paragraph height which are on the top of the page are likely to be headlines
-        const likelyHeadingHeights = new Set();
-        parseResult.content.forEach(page => {
-            page.textItems.forEach(item => {
-                if (item.height > paragraphHeight && heightAnalyzation.maxYPerPage[page.index] == item.y) {
-                    likelyHeadingHeights.add(item.height);
-                }
-            });
-        });
-
-        const headlineHeights = [];
-        heightAnalyzation.heights.forEach(height => {
-            if (height == heightAnalyzation.maxHeight || (height > paragraphHeight && likelyHeadingHeights.has(height) && heightAnalyzation[height].pages.size > 1)) {
-                headlineHeights.push(height);
-            }
-        });
-
-
-        const headlineLevels = {};
-        headlineHeights.reverse().forEach((height, i) => headlineLevels[height] = i + 1);
-        var lastMajorHeight = paragraphHeight;
-        var heights = heightAnalyzation.heights;
-        for (var i = 0; i < heights.length; i++) {
-            if (heights[i] > paragraphHeight && !headlineLevels[heights[i]]) {
-                const nextMajorHeight = findNextMajorHeight(heights, i + 1, headlineLevels);
-                const distanceToLower = heights[i] - lastMajorHeight;
-                const distanceToHigher = nextMajorHeight - heights[i];
-                if (distanceToLower <= distanceToHigher) {
-                    if (lastMajorHeight == paragraphHeight) {
-                        paragraphHeight++;
-                    } else {
-                        headlineLevels[heights[i]] = headlineLevels[lastMajorHeight];
-                    }
-                } else {
-                    headlineLevels[heights[i]] = headlineLevels[nextMajorHeight];
-                }
-            }
-            if (headlineLevels[heights[i]]) {
-                lastMajorHeight = heights[i];
-            }
-        }
-
-        const newContent = parseResult.content.map(page => {
-            const newTextItems = [];
-            page.textItems.forEach(item => {
-                if (item.height <= paragraphHeight) {
-                    newTextItems.push(item);
-                } else {
-                    const headlineLevel = headlineLevels[item.height];
-                    newTextItems.push(new TextItem({
-                        ...item,
-                        text: item.text,
-                        annotation: new Annotation({
-                            category: "Headline-" + headlineLevel,
-                            color: 'green'
-                        }),
-                        markdownElement: new Headline({
-                            level: headlineLevel
-                        })
-                    }));
-                }
-            });
-            return {
-                ...page,
-                textItems: newTextItems
-            };
-        });
-
-        return new ParseResult({
-            ...parseResult,
-            content: newContent,
-        });
-    }
-
-    completeTransform(parseResult:ParseResult) {
-        parseResult.content.forEach(page => {
-            page.textItems.forEach(textItem => textItem.annotation = null)
-        });
-        return parseResult;
-    }
-
-}
--- a/src/javascript/models/transformations/old/HeadlineDetector2.jsx
+++ b/src/javascript/models/transformations/old/HeadlineDetector2.jsx
@ -1,107 +0,0 @@
-import Transformation from './Transformation.jsx';
-import TextItem from '../TextItem.jsx';
-import PdfPage from '../PdfPage.jsx';
-import ContentView from '../ContentView.jsx';
-import { Annotation, ADDED_ANNOTATION, REMOVED_ANNOTATION } from '../Annotation.jsx';
-
-import Headline from '../markdown/Headline.jsx';
-
-function getMostUsedHeight(heightToOccurrence) {
-    var maxOccurence = 0;
-    var maxHeight = 0;
-    Object.keys(heightToOccurrence).map((element) => {
-        if (heightToOccurrence[element] > maxOccurence) {
-            maxOccurence = heightToOccurrence[element];
-            maxHeight = element;
-        }
-    });
-    return parseInt(maxHeight);
-}
-
-
-export default class HeadlineDetector extends Transformation {
-
-    constructor() {
-        super("Detect Headlines");
-    }
-
-    contentView() {
-        return ContentView.PDF;
-    }
-
-    // Strategy:
-    // - find most used height => this & every height below is paragraph
-    // - heights which start a page are likely to be headlines
-    // - maxHeigth is likely a headline
-    // - heights which occur on more then one page are likely to be headlines
-    transform(pages:PdfPage[]) {
-
-        const heightToOccurrence = {};
-        pages.forEach(page => {
-            page.textItems.forEach(item => {
-                heightToOccurrence[item.height] = heightToOccurrence[item.height] ? heightToOccurrence[item.height] + 1 : 1;
-            });
-        });
-        console.debug(heightToOccurrence);
-        const mostUsedHeight = getMostUsedHeight(heightToOccurrence);
-        console.debug("mostUsedHeight: " + mostUsedHeight);
-
-        const headlineHeights = new Set(Object.keys(heightToOccurrence).filter(height => parseInt(height) > mostUsedHeight).map(elem => parseInt(elem)));
-        console.debug(Array.from(headlineHeights));
-        const headlineHeights2 = new Set();
-        pages.forEach(page => {
-            const textItems = page.textItems;
-            for (var i = 0; i < textItems.length; i++) {
-                const item = textItems[i];
-                if (item.height > mostUsedHeight) {
-
-                    item.annotation = ADDED_ANNOTATION;
-                    const firstItemOnPage = i == 0;
-                    var upperDistance = 99;
-                    if (!firstItemOnPage) {
-                        upperDistance = textItems[i - 1].y - item.y - item.height;
-                    }
-                    var lowerDistance = 0;
-                    const lastItemOnPage = i == textItems.length - 1;
-                    if (!lastItemOnPage) {
-                        lowerDistance = item.y - textItems[i + 1].y - textItems[i + 1].height;
-                    }
-                    if (firstItemOnPage) {
-                        console.debug("add " + item.height);
-                        console.debug("potential headline: " + item.height + " | " + item.text);
-                        console.debug("\tfirstItem=" + firstItemOnPage + ", lastItem:" + lastItemOnPage);
-                        console.debug("\tupperDistance/lowerDistance=" + upperDistance + " / " + lowerDistance);
-                        headlineHeights2.add(item.height);
-                    }
-
-                    // if (!((firstItemOnPage || upperDistance > mostUsedHeight / 2) && lowerDistance > mostUsedHeight / 2)) {
-                    //     console.debug("remove " + item.height);
-                    //     console.debug("potential headline: " + item.height + " | " + item.text);
-                    //     console.debug("\tfirstItem=" + firstItemOnPage + ", lastItem:" + lastItemOnPage);
-                    //     console.debug("\tupperDistance/lowerDistance=" + upperDistance + " / " + lowerDistance);
-                    //     headlineHeights.delete(item.height);
-                    // }
-
-
-                // if ((firstItemOnPage || upperDistance > 10) && lowerDistance > 10) {
-                //     item.annotation = ADDED_ANNOTATION;
-                // }
-                // console.debug("potential headline: " + item.height + " | " + item.text);
-                // console.debug("\tfirstItem=" + firstItemOnPage + ", lastItem:" + lastItemOnPage);
-                // console.debug("\tupperDistance/lowerDistance=" + upperDistance + " / " + lowerDistance);
-                }
-            }
-        });
-        console.debug(Array.from(headlineHeights2));
-
-        return pages;
-    }
-
-    processAnnotations(pages:PdfPage[]) {
-        pages.forEach(page => {
-            page.textItems.forEach(textItem => textItem.annotation = null)
-        });
-        return pages;
-    }
-
-}
--- a/src/javascript/models/transformations/old/HeadlineToUppercase.jsx
+++ b/src/javascript/models/transformations/old/HeadlineToUppercase.jsx
@ -1,58 +0,0 @@
-import ToPdfViewTransformation from './ToPdfViewTransformation.jsx';
-import TextItem from '../TextItem.jsx';
-import ParseResult from '../ParseResult.jsx';
-import { ADDED_ANNOTATION, REMOVED_ANNOTATION, UNCHANGED_ANNOTATION } from '../Annotation.jsx';
-
-import { hasUpperCaseCharacterInMiddleOfWord } from '../../functions.jsx'
-
-// Uppercase headlines are often parsed with very mixed character with pdf.js, like 'A heAdLine'.
-// This tries to detect them and make them all uppercase.
-export default class HeadlineToUppercase extends ToPdfViewTransformation {
-
-    constructor() {
-        super("Headlines Uppercase");
-    }
-
-    transform(parseResult:ParseResult) {
-        const newContent = parseResult.content.map(page => {
-            const newTextItems = [];
-            page.textItems.forEach(item => {
-                if (item.markdownElement && item.markdownElement.constructor.name === 'Headline') {
-                    const headline = item.text.trim();
-                    if (hasUpperCaseCharacterInMiddleOfWord(headline)) {
-                        item.annotation = REMOVED_ANNOTATION;
-                        newTextItems.push(item);
-                        newTextItems.push(new TextItem({
-                            ...item,
-                            text: item.text.toUpperCase(),
-                            annotation: ADDED_ANNOTATION
-                        }));
-                    } else {
-                        item.annotation = UNCHANGED_ANNOTATION;
-                        newTextItems.push(item);
-                    }
-                } else {
-                    newTextItems.push(item);
-                }
-            });
-            return {
-                ...page,
-                textItems: newTextItems
-            };
-        });
-
-        return new ParseResult({
-            ...parseResult,
-            content: newContent,
-        });
-    }
-
-    completeTransform(parseResult:ParseResult) {
-        parseResult.content.forEach(page => {
-            page.textItems = page.textItems.filter(textItem => !textItem.annotation || textItem.annotation !== REMOVED_ANNOTATION);
-            page.textItems.forEach(textItem => textItem.annotation = null)
-        });
-        return parseResult;
-    }
-
-}
--- a/src/javascript/models/transformations/old/ToBlockSystem.jsx
+++ b/src/javascript/models/transformations/old/ToBlockSystem.jsx
@ -1,74 +0,0 @@
-import React from 'react';
-import Transformation from './Transformation.jsx';
-import BlockPageView from '../../components/debug/BlockPageView.jsx';
-import ParseResult from '../ParseResult.jsx';
-import BlockPage from '../BlockPage.jsx';
-
-export default class ToBlockSystem extends Transformation {
-
-    constructor() {
-        super("To Block System");
-    }
-
-    createPageView(page, modificationsOnly) { // eslint-disable-line no-unused-vars
-        return <BlockPageView key={ page.index } page={ page } />;
-    }
-
-    transform(parseResult:ParseResult) {
-        const blocks = [];
-        parseResult.content.forEach(page => {
-            var minDiff = 99;
-            var lastY = 0;
-            page.textItems.forEach(item => {
-                if (lastY > 0) {
-                    const yDiff = lastY - item.y - item.height;
-                    if (yDiff > 0) {
-                        minDiff = Math.min(minDiff, yDiff);
-                    }
-                }
-                lastY = item.y;
-            });
-
-            var text;
-            const rollup = (category) => {
-                if (text && text.length > 0) {
-                    // console.debug("Push[" + blocks.length + "]: " + text);
-                    blocks.push({
-                        category: category,
-                        text: text
-                    });
-                }
-                text = null;
-            };
-
-            lastY = 0;
-            page.textItems.forEach(item => {
-                if (item.markdownElement) {
-                    rollup("Block");
-                    text = item.markdownElement.transformText(item.text);
-                    rollup(item.markdownElement.constructor.name);
-                } else if (!text) {
-                    text = item.text;
-                } else {
-                    const yDiff = lastY - item.y - item.height;
-                    if (yDiff > minDiff + 2) {
-                        rollup("Block");
-                        text = item.text;
-                    } else {
-                        text += '\n' + item.text;
-                    }
-                }
-                lastY = item.y;
-            });
-            rollup("Block")
-        });
-        return new ParseResult({
-            ...parseResult,
-            content: [new BlockPage({
-                index: 0,
-                blocks: blocks
-            })],
-        });
-    }
-
-}