mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-21 23:33:31 +01:00
[WIP] TOC headline parsing
This commit is contained in:
parent
c9352d8396
commit
6f69566e98
@ -1,5 +1,11 @@
|
||||
const MIN_DIGIT_CHAR_CODE = 48;
|
||||
const MAX_DIGIT_CHAR_CODE = 57;
|
||||
const WHITESPACE_CHAR_CODE = 32;
|
||||
const TAB_CHAR_CODE = 9;
|
||||
const DOT_CHAR_CODE = 46;
|
||||
|
||||
export function isDigit(charCode) {
|
||||
return charCode >= 48 && charCode <= 57;
|
||||
return charCode >= MIN_DIGIT_CHAR_CODE && charCode <= MAX_DIGIT_CHAR_CODE;
|
||||
}
|
||||
|
||||
export function isNumber(string) {
|
||||
@ -27,3 +33,17 @@ export function hasUpperCaseCharacterInMiddleOfWord(text) {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Remove whitespace/dots + to uppercase
|
||||
export function normalizedCharCodeArray(string) {
|
||||
string = string.toUpperCase();
|
||||
return charCodeArray(string).filter(charCode => charCode != WHITESPACE_CHAR_CODE && charCode != TAB_CHAR_CODE && charCode != DOT_CHAR_CODE);
|
||||
}
|
||||
|
||||
export function charCodeArray(string) {
|
||||
const charCodes = [];
|
||||
for (var i = 0; i < string.length; i++) {
|
||||
charCodes.push(string.charCodeAt(i));
|
||||
}
|
||||
return charCodes;
|
||||
}
|
@ -8,6 +8,7 @@ import DetectFootnotes from './transformations/DetectFootnotes.jsx'
|
||||
import DetectTOC from './transformations/DetectTOC.jsx'
|
||||
import DetectLists from './transformations/DetectLists.jsx'
|
||||
import DetectCodeBlocks from './transformations/DetectCodeBlocks.jsx'
|
||||
import DetectHeadlines from './transformations/DetectHeadlines.jsx'
|
||||
// import DetectFormats from './transformations/DetectFormats.jsx'
|
||||
// import CombineSameY from './transformations/CombineSameY.jsx';
|
||||
// import RemoveWhitespaces from './transformations/RemoveWhitespaces.jsx'
|
||||
@ -35,6 +36,7 @@ export default class AppState {
|
||||
new DetectTOC(),
|
||||
new DetectLists(),
|
||||
new DetectCodeBlocks(),
|
||||
new DetectHeadlines(),
|
||||
|
||||
// new DetectFormats(),
|
||||
// new CombineSameY(),
|
||||
|
40
src/javascript/models/HeadlineFinder.jsx
Normal file
40
src/javascript/models/HeadlineFinder.jsx
Normal file
@ -0,0 +1,40 @@
|
||||
import { normalizedCharCodeArray } from '../functions.jsx'
|
||||
|
||||
export default class HeadlineFinder {
|
||||
|
||||
constructor(options) {
|
||||
this.headlineCharCodes = normalizedCharCodeArray(options.headline);
|
||||
this.stackedTextItems = [];
|
||||
this.stackedChars = 0;
|
||||
}
|
||||
|
||||
consume(textItem) {
|
||||
const normalizedCharCodes = normalizedCharCodeArray(textItem.text);
|
||||
const matchAll = this.matchAll(normalizedCharCodes);
|
||||
if (matchAll) {
|
||||
this.stackedTextItems.push(textItem);
|
||||
this.stackedChars += normalizedCharCodes.length;
|
||||
if (this.stackedChars == this.headlineCharCodes.length) {
|
||||
return this.stackedTextItems;
|
||||
}
|
||||
} else {
|
||||
if (this.stackedChars > 0) {
|
||||
this.stackedChars = 0;
|
||||
this.stackedTextItems = [];
|
||||
this.consume(textItem); // test again without stack
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
matchAll(normalizedCharCodes) {
|
||||
for (var i = 0; i < normalizedCharCodes.length; i++) {
|
||||
const headlineChar = this.headlineCharCodes[this.stackedChars + i];
|
||||
const textItemChar = normalizedCharCodes[i];
|
||||
if (textItemChar != headlineChar) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
@ -35,9 +35,9 @@ export default class DetectLists extends ToPdfBlockViewTransformation {
|
||||
|
||||
var lastItemX;
|
||||
var currentLevel = 0;
|
||||
var xByLevel = {};
|
||||
var itemsBeforeFirstLineItem = [];
|
||||
var listBlockItems = [];
|
||||
var xByLevel = {};
|
||||
|
||||
const pushLineItem = (originalItem, text, setLevel) => {
|
||||
if (lastItemX && setLevel) {
|
||||
|
@ -3,8 +3,9 @@ import ParseResult from '../ParseResult.jsx';
|
||||
import TextItem from '../TextItem.jsx';
|
||||
import PdfBlock from '../PdfBlock.jsx';
|
||||
import TextItemCombiner from '../TextItemCombiner.jsx';
|
||||
import HeadlineFinder from '../HeadlineFinder.jsx';
|
||||
import { REMOVED_ANNOTATION, ADDED_ANNOTATION } from '../Annotation.jsx';
|
||||
import { TOC_BLOCK, HEADLINE2 } from '../MarkdownElements.jsx';
|
||||
import { TOC_BLOCK, HEADLINE2, headlineByLevel } from '../MarkdownElements.jsx';
|
||||
import { isDigit } from '../../functions.jsx'
|
||||
|
||||
//Detect table of contents pages
|
||||
@ -22,14 +23,16 @@ export default class DetectTOC extends ToPdfBlockViewTransformation {
|
||||
mostUsedDistance: mostUsedDistance
|
||||
});
|
||||
|
||||
var lastLevel = 0;
|
||||
const itemLeveler = new ItemLeveler();
|
||||
const linkLeveler = new LinkLeveler();
|
||||
var tocLinks = [];
|
||||
var lastTocPage;
|
||||
parseResult.content.slice(0, maxPagesToEvaluate).forEach(page => {
|
||||
var linesCount = 0;
|
||||
var linesWithDigitsCount = 0;
|
||||
var lineItemsWithDigits = [];
|
||||
const unknownBlocks = new Set();
|
||||
var headlineBlock;
|
||||
const pageTocLinks = [];
|
||||
page.blocks.forEach(block => {
|
||||
var blockHasLinesWithDigits = false;
|
||||
const itemsGroupedByY = textCombiner.combine(block.textItems).textItems;
|
||||
@ -38,8 +41,10 @@ export default class DetectTOC extends ToPdfBlockViewTransformation {
|
||||
linesCount++
|
||||
var lineText = lineItem.text.replace(/\./g, '').trim();
|
||||
var endsWithDigit = false;
|
||||
var digits = [];
|
||||
while (isDigit(lineText.charCodeAt(lineText.length - 1))) {
|
||||
lineText = lineText.substring(0, lineText.length - 2);
|
||||
digits.unshift(lineText.charAt(lineText.length - 1));
|
||||
lineText = lineText.substring(0, lineText.length - 1);
|
||||
endsWithDigit = true;
|
||||
}
|
||||
lineText = lineText.trim();
|
||||
@ -50,6 +55,13 @@ export default class DetectTOC extends ToPdfBlockViewTransformation {
|
||||
}
|
||||
linesWithDigitsCount++;
|
||||
blockHasLinesWithDigits = true;
|
||||
pageTocLinks.push(new TocLink({
|
||||
pageNumber: parseInt(digits.join('')),
|
||||
textItem: new TextItem({
|
||||
...lineItem,
|
||||
text: lineText
|
||||
})
|
||||
}));
|
||||
lineItemsWithDigits.push(new TextItem({
|
||||
...lineItem,
|
||||
text: lineText
|
||||
@ -67,8 +79,13 @@ export default class DetectTOC extends ToPdfBlockViewTransformation {
|
||||
}
|
||||
});
|
||||
|
||||
// page has been processed
|
||||
if (linesWithDigitsCount * 100 / linesCount > 75) {
|
||||
tocPages.push(page.index + 1);
|
||||
lastTocPage = page;
|
||||
linkLeveler.levelPageItems(pageTocLinks);
|
||||
tocLinks = tocLinks.concat(pageTocLinks);
|
||||
|
||||
const newBlocks = [];
|
||||
page.blocks.forEach((block) => {
|
||||
if (!unknownBlocks.has(block)) {
|
||||
@ -83,17 +100,50 @@ export default class DetectTOC extends ToPdfBlockViewTransformation {
|
||||
}));
|
||||
}
|
||||
});
|
||||
// lastLevel = processLevels(lineItemsWithDigits, lastLevel);
|
||||
itemLeveler.level(lineItemsWithDigits);
|
||||
newBlocks.push(new PdfBlock({
|
||||
textItems: lineItemsWithDigits,
|
||||
type: TOC_BLOCK,
|
||||
annotation: ADDED_ANNOTATION
|
||||
}));
|
||||
page.blocks = newBlocks;
|
||||
}
|
||||
});
|
||||
|
||||
//all pages have been processed
|
||||
var foundHeadlines = tocLinks.length;
|
||||
const notFoundHeadlines = [];
|
||||
if (tocPages.length > 0) {
|
||||
tocLinks.forEach(tocLink => {
|
||||
var linkedPage = parseResult.content[tocLink.pageNumber - 1];
|
||||
var foundHeadline = false;
|
||||
if (linkedPage) {
|
||||
foundHeadline = findHeadline(linkedPage, tocLink, textCombiner);
|
||||
if (!foundHeadline) { // pages are off by 1 ?
|
||||
linkedPage = parseResult.content[tocLink.pageNumber];
|
||||
if (linkedPage) {
|
||||
foundHeadline = findHeadline(linkedPage, tocLink, textCombiner);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
//TODO sometimes pages are off. We could try the page range from pre to next ...
|
||||
}
|
||||
if (!foundHeadline) {
|
||||
notFoundHeadlines.push(tocLink);
|
||||
}
|
||||
});
|
||||
lastTocPage.blocks.push(new PdfBlock({
|
||||
textItems: tocLinks.map(tocLink => {
|
||||
tocLink.textItem.text = ' '.repeat(tocLink.level * 3) + '- ' + tocLink.textItem.text;
|
||||
return tocLink.textItem
|
||||
}),
|
||||
type: TOC_BLOCK,
|
||||
annotation: ADDED_ANNOTATION
|
||||
}));
|
||||
}
|
||||
|
||||
const messages = [];
|
||||
messages.push('Detected ' + tocPages.length + ' table of content pages');
|
||||
if (foundHeadlines > 0) {
|
||||
messages.push('Found TOC headlines: ' + (foundHeadlines - notFoundHeadlines.length) + '/' + foundHeadlines);
|
||||
}
|
||||
if (notFoundHeadlines.length > 0) {
|
||||
messages.push('Missing TOC headlines: ' + notFoundHeadlines.map(tocLink => tocLink.textItem.text + '=>' + tocLink.pageNumber));
|
||||
}
|
||||
return new ParseResult({
|
||||
...parseResult,
|
||||
globals: {
|
||||
@ -101,27 +151,61 @@ export default class DetectTOC extends ToPdfBlockViewTransformation {
|
||||
tocPages: tocPages
|
||||
|
||||
},
|
||||
messages: ['Detected ' + tocPages.length + ' table of content pages']
|
||||
messages: messages
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function findHeadline(page, tocLink, textCombiner) {
|
||||
const headline = tocLink.textItem.text;
|
||||
const headlineFinder = new HeadlineFinder({
|
||||
headline: headline
|
||||
});
|
||||
var blockIndex = 0;
|
||||
var lastBlock;
|
||||
for ( var block of page.blocks ) {
|
||||
const itemsGroupedByY = textCombiner.combine(block.textItems).textItems;
|
||||
for ( var item of itemsGroupedByY ) {
|
||||
const headlineItems = headlineFinder.consume(item);
|
||||
if (headlineItems) {
|
||||
const usedItems = headlineFinder.stackedTextItems;
|
||||
block.annotation = REMOVED_ANNOTATION;
|
||||
if (usedItems.length > itemsGroupedByY.length) {
|
||||
// 2 line headline
|
||||
lastBlock.annotation = REMOVED_ANNOTATION;
|
||||
}
|
||||
page.blocks.splice(blockIndex + 1, 0, new PdfBlock({
|
||||
textItems: [new TextItem({
|
||||
...usedItems[0],
|
||||
text: headline
|
||||
})],
|
||||
type: headlineByLevel(tocLink.level + 2),
|
||||
annotation: ADDED_ANNOTATION
|
||||
}));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
blockIndex++;
|
||||
lastBlock = block;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
class ItemLeveler {
|
||||
|
||||
class LinkLeveler {
|
||||
constructor() {
|
||||
this.levelByMethod = null;
|
||||
this.uniqueFonts = [];
|
||||
this.headlines = [];
|
||||
}
|
||||
|
||||
level(lineItemsWithDigits) {
|
||||
levelPageItems(tocLinks:TocLink[]) {
|
||||
if (!this.levelByMethod) {
|
||||
const uniqueX = this.calculateUniqueX(lineItemsWithDigits);
|
||||
const uniqueX = this.calculateUniqueX(tocLinks);
|
||||
if (uniqueX.length > 1) {
|
||||
this.levelByMethod = this.levelByXDiff;
|
||||
} else {
|
||||
const uniqueFonts = this.calculateUniqueFonts(lineItemsWithDigits);
|
||||
const uniqueFonts = this.calculateUniqueFonts(tocLinks);
|
||||
if (uniqueFonts.length > 1) {
|
||||
this.uniqueFonts = uniqueFonts;
|
||||
this.levelByMethod = this.levelByFont;
|
||||
@ -130,46 +214,31 @@ class ItemLeveler {
|
||||
}
|
||||
}
|
||||
}
|
||||
this.levelByMethod(lineItemsWithDigits);
|
||||
this.levelByMethod(tocLinks);
|
||||
}
|
||||
|
||||
levelByXDiff(lineItemsWithDigits) {
|
||||
const uniqueX = this.calculateUniqueX(lineItemsWithDigits);
|
||||
lineItemsWithDigits.forEach(item => {
|
||||
const level = uniqueX.indexOf(item.x);
|
||||
this.headlines.push(new Headline({
|
||||
level: level,
|
||||
text: item.text
|
||||
}));
|
||||
item.text = ' '.repeat(level * 3) + '- ' + item.text;
|
||||
levelByXDiff(tocLinks) {
|
||||
const uniqueX = this.calculateUniqueX(tocLinks);
|
||||
tocLinks.forEach(link => {
|
||||
link.level = uniqueX.indexOf(link.textItem.x);
|
||||
});
|
||||
}
|
||||
|
||||
levelByFont(lineItemsWithDigits) {
|
||||
lineItemsWithDigits.forEach(item => {
|
||||
const level = this.uniqueFonts.indexOf(item.font);
|
||||
this.headlines.push(new Headline({
|
||||
level: level,
|
||||
text: item.text
|
||||
}));
|
||||
item.text = ' '.repeat(level * 3) + '- ' + item.text;
|
||||
levelByFont(tocLinks) {
|
||||
tocLinks.forEach(link => {
|
||||
link.level = this.uniqueFonts.indexOf(link.textItem.font);
|
||||
});
|
||||
}
|
||||
|
||||
levelToZero(lineItemsWithDigits) {
|
||||
lineItemsWithDigits.forEach(item => {
|
||||
const level = 0;
|
||||
this.headlines.push(new Headline({
|
||||
level: level,
|
||||
text: item.text
|
||||
}));
|
||||
item.text = ' '.repeat(level * 3) + '- ' + item.text;
|
||||
levelToZero(tocLinks) {
|
||||
tocLinks.forEach(link => {
|
||||
link.level = 0;
|
||||
});
|
||||
}
|
||||
|
||||
calculateUniqueX(lineItemsWithDigits) {
|
||||
var uniqueX = lineItemsWithDigits.reduce(function(uniquesArray, lineItem) {
|
||||
if (uniquesArray.indexOf(lineItem.x) < 0) uniquesArray.push(lineItem.x);
|
||||
calculateUniqueX(tocLinks) {
|
||||
var uniqueX = tocLinks.reduce(function(uniquesArray, link) {
|
||||
if (uniquesArray.indexOf(link.textItem.x) < 0) uniquesArray.push(link.textItem.x);
|
||||
return uniquesArray;
|
||||
}, []);
|
||||
|
||||
@ -180,9 +249,9 @@ class ItemLeveler {
|
||||
return uniqueX;
|
||||
}
|
||||
|
||||
calculateUniqueFonts(lineItemsWithDigits) {
|
||||
var uniqueFont = lineItemsWithDigits.reduce(function(uniquesArray, lineItem) {
|
||||
if (uniquesArray.indexOf(lineItem.font) < 0) uniquesArray.push(lineItem.font);
|
||||
calculateUniqueFonts(tocLinks) {
|
||||
var uniqueFont = tocLinks.reduce(function(uniquesArray, link) {
|
||||
if (uniquesArray.indexOf(link.textItem.font) < 0) uniquesArray.push(link.textItem.font);
|
||||
return uniquesArray;
|
||||
}, []);
|
||||
|
||||
@ -191,9 +260,10 @@ class ItemLeveler {
|
||||
|
||||
}
|
||||
|
||||
class Headline {
|
||||
class TocLink {
|
||||
constructor(options) {
|
||||
this.level = options.level;
|
||||
this.text = options.text;
|
||||
this.textItem = options.textItem;
|
||||
this.pageNumber = options.pageNumber;
|
||||
this.level = 0;
|
||||
}
|
||||
}
|
||||
|
134
test/HeadlineFinder.spec.js
Normal file
134
test/HeadlineFinder.spec.js
Normal file
@ -0,0 +1,134 @@
|
||||
import { expect } from 'chai';
|
||||
|
||||
import HeadlineFinder from '../src/javascript/models/HeadlineFinder';
|
||||
import TextItem from '../src/javascript/models/TextItem.jsx';
|
||||
|
||||
describe('HeadlineFinder', () => {
|
||||
|
||||
|
||||
it('Not Found - Case 1', () => {
|
||||
const headlineFinder = new HeadlineFinder({
|
||||
headline: 'My Little Headline'
|
||||
});
|
||||
const item1 = new TextItem({
|
||||
text: 'My '
|
||||
});
|
||||
const item2 = new TextItem({
|
||||
text: 'Little'
|
||||
});
|
||||
const item3 = new TextItem({
|
||||
text: ' Headline2'
|
||||
});
|
||||
|
||||
expect(headlineFinder.consume(item1)).to.equal(null);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(1).to.contain(item1);
|
||||
expect(headlineFinder.consume(item2)).to.equal(null);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(2).to.contain(item1).to.contain(item2);
|
||||
expect(headlineFinder.consume(item3)).to.equal(null);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(0);
|
||||
|
||||
});
|
||||
|
||||
it('Found - Simple', () => {
|
||||
const headlineFinder = new HeadlineFinder({
|
||||
headline: 'My Little Headline'
|
||||
});
|
||||
const item1 = new TextItem({
|
||||
text: 'My '
|
||||
});
|
||||
const item2 = new TextItem({
|
||||
text: 'Little'
|
||||
});
|
||||
const item3 = new TextItem({
|
||||
text: ' Headline'
|
||||
});
|
||||
|
||||
expect(headlineFinder.consume(item1)).to.equal(null);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(1).to.contain(item1);
|
||||
expect(headlineFinder.consume(item2)).to.equal(null);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(2).to.contain(item1).to.contain(item2);
|
||||
expect(headlineFinder.consume(item3)).to.have.lengthOf(3).to.contain(item1).to.contain(item2).to.contain(item3);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(3).to.contain(item1).to.contain(item2).to.contain(item3);
|
||||
|
||||
});
|
||||
|
||||
it('Found - Waste in beginning', () => {
|
||||
const headlineFinder = new HeadlineFinder({
|
||||
headline: 'My Little Headline'
|
||||
});
|
||||
const item0 = new TextItem({
|
||||
text: 'Waste '
|
||||
});
|
||||
const item1 = new TextItem({
|
||||
text: 'My '
|
||||
});
|
||||
const item2 = new TextItem({
|
||||
text: 'Little'
|
||||
});
|
||||
const item3 = new TextItem({
|
||||
text: ' Headline'
|
||||
});
|
||||
|
||||
expect(headlineFinder.consume(item0)).to.equal(null);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(0);
|
||||
expect(headlineFinder.consume(item1)).to.equal(null);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(1).to.contain(item1);
|
||||
expect(headlineFinder.consume(item2)).to.equal(null);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(2).to.contain(item1).to.contain(item2);
|
||||
expect(headlineFinder.consume(item3)).to.have.lengthOf(3).to.contain(item1).to.contain(item2).to.contain(item3);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(3).to.contain(item1).to.contain(item2).to.contain(item3);
|
||||
|
||||
});
|
||||
|
||||
it('Found - Duplicate in beginning', () => {
|
||||
const headlineFinder = new HeadlineFinder({
|
||||
headline: 'My Little Headline'
|
||||
});
|
||||
const item0 = new TextItem({
|
||||
text: 'My '
|
||||
});
|
||||
const item1 = new TextItem({
|
||||
text: 'My '
|
||||
});
|
||||
const item2 = new TextItem({
|
||||
text: 'Little'
|
||||
});
|
||||
const item3 = new TextItem({
|
||||
text: ' Headline'
|
||||
});
|
||||
|
||||
expect(headlineFinder.consume(item0)).to.equal(null);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(1).to.contain(item0);
|
||||
expect(headlineFinder.consume(item1)).to.equal(null);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(1).to.contain(item1);
|
||||
expect(headlineFinder.consume(item2)).to.equal(null);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(2).to.contain(item1).to.contain(item2);
|
||||
expect(headlineFinder.consume(item3)).to.have.lengthOf(3).to.contain(item1).to.contain(item2).to.contain(item3);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(3).to.contain(item1).to.contain(item2).to.contain(item3);
|
||||
|
||||
});
|
||||
|
||||
it('Found - Mixed up case and Whitespace', () => {
|
||||
const headlineFinder = new HeadlineFinder({
|
||||
headline: 'MYLitt le HEADline'
|
||||
});
|
||||
const item1 = new TextItem({
|
||||
text: 'My '
|
||||
});
|
||||
const item2 = new TextItem({
|
||||
text: 'Little'
|
||||
});
|
||||
const item3 = new TextItem({
|
||||
text: ' Headline'
|
||||
});
|
||||
|
||||
expect(headlineFinder.consume(item1)).to.equal(null);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(1).to.contain(item1);
|
||||
expect(headlineFinder.consume(item2)).to.equal(null);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(2).to.contain(item1).to.contain(item2);
|
||||
expect(headlineFinder.consume(item3)).to.have.lengthOf(3).to.contain(item1).to.contain(item2).to.contain(item3);
|
||||
expect(headlineFinder.stackedTextItems).to.have.lengthOf(3).to.contain(item1).to.contain(item2).to.contain(item3);
|
||||
|
||||
});
|
||||
|
||||
});
|
@ -1,6 +1,6 @@
|
||||
import { expect } from 'chai';
|
||||
|
||||
import { hasUpperCaseCharacterInMiddleOfWord } from '../src/javascript/functions.jsx'
|
||||
import { hasUpperCaseCharacterInMiddleOfWord, normalizedCharCodeArray, charCodeArray } from '../src/javascript/functions.jsx'
|
||||
|
||||
describe('hasUpperCaseCharacterInMiddleOfWord', () => {
|
||||
|
||||
@ -37,3 +37,42 @@ describe('hasUpperCaseCharacterInMiddleOfWord', () => {
|
||||
expect(hasUpperCaseCharacterInMiddleOfWord("High 5'Sec")).to.equal(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('charCodeArray', () => {
|
||||
it('Charcodes', () => {
|
||||
expect(charCodeArray(".")).to.have.lengthOf(1).to.contain(46);
|
||||
});
|
||||
|
||||
it('Convert Back', () => {
|
||||
expect(String.fromCharCode.apply(null, charCodeArray("word"))).to.equal("word");
|
||||
expect(String.fromCharCode.apply(null, charCodeArray("WORD"))).to.equal("WORD");
|
||||
expect(String.fromCharCode.apply(null, charCodeArray("a word"))).to.equal("a word");
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
describe('normalizedCharCodeArray', () => {
|
||||
|
||||
it('No Change', () => {
|
||||
expect(String.fromCharCode.apply(null, normalizedCharCodeArray("WORD"))).to.equal("WORD");
|
||||
expect(String.fromCharCode.apply(null, normalizedCharCodeArray("WORD23"))).to.equal("WORD23");
|
||||
});
|
||||
|
||||
it('lowecaseToUpperCase', () => {
|
||||
expect(String.fromCharCode.apply(null, normalizedCharCodeArray("word"))).to.equal("WORD");
|
||||
expect(String.fromCharCode.apply(null, normalizedCharCodeArray("WoRd"))).to.equal("WORD");
|
||||
expect(String.fromCharCode.apply(null, normalizedCharCodeArray("word23"))).to.equal("WORD23");
|
||||
});
|
||||
|
||||
it('RemoveWhiteSpace', () => {
|
||||
expect(String.fromCharCode.apply(null, normalizedCharCodeArray("A WORD"))).to.equal("AWORD");
|
||||
expect(String.fromCharCode.apply(null, normalizedCharCodeArray("SOME LITTLE SENTENCE."))).to.equal("SOMELITTLESENTENCE");
|
||||
});
|
||||
|
||||
it('All', () => {
|
||||
expect(String.fromCharCode.apply(null, normalizedCharCodeArray("a word"))).to.equal("AWORD");
|
||||
expect(String.fromCharCode.apply(null, normalizedCharCodeArray("WoRd 4 u"))).to.equal("WORD4U");
|
||||
expect(String.fromCharCode.apply(null, normalizedCharCodeArray("Some little sentence."))).to.equal("SOMELITTLESENTENCE");
|
||||
});
|
||||
|
||||
});
|
||||
|
Loading…
Reference in New Issue
Block a user