diff --git a/package-lock.json b/package-lock.json index a8074794..9147d9e5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,6 +13,7 @@ "cookie-parser": "^1.4.6", "express": "^4.17.1", "express-session": "^1.17.3", + "fuse.js": "^7.1.0", "graceful-fs": "^4.2.10", "htmlparser2": "^8.0.1", "lru-cache": "^10.0.3", @@ -2105,6 +2106,14 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/fuse.js": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.1.0.tgz", + "integrity": "sha512-trLf4SzuuUxfusZADLINj+dE8clK1frKdmqiJNb1Es75fmI5oY6X2mxLVUciLLjxqw/xr72Dhy+lER6dGd02FQ==", + "engines": { + "node": ">=10" + } + }, "node_modules/gensync": { "version": "1.0.0-beta.2", "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", diff --git a/package.json b/package.json index d4831736..2a77ec87 100644 --- a/package.json +++ b/package.json @@ -40,6 +40,7 @@ "cookie-parser": "^1.4.6", "express": "^4.17.1", "express-session": "^1.17.3", + "fuse.js": "^7.1.0", "graceful-fs": "^4.2.10", "htmlparser2": "^8.0.1", "lru-cache": "^10.0.3", diff --git a/server/utils/podcastUtils.js b/server/utils/podcastUtils.js index 3a1df198..74a71cc1 100644 --- a/server/utils/podcastUtils.js +++ b/server/utils/podcastUtils.js @@ -3,6 +3,7 @@ const ssrfFilter = require('ssrf-req-filter') const Logger = require('../Logger') const { xmlToJSON, levenshteinDistance, timestampToSeconds } = require('./index') const htmlSanitizer = require('../utils/htmlSanitizer') +const Fuse = require('fuse.js') /** * @typedef RssPodcastChapter @@ -407,7 +408,7 @@ module.exports.getPodcastFeed = (feedUrl, excludeEpisodeMetadata = false) => { }) } -// Return array of episodes ordered by closest match (Levenshtein distance of 6 or less) +// Return array of episodes ordered by closest match using fuse.js module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => { const feed = await this.getPodcastFeed(feedUrl).catch(() => { return null @@ -420,32 +421,28 @@ module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => { * * @param {RssPodcast} feed * @param {string} searchTitle - * @returns {Array<{ episode: RssPodcastEpisode, levenshtein: number }>} + * @returns {Array<{ episode: RssPodcastEpisode }>} */ module.exports.findMatchingEpisodesInFeed = (feed, searchTitle) => { - searchTitle = searchTitle.toLowerCase().trim() if (!feed?.episodes) { return null } + const fuseOptions = { + ignoreDiacritics: true, + threshold: 0.4, // default 0.6 return too many matches + keys: [ + {name: 'title', weight: 0.7}, // prefer match in title + {name: 'subtitle', weight: 0.3} + ] + } + const fuse = new Fuse(feed.episodes, fuseOptions) + const matches = [] - feed.episodes.forEach((ep) => { - if (!ep.title) return - const epTitle = ep.title.toLowerCase().trim() - if (epTitle === searchTitle) { - matches.push({ - episode: ep, - levenshtein: 0 - }) - } else { - const levenshtein = levenshteinDistance(searchTitle, epTitle, true) - if (levenshtein <= 6 && epTitle.length > levenshtein) { - matches.push({ - episode: ep, - levenshtein - }) - } - } + fuse.search(searchTitle).forEach((match) => { + matches.push({ + episode: match.item + }) }) - return matches.sort((a, b) => a.levenshtein - b.levenshtein) + return matches }