diff --git a/server/libs/fusejs/index.js b/server/libs/fusejs/index.js new file mode 100644 index 00000000..23d7f045 --- /dev/null +++ b/server/libs/fusejs/index.js @@ -0,0 +1,13 @@ +/** + * Source: https://github.com/krisk/Fuse/blob/main/dist/fuse.basic.min.js + */ + +/** + * Fuse.js v7.1.0 - Lightweight fuzzy-search (http://fusejs.io) + * + * Copyright (c) 2025 Kiro Risk (http://kiro.me) + * All Rights Reserved. Apache Software License 2.0 + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +var e,t;e=this,t=function(){"use strict";function e(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function t(t){for(var n=1;ne.length)&&(t=e.length);for(var n=0,r=new Array(t);n0&&void 0!==arguments[0]?arguments[0]:{},n=t.getFn,u=void 0===n?M.getFn:n,i=t.fieldNormWeight,o=void 0===i?M.fieldNormWeight:i;r(this,e),this.norm=function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:1,t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:3,n=new Map,r=Math.pow(10,t);return{get:function(t){var u=t.match(w).length;if(n.has(u))return n.get(u);var i=1/Math.pow(u,.5*e),o=parseFloat(Math.round(i*r)/r);return n.set(u,o),o},clear:function(){n.clear()}}}(o,3),this.getFn=u,this.isCreated=!1,this.setIndexRecords()}return i(e,[{key:"setSources",value:function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:[];this.docs=e}},{key:"setIndexRecords",value:function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:[];this.records=e}},{key:"setKeys",value:function(){var e=this,t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:[];this.keys=t,this._keysMap={},t.forEach((function(t,n){e._keysMap[t.id]=n}))}},{key:"create",value:function(){var e=this;!this.isCreated&&this.docs.length&&(this.isCreated=!0,d(this.docs[0])?this.docs.forEach((function(t,n){e._addString(t,n)})):this.docs.forEach((function(t,n){e._addObject(t,n)})),this.norm.clear())}},{key:"add",value:function(e){var t=this.size();d(e)?this._addString(e,t):this._addObject(e,t)}},{key:"removeAt",value:function(e){this.records.splice(e,1);for(var t=e,n=this.size();t2&&void 0!==arguments[2]?arguments[2]:{},r=n.getFn,u=void 0===r?M.getFn:r,i=n.fieldNormWeight,o=void 0===i?M.fieldNormWeight:i,a=new x({getFn:u,fieldNormWeight:o});return a.setKeys(e.map(B)),a.setSources(t),a.create(),a}function S(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},n=t.errors,r=void 0===n?0:n,u=t.currentLocation,i=void 0===u?0:u,o=t.expectedLocation,a=void 0===o?0:o,c=t.distance,s=void 0===c?M.distance:c,h=t.ignoreLocation,l=void 0===h?M.ignoreLocation:h,f=r/e.length;if(l)return f;var d=Math.abs(a-i);return s?f+d/s:d?1:f}var _=32;function O(e,t,n){var r=arguments.length>3&&void 0!==arguments[3]?arguments[3]:{},u=r.location,i=void 0===u?M.location:u,o=r.distance,a=void 0===o?M.distance:o,c=r.threshold,s=void 0===c?M.threshold:c,h=r.findAllMatches,l=void 0===h?M.findAllMatches:h,f=r.minMatchCharLength,d=void 0===f?M.minMatchCharLength:f,v=r.includeMatches,g=void 0===v?M.includeMatches:v,A=r.ignoreLocation,y=void 0===A?M.ignoreLocation:A;if(t.length>_)throw new Error("Pattern length exceeds max of ".concat(_,"."));for(var p,m=t.length,C=e.length,F=Math.max(0,Math.min(i,C)),E=s,B=F,D=d>1||g,b=D?Array(C):[];(p=e.indexOf(t,B))>-1;){var k=S(t,{currentLocation:p,expectedLocation:F,distance:a,ignoreLocation:y});if(E=Math.min(k,E),B=p+m,D)for(var w=0;w=W;$-=1){var K=$-1,J=n[e.charAt(K)];if(D&&(b[K]=+!!J),T[$]=(T[$+1]<<1|1)&J,I&&(T[$]|=(x[$+1]|x[$])<<1|1|x[$+1]),T[$]&j&&(L=S(t,{errors:I,currentLocation:K,expectedLocation:F,distance:a,ignoreLocation:y}))<=E){if(E=L,(B=K)<=F)break;W=Math.max(1,2*F-B)}}if(S(t,{errors:I+1,currentLocation:F,expectedLocation:F,distance:a,ignoreLocation:y})>E)break;x=T}var R={isMatch:B>=0,score:Math.max(.001,L)};if(D){var U=function(){for(var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:[],t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:M.minMatchCharLength,n=[],r=-1,u=-1,i=0,o=e.length;i=t&&n.push([r,u]),r=-1)}return e[i-1]&&i-r>=t&&n.push([r,i-1]),n}(b,d);U.length?g&&(R.indices=U):R.isMatch=!1}return R}function j(e){for(var t={},n=0,r=e.length;n1&&void 0!==arguments[1]?arguments[1]:{},i=u.location,o=void 0===i?M.location:i,a=u.threshold,c=void 0===a?M.threshold:a,s=u.distance,h=void 0===s?M.distance:s,l=u.includeMatches,f=void 0===l?M.includeMatches:l,d=u.findAllMatches,v=void 0===d?M.findAllMatches:d,g=u.minMatchCharLength,A=void 0===g?M.minMatchCharLength:g,y=u.isCaseSensitive,p=void 0===y?M.isCaseSensitive:y,m=u.ignoreDiacritics,C=void 0===m?M.ignoreDiacritics:m,F=u.ignoreLocation,E=void 0===F?M.ignoreLocation:F;if(r(this,e),this.options={location:o,threshold:c,distance:h,includeMatches:f,findAllMatches:v,minMatchCharLength:A,isCaseSensitive:p,ignoreDiacritics:C,ignoreLocation:E},t=p?t:t.toLowerCase(),t=C?I(t):t,this.pattern=t,this.chunks=[],this.pattern.length){var B=function(e,t){n.chunks.push({pattern:e,alphabet:j(e),startIndex:t})},D=this.pattern.length;if(D>_){for(var b=0,k=D%_,w=D-k;b-1&&(n.refIndex=e.idx),t.matches.push(n)}}))}function T(e,t){t.score=e.score}var $=function(){function e(n){var u=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},i=arguments.length>2?arguments[2]:void 0;if(r(this,e),this.options=t(t({},M),u),this.options.useExtendedSearch)throw new Error("Extended search is not available");this._keyStore=new E(this.options.keys),this.setCollection(n,i)}return i(e,[{key:"setCollection",value:function(e,t){if(this._docs=e,t&&!(t instanceof x))throw new Error("Incorrect 'index' type");this._myIndex=t||L(this.options.keys,this._docs,{getFn:this.options.getFn,fieldNormWeight:this.options.fieldNormWeight})}},{key:"add",value:function(e){A(e)&&(this._docs.push(e),this._myIndex.add(e))}},{key:"remove",value:function(){for(var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:function(){return!1},t=[],n=0,r=this._docs.length;n1&&void 0!==arguments[1]?arguments[1]:{}).limit,n=void 0===t?-1:t,r=this.options,u=r.includeMatches,i=r.includeScore,o=r.shouldSort,a=r.sortFn,c=r.ignoreFieldNorm,s=d(e)?d(this._docs[0])?this._searchStringList(e):this._searchObjectList(e):this._searchLogical(e);return function(e,t){var n=t.ignoreFieldNorm,r=void 0===n?M.ignoreFieldNorm:n;e.forEach((function(e){var t=1;e.matches.forEach((function(e){var n=e.key,u=e.norm,i=e.score,o=n?n.weight:null;t*=Math.pow(0===i&&o?Number.EPSILON:i,(o||1)*(r?1:u))})),e.score=t}))}(s,{ignoreFieldNorm:c}),o&&s.sort(a),v(n)&&n>-1&&(s=s.slice(0,n)),function(e,t){var n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:{},r=n.includeMatches,u=void 0===r?M.includeMatches:r,i=n.includeScore,o=void 0===i?M.includeScore:i,a=[];return u&&a.push(z),o&&a.push(T),e.map((function(e){var n=e.idx,r={item:t[n],refIndex:n};return a.length&&a.forEach((function(t){t(e,r)})),r}))}(s,this._docs,{includeMatches:u,includeScore:i})}},{key:"_searchStringList",value:function(e){var t=W(e,this.options),n=this._myIndex.records,r=[];return n.forEach((function(e){var n=e.v,u=e.i,i=e.n;if(A(n)){var o=t.searchIn(n),a=o.isMatch,c=o.score,s=o.indices;a&&r.push({item:n,idx:u,matches:[{score:c,value:n,norm:i,indices:s}]})}})),r}},{key:"_searchLogical",value:function(e){throw new Error("Logical search is not available")}},{key:"_searchObjectList",value:function(e){var t=this,n=W(e,this.options),r=this._myIndex,u=r.keys,i=r.records,o=[];return i.forEach((function(e){var r=e.$,i=e.i;if(A(r)){var c=[];u.forEach((function(e,u){c.push.apply(c,a(t._findMatches({key:e,value:r[u],searcher:n})))})),c.length&&o.push({idx:i,item:r,matches:c})}})),o}},{key:"_findMatches",value:function(e){var t=e.key,n=e.value,r=e.searcher;if(!A(n))return[];var u=[];if(h(n))n.forEach((function(e){var n=e.v,i=e.i,o=e.n;if(A(n)){var a=r.searchIn(n),c=a.isMatch,s=a.score,h=a.indices;c&&u.push({score:s,key:t,value:n,idx:i,norm:o,indices:h})}}));else{var i=n.v,o=n.n,a=r.searchIn(i),c=a.isMatch,s=a.score,l=a.indices;c&&u.push({score:s,key:t,value:i,norm:o,indices:l})}return u}}]),e}();return $.version="7.1.0",$.createIndex=L,$.parseIndex=function(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},n=t.getFn,r=void 0===n?M.getFn:n,u=t.fieldNormWeight,i=void 0===u?M.fieldNormWeight:u,o=e.keys,a=e.records,c=new x({getFn:r,fieldNormWeight:i});return c.setKeys(o),c.setIndexRecords(a),c},$.config=M,$},"object"==typeof exports&&"undefined"!=typeof module?module.exports=t():"function"==typeof define&&define.amd?define(t):(e="undefined"!=typeof globalThis?globalThis:e||self).Fuse=t(); \ No newline at end of file diff --git a/server/scanner/Scanner.js b/server/scanner/Scanner.js index 1e2751ed..206068cc 100644 --- a/server/scanner/Scanner.js +++ b/server/scanner/Scanner.js @@ -370,7 +370,7 @@ class Scanner { let numEpisodesUpdated = 0 for (const episode of episodesToQuickMatch) { - const episodeMatches = findMatchingEpisodesInFeed(feed, episode.title) + const episodeMatches = findMatchingEpisodesInFeed(feed, episode.title, 0.1) if (episodeMatches?.length) { const wasUpdated = await this.updateEpisodeWithMatch(episode, episodeMatches[0].episode, options) if (wasUpdated) numEpisodesUpdated++ diff --git a/server/utils/podcastUtils.js b/server/utils/podcastUtils.js index 3a1df198..12469160 100644 --- a/server/utils/podcastUtils.js +++ b/server/utils/podcastUtils.js @@ -1,8 +1,9 @@ const axios = require('axios') const ssrfFilter = require('ssrf-req-filter') const Logger = require('../Logger') -const { xmlToJSON, levenshteinDistance, timestampToSeconds } = require('./index') +const { xmlToJSON, timestampToSeconds } = require('./index') const htmlSanitizer = require('../utils/htmlSanitizer') +const Fuse = require('../libs/fusejs') /** * @typedef RssPodcastChapter @@ -407,7 +408,7 @@ module.exports.getPodcastFeed = (feedUrl, excludeEpisodeMetadata = false) => { }) } -// Return array of episodes ordered by closest match (Levenshtein distance of 6 or less) +// Return array of episodes ordered by closest match using fuse.js module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => { const feed = await this.getPodcastFeed(feedUrl).catch(() => { return null @@ -420,32 +421,29 @@ module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => { * * @param {RssPodcast} feed * @param {string} searchTitle - * @returns {Array<{ episode: RssPodcastEpisode, levenshtein: number }>} + * @param {number} [threshold=0.4] - 0.0 for perfect match, 1.0 for match anything + * @returns {Array<{ episode: RssPodcastEpisode }>} */ -module.exports.findMatchingEpisodesInFeed = (feed, searchTitle) => { - searchTitle = searchTitle.toLowerCase().trim() +module.exports.findMatchingEpisodesInFeed = (feed, searchTitle, threshold = 0.4) => { if (!feed?.episodes) { return null } + const fuseOptions = { + ignoreDiacritics: true, + threshold, + keys: [ + { name: 'title', weight: 0.7 }, // prefer match in title + { name: 'subtitle', weight: 0.3 } + ] + } + const fuse = new Fuse(feed.episodes, fuseOptions) + const matches = [] - feed.episodes.forEach((ep) => { - if (!ep.title) return - const epTitle = ep.title.toLowerCase().trim() - if (epTitle === searchTitle) { - matches.push({ - episode: ep, - levenshtein: 0 - }) - } else { - const levenshtein = levenshteinDistance(searchTitle, epTitle, true) - if (levenshtein <= 6 && epTitle.length > levenshtein) { - matches.push({ - episode: ep, - levenshtein - }) - } - } + fuse.search(searchTitle).forEach((match) => { + matches.push({ + episode: match.item + }) }) - return matches.sort((a, b) => a.levenshtein - b.levenshtein) + return matches }