mirror of
https://github.com/advplyr/audiobookshelf.git
synced 2025-06-20 17:58:01 +02:00
Use fuse.js for podcast episode search
Replace levenshtein distance with fuse.js fuzzy searching library. Search in episode's title and subtitle
This commit is contained in:
parent
e669a8d378
commit
eda7036f70
9
package-lock.json
generated
9
package-lock.json
generated
@ -13,6 +13,7 @@
|
|||||||
"cookie-parser": "^1.4.6",
|
"cookie-parser": "^1.4.6",
|
||||||
"express": "^4.17.1",
|
"express": "^4.17.1",
|
||||||
"express-session": "^1.17.3",
|
"express-session": "^1.17.3",
|
||||||
|
"fuse.js": "^7.1.0",
|
||||||
"graceful-fs": "^4.2.10",
|
"graceful-fs": "^4.2.10",
|
||||||
"htmlparser2": "^8.0.1",
|
"htmlparser2": "^8.0.1",
|
||||||
"lru-cache": "^10.0.3",
|
"lru-cache": "^10.0.3",
|
||||||
@ -2105,6 +2106,14 @@
|
|||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/fuse.js": {
|
||||||
|
"version": "7.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.1.0.tgz",
|
||||||
|
"integrity": "sha512-trLf4SzuuUxfusZADLINj+dE8clK1frKdmqiJNb1Es75fmI5oY6X2mxLVUciLLjxqw/xr72Dhy+lER6dGd02FQ==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/gensync": {
|
"node_modules/gensync": {
|
||||||
"version": "1.0.0-beta.2",
|
"version": "1.0.0-beta.2",
|
||||||
"resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz",
|
"resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz",
|
||||||
|
@ -40,6 +40,7 @@
|
|||||||
"cookie-parser": "^1.4.6",
|
"cookie-parser": "^1.4.6",
|
||||||
"express": "^4.17.1",
|
"express": "^4.17.1",
|
||||||
"express-session": "^1.17.3",
|
"express-session": "^1.17.3",
|
||||||
|
"fuse.js": "^7.1.0",
|
||||||
"graceful-fs": "^4.2.10",
|
"graceful-fs": "^4.2.10",
|
||||||
"htmlparser2": "^8.0.1",
|
"htmlparser2": "^8.0.1",
|
||||||
"lru-cache": "^10.0.3",
|
"lru-cache": "^10.0.3",
|
||||||
|
@ -3,6 +3,7 @@ const ssrfFilter = require('ssrf-req-filter')
|
|||||||
const Logger = require('../Logger')
|
const Logger = require('../Logger')
|
||||||
const { xmlToJSON, levenshteinDistance, timestampToSeconds } = require('./index')
|
const { xmlToJSON, levenshteinDistance, timestampToSeconds } = require('./index')
|
||||||
const htmlSanitizer = require('../utils/htmlSanitizer')
|
const htmlSanitizer = require('../utils/htmlSanitizer')
|
||||||
|
const Fuse = require('fuse.js')
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @typedef RssPodcastChapter
|
* @typedef RssPodcastChapter
|
||||||
@ -407,7 +408,7 @@ module.exports.getPodcastFeed = (feedUrl, excludeEpisodeMetadata = false) => {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return array of episodes ordered by closest match (Levenshtein distance of 6 or less)
|
// Return array of episodes ordered by closest match using fuse.js
|
||||||
module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => {
|
module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => {
|
||||||
const feed = await this.getPodcastFeed(feedUrl).catch(() => {
|
const feed = await this.getPodcastFeed(feedUrl).catch(() => {
|
||||||
return null
|
return null
|
||||||
@ -420,32 +421,28 @@ module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => {
|
|||||||
*
|
*
|
||||||
* @param {RssPodcast} feed
|
* @param {RssPodcast} feed
|
||||||
* @param {string} searchTitle
|
* @param {string} searchTitle
|
||||||
* @returns {Array<{ episode: RssPodcastEpisode, levenshtein: number }>}
|
* @returns {Array<{ episode: RssPodcastEpisode }>}
|
||||||
*/
|
*/
|
||||||
module.exports.findMatchingEpisodesInFeed = (feed, searchTitle) => {
|
module.exports.findMatchingEpisodesInFeed = (feed, searchTitle) => {
|
||||||
searchTitle = searchTitle.toLowerCase().trim()
|
|
||||||
if (!feed?.episodes) {
|
if (!feed?.episodes) {
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const fuseOptions = {
|
||||||
|
ignoreDiacritics: true,
|
||||||
|
threshold: 0.4, // default 0.6 return too many matches
|
||||||
|
keys: [
|
||||||
|
{name: 'title', weight: 0.7}, // prefer match in title
|
||||||
|
{name: 'subtitle', weight: 0.3}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
const fuse = new Fuse(feed.episodes, fuseOptions)
|
||||||
|
|
||||||
const matches = []
|
const matches = []
|
||||||
feed.episodes.forEach((ep) => {
|
fuse.search(searchTitle).forEach((match) => {
|
||||||
if (!ep.title) return
|
matches.push({
|
||||||
const epTitle = ep.title.toLowerCase().trim()
|
episode: match.item
|
||||||
if (epTitle === searchTitle) {
|
})
|
||||||
matches.push({
|
|
||||||
episode: ep,
|
|
||||||
levenshtein: 0
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
const levenshtein = levenshteinDistance(searchTitle, epTitle, true)
|
|
||||||
if (levenshtein <= 6 && epTitle.length > levenshtein) {
|
|
||||||
matches.push({
|
|
||||||
episode: ep,
|
|
||||||
levenshtein
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
return matches.sort((a, b) => a.levenshtein - b.levenshtein)
|
return matches
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user