mirror of
https://github.com/advplyr/audiobookshelf.git
synced 2025-06-20 09:48:25 +02:00
Merge pull request #4383 from JKubovy/improve-podcast-episode-search
Use fuse.js for podcast episode search
This commit is contained in:
commit
5025c6a3ea
13
server/libs/fusejs/index.js
Normal file
13
server/libs/fusejs/index.js
Normal file
File diff suppressed because one or more lines are too long
@ -370,7 +370,7 @@ class Scanner {
|
||||
|
||||
let numEpisodesUpdated = 0
|
||||
for (const episode of episodesToQuickMatch) {
|
||||
const episodeMatches = findMatchingEpisodesInFeed(feed, episode.title)
|
||||
const episodeMatches = findMatchingEpisodesInFeed(feed, episode.title, 0.1)
|
||||
if (episodeMatches?.length) {
|
||||
const wasUpdated = await this.updateEpisodeWithMatch(episode, episodeMatches[0].episode, options)
|
||||
if (wasUpdated) numEpisodesUpdated++
|
||||
|
@ -1,8 +1,9 @@
|
||||
const axios = require('axios')
|
||||
const ssrfFilter = require('ssrf-req-filter')
|
||||
const Logger = require('../Logger')
|
||||
const { xmlToJSON, levenshteinDistance, timestampToSeconds } = require('./index')
|
||||
const { xmlToJSON, timestampToSeconds } = require('./index')
|
||||
const htmlSanitizer = require('../utils/htmlSanitizer')
|
||||
const Fuse = require('../libs/fusejs')
|
||||
|
||||
/**
|
||||
* @typedef RssPodcastChapter
|
||||
@ -407,7 +408,7 @@ module.exports.getPodcastFeed = (feedUrl, excludeEpisodeMetadata = false) => {
|
||||
})
|
||||
}
|
||||
|
||||
// Return array of episodes ordered by closest match (Levenshtein distance of 6 or less)
|
||||
// Return array of episodes ordered by closest match using fuse.js
|
||||
module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => {
|
||||
const feed = await this.getPodcastFeed(feedUrl).catch(() => {
|
||||
return null
|
||||
@ -420,32 +421,29 @@ module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => {
|
||||
*
|
||||
* @param {RssPodcast} feed
|
||||
* @param {string} searchTitle
|
||||
* @returns {Array<{ episode: RssPodcastEpisode, levenshtein: number }>}
|
||||
* @param {number} [threshold=0.4] - 0.0 for perfect match, 1.0 for match anything
|
||||
* @returns {Array<{ episode: RssPodcastEpisode }>}
|
||||
*/
|
||||
module.exports.findMatchingEpisodesInFeed = (feed, searchTitle) => {
|
||||
searchTitle = searchTitle.toLowerCase().trim()
|
||||
module.exports.findMatchingEpisodesInFeed = (feed, searchTitle, threshold = 0.4) => {
|
||||
if (!feed?.episodes) {
|
||||
return null
|
||||
}
|
||||
|
||||
const fuseOptions = {
|
||||
ignoreDiacritics: true,
|
||||
threshold,
|
||||
keys: [
|
||||
{ name: 'title', weight: 0.7 }, // prefer match in title
|
||||
{ name: 'subtitle', weight: 0.3 }
|
||||
]
|
||||
}
|
||||
const fuse = new Fuse(feed.episodes, fuseOptions)
|
||||
|
||||
const matches = []
|
||||
feed.episodes.forEach((ep) => {
|
||||
if (!ep.title) return
|
||||
const epTitle = ep.title.toLowerCase().trim()
|
||||
if (epTitle === searchTitle) {
|
||||
fuse.search(searchTitle).forEach((match) => {
|
||||
matches.push({
|
||||
episode: ep,
|
||||
levenshtein: 0
|
||||
episode: match.item
|
||||
})
|
||||
} else {
|
||||
const levenshtein = levenshteinDistance(searchTitle, epTitle, true)
|
||||
if (levenshtein <= 6 && epTitle.length > levenshtein) {
|
||||
matches.push({
|
||||
episode: ep,
|
||||
levenshtein
|
||||
})
|
||||
}
|
||||
}
|
||||
})
|
||||
return matches.sort((a, b) => a.levenshtein - b.levenshtein)
|
||||
return matches
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user