audiobookshelf/server/utils/podcastUtils.js

const axios = require('axios')
const ssrfFilter = require('ssrf-req-filter')
const Logger = require('../Logger')
const { xmlToJSON, levenshteinDistance } = require('./index')
const htmlSanitizer = require('../utils/htmlSanitizer')

function extractFirstArrayItem(json, key) {
  if (!json[key]?.length) return null
  return json[key][0]
}

function extractImage(channel) {
  if (!channel.image || !channel.image.url || !channel.image.url.length) {
    if (!channel['itunes:image'] || !channel['itunes:image'].length || !channel['itunes:image'][0]['$']) {
      return null
    }
    var itunesImage = channel['itunes:image'][0]['$']
    return itunesImage.href || null
  }
  return channel.image.url[0] || null
}

function extractCategories(channel) {
  if (!channel['itunes:category'] || !channel['itunes:category'].length) return []
  var categories = channel['itunes:category']
  var cleanedCats = []
  categories.forEach((cat) => {
    if (!cat['$'] || !cat['$'].text) return
    var cattext = cat['$'].text
    if (cat['itunes:category']) {
      var subcats = extractCategories(cat)
      if (subcats.length) {
        cleanedCats = cleanedCats.concat(subcats.map((subcat) => `${cattext}:${subcat}`))
      } else {
        cleanedCats.push(cattext)
      }
    } else {
      cleanedCats.push(cattext)
    }
  })
  return cleanedCats
}

function extractPodcastMetadata(channel) {
  const metadata = {
    image: extractImage(channel),
    categories: extractCategories(channel),
    feedUrl: null,
    description: null,
    descriptionPlain: null,
    type: null
  }

  if (channel['itunes:new-feed-url']) {
    metadata.feedUrl = extractFirstArrayItem(channel, 'itunes:new-feed-url')
  } else if (channel['atom:link'] && channel['atom:link'].length && channel['atom:link'][0]['$']) {
    metadata.feedUrl = channel['atom:link'][0]['$'].href || null
  }

  if (channel['description']) {
    const rawDescription = extractFirstArrayItem(channel, 'description') || ''
    metadata.description = htmlSanitizer.sanitize(rawDescription)
    metadata.descriptionPlain = htmlSanitizer.stripAllTags(rawDescription)
  }

  const arrayFields = ['title', 'language', 'itunes:explicit', 'itunes:author', 'pubDate', 'link', 'itunes:type']
  arrayFields.forEach((key) => {
    const cleanKey = key.split(':').pop()
    let value = extractFirstArrayItem(channel, key)
    if (value?.['_']) value = value['_']
    metadata[cleanKey] = value
  })
  return metadata
}

function extractEpisodeData(item) {
  // Episode must have url
  if (!item.enclosure?.[0]?.['$']?.url) {
    Logger.error(`[podcastUtils] Invalid podcast episode data`)
    return null
  }

  const episode = {
    enclosure: {
      ...item.enclosure[0]['$']
    }
  }

  episode.enclosure.url = episode.enclosure.url.trim()

  // Full description with html
  if (item['content:encoded']) {
    const rawDescription = (extractFirstArrayItem(item, 'content:encoded') || '').trim()
    episode.description = htmlSanitizer.sanitize(rawDescription)
  }

  // Extract chapters
  if (item['podcast:chapters']?.[0]?.['$']?.url) {
    episode.chaptersUrl = item['podcast:chapters'][0]['$'].url
    episode.chaptersType = item['podcast:chapters'][0]['$'].type || 'application/json'
  }

  // Supposed to be the plaintext description but not always followed
  if (item['description']) {
    const rawDescription = extractFirstArrayItem(item, 'description') || ''
    if (!episode.description) episode.description = htmlSanitizer.sanitize(rawDescription)
    episode.descriptionPlain = htmlSanitizer.stripAllTags(rawDescription)
  }

  if (item['pubDate']) {
    const pubDate = extractFirstArrayItem(item, 'pubDate')
    if (typeof pubDate === 'string') {
      episode.pubDate = pubDate
    } else if (typeof pubDate?._ === 'string') {
      episode.pubDate = pubDate._
    } else {
      Logger.error(`[podcastUtils] Invalid pubDate ${item['pubDate']} for ${episode.enclosure.url}`)
    }
  }

  if (item['guid']) {
    const guidItem = extractFirstArrayItem(item, 'guid')
    if (typeof guidItem === 'string') {
      episode.guid = guidItem
    } else if (typeof guidItem?._ === 'string') {
      episode.guid = guidItem._
    } else {
      Logger.error(`[podcastUtils] Invalid guid ${item['guid']} for ${episode.enclosure.url}`)
    }
  }

  const arrayFields = ['title', 'itunes:episodeType', 'itunes:season', 'itunes:episode', 'itunes:author', 'itunes:duration', 'itunes:explicit', 'itunes:subtitle']
  arrayFields.forEach((key) => {
    const cleanKey = key.split(':').pop()
    let value = extractFirstArrayItem(item, key)
    if (value?.['_']) value = value['_']
    episode[cleanKey] = value
  })
  return episode
}

function cleanEpisodeData(data) {
  const pubJsDate = data.pubDate ? new Date(data.pubDate) : null
  const publishedAt = pubJsDate && !isNaN(pubJsDate) ? pubJsDate.valueOf() : null
  return {
    title: data.title,
    subtitle: data.subtitle || '',
    description: data.description || '',
    descriptionPlain: data.descriptionPlain || '',
    pubDate: data.pubDate || '',
    episodeType: data.episodeType || '',
    season: data.season || '',
    episode: data.episode || '',
    author: data.author || '',
    duration: data.duration || '',
    explicit: data.explicit || '',
    publishedAt,
    enclosure: data.enclosure,
    guid: data.guid || null,
    chaptersUrl: data.chaptersUrl || null,
    chaptersType: data.chaptersType || null
  }
}

function extractPodcastEpisodes(items) {
  const episodes = []
  items.forEach((item) => {
    const extracted = extractEpisodeData(item)
    if (extracted) {
      episodes.push(cleanEpisodeData(extracted))
    }
  })
  return episodes
}

function cleanPodcastJson(rssJson, excludeEpisodeMetadata) {
  if (!rssJson.channel?.length) {
    Logger.error(`[podcastUtil] Invalid podcast no channel object`)
    return null
  }
  const channel = rssJson.channel[0]
  if (!channel.item?.length) {
    Logger.error(`[podcastUtil] Invalid podcast no episodes`)
    return null
  }
  const podcast = {
    metadata: extractPodcastMetadata(channel)
  }
  if (!excludeEpisodeMetadata) {
    podcast.episodes = extractPodcastEpisodes(channel.item)
  } else {
    podcast.numEpisodes = channel.item.length
  }
  return podcast
}

module.exports.parsePodcastRssFeedXml = async (xml, excludeEpisodeMetadata = false, includeRaw = false) => {
  if (!xml) return null
  const json = await xmlToJSON(xml)
  if (!json?.rss) {
    Logger.error('[podcastUtils] Invalid XML or RSS feed')
    return null
  }

  const podcast = cleanPodcastJson(json.rss, excludeEpisodeMetadata)
  if (!podcast) return null

  if (includeRaw) {
    return {
      podcast,
      rawJson: json
    }
  } else {
    return {
      podcast
    }
  }
}

/**
 * Get podcast RSS feed as JSON
 * Uses SSRF filter to prevent internal URLs
 *
 * @param {string} feedUrl
 * @param {boolean} [excludeEpisodeMetadata=false]
 * @returns {Promise}
 */
module.exports.getPodcastFeed = (feedUrl, excludeEpisodeMetadata = false) => {
  Logger.debug(`[podcastUtils] getPodcastFeed for "${feedUrl}"`)

  return axios({
    url: feedUrl,
    method: 'GET',
    timeout: 12000,
    responseType: 'arraybuffer',
    headers: {
      Accept: 'application/rss+xml, application/xhtml+xml, application/xml, */*;q=0.8',
      'User-Agent': 'audiobookshelf (+https://audiobookshelf.org; like iTMS)'
    },
    httpAgent: global.DisableSsrfRequestFilter ? null : ssrfFilter(feedUrl),
    httpsAgent: global.DisableSsrfRequestFilter ? null : ssrfFilter(feedUrl)
  })
    .then(async (data) => {
      // Adding support for ios-8859-1 encoded RSS feeds.
      //  See: https://github.com/advplyr/audiobookshelf/issues/1489
      const contentType = data.headers?.['content-type'] || '' // e.g. text/xml; charset=iso-8859-1
      if (contentType.toLowerCase().includes('iso-8859-1')) {
        data.data = data.data.toString('latin1')
      } else {
        data.data = data.data.toString()
      }

      if (!data?.data) {
        Logger.error(`[podcastUtils] getPodcastFeed: Invalid podcast feed request response (${feedUrl})`)
        return null
      }
      Logger.debug(`[podcastUtils] getPodcastFeed for "${feedUrl}" success - parsing xml`)
      const payload = await this.parsePodcastRssFeedXml(data.data, excludeEpisodeMetadata)
      if (!payload) {
        return null
      }

      // RSS feed may be a private RSS feed
      payload.podcast.metadata.feedUrl = feedUrl

      return payload.podcast
    })
    .catch((error) => {
      Logger.error('[podcastUtils] getPodcastFeed Error', error)
      return null
    })
}

// Return array of episodes ordered by closest match (Levenshtein distance of 6 or less)
module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => {
  const feed = await this.getPodcastFeed(feedUrl).catch(() => {
    return null
  })

  return this.findMatchingEpisodesInFeed(feed, searchTitle)
}

module.exports.findMatchingEpisodesInFeed = (feed, searchTitle) => {
  searchTitle = searchTitle.toLowerCase().trim()
  if (!feed?.episodes) {
    return null
  }

  const matches = []
  feed.episodes.forEach((ep) => {
    if (!ep.title) return
    const epTitle = ep.title.toLowerCase().trim()
    if (epTitle === searchTitle) {
      matches.push({
        episode: ep,
        levenshtein: 0
      })
    } else {
      const levenshtein = levenshteinDistance(searchTitle, epTitle, true)
      if (levenshtein <= 6 && epTitle.length > levenshtein) {
        matches.push({
          episode: ep,
          levenshtein
        })
      }
    }
  })
  return matches.sort((a, b) => a.levenshtein - b.levenshtein)
}
Add:Podcast quickmatch attempts quick matching unmatched episodes #983 2022-09-16 01:35:56 +02:00			`const axios = require('axios')`
Update:API endpoint /podcasts/feed validates rssFeed URL and uses SSRF req filter 2023-12-17 19:00:11 +01:00			`const ssrfFilter = require('ssrf-req-filter')`
			`const Logger = require('../Logger')`
Add:Podcast quickmatch attempts quick matching unmatched episodes #983 2022-09-16 01:35:56 +02:00			`const { xmlToJSON, levenshteinDistance } = require('./index')`
Add:HTML sanitizer lib to support html in podcasts and replace strip html lib 2022-05-28 02:41:40 +02:00			`const htmlSanitizer = require('../utils/htmlSanitizer')`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00
			`function extractFirstArrayItem(json, key) {`
Fix:Podcast episodes store RSS feed guid so they can be matched if the RSS feed changes the episode URL #2207 2023-10-17 00:47:44 +02:00			`if (!json[key]?.length) return null`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`return json[key][0]`
			`}`

			`function extractImage(channel) {`
			`if (!channel.image \|\| !channel.image.url \|\| !channel.image.url.length) {`
			`if (!channel['itunes:image'] \|\| !channel['itunes:image'].length \|\| !channel['itunes:image'][0]['$']) {`
			`return null`
			`}`
			`var itunesImage = channel['itunes:image'][0]['$']`
			`return itunesImage.href \|\| null`
			`}`
			`return channel.image.url[0] \|\| null`
			`}`

			`function extractCategories(channel) {`
			`if (!channel['itunes:category'] \|\| !channel['itunes:category'].length) return []`
			`var categories = channel['itunes:category']`
			`var cleanedCats = []`
			`categories.forEach((cat) => {`
			`if (!cat['$'] \|\| !cat['$'].text) return`
			`var cattext = cat['$'].text`
			`if (cat['itunes:category']) {`
			`var subcats = extractCategories(cat)`
			`if (subcats.length) {`
			cleanedCats = cleanedCats.concat(subcats.map((subcat) => `${cattext}:${subcat}`))
			`} else {`
			`cleanedCats.push(cattext)`
			`}`
			`} else {`
			`cleanedCats.push(cattext)`
			`}`
			`})`
			`return cleanedCats`
			`}`

			`function extractPodcastMetadata(channel) {`
Fix:Podcast RSS feed parse when element has attributes #1650 2023-04-06 00:40:40 +02:00			`const metadata = {`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`image: extractImage(channel),`
Update podcast search page to support manually entering podcast RSS feed 2022-04-13 23:55:48 +02:00			`categories: extractCategories(channel),`
			`feedUrl: null,`
			`description: null,`
Add support to podcast type 2023-02-22 19:22:52 +01:00			`descriptionPlain: null,`
			`type: null`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`}`
Update podcast search page to support manually entering podcast RSS feed 2022-04-13 23:55:48 +02:00
			`if (channel['itunes:new-feed-url']) {`
			`metadata.feedUrl = extractFirstArrayItem(channel, 'itunes:new-feed-url')`
			`} else if (channel['atom:link'] && channel['atom:link'].length && channel['atom:link'][0]['$']) {`
			`metadata.feedUrl = channel['atom:link'][0]['$'].href \|\| null`
			`}`

			`if (channel['description']) {`
Add:HTML sanitizer lib to support html in podcasts and replace strip html lib 2022-05-28 02:41:40 +02:00			`const rawDescription = extractFirstArrayItem(channel, 'description') \|\| ''`
			`metadata.description = htmlSanitizer.sanitize(rawDescription)`
			`metadata.descriptionPlain = htmlSanitizer.stripAllTags(rawDescription)`
Update podcast search page to support manually entering podcast RSS feed 2022-04-13 23:55:48 +02:00			`}`

Fix:Podcast RSS feed parse when element has attributes #1650 2023-04-06 00:40:40 +02:00			`const arrayFields = ['title', 'language', 'itunes:explicit', 'itunes:author', 'pubDate', 'link', 'itunes:type']`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`arrayFields.forEach((key) => {`
Fix:Podcast RSS feed parse when element has attributes #1650 2023-04-06 00:40:40 +02:00			`const cleanKey = key.split(':').pop()`
			`let value = extractFirstArrayItem(channel, key)`
Fix:RSS feed parser for episode metadata tags that have attributes #1996 2023-10-28 23:11:15 +02:00			`if (value?.['_']) value = value['_']`
Fix:Podcast RSS feed parse when element has attributes #1650 2023-04-06 00:40:40 +02:00			`metadata[cleanKey] = value`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`})`
			`return metadata`
			`}`

			`function extractEpisodeData(item) {`
			`// Episode must have url`
Add:Chapters to podcast episodes #1646 2023-04-09 21:32:51 +02:00			`if (!item.enclosure?.[0]?.['$']?.url) {`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			Logger.error(`[podcastUtils] Invalid podcast episode data`)
			`return null`
			`}`
Fix podcast episode playback session duration, use podcast episode plaintext description 2022-04-18 00:52:06 +02:00
Add:Chapters to podcast episodes #1646 2023-04-09 21:32:51 +02:00			`const episode = {`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`enclosure: {`
			`...item.enclosure[0]['$']`
			`}`
			`}`
Fix podcast episode playback session duration, use podcast episode plaintext description 2022-04-18 00:52:06 +02:00
Fix:Crash when podcasts put empty spaces with episode file path in RSS feed #1650 2023-04-18 00:03:58 +02:00			`episode.enclosure.url = episode.enclosure.url.trim()`

Add:Full podcast episode description parsed and viewable in modal #492 2022-05-28 18:38:51 +02:00			`// Full description with html`
			`if (item['content:encoded']) {`
			`const rawDescription = (extractFirstArrayItem(item, 'content:encoded') \|\| '').trim()`
			`episode.description = htmlSanitizer.sanitize(rawDescription)`
			`}`

Add:Chapters to podcast episodes #1646 2023-04-09 21:32:51 +02:00			`// Extract chapters`
			`if (item['podcast:chapters']?.[0]?.['$']?.url) {`
			`episode.chaptersUrl = item['podcast:chapters'][0]['$'].url`
			`episode.chaptersType = item['podcast:chapters'][0]['$'].type \|\| 'application/json'`
			`}`

Add:Full podcast episode description parsed and viewable in modal #492 2022-05-28 18:38:51 +02:00			`// Supposed to be the plaintext description but not always followed`
Fix podcast episode playback session duration, use podcast episode plaintext description 2022-04-18 00:52:06 +02:00			`if (item['description']) {`
Add:HTML sanitizer lib to support html in podcasts and replace strip html lib 2022-05-28 02:41:40 +02:00			`const rawDescription = extractFirstArrayItem(item, 'description') \|\| ''`
Add:Full podcast episode description parsed and viewable in modal #492 2022-05-28 18:38:51 +02:00			`if (!episode.description) episode.description = htmlSanitizer.sanitize(rawDescription)`
Add:HTML sanitizer lib to support html in podcasts and replace strip html lib 2022-05-28 02:41:40 +02:00			`episode.descriptionPlain = htmlSanitizer.stripAllTags(rawDescription)`
Fix podcast episode playback session duration, use podcast episode plaintext description 2022-04-18 00:52:06 +02:00			`}`

Fix:Podcast parsing pubDate from RSS feed #1072 2022-10-16 23:24:05 +02:00			`if (item['pubDate']) {`
Fix:Podcast pubDate parsing #1116 2022-11-06 22:43:17 +01:00			`const pubDate = extractFirstArrayItem(item, 'pubDate')`
			`if (typeof pubDate === 'string') {`
			`episode.pubDate = pubDate`
Fix:Podcast episodes store RSS feed guid so they can be matched if the RSS feed changes the episode URL #2207 2023-10-17 00:47:44 +02:00			`} else if (typeof pubDate?._ === 'string') {`
Fix:Podcast pubDate parsing #1116 2022-11-06 22:43:17 +01:00			`episode.pubDate = pubDate._`
Fix:Podcast parsing pubDate from RSS feed #1072 2022-10-16 23:24:05 +02:00			`} else {`
			Logger.error(`[podcastUtils] Invalid pubDate ${item['pubDate']} for ${episode.enclosure.url}`)
			`}`
			`}`

Fix:Podcast episodes store RSS feed guid so they can be matched if the RSS feed changes the episode URL #2207 2023-10-17 00:47:44 +02:00			`if (item['guid']) {`
			`const guidItem = extractFirstArrayItem(item, 'guid')`
			`if (typeof guidItem === 'string') {`
			`episode.guid = guidItem`
			`} else if (typeof guidItem?._ === 'string') {`
			`episode.guid = guidItem._`
			`} else {`
			Logger.error(`[podcastUtils] Invalid guid ${item['guid']} for ${episode.enclosure.url}`)
			`}`
			`}`

Fix:Podcast RSS feed parse when element has attributes #1650 2023-04-06 00:40:40 +02:00			`const arrayFields = ['title', 'itunes:episodeType', 'itunes:season', 'itunes:episode', 'itunes:author', 'itunes:duration', 'itunes:explicit', 'itunes:subtitle']`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`arrayFields.forEach((key) => {`
Fix:Podcast RSS feed parse when element has attributes #1650 2023-04-06 00:40:40 +02:00			`const cleanKey = key.split(':').pop()`
Fix:RSS feed parser for episode metadata tags that have attributes #1996 2023-10-28 23:11:15 +02:00			`let value = extractFirstArrayItem(item, key)`
			`if (value?.['_']) value = value['_']`
			`episode[cleanKey] = value`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`})`
			`return episode`
			`}`

Fix library check path and set provider, update podcast model and UI 2022-03-19 12:41:54 +01:00			`function cleanEpisodeData(data) {`
Fix:Podcast parsing pubDate from RSS feed #1072 2022-10-16 23:24:05 +02:00			`const pubJsDate = data.pubDate ? new Date(data.pubDate) : null`
			`const publishedAt = pubJsDate && !isNaN(pubJsDate) ? pubJsDate.valueOf() : null`
Fix library check path and set provider, update podcast model and UI 2022-03-19 12:41:54 +01:00			`return {`
			`title: data.title,`
			`subtitle: data.subtitle \|\| '',`
			`description: data.description \|\| '',`
Fix podcast episode playback session duration, use podcast episode plaintext description 2022-04-18 00:52:06 +02:00			`descriptionPlain: data.descriptionPlain \|\| '',`
Fix library check path and set provider, update podcast model and UI 2022-03-19 12:41:54 +01:00			`pubDate: data.pubDate \|\| '',`
			`episodeType: data.episodeType \|\| '',`
Add support for seasonal podcasts Podcasts such as [Command Line Heroes](https://podcasts.apple.com/us/podcast/command-line-heroes/id1319947289) have multiple seasons in which each has it's own , . This seaks to add support for such podcast series. 2022-05-04 16:14:09 +02:00			`season: data.season \|\| '',`
Fix library check path and set provider, update podcast model and UI 2022-03-19 12:41:54 +01:00			`episode: data.episode \|\| '',`
			`author: data.author \|\| '',`
			`duration: data.duration \|\| '',`
			`explicit: data.explicit \|\| '',`
Fix:Podcast parsing pubDate from RSS feed #1072 2022-10-16 23:24:05 +02:00			`publishedAt,`
Add:Chapters to podcast episodes #1646 2023-04-09 21:32:51 +02:00			`enclosure: data.enclosure,`
Fix:Podcast episodes store RSS feed guid so they can be matched if the RSS feed changes the episode URL #2207 2023-10-17 00:47:44 +02:00			`guid: data.guid \|\| null,`
Add:Chapters to podcast episodes #1646 2023-04-09 21:32:51 +02:00			`chaptersUrl: data.chaptersUrl \|\| null,`
			`chaptersType: data.chaptersType \|\| null`
Fix library check path and set provider, update podcast model and UI 2022-03-19 12:41:54 +01:00			`}`
			`}`

Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`function extractPodcastEpisodes(items) {`
Add:Chapters to podcast episodes #1646 2023-04-09 21:32:51 +02:00			`const episodes = []`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`items.forEach((item) => {`
Add:Chapters to podcast episodes #1646 2023-04-09 21:32:51 +02:00			`const extracted = extractEpisodeData(item)`
Fix library check path and set provider, update podcast model and UI 2022-03-19 12:41:54 +01:00			`if (extracted) {`
			`episodes.push(cleanEpisodeData(extracted))`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`}`
			`})`
			`return episodes`
			`}`

Add:OPML Upload for bulk adding podcasts #588 2022-05-29 18:46:45 +02:00			`function cleanPodcastJson(rssJson, excludeEpisodeMetadata) {`
Fix:Podcast episodes store RSS feed guid so they can be matched if the RSS feed changes the episode URL #2207 2023-10-17 00:47:44 +02:00			`if (!rssJson.channel?.length) {`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			Logger.error(`[podcastUtil] Invalid podcast no channel object`)
			`return null`
			`}`
Fix:Podcast episodes store RSS feed guid so they can be matched if the RSS feed changes the episode URL #2207 2023-10-17 00:47:44 +02:00			`const channel = rssJson.channel[0]`
			`if (!channel.item?.length) {`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			Logger.error(`[podcastUtil] Invalid podcast no episodes`)
			`return null`
			`}`
Fix:Podcast episodes store RSS feed guid so they can be matched if the RSS feed changes the episode URL #2207 2023-10-17 00:47:44 +02:00			`const podcast = {`
Add:OPML Upload for bulk adding podcasts #588 2022-05-29 18:46:45 +02:00			`metadata: extractPodcastMetadata(channel)`
			`}`
			`if (!excludeEpisodeMetadata) {`
			`podcast.episodes = extractPodcastEpisodes(channel.item)`
			`} else {`
			`podcast.numEpisodes = channel.item.length`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`}`
			`return podcast`
			`}`

Add:OPML Upload for bulk adding podcasts #588 2022-05-29 18:46:45 +02:00			`module.exports.parsePodcastRssFeedXml = async (xml, excludeEpisodeMetadata = false, includeRaw = false) => {`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`if (!xml) return null`
Fix:Podcast episodes store RSS feed guid so they can be matched if the RSS feed changes the episode URL #2207 2023-10-17 00:47:44 +02:00			`const json = await xmlToJSON(xml)`
			`if (!json?.rss) {`
Add:Podcast RSS feed parser 2022-03-06 01:54:24 +01:00			`Logger.error('[podcastUtils] Invalid XML or RSS feed')`
			`return null`
			`}`
Update podcast search page to support manually entering podcast RSS feed 2022-04-13 23:55:48 +02:00
Add:OPML Upload for bulk adding podcasts #588 2022-05-29 18:46:45 +02:00			`const podcast = cleanPodcastJson(json.rss, excludeEpisodeMetadata)`
Update podcast search page to support manually entering podcast RSS feed 2022-04-13 23:55:48 +02:00			`if (!podcast) return null`

			`if (includeRaw) {`
			`return {`
			`podcast,`
			`rawJson: json`
			`}`
			`} else {`
			`return {`
			`podcast`
			`}`
			`}`
Add:Podcast quickmatch attempts quick matching unmatched episodes #983 2022-09-16 01:35:56 +02:00			`}`

Update:API endpoint /podcasts/feed validates rssFeed URL and uses SSRF req filter 2023-12-17 19:00:11 +01:00			`/**`
			`* Get podcast RSS feed as JSON`
			`* Uses SSRF filter to prevent internal URLs`
Update:Support for ENV variables to disable SSRF request filter (DISABLE_SSRF_REQUEST_FILTER=1) #2549 2024-06-04 00:21:18 +02:00			`*`
			`* @param {string} feedUrl`
Update:API endpoint /podcasts/feed validates rssFeed URL and uses SSRF req filter 2023-12-17 19:00:11 +01:00			`* @param {boolean} [excludeEpisodeMetadata=false]`
			`* @returns {Promise}`
			`*/`
Add:Podcast quickmatch attempts quick matching unmatched episodes #983 2022-09-16 01:35:56 +02:00			`module.exports.getPodcastFeed = (feedUrl, excludeEpisodeMetadata = false) => {`
			Logger.debug(`[podcastUtils] getPodcastFeed for "${feedUrl}"`)
Update:API endpoint /podcasts/feed validates rssFeed URL and uses SSRF req filter 2023-12-17 19:00:11 +01:00
			`return axios({`
			`url: feedUrl,`
			`method: 'GET',`
			`timeout: 12000,`
			`responseType: 'arraybuffer',`
Add user agent string to feed requests 2024-06-23 18:35:37 +02:00			`headers: {`
			`Accept: 'application/rss+xml, application/xhtml+xml, application/xml, /;q=0.8',`
Update User-Agent strings 2024-06-25 00:14:20 +02:00			`'User-Agent': 'audiobookshelf (+https://audiobookshelf.org; like iTMS)'`
Add user agent string to feed requests 2024-06-23 18:35:37 +02:00			`},`
Update:Support for ENV variables to disable SSRF request filter (DISABLE_SSRF_REQUEST_FILTER=1) #2549 2024-06-04 00:21:18 +02:00			`httpAgent: global.DisableSsrfRequestFilter ? null : ssrfFilter(feedUrl),`
			`httpsAgent: global.DisableSsrfRequestFilter ? null : ssrfFilter(feedUrl)`
			`})`
			`.then(async (data) => {`
			`// Adding support for ios-8859-1 encoded RSS feeds.`
			`// See: https://github.com/advplyr/audiobookshelf/issues/1489`
			`const contentType = data.headers?.['content-type'] \|\| '' // e.g. text/xml; charset=iso-8859-1`
			`if (contentType.toLowerCase().includes('iso-8859-1')) {`
			`data.data = data.data.toString('latin1')`
			`} else {`
			`data.data = data.data.toString()`
			`}`
Fix:Handle podcast RSS feeds with iso-8859-1 encoding #1489 2023-02-11 00:07:25 +01:00
Update:Support for ENV variables to disable SSRF request filter (DISABLE_SSRF_REQUEST_FILTER=1) #2549 2024-06-04 00:21:18 +02:00			`if (!data?.data) {`
			Logger.error(`[podcastUtils] getPodcastFeed: Invalid podcast feed request response (${feedUrl})`)
			`return null`
			`}`
			Logger.debug(`[podcastUtils] getPodcastFeed for "${feedUrl}" success - parsing xml`)
			`const payload = await this.parsePodcastRssFeedXml(data.data, excludeEpisodeMetadata)`
			`if (!payload) {`
			`return null`
			`}`
Add:Podcast quickmatch attempts quick matching unmatched episodes #983 2022-09-16 01:35:56 +02:00
Update:Support for ENV variables to disable SSRF request filter (DISABLE_SSRF_REQUEST_FILTER=1) #2549 2024-06-04 00:21:18 +02:00			`// RSS feed may be a private RSS feed`
			`payload.podcast.metadata.feedUrl = feedUrl`
Add:Podcast quickmatch attempts quick matching unmatched episodes #983 2022-09-16 01:35:56 +02:00
Update:Support for ENV variables to disable SSRF request filter (DISABLE_SSRF_REQUEST_FILTER=1) #2549 2024-06-04 00:21:18 +02:00			`return payload.podcast`
			`})`
			`.catch((error) => {`
			`Logger.error('[podcastUtils] getPodcastFeed Error', error)`
			`return null`
			`})`
Add:Podcast quickmatch attempts quick matching unmatched episodes #983 2022-09-16 01:35:56 +02:00			`}`

			`// Return array of episodes ordered by closest match (Levenshtein distance of 6 or less)`
			`module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => {`
			`const feed = await this.getPodcastFeed(feedUrl).catch(() => {`
			`return null`
			`})`

			`return this.findMatchingEpisodesInFeed(feed, searchTitle)`
			`}`

			`module.exports.findMatchingEpisodesInFeed = (feed, searchTitle) => {`
			`searchTitle = searchTitle.toLowerCase().trim()`
Fix:Podcast episodes store RSS feed guid so they can be matched if the RSS feed changes the episode URL #2207 2023-10-17 00:47:44 +02:00			`if (!feed?.episodes) {`
Add:Podcast quickmatch attempts quick matching unmatched episodes #983 2022-09-16 01:35:56 +02:00			`return null`
			`}`

			`const matches = []`
Update:Support for ENV variables to disable SSRF request filter (DISABLE_SSRF_REQUEST_FILTER=1) #2549 2024-06-04 00:21:18 +02:00			`feed.episodes.forEach((ep) => {`
Add:Podcast quickmatch attempts quick matching unmatched episodes #983 2022-09-16 01:35:56 +02:00			`if (!ep.title) return`
			`const epTitle = ep.title.toLowerCase().trim()`
			`if (epTitle === searchTitle) {`
			`matches.push({`
			`episode: ep,`
			`levenshtein: 0`
			`})`
			`} else {`
			`const levenshtein = levenshteinDistance(searchTitle, epTitle, true)`
			`if (levenshtein <= 6 && epTitle.length > levenshtein) {`
			`matches.push({`
			`episode: ep,`
			`levenshtein`
			`})`
			`}`
			`}`
			`})`
			`return matches.sort((a, b) => a.levenshtein - b.levenshtein)`
Add support to podcast type 2023-02-22 19:22:52 +01:00			`}`