Change: scanner uses any .opf file, use description if plain text, use genres #141, Add: language book detail

2025-06-24 19:51:30 +02:00 · 2021-11-09 17:54:28 -06:00 · 2021-11-09 17:54:28 -06:00 · 7141f70aa5
commit 7141f70aa5
parent 3eb0dc9ac3
6 changed files with 76 additions and 37 deletions
--- a/server/Scanner.js
+++ b/server/Scanner.js
@ -194,6 +194,14 @@ class Scanner {
      }
    })

+
+    // Sync other files (all files that are not audio files) - Updates cover path
+    var hasOtherFileUpdates = false
+    var otherFilesUpdated = await existingAudiobook.syncOtherFiles(audiobookData.otherFiles, this.MetadataPath, forceAudioFileScan)
+    if (otherFilesUpdated) {
+      hasOtherFileUpdates = true
+    }
+
    // Rescan audio file metadata
    if (forceAudioFileScan) {
      Logger.info(`[Scanner] Rescanning ${existingAudiobook.audioFiles.length} audio files for "${existingAudiobook.title}"`)
@ -240,7 +248,7 @@ class Scanner {
      return ScanResult.UPDATED
    }

-    var hasUpdates = hasUpdatedIno || hasUpdatedLibraryOrFolder || removedAudioFiles.length || removedAudioTracks.length || newAudioFiles.length || hasUpdatedAudioFiles
+    var hasUpdates = hasOtherFileUpdates || hasUpdatedIno || hasUpdatedLibraryOrFolder || removedAudioFiles.length || removedAudioTracks.length || newAudioFiles.length || hasUpdatedAudioFiles

    // Check that audio tracks are in sequential order with no gaps
    if (existingAudiobook.checkUpdateMissingParts()) {
@ -248,12 +256,6 @@ class Scanner {
      hasUpdates = true
    }

-    // Sync other files (all files that are not audio files) - Updates cover path
-    var otherFilesUpdated = await existingAudiobook.syncOtherFiles(audiobookData.otherFiles, this.MetadataPath, forceAudioFileScan)
-    if (otherFilesUpdated) {
-      hasUpdates = true
-    }
-
    // Syncs path and fullPath
    if (existingAudiobook.syncPaths(audiobookData)) {
      hasUpdates = true
--- a/server/objects/Audiobook.js
+++ b/server/objects/Audiobook.js
@ -502,7 +502,6 @@ class Audiobook {

    var alreadyHasDescTxt = this.otherFiles.find(of => of.filename === 'desc.txt')
    var alreadyHasReaderTxt = this.otherFiles.find(of => of.filename === 'reader.txt')
-    var alreadyHasMetadataOpf = this.otherFiles.find(of => of.filename === 'metadata.opf')

    var newOtherFilePaths = newOtherFiles.map(f => f.path)
    this.otherFiles = this.otherFiles.filter(f => newOtherFilePaths.includes(f.path))
@ -533,21 +532,27 @@ class Audiobook {
        hasUpdates = true
      }
    }
-    var metadataOpf = newOtherFiles.find(file => file.filename === 'metadata.opf' || file.filename === 'metadata.xml')
-    if (metadataOpf && (!alreadyHasMetadataOpf || forceRescan)) {
+
+    var metadataOpf = newOtherFiles.find(file => file.ext === '.opf' || file.filename === 'metadata.xml')
+    if (metadataOpf) {
      var xmlText = await readTextFile(metadataOpf.fullPath)
      if (xmlText) {
        var opfMetadata = await parseOpfMetadataXML(xmlText)
-        Logger.debug(`[Audiobook] Sync Other File ${metadataOpf.filename} parsed:`, opfMetadata)
+        Logger.debug(`[Audiobook] Sync Other File "${metadataOpf.filename}" parsed:`, opfMetadata)
        if (opfMetadata) {
          const bookUpdatePayload = {}
          for (const key in opfMetadata) {
-            if (opfMetadata[key] && !this.book[key]) {
+            // Add genres only if genres are empty
+            if (key === 'genres') {
+              if (opfMetadata.genres.length && !this.book._genres.length) {
+                bookUpdatePayload[key] = opfMetadata.genres
+              }
+            } else if (opfMetadata[key] && !this.book[key]) {
              bookUpdatePayload[key] = opfMetadata[key]
            }
          }
          if (Object.keys(bookUpdatePayload).length) {
-            Logger.debug(`[Audiobook] Using data found in metadata opf/xml`, bookUpdatePayload)
+            Logger.debug(`[Audiobook] Using data found in OPF "${metadataOpf.filename}"`, bookUpdatePayload)
            this.update({ book: bookUpdatePayload })
            hasUpdates = true
          }
@ -778,15 +783,20 @@ class Audiobook {
      bookUpdatePayload.narrator = readerText
    }

-    var metadataOpf = this.otherFiles.find(file => file.filename === 'metadata.opf' || file.filename === 'metadata.xml')
+    var metadataOpf = this.otherFiles.find(file => file.isOPFFile || file.filename === 'metadata.xml')
    if (metadataOpf) {
      var xmlText = await readTextFile(metadataOpf.fullPath)
      if (xmlText) {
        var opfMetadata = await parseOpfMetadataXML(xmlText)
-        Logger.debug(`[Audiobook] "${this.title}" found ${metadataOpf.filename} parsed:`, opfMetadata)
+        Logger.debug(`[Audiobook] "${this.title}" found "${metadataOpf.filename}" parsed:`, opfMetadata)
        if (opfMetadata) {
          for (const key in opfMetadata) {
-            if (opfMetadata[key] && !this.book[key] && !bookUpdatePayload[key]) {
+            // Add genres only if genres are empty
+            if (key === 'genres') {
+              if (opfMetadata.genres.length && !this.book._genres.length) {
+                bookUpdatePayload[key] = opfMetadata.genres
+              }
+            } else if (opfMetadata[key] && !this.book[key] && !bookUpdatePayload[key]) {
              bookUpdatePayload[key] = opfMetadata[key]
            }
          }
--- a/server/objects/AudiobookFile.js
+++ b/server/objects/AudiobookFile.js
@ -13,6 +13,10 @@ class AudiobookFile {
    }
  }

+  get isOPFFile() {
+    return this.ext ? this.ext.toLowerCase() === '.opf' : false
+  }
+
  toJSON() {
    return {
      ino: this.ino || null,
--- a/server/objects/Book.js
+++ b/server/objects/Book.js
@ -16,6 +16,7 @@ class Book {
    this.publisher = null
    this.description = null
    this.isbn = null
+    this.langauge = null
    this.cover = null
    this.coverFullPath = null
    this.genres = []
@ -38,6 +39,7 @@ class Book {
  get _author() { return this.authorFL || '' }
  get _series() { return this.series || '' }
  get _authorsList() { return this._author.split(', ') }
+  get _genres() { return this.genres || [] }

  get shouldSearchForCover() {
    if (this.authorFL !== this.lastCoverSearchAuthor || this.title !== this.lastCoverSearchTitle || !this.lastCoverSearch) return true
@ -58,6 +60,7 @@ class Book {
    this.publisher = book.publisher
    this.description = book.description
    this.isbn = book.isbn || null
+    this.language = book.language || null
    this.cover = book.cover
    this.coverFullPath = book.coverFullPath || null
    this.genres = book.genres
@ -81,6 +84,7 @@ class Book {
      publisher: this.publisher,
      description: this.description,
      isbn: this.isbn,
+      language: this.language,
      cover: this.cover,
      coverFullPath: this.coverFullPath,
      genres: this.genres,
@ -120,6 +124,7 @@ class Book {
    this.publishYear = data.publishYear || null
    this.description = data.description || null
    this.isbn = data.isbn || null
+    this.language = data.language || null
    this.cover = data.cover || null
    this.coverFullPath = data.coverFullPath || null
    this.genres = data.genres || []
--- a/server/utils/parseOpfMetadata.js
+++ b/server/utils/parseOpfMetadata.js
@ -20,20 +20,23 @@ function fetchCreator(creators, role) {
  return creator ? creator.value : null
 }

+function fetchTagString(metadata, tag) {
+  if (!metadata[tag] || !metadata[tag].length) return null
+  var tag = metadata[tag][0]
+  if (typeof tag !== 'string') return null
+  return tag
+}
+
 function fetchDate(metadata) {
-  if (!metadata['dc:date']) return null
-  var dates = metadata['dc:date']
-  if (!dates.length || typeof dates[0] !== 'string') return null
-  var dateSplit = dates[0].split('-')
+  var date = fetchTagString(metadata, 'dc:date')
+  if (!date) return null
+  var dateSplit = date.split('-')
  if (!dateSplit.length || dateSplit[0].length !== 4 || isNaN(dateSplit[0])) return null
  return dateSplit[0]
 }

 function fetchPublisher(metadata) {
-  if (!metadata['dc:publisher']) return null
-  var publishers = metadata['dc:publisher']
-  if (!publishers.length || typeof publishers[0] !== 'string') return null
-  return publishers[0]
+  return fetchTagString(metadata, 'dc:publisher')
 }

 function fetchISBN(metadata) {
@ -44,22 +47,33 @@ function fetchISBN(metadata) {
 }

 function fetchTitle(metadata) {
-  if (!metadata['dc:title']) return null
-  var titles = metadata['dc:title']
-  if (!titles.length) return null
-  if (typeof titles[0] === 'string') {
-    return titles[0]
-  }
-  if (titles[0]['_']) {
-    return titles[0]['_']
-  }
-  return null
+  return fetchTagString(metadata, 'dc:title')
+}
+
+function fetchDescription(metadata) {
+  var description = fetchTagString(metadata, 'dc:description')
+  if (!description) return null
+  // check if description is HTML or plain text. only plain text allowed
+  // calibre stores < and > as &lt; and &gt;
+  description = description.replace(/&lt;/g, '<').replace(/&gt;/g, '>')
+  if (description.match(/<!DOCTYPE html>|<\/?\s*[a-z-][^>]*\s*>|(\&(?:[\w\d]+|#\d+|#x[a-f\d]+);)/)) return null
+  return description
+}
+
+function fetchGenres(metadata) {
+  if (!metadata['dc:subject'] || !metadata['dc:subject'].length) return []
+  return metadata['dc:subject'].map(g => typeof g === 'string' ? g : null).filter(g => !!g)
+}
+
+function fetchLanguage(metadata) {
+  return fetchTagString(metadata, 'dc:language')
 }

 module.exports.parseOpfMetadataXML = async (xml) => {
  var json = await xmlToJSON(xml)
  if (!json || !json.package || !json.package.metadata) return null
  var metadata = json.package.metadata
+
  if (Array.isArray(metadata)) {
    if (!metadata.length) return null
    metadata = metadata[0]
@ -72,7 +86,10 @@ module.exports.parseOpfMetadataXML = async (xml) => {
    narrator: fetchCreator(creators, 'nrt'),
    publishYear: fetchDate(metadata),
    publisher: fetchPublisher(metadata),
-    isbn: fetchISBN(metadata)
+    isbn: fetchISBN(metadata),
+    description: fetchDescription(metadata),
+    genres: fetchGenres(metadata),
+    language: fetchLanguage(metadata)
  }
  return data
 }
--- a/server/utils/scandir.js
+++ b/server/utils/scandir.js
@ -130,10 +130,11 @@ function getFileType(ext) {
  var ext_cleaned = ext.toLowerCase()
  if (ext_cleaned.startsWith('.')) ext_cleaned = ext_cleaned.slice(1)
  if (globals.SupportedAudioTypes.includes(ext_cleaned)) return 'audio'
-  if (ext_cleaned === 'nfo') return 'info'
-  if (ext_cleaned === 'txt') return 'text'
  if (globals.SupportedImageTypes.includes(ext_cleaned)) return 'image'
  if (globals.SupportedEbookTypes.includes(ext_cleaned)) return 'ebook'
+  if (ext_cleaned === 'nfo') return 'info'
+  if (ext_cleaned === 'txt') return 'text'
+  if (ext_cleaned === 'opf') return 'opf'
  return 'unknown'
 }