From 0d08aecd56b13695c490c4797eb462b8f6dc794e Mon Sep 17 00:00:00 2001
From: mikiher <mikiher@gmail.com>
Date: Wed, 18 Sep 2024 08:28:15 +0300
Subject: [PATCH] Move from libarchive to node-unrar-js for cbr and
 node-stream-zip for cbz

---
 package-lock.json                          |   9 +
 package.json                               |   1 +
 server/utils/comicBookExtractors.js        | 196 +++++++++++++++++++++
 server/utils/parsers/parseComicMetadata.js |  68 ++-----
 4 files changed, 225 insertions(+), 49 deletions(-)
 create mode 100644 server/utils/comicBookExtractors.js

diff --git a/package-lock.json b/package-lock.json
index 90493a06..6f0a3587 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -16,6 +16,7 @@
         "graceful-fs": "^4.2.10",
         "htmlparser2": "^8.0.1",
         "lru-cache": "^10.0.3",
+        "node-unrar-js": "^2.0.2",
         "nodemailer": "^6.9.13",
         "openid-client": "^5.6.1",
         "p-throttle": "^4.1.1",
@@ -3565,6 +3566,14 @@
       "integrity": "sha512-uYr7J37ae/ORWdZeQ1xxMJe3NtdmqMC/JZK+geofDrkLUApKRHPd18/TxtBOJ4A0/+uUIliorNrfYV6s1b02eQ==",
       "dev": true
     },
+    "node_modules/node-unrar-js": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/node-unrar-js/-/node-unrar-js-2.0.2.tgz",
+      "integrity": "sha512-hLNmoJzqaKJnod8yiTVGe9hnlNRHotUi0CreSv/8HtfRi/3JnRC8DvsmKfeGGguRjTEulhZK6zXX5PXoVuDZ2w==",
+      "engines": {
+        "node": ">=10.0.0"
+      }
+    },
     "node_modules/nodemailer": {
       "version": "6.9.13",
       "resolved": "https://registry.npmjs.org/nodemailer/-/nodemailer-6.9.13.tgz",
diff --git a/package.json b/package.json
index 70cf40c2..752b2f8d 100644
--- a/package.json
+++ b/package.json
@@ -43,6 +43,7 @@
     "graceful-fs": "^4.2.10",
     "htmlparser2": "^8.0.1",
     "lru-cache": "^10.0.3",
+    "node-unrar-js": "^2.0.2",
     "nodemailer": "^6.9.13",
     "openid-client": "^5.6.1",
     "p-throttle": "^4.1.1",
diff --git a/server/utils/comicBookExtractors.js b/server/utils/comicBookExtractors.js
new file mode 100644
index 00000000..3443a570
--- /dev/null
+++ b/server/utils/comicBookExtractors.js
@@ -0,0 +1,196 @@
+const Path = require('path')
+const fs = require('../libs/fsExtra')
+const os = require('os')
+const Logger = require('../Logger')
+const { isWritable } = require('./fileUtils')
+
+const StreamZip = require('../libs/nodeStreamZip')
+const Archive = require('../libs/libarchive/archive')
+const unrar = require('node-unrar-js')
+
+class AbstractComicBookExtractor {
+  constructor(comicPath) {
+    this.comicPath = comicPath
+  }
+
+  async getBuffer() {
+    if (!(await fs.pathExists(this.comicPath))) {
+      Logger.error(`[parseComicMetadata] Comic path does not exist "${this.comicPath}"`)
+      return null
+    }
+    try {
+      return fs.readFile(this.comicPath)
+    } catch (error) {
+      Logger.error(`[parseComicMetadata] Failed to read comic at "${this.comicPath}"`, error)
+      return null
+    }
+  }
+
+  async open() {
+    throw new Error('Not implemented')
+  }
+
+  async getFilePaths() {
+    throw new Error('Not implemented')
+  }
+
+  async extractToFile(filePath, outputFilePath) {
+    throw new Error('Not implemented')
+  }
+
+  async extractToBuffer(filePath) {
+    throw new Error('Not implemented')
+  }
+
+  close() {
+    throw new Error('Not implemented')
+  }
+}
+
+class CbrComicBookExtractor extends AbstractComicBookExtractor {
+  constructor(comicPath) {
+    super(comicPath)
+    this.archive = null
+    this.tmpDir = null
+  }
+
+  async open() {
+    this.tmpDir = global.MetadataPath ? Path.join(global.MetadataPath, 'tmp') : os.tmpdir()
+    await fs.ensureDir(this.tmpDir)
+    if (!(await isWritable(this.tmpDir))) throw new Error(`[CbrComicBookExtractor] Temp directory "${this.tmpDir}" is not writable`)
+    this.archive = await unrar.createExtractorFromFile({ filepath: this.comicPath, targetPath: this.tmpDir })
+    Logger.debug(`[CbrComicBookExtractor] Opened comic book "${this.comicPath}". Using temp directory "${this.tmpDir}" for extraction.`)
+  }
+
+  async getFilePaths() {
+    if (!this.archive) return null
+    const list = this.archive.getFileList()
+    const fileHeaders = [...list.fileHeaders]
+    const filePaths = fileHeaders.filter((fh) => !fh.flags.directory).map((fh) => fh.name)
+    Logger.debug(`[CbrComicBookExtractor] Found ${filePaths.length} files in comic book "${this.comicPath}"`)
+    return filePaths
+  }
+
+  async extractToBuffer(file) {
+    if (!this.archive) return null
+    const extracted = this.archive.extract({ files: [file] })
+    const files = [...extracted.files]
+    const filePath = Path.join(this.tmpDir, files[0].fileHeader.name)
+    const fileData = await fs.readFile(filePath)
+    await fs.remove(filePath)
+    Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${fileData.length}`)
+    return fileData
+  }
+
+  async extractToFile(file, outputFilePath) {
+    if (!this.archive) return false
+    const extracted = this.archive.extract({ files: [file] })
+    const files = [...extracted.files]
+    const fileEntry = files[0]
+    const extractedFilePath = Path.join(this.tmpDir, fileEntry.fileHeader.name)
+    await fs.move(extractedFilePath, outputFilePath, { overwrite: true })
+    Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
+    return true
+  }
+
+  close() {
+    Logger.debug(`[CbrComicBookExtractor] Closed comic book "${this.comicPath}"`)
+  }
+}
+
+class CbzComicBookExtractor extends AbstractComicBookExtractor {
+  constructor(comicPath) {
+    super(comicPath)
+    this.archive = null
+  }
+
+  async open() {
+    const buffer = await this.getBuffer()
+    this.archive = await Archive.open(buffer)
+    Logger.debug(`[CbzComicBookExtractor] Opened comic book "${this.comicPath}"`)
+  }
+
+  async getFilePaths() {
+    if (!this.archive) return null
+    const list = await this.archive.getFilesArray()
+    const fileNames = list.map((fo) => fo.file._path)
+    Logger.debug(`[CbzComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`)
+    return fileNames
+  }
+
+  async extractToBuffer(file) {
+    if (!this.archive) return null
+    const extracted = await this.archive.extractSingleFile(file)
+    Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted?.fileData.length}`)
+    return extracted?.fileData
+  }
+
+  async extractToFile(file, outputFilePath) {
+    const data = await this.extractToBuffer(file)
+    if (!data) return false
+    await fs.writeFile(outputFilePath, data)
+    Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
+    return true
+  }
+
+  close() {
+    this.archive?.close()
+    Logger.debug(`[CbzComicBookExtractor] Closed comic book "${this.comicPath}"`)
+  }
+}
+
+class CbzStreamZipComicBookExtractor extends AbstractComicBookExtractor {
+  constructor(comicPath) {
+    super(comicPath)
+    this.archive = null
+  }
+
+  async open() {
+    this.archive = new StreamZip.async({ file: this.comicPath })
+    Logger.debug(`[CbzStreamZipComicBookExtractor] Opened comic book "${this.comicPath}"`)
+  }
+
+  async getFilePaths() {
+    if (!this.archive) return null
+    const entries = await this.archive.entries()
+    const fileNames = Object.keys(entries).filter((entry) => !entries[entry].isDirectory)
+    Logger.debug(`[CbzStreamZipComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`)
+    return fileNames
+  }
+
+  async extractToBuffer(file) {
+    if (!this.archive) return null
+    const extracted = await this.archive?.entryData(file)
+    Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted.length}`)
+    return extracted
+  }
+
+  async extractToFile(file, outputFilePath) {
+    if (!this.archive) return false
+    try {
+      await this.archive.extract(file, outputFilePath)
+      Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
+      return true
+    } catch (error) {
+      Logger.error(`[CbzStreamZipComicBookExtractor] Failed to extract file "${file}" to "${outputFilePath}"`, error)
+      return false
+    }
+  }
+
+  close() {
+    this.archive?.close()
+    Logger.debug(`[CbzStreamZipComicBookExtractor] Closed comic book "${this.comicPath}"`)
+  }
+}
+
+function createComicBookExtractor(comicPath) {
+  const ext = Path.extname(comicPath).toLowerCase()
+  if (ext === '.cbr') {
+    return new CbrComicBookExtractor(comicPath)
+  } else if (ext === '.cbz') {
+    return new CbzStreamZipComicBookExtractor(comicPath)
+  } else {
+    throw new Error(`Unsupported comic book format "${ext}"`)
+  }
+}
+module.exports = { createComicBookExtractor }
diff --git a/server/utils/parsers/parseComicMetadata.js b/server/utils/parsers/parseComicMetadata.js
index d2ba702d..7ed0d1f5 100644
--- a/server/utils/parsers/parseComicMetadata.js
+++ b/server/utils/parsers/parseComicMetadata.js
@@ -5,24 +5,7 @@ const Logger = require('../../Logger')
 const Archive = require('../../libs/libarchive/archive')
 const { xmlToJSON } = require('../index')
 const parseComicInfoMetadata = require('./parseComicInfoMetadata')
-
-/**
- *
- * @param {string} filepath
- * @returns {Promise<Buffer>}
- */
-async function getComicFileBuffer(filepath) {
-  if (!(await fs.pathExists(filepath))) {
-    Logger.error(`[parseComicMetadata] Comic path does not exist "${filepath}"`)
-    return null
-  }
-  try {
-    return fs.readFile(filepath)
-  } catch (error) {
-    Logger.error(`[parseComicMetadata] Failed to read comic at "${filepath}"`, error)
-    return null
-  }
-}
+const { createComicBookExtractor } = require('../comicBookExtractors.js')
 
 /**
  * Extract cover image from comic return true if success
@@ -33,22 +16,11 @@ async function getComicFileBuffer(filepath) {
  * @returns {Promise<boolean>}
  */
 async function extractCoverImage(comicPath, comicImageFilepath, outputCoverPath) {
-  const comicFileBuffer = await getComicFileBuffer(comicPath)
-  if (!comicFileBuffer) return null
-
   let archive = null
   try {
-    archive = await Archive.open(comicFileBuffer)
-    const fileEntry = await archive.extractSingleFile(comicImageFilepath)
-
-    if (!fileEntry?.fileData) {
-      Logger.error(`[parseComicMetadata] Invalid file entry data for comicPath "${comicPath}"/${comicImageFilepath}`)
-      return false
-    }
-
-    await fs.writeFile(outputCoverPath, fileEntry.fileData)
-
-    return true
+    archive = createComicBookExtractor(comicPath)
+    await archive.open()
+    return await archive.extractToFile(comicImageFilepath, outputCoverPath)
   } catch (error) {
     Logger.error(`[parseComicMetadata] Failed to extract image "${comicImageFilepath}" from comicPath "${comicPath}" into "${outputCoverPath}"`, error)
     return false
@@ -67,30 +39,28 @@ module.exports.extractCoverImage = extractCoverImage
  */
 async function parse(ebookFile) {
   const comicPath = ebookFile.metadata.path
-  Logger.debug(`Parsing metadata from comic at "${comicPath}"`)
-
-  const comicFileBuffer = await getComicFileBuffer(comicPath)
-  if (!comicFileBuffer) return null
-
+  Logger.debug(`[parseComicMetadata] Parsing comic metadata at "${comicPath}"`)
   let archive = null
   try {
-    archive = await Archive.open(comicFileBuffer)
+    archive = createComicBookExtractor(comicPath)
+    await archive.open()
 
-    const fileObjects = await archive.getFilesArray()
+    const filePaths = await archive.getFilePaths()
 
-    fileObjects.sort((a, b) => {
-      return a.file.name.localeCompare(b.file.name, undefined, {
+    // Sort the file paths in a natural order to get the first image
+    filePaths.sort((a, b) => {
+      return a.localeCompare(b, undefined, {
         numeric: true,
         sensitivity: 'base'
       })
     })
 
     let metadata = null
-    const comicInfo = fileObjects.find((fo) => fo.file.name === 'ComicInfo.xml')
-    if (comicInfo) {
-      const comicInfoEntry = await comicInfo.file.extract()
-      if (comicInfoEntry?.fileData) {
-        const comicInfoStr = new TextDecoder().decode(comicInfoEntry.fileData)
+    const comicInfoPath = filePaths.find((filePath) => filePath === 'ComicInfo.xml')
+    if (comicInfoPath) {
+      const comicInfoData = await archive.extractToBuffer(comicInfoPath)
+      if (comicInfoData) {
+        const comicInfoStr = new TextDecoder().decode(comicInfoData)
         const comicInfoJson = await xmlToJSON(comicInfoStr)
         if (comicInfoJson) {
           metadata = parseComicInfoMetadata.parse(comicInfoJson)
@@ -104,9 +74,9 @@ async function parse(ebookFile) {
       metadata
     }
 
-    const firstImage = fileObjects.find((fo) => globals.SupportedImageTypes.includes(Path.extname(fo.file.name).toLowerCase().slice(1)))
-    if (firstImage?.file?._path) {
-      payload.ebookCoverPath = firstImage.file._path
+    const firstImagePath = filePaths.find((filePath) => globals.SupportedImageTypes.includes(Path.extname(filePath).toLowerCase().slice(1)))
+    if (firstImagePath) {
+      payload.ebookCoverPath = firstImagePath
     } else {
       Logger.warn(`[parseComicMetadata] Cover image not found in comic at "${comicPath}"`)
     }