2023-09-03 00:49:28 +02:00
const sequelize = require ( 'sequelize' )
2023-08-26 23:33:27 +02:00
const Path = require ( 'path' )
const packageJson = require ( '../../package.json' )
const Logger = require ( '../Logger' )
const SocketAuthority = require ( '../SocketAuthority' )
const Database = require ( '../Database' )
const fs = require ( '../libs/fsExtra' )
const fileUtils = require ( '../utils/fileUtils' )
const scanUtils = require ( '../utils/scandir' )
2023-09-04 18:50:55 +02:00
const { LogLevel , ScanResult } = require ( '../utils/constants' )
2023-09-02 01:01:17 +02:00
const libraryFilters = require ( '../utils/queries/libraryFilters' )
2023-09-04 00:51:58 +02:00
const LibraryItemScanner = require ( './LibraryItemScanner' )
2023-08-28 00:19:57 +02:00
const LibraryScan = require ( './LibraryScan' )
2023-08-26 23:33:27 +02:00
const LibraryItemScanData = require ( './LibraryItemScanData' )
class LibraryScanner {
2023-09-04 00:51:58 +02:00
constructor ( ) {
2023-08-26 23:33:27 +02:00
this . cancelLibraryScan = { }
this . librariesScanning = [ ]
2023-09-04 18:50:55 +02:00
this . scanningFilesChanged = false
/** @type {import('../Watcher').PendingFileUpdate[][]} */
this . pendingFileUpdatesToScan = [ ]
2023-08-26 23:33:27 +02:00
}
/ * *
* @ param { string } libraryId
* @ returns { boolean }
* /
isLibraryScanning ( libraryId ) {
return this . librariesScanning . some ( ls => ls . id === libraryId )
}
2023-09-04 18:50:55 +02:00
/ * *
*
* @ param { string } libraryId
* /
setCancelLibraryScan ( libraryId ) {
const libraryScanning = this . librariesScanning . find ( ls => ls . id === libraryId )
if ( ! libraryScanning ) return
this . cancelLibraryScan [ libraryId ] = true
}
2023-08-26 23:33:27 +02:00
/ * *
*
* @ param { import ( '../objects/Library' ) } library
* @ param { * } options
* /
async scan ( library , options = { } ) {
if ( this . isLibraryScanning ( library . id ) ) {
Logger . error ( ` [Scanner] Already scanning ${ library . id } ` )
return
}
if ( ! library . folders . length ) {
Logger . warn ( ` [Scanner] Library has no folders to scan " ${ library . name } " ` )
return
}
const libraryScan = new LibraryScan ( )
2023-09-04 20:59:37 +02:00
libraryScan . setData ( library )
2023-08-26 23:33:27 +02:00
libraryScan . verbose = true
this . librariesScanning . push ( libraryScan . getScanEmitData )
SocketAuthority . emitter ( 'scan_start' , libraryScan . getScanEmitData )
Logger . info ( ` [Scanner] Starting library scan ${ libraryScan . id } for ${ libraryScan . libraryName } ` )
const canceled = await this . scanLibrary ( libraryScan )
if ( canceled ) {
Logger . info ( ` [Scanner] Library scan canceled for " ${ libraryScan . libraryName } " ` )
delete this . cancelLibraryScan [ libraryScan . libraryId ]
}
libraryScan . setComplete ( )
Logger . info ( ` [Scanner] Library scan ${ libraryScan . id } completed in ${ libraryScan . elapsedTimestamp } | ${ libraryScan . resultStats } ` )
this . librariesScanning = this . librariesScanning . filter ( ls => ls . id !== library . id )
if ( canceled && ! libraryScan . totalResults ) {
const emitData = libraryScan . getScanEmitData
emitData . results = null
SocketAuthority . emitter ( 'scan_complete' , emitData )
return
}
SocketAuthority . emitter ( 'scan_complete' , libraryScan . getScanEmitData )
if ( libraryScan . totalResults ) {
libraryScan . saveLog ( )
}
}
/ * *
*
* @ param { import ( './LibraryScan' ) } libraryScan
2023-09-03 00:49:28 +02:00
* @ returns { boolean } true if scan canceled
2023-08-26 23:33:27 +02:00
* /
async scanLibrary ( libraryScan ) {
2023-09-02 01:01:17 +02:00
// Make sure library filter data is set
// this is used to check for existing authors & series
2023-09-04 00:51:58 +02:00
await libraryFilters . getFilterData ( libraryScan . library . mediaType , libraryScan . libraryId )
2023-09-02 01:01:17 +02:00
2023-08-26 23:33:27 +02:00
/** @type {LibraryItemScanData[]} */
let libraryItemDataFound = [ ]
// Scan each library folder
for ( let i = 0 ; i < libraryScan . folders . length ; i ++ ) {
const folder = libraryScan . folders [ i ]
const itemDataFoundInFolder = await this . scanFolder ( libraryScan . library , folder )
libraryScan . addLog ( LogLevel . INFO , ` ${ itemDataFoundInFolder . length } item data found in folder " ${ folder . fullPath } " ` )
libraryItemDataFound = libraryItemDataFound . concat ( itemDataFoundInFolder )
}
if ( this . cancelLibraryScan [ libraryScan . libraryId ] ) return true
const existingLibraryItems = await Database . libraryItemModel . findAll ( {
where : {
libraryId : libraryScan . libraryId
2023-09-03 00:49:28 +02:00
}
2023-08-26 23:33:27 +02:00
} )
2023-09-03 00:49:28 +02:00
if ( this . cancelLibraryScan [ libraryScan . libraryId ] ) return true
2023-08-26 23:33:27 +02:00
const libraryItemIdsMissing = [ ]
2023-09-03 00:49:28 +02:00
let oldLibraryItemsUpdated = [ ]
2023-08-26 23:33:27 +02:00
for ( const existingLibraryItem of existingLibraryItems ) {
// First try to find matching library item with exact file path
let libraryItemData = libraryItemDataFound . find ( lid => lid . path === existingLibraryItem . path )
if ( ! libraryItemData ) {
// Fallback to finding matching library item with matching inode value
libraryItemData = libraryItemDataFound . find ( lid => lid . ino === existingLibraryItem . ino )
if ( libraryItemData ) {
libraryScan . addLog ( LogLevel . INFO , ` Library item with path " ${ existingLibraryItem . path } " was not found, but library item inode " ${ existingLibraryItem . ino } " was found at path " ${ libraryItemData . path } " ` )
}
}
if ( ! libraryItemData ) {
// Podcast folder can have no episodes and still be valid
if ( libraryScan . libraryMediaType === 'podcast' && await fs . pathExists ( existingLibraryItem . path ) ) {
libraryScan . addLog ( LogLevel . INFO , ` Library item " ${ existingLibraryItem . relPath } " folder exists but has no episodes ` )
} else {
libraryScan . addLog ( LogLevel . WARN , ` Library Item " ${ existingLibraryItem . path } " (inode: ${ existingLibraryItem . ino } ) is missing ` )
2023-08-29 00:50:21 +02:00
libraryScan . resultsMissing ++
2023-08-26 23:33:27 +02:00
if ( ! existingLibraryItem . isMissing ) {
libraryItemIdsMissing . push ( existingLibraryItem . id )
2023-09-03 00:49:28 +02:00
// TODO: Temporary while using old model to socket emit
const oldLibraryItem = await Database . libraryItemModel . getOldById ( existingLibraryItem . id )
oldLibraryItem . isMissing = true
oldLibraryItem . updatedAt = Date . now ( )
oldLibraryItemsUpdated . push ( oldLibraryItem )
2023-08-26 23:33:27 +02:00
}
}
} else {
2023-08-29 00:50:21 +02:00
libraryItemDataFound = libraryItemDataFound . filter ( lidf => lidf !== libraryItemData )
2023-09-03 00:49:28 +02:00
if ( await libraryItemData . checkLibraryItemData ( existingLibraryItem , libraryScan ) ) {
libraryScan . resultsUpdated ++
if ( libraryItemData . hasLibraryFileChanges || libraryItemData . hasPathChange ) {
2023-09-04 00:51:58 +02:00
const libraryItem = await LibraryItemScanner . rescanLibraryItem ( existingLibraryItem , libraryItemData , libraryScan . library . settings , libraryScan )
2023-09-03 00:49:28 +02:00
const oldLibraryItem = Database . libraryItemModel . getOldLibraryItem ( libraryItem )
oldLibraryItemsUpdated . push ( oldLibraryItem )
} else {
// TODO: Temporary while using old model to socket emit
const oldLibraryItem = await Database . libraryItemModel . getOldById ( existingLibraryItem . id )
oldLibraryItemsUpdated . push ( oldLibraryItem )
}
2023-08-26 23:33:27 +02:00
}
}
2023-09-03 00:49:28 +02:00
// Emit item updates in chunks of 10 to client
if ( oldLibraryItemsUpdated . length === 10 ) {
// TODO: Should only emit to clients where library item is accessible
SocketAuthority . emitter ( 'items_updated' , oldLibraryItemsUpdated . map ( li => li . toJSONExpanded ( ) ) )
oldLibraryItemsUpdated = [ ]
}
if ( this . cancelLibraryScan [ libraryScan . libraryId ] ) return true
}
// Emit item updates to client
if ( oldLibraryItemsUpdated . length ) {
// TODO: Should only emit to clients where library item is accessible
SocketAuthority . emitter ( 'items_updated' , oldLibraryItemsUpdated . map ( li => li . toJSONExpanded ( ) ) )
}
2023-09-04 00:51:58 +02:00
// Authors and series that were removed from books should be removed if they are now empty
await LibraryItemScanner . checkAuthorsAndSeriesRemovedFromBooks ( libraryScan . libraryId , libraryScan )
2023-09-03 16:54:23 +02:00
2023-08-26 23:33:27 +02:00
// Update missing library items
if ( libraryItemIdsMissing . length ) {
libraryScan . addLog ( LogLevel . INFO , ` Updating ${ libraryItemIdsMissing . length } library items missing ` )
await Database . libraryItemModel . update ( {
isMissing : true ,
lastScan : Date . now ( ) ,
lastScanVersion : packageJson . version
} , {
where : {
id : libraryItemIdsMissing
}
} )
}
2023-08-29 00:50:21 +02:00
2023-09-03 00:49:28 +02:00
if ( this . cancelLibraryScan [ libraryScan . libraryId ] ) return true
2023-08-29 00:50:21 +02:00
// Add new library items
if ( libraryItemDataFound . length ) {
2023-09-03 00:49:28 +02:00
let newOldLibraryItems = [ ]
2023-09-02 01:01:17 +02:00
for ( const libraryItemData of libraryItemDataFound ) {
2023-09-04 00:51:58 +02:00
const newLibraryItem = await LibraryItemScanner . scanNewLibraryItem ( libraryItemData , libraryScan . library . settings , libraryScan )
2023-09-02 01:01:17 +02:00
if ( newLibraryItem ) {
2023-09-03 00:49:28 +02:00
const oldLibraryItem = Database . libraryItemModel . getOldLibraryItem ( newLibraryItem )
newOldLibraryItems . push ( oldLibraryItem )
2023-09-02 01:01:17 +02:00
libraryScan . resultsAdded ++
}
2023-09-03 00:49:28 +02:00
// Emit new items in chunks of 10 to client
if ( newOldLibraryItems . length === 10 ) {
// TODO: Should only emit to clients where library item is accessible
SocketAuthority . emitter ( 'items_added' , newOldLibraryItems . map ( li => li . toJSONExpanded ( ) ) )
newOldLibraryItems = [ ]
}
if ( this . cancelLibraryScan [ libraryScan . libraryId ] ) return true
}
// Emit new items to client
if ( newOldLibraryItems . length ) {
// TODO: Should only emit to clients where library item is accessible
SocketAuthority . emitter ( 'items_added' , newOldLibraryItems . map ( li => li . toJSONExpanded ( ) ) )
2023-09-02 01:01:17 +02:00
}
2023-08-29 00:50:21 +02:00
}
2023-08-26 23:33:27 +02:00
}
/ * *
* Get scan data for library folder
* @ param { import ( '../objects/Library' ) } library
* @ param { import ( '../objects/Folder' ) } folder
* @ returns { LibraryItemScanData [ ] }
* /
async scanFolder ( library , folder ) {
const folderPath = fileUtils . filePathToPOSIX ( folder . fullPath )
const pathExists = await fs . pathExists ( folderPath )
if ( ! pathExists ) {
Logger . error ( ` [scandir] Invalid folder path does not exist " ${ folderPath } " ` )
return [ ]
}
const fileItems = await fileUtils . recurseFiles ( folderPath )
const libraryItemGrouping = scanUtils . groupFileItemsIntoLibraryItemDirs ( library . mediaType , fileItems , library . settings . audiobooksOnly )
if ( ! Object . keys ( libraryItemGrouping ) . length ) {
Logger . error ( ` Root path has no media folders: ${ folderPath } ` )
return [ ]
}
const items = [ ]
for ( const libraryItemPath in libraryItemGrouping ) {
let isFile = false // item is not in a folder
let libraryItemData = null
let fileObjs = [ ]
if ( libraryItemPath === libraryItemGrouping [ libraryItemPath ] ) {
// Media file in root only get title
libraryItemData = {
mediaMetadata : {
title : Path . basename ( libraryItemPath , Path . extname ( libraryItemPath ) )
} ,
path : Path . posix . join ( folderPath , libraryItemPath ) ,
relPath : libraryItemPath
}
fileObjs = await scanUtils . buildLibraryFile ( folderPath , [ libraryItemPath ] )
isFile = true
} else {
libraryItemData = scanUtils . getDataFromMediaDir ( library . mediaType , folderPath , libraryItemPath )
fileObjs = await scanUtils . buildLibraryFile ( libraryItemData . path , libraryItemGrouping [ libraryItemPath ] )
}
const libraryItemFolderStats = await fileUtils . getFileTimestampsWithIno ( libraryItemData . path )
if ( ! libraryItemFolderStats . ino ) {
Logger . warn ( ` [LibraryScanner] Library item folder " ${ libraryItemData . path } " has no inode value ` )
continue
}
items . push ( new LibraryItemScanData ( {
libraryFolderId : folder . id ,
libraryId : folder . libraryId ,
2023-09-02 01:01:17 +02:00
mediaType : library . mediaType ,
2023-08-26 23:33:27 +02:00
ino : libraryItemFolderStats . ino ,
mtimeMs : libraryItemFolderStats . mtimeMs || 0 ,
ctimeMs : libraryItemFolderStats . ctimeMs || 0 ,
birthtimeMs : libraryItemFolderStats . birthtimeMs || 0 ,
path : libraryItemData . path ,
relPath : libraryItemData . relPath ,
isFile ,
mediaMetadata : libraryItemData . mediaMetadata || null ,
libraryFiles : fileObjs
} ) )
}
return items
}
2023-09-04 18:50:55 +02:00
/ * *
* Scan files changed from Watcher
* @ param { import ( '../Watcher' ) . PendingFileUpdate [ ] } fileUpdates
* /
async scanFilesChanged ( fileUpdates ) {
if ( ! fileUpdates ? . length ) return
// If already scanning files from watcher then add these updates to queue
if ( this . scanningFilesChanged ) {
this . pendingFileUpdatesToScan . push ( fileUpdates )
Logger . debug ( ` [LibraryScanner] Already scanning files from watcher - file updates pushed to queue (size ${ this . pendingFileUpdatesToScan . length } ) ` )
return
}
this . scanningFilesChanged = true
// files grouped by folder
const folderGroups = this . getFileUpdatesGrouped ( fileUpdates )
for ( const folderId in folderGroups ) {
const libraryId = folderGroups [ folderId ] . libraryId
// const library = await Database.libraryModel.getOldById(libraryId)
const library = await Database . libraryModel . findByPk ( libraryId , {
include : {
model : Database . libraryFolderModel ,
where : {
id : folderId
}
}
} )
if ( ! library ) {
Logger . error ( ` [LibraryScanner] Library " ${ libraryId } " not found in files changed ${ libraryId } ` )
continue
}
const folder = library . libraryFolders [ 0 ]
const relFilePaths = folderGroups [ folderId ] . fileUpdates . map ( fileUpdate => fileUpdate . relPath )
const fileUpdateGroup = scanUtils . groupFilesIntoLibraryItemPaths ( library . mediaType , relFilePaths )
if ( ! Object . keys ( fileUpdateGroup ) . length ) {
Logger . info ( ` [LibraryScanner] No important changes to scan for in folder " ${ folderId } " ` )
continue
}
const folderScanResults = await this . scanFolderUpdates ( library , folder , fileUpdateGroup )
Logger . debug ( ` [LibraryScanner] Folder scan results ` , folderScanResults )
// If something was updated then reset numIssues filter data for library
if ( Object . values ( folderScanResults ) . some ( scanResult => scanResult !== ScanResult . NOTHING && scanResult !== ScanResult . UPTODATE ) ) {
await Database . resetLibraryIssuesFilterData ( libraryId )
}
}
this . scanningFilesChanged = false
if ( this . pendingFileUpdatesToScan . length ) {
Logger . debug ( ` [LibraryScanner] File updates finished scanning with more updates in queue ( ${ this . pendingFileUpdatesToScan . length } ) ` )
this . scanFilesChanged ( this . pendingFileUpdatesToScan . shift ( ) )
}
}
/ * *
* Group array of PendingFileUpdate from Watcher by folder
* @ param { import ( '../Watcher' ) . PendingFileUpdate [ ] } fileUpdates
* @ returns { Record < string , { libraryId : string , folderId : string , fileUpdates : import ( '../Watcher' ) . PendingFileUpdate [ ] } > }
* /
getFileUpdatesGrouped ( fileUpdates ) {
const folderGroups = { }
fileUpdates . forEach ( ( file ) => {
if ( folderGroups [ file . folderId ] ) {
folderGroups [ file . folderId ] . fileUpdates . push ( file )
} else {
folderGroups [ file . folderId ] = {
libraryId : file . libraryId ,
folderId : file . folderId ,
fileUpdates : [ file ]
}
}
} )
return folderGroups
}
/ * *
* Scan grouped paths for library folder coming from Watcher
* @ param { import ( '../models/Library' ) } library
* @ param { import ( '../models/LibraryFolder' ) } folder
* @ param { Record < string , string [ ] > } fileUpdateGroup
* @ returns { Promise < Record < string , number >> }
* /
async scanFolderUpdates ( library , folder , fileUpdateGroup ) {
// Make sure library filter data is set
// this is used to check for existing authors & series
await libraryFilters . getFilterData ( library . mediaType , library . id )
Logger . debug ( ` [Scanner] Scanning file update groups in folder " ${ folder . id } " of library " ${ library . name } " ` )
Logger . debug ( ` [Scanner] scanFolderUpdates fileUpdateGroup ` , fileUpdateGroup )
// First pass - Remove files in parent dirs of items and remap the fileupdate group
// Test Case: Moving audio files from library item folder to author folder should trigger a re-scan of the item
const updateGroup = { ... fileUpdateGroup }
for ( const itemDir in updateGroup ) {
if ( itemDir == fileUpdateGroup [ itemDir ] ) continue // Media in root path
const itemDirNestedFiles = fileUpdateGroup [ itemDir ] . filter ( b => b . includes ( '/' ) )
if ( ! itemDirNestedFiles . length ) continue
const firstNest = itemDirNestedFiles [ 0 ] . split ( '/' ) . shift ( )
const altDir = ` ${ itemDir } / ${ firstNest } `
const fullPath = Path . posix . join ( fileUtils . filePathToPOSIX ( folder . path ) , itemDir )
const childLibraryItem = await Database . libraryItemModel . findOne ( {
attributes : [ 'id' , 'path' ] ,
where : {
path : {
[ sequelize . Op . not ] : fullPath
} ,
path : {
[ sequelize . Op . startsWith ] : fullPath
}
}
} )
if ( ! childLibraryItem ) {
continue
}
const altFullPath = Path . posix . join ( fileUtils . filePathToPOSIX ( folder . path ) , altDir )
const altChildLibraryItem = await Database . libraryItemModel . findOne ( {
attributes : [ 'id' , 'path' ] ,
where : {
path : {
[ sequelize . Op . not ] : altFullPath
} ,
path : {
[ sequelize . Op . startsWith ] : altFullPath
}
}
} )
if ( altChildLibraryItem ) {
continue
}
delete fileUpdateGroup [ itemDir ]
fileUpdateGroup [ altDir ] = itemDirNestedFiles . map ( ( f ) => f . split ( '/' ) . slice ( 1 ) . join ( '/' ) )
Logger . warn ( ` [LibraryScanner] Some files were modified in a parent directory of a library item " ${ childLibraryItem . path } " - ignoring ` )
}
// Second pass: Check for new/updated/removed items
const itemGroupingResults = { }
for ( const itemDir in fileUpdateGroup ) {
const fullPath = Path . posix . join ( fileUtils . filePathToPOSIX ( folder . path ) , itemDir )
const itemDirParts = itemDir . split ( '/' ) . slice ( 0 , - 1 )
2023-09-06 22:43:59 +02:00
const potentialChildDirs = [ fullPath ]
2023-09-04 18:50:55 +02:00
for ( let i = 0 ; i < itemDirParts . length ; i ++ ) {
potentialChildDirs . push ( Path . posix . join ( fileUtils . filePathToPOSIX ( folder . path ) , itemDir . split ( '/' ) . slice ( 0 , - 1 - i ) . join ( '/' ) ) )
}
// Check if book dir group is already an item
let existingLibraryItem = await Database . libraryItemModel . findOneOld ( {
path : potentialChildDirs
} )
if ( ! existingLibraryItem ) {
2023-09-06 22:43:59 +02:00
const dirIno = await fileUtils . getIno ( fullPath )
2023-09-04 18:50:55 +02:00
existingLibraryItem = await Database . libraryItemModel . findOneOld ( {
ino : dirIno
} )
if ( existingLibraryItem ) {
Logger . debug ( ` [LibraryScanner] scanFolderUpdates: Library item found by inode value= ${ dirIno } . " ${ existingLibraryItem . relPath } => ${ itemDir } " ` )
2023-09-04 20:59:37 +02:00
// Update library item paths for scan
2023-09-04 18:50:55 +02:00
existingLibraryItem . path = fullPath
existingLibraryItem . relPath = itemDir
}
}
if ( existingLibraryItem ) {
// Is the item exactly - check if was deleted
if ( existingLibraryItem . path === fullPath ) {
const exists = await fs . pathExists ( fullPath )
if ( ! exists ) {
Logger . info ( ` [LibraryScanner] Scanning file update group and library item was deleted " ${ existingLibraryItem . media . metadata . title } " - marking as missing ` )
existingLibraryItem . setMissing ( )
await Database . updateLibraryItem ( existingLibraryItem )
SocketAuthority . emitter ( 'item_updated' , existingLibraryItem . toJSONExpanded ( ) )
itemGroupingResults [ itemDir ] = ScanResult . REMOVED
continue
}
}
// Scan library item for updates
Logger . debug ( ` [LibraryScanner] Folder update for relative path " ${ itemDir } " is in library item " ${ existingLibraryItem . media . metadata . title } " - scan for updates ` )
itemGroupingResults [ itemDir ] = await LibraryItemScanner . scanLibraryItem ( existingLibraryItem . id )
continue
} else if ( library . settings . audiobooksOnly && ! fileUpdateGroup [ itemDir ] . some ? . ( scanUtils . checkFilepathIsAudioFile ) ) {
Logger . debug ( ` [LibraryScanner] Folder update for relative path " ${ itemDir } " has no audio files ` )
continue
}
// Check if a library item is a subdirectory of this dir
const childItem = await Database . libraryItemModel . findOne ( {
attributes : [ 'id' , 'path' ] ,
where : {
path : {
[ sequelize . Op . startsWith ] : fullPath + '/'
}
}
} )
if ( childItem ) {
Logger . warn ( ` [LibraryScanner] Files were modified in a parent directory of a library item " ${ childItem . path } " - ignoring ` )
itemGroupingResults [ itemDir ] = ScanResult . NOTHING
continue
}
Logger . debug ( ` [LibraryScanner] Folder update group must be a new item " ${ itemDir } " in library " ${ library . name } " ` )
const isSingleMediaItem = itemDir === fileUpdateGroup [ itemDir ]
const newLibraryItem = await LibraryItemScanner . scanPotentialNewLibraryItem ( fullPath , library , folder , isSingleMediaItem )
if ( newLibraryItem ) {
const oldNewLibraryItem = Database . libraryItemModel . getOldLibraryItem ( newLibraryItem )
SocketAuthority . emitter ( 'item_added' , oldNewLibraryItem . toJSONExpanded ( ) )
}
itemGroupingResults [ itemDir ] = newLibraryItem ? ScanResult . ADDED : ScanResult . NOTHING
}
return itemGroupingResults
}
2023-08-26 23:33:27 +02:00
}
2023-09-04 00:51:58 +02:00
module . exports = new LibraryScanner ( )