drive: add full directory listing option for speed

* Add -drive-full-list flag to choose between recursive and full directory list
  * Full list is the default - much quicker if there are lots of directories
This commit is contained in:
Nick Craig-Wood 2013-01-23 21:19:26 +00:00
parent 351829e9fe
commit 8404290499

View File

@ -4,8 +4,6 @@ package main
// FIXME drive code is leaking goroutines somehow - reported bug
// https://code.google.com/p/google-api-go-client/issues/detail?id=23
// FIXME use recursive listing not bound to directory for speed?
// FIXME list containers equivalent should list directories?
// FIXME list directory should list to channel for concurrency not
@ -49,7 +47,7 @@ type FsDrive struct {
about *drive.About // information about the drive, including the root
rootId string // Id of the root directory
foundRoot sync.Once // Whether we need to find the root directory or not
dirCache lockedMap // Map of directory path to directory id
dirCache dirCache // Map of directory path to directory id
findDirLock sync.Mutex // Protect findDir from concurrent use
}
@ -64,36 +62,49 @@ type FsObjectDrive struct {
modifiedDate string // RFC3339 time it was last modified
}
// lockedMap is a map with a mutex
type lockedMap struct {
// dirCache caches paths to directory Ids and vice versa
type dirCache struct {
sync.RWMutex
cache map[string]string
cache map[string]string
invCache map[string]string
}
// Make a new locked map
func newLockedMap() lockedMap {
return lockedMap{cache: make(map[string]string)}
func newDirCache() dirCache {
d := dirCache{}
d.Flush()
return d
}
// Get an item from the map
func (m *lockedMap) Get(key string) (value string, ok bool) {
// Gets an Id given a path
func (m *dirCache) Get(path string) (id string, ok bool) {
m.RLock()
value, ok = m.cache[key]
id, ok = m.cache[path]
m.RUnlock()
return
}
// Put an item to the map
func (m *lockedMap) Put(key, value string) {
// GetInv gets a path given an Id
func (m *dirCache) GetInv(path string) (id string, ok bool) {
m.RLock()
id, ok = m.invCache[path]
m.RUnlock()
return
}
// Put a path, id into the map
func (m *dirCache) Put(path, id string) {
m.Lock()
m.cache[key] = value
m.cache[path] = id
m.invCache[id] = path
m.Unlock()
}
// Flush the map of all data
func (m *lockedMap) Flush() {
func (m *dirCache) Flush() {
m.Lock()
m.cache = make(map[string]string)
m.invCache = make(map[string]string)
m.Unlock()
}
@ -112,6 +123,7 @@ var (
driveClientSecret = flag.String("drive-client-secret", os.Getenv("GDRIVE_CLIENT_SECRET"), "User name. Defaults to environment var GDRIVE_CLIENT_SECRET.")
driveTokenFile = flag.String("drive-token-file", os.Getenv("GDRIVE_TOKEN_FILE"), "API key (password). Defaults to environment var GDRIVE_TOKEN_FILE.")
driveAuthCode = flag.String("drive-auth-code", "", "Pass in when requested to make the drive token file.")
driveFullList = flag.Bool("drive-full-list", true, "Use a full listing for directory list. More data but usually quicker.")
)
// String converts this FsDrive to a string
@ -145,7 +157,10 @@ type listAllFn func(*drive.File) bool
//
// Search params: https://developers.google.com/drive/search-parameters
func (f *FsDrive) listAll(dirId string, title string, directoriesOnly bool, filesOnly bool, fn listAllFn) (found bool, err error) {
query := fmt.Sprintf("trashed=false and '%s' in parents", dirId)
query := fmt.Sprintf("trashed=false")
if dirId != "" {
query += fmt.Sprintf(" and '%s' in parents", dirId)
}
if title != "" {
// Escaping the backslash isn't documented but seems to work
title = strings.Replace(title, `\`, `\\`, -1)
@ -158,7 +173,8 @@ func (f *FsDrive) listAll(dirId string, title string, directoriesOnly bool, file
if filesOnly {
query += fmt.Sprintf(" and mimeType!='%s'", driveFolderType)
}
list := f.svc.Files.List().Q(query)
// fmt.Printf("listAll Query = %q\n", query)
list := f.svc.Files.List().Q(query).MaxResults(1000)
OUTER:
for {
files, err := list.Do()
@ -226,7 +242,7 @@ func NewFsDrive(path string) (*FsDrive, error) {
if err != nil {
return nil, err
}
f := &FsDrive{root: root, dirCache: newLockedMap()}
f := &FsDrive{root: root, dirCache: newDirCache()}
t := &oauth.Transport{
Config: driveConfig,
@ -262,6 +278,9 @@ func NewFsDrive(path string) (*FsDrive, error) {
// Find the Id of the root directory and the Id of its parent
f.rootId = f.about.RootFolderId
// Put the root directory in
f.dirCache.Put("", f.rootId)
// fmt.Printf("Root id %s", f.rootId)
return f, nil
}
@ -293,7 +312,12 @@ func (f *FsDrive) NewFsObject(remote string) FsObject {
}
// Path should be directory path either "" or "path/"
func (f *FsDrive) listDir(dirId string, path string, out FsObjectsChan) error {
//
// List the directory using a recursive list from the root
//
// This fetches the minimum amount of stuff but does more API calls
// which makes it slow
func (f *FsDrive) listDirRecursive(dirId string, path string, out FsObjectsChan) error {
var subError error
// Make the API request
_, err := f.listAll(dirId, "", false, false, func(item *drive.File) bool {
@ -301,7 +325,7 @@ func (f *FsDrive) listDir(dirId string, path string, out FsObjectsChan) error {
// FIXME should do this in parallel
// use a wg to sync then collect error
if item.MimeType == driveFolderType {
subError = f.listDir(item.Id, path+item.Title+"/", out)
subError = f.listDirRecursive(item.Id, path+item.Title+"/", out)
if subError != nil {
return true
}
@ -324,6 +348,74 @@ func (f *FsDrive) listDir(dirId string, path string, out FsObjectsChan) error {
return nil
}
// Path should be directory path either "" or "path/"
//
// List the directory using a full listing and filtering out unwanted
// items
//
// This is fast in terms of number of API calls, but slow in terms of
// fetching more data than it needs
func (f *FsDrive) listDirFull(dirId string, path string, out FsObjectsChan) error {
// Orphans waiting for their parent
orphans := make(map[string][]*drive.File)
var outputItem func(*drive.File, string) // forward def for recursive fn
// Output an item or directory
outputItem = func(item *drive.File, directory string) {
// fmt.Printf("found %q %q parent %q dir %q ok %s\n", item.Title, item.Id, parentId, directory, ok)
path := item.Title
if directory != "" {
path = directory + "/" + path
}
if item.MimeType == driveFolderType {
// Put the directory into the dircache
f.dirCache.Put(path, item.Id)
// fmt.Printf("directory %s %s %s\n", path, item.Title, item.Id)
// Collect the orphans if any
for _, orphan := range orphans[item.Id] {
// fmt.Printf("rescuing orphan %s %s %s\n", path, orphan.Title, orphan.Id)
outputItem(orphan, path)
}
delete(orphans, item.Id)
} else {
// fmt.Printf("file %s %s %s\n", path, item.Title, item.Id)
// If item has no MD5 sum it isn't stored on drive, so ignore it
if item.Md5Checksum != "" {
if fs := f.NewFsObjectWithInfo(path, item); fs != nil {
out <- fs
}
}
}
}
// Make the API request
_, err := f.listAll("", "", false, false, func(item *drive.File) bool {
if len(item.Parents) == 0 {
// fmt.Printf("no parents %s %s: %#v\n", item.Title, item.Id, item)
return false
}
parentId := item.Parents[0].Id
directory, ok := f.dirCache.GetInv(parentId)
if !ok {
// Haven't found the parent yet so add to orphans
// fmt.Printf("orphan[%s] %s %s\n", parentId, item.Title, item.Id)
orphans[parentId] = append(orphans[parentId], item)
} else {
outputItem(item, directory)
}
return false
})
if err != nil {
return err
}
if len(orphans) > 0 {
// fmt.Printf("Orphans!!!! %v", orphans)
}
return nil
}
// Splits a path into directory, leaf
//
// Path shouldn't start or end with a /
@ -446,6 +538,8 @@ func (f *FsDrive) findRoot(create bool) error {
f.foundRoot.Do(func() {
f.rootId, err = f.findDir(f.root, create)
f.dirCache.Flush()
// Put the root directory in
f.dirCache.Put("", f.rootId)
})
return err
}
@ -460,7 +554,11 @@ func (f *FsDrive) List() FsObjectsChan {
stats.Error()
log.Printf("Couldn't find root: %s", err)
} else {
err = f.listDir(f.rootId, "", out)
if *driveFullList {
err = f.listDirFull(f.rootId, "", out)
} else {
err = f.listDirRecursive(f.rootId, "", out)
}
if err != nil {
stats.Error()
log.Printf("List failed: %s", err)