diff --git a/backend/all/all.go b/backend/all/all.go index df6a6e45b..36d95ce67 100644 --- a/backend/all/all.go +++ b/backend/all/all.go @@ -14,6 +14,7 @@ import ( _ "github.com/rclone/rclone/backend/combine" _ "github.com/rclone/rclone/backend/compress" _ "github.com/rclone/rclone/backend/crypt" + _ "github.com/rclone/rclone/backend/doi" _ "github.com/rclone/rclone/backend/drive" _ "github.com/rclone/rclone/backend/dropbox" _ "github.com/rclone/rclone/backend/fichier" diff --git a/backend/doi/api/dataversetypes.go b/backend/doi/api/dataversetypes.go new file mode 100644 index 000000000..1df00858c --- /dev/null +++ b/backend/doi/api/dataversetypes.go @@ -0,0 +1,38 @@ +// Type definitions specific to Dataverse + +package api + +// DataverseDatasetResponse is returned by the Dataverse dataset API +type DataverseDatasetResponse struct { + Status string `json:"status"` + Data DataverseDataset `json:"data"` +} + +// DataverseDataset is the representation of a dataset +type DataverseDataset struct { + LatestVersion DataverseDatasetVersion `json:"latestVersion"` +} + +// DataverseDatasetVersion is the representation of a dataset version +type DataverseDatasetVersion struct { + LastUpdateTime string `json:"lastUpdateTime"` + Files []DataverseFile `json:"files"` +} + +// DataverseFile is the representation of a file found in a dataset +type DataverseFile struct { + DirectoryLabel string `json:"directoryLabel"` + DataFile DataverseDataFile `json:"dataFile"` +} + +// DataverseDataFile represents file metadata details +type DataverseDataFile struct { + ID int64 `json:"id"` + Filename string `json:"filename"` + ContentType string `json:"contentType"` + FileSize int64 `json:"filesize"` + OriginalFileFormat string `json:"originalFileFormat"` + OriginalFileSize int64 `json:"originalFileSize"` + OriginalFileName string `json:"originalFileName"` + MD5 string `json:"md5"` +} diff --git a/backend/doi/api/inveniotypes.go b/backend/doi/api/inveniotypes.go new file mode 100644 index 000000000..bdb866451 --- /dev/null +++ b/backend/doi/api/inveniotypes.go @@ -0,0 +1,33 @@ +// Type definitions specific to InvenioRDM + +package api + +// InvenioRecordResponse is the representation of a record stored in InvenioRDM +type InvenioRecordResponse struct { + Links InvenioRecordResponseLinks `json:"links"` +} + +// InvenioRecordResponseLinks represents a record's links +type InvenioRecordResponseLinks struct { + Self string `json:"self"` +} + +// InvenioFilesResponse is the representation of a record's files +type InvenioFilesResponse struct { + Entries []InvenioFilesResponseEntry `json:"entries"` +} + +// InvenioFilesResponseEntry is the representation of a file entry +type InvenioFilesResponseEntry struct { + Key string `json:"key"` + Checksum string `json:"checksum"` + Size int64 `json:"size"` + Updated string `json:"updated"` + MimeType string `json:"mimetype"` + Links InvenioFilesResponseEntryLinks `json:"links"` +} + +// InvenioFilesResponseEntryLinks represents file links details +type InvenioFilesResponseEntryLinks struct { + Content string `json:"content"` +} diff --git a/backend/doi/api/types.go b/backend/doi/api/types.go new file mode 100644 index 000000000..202cbf6ec --- /dev/null +++ b/backend/doi/api/types.go @@ -0,0 +1,26 @@ +// Package api has general type definitions for doi +package api + +// DoiResolverResponse is returned by the DOI resolver API +// +// Reference: https://www.doi.org/the-identifier/resources/factsheets/doi-resolution-documentation +type DoiResolverResponse struct { + ResponseCode int `json:"responseCode"` + Handle string `json:"handle"` + Values []DoiResolverResponseValue `json:"values"` +} + +// DoiResolverResponseValue is a single handle record value +type DoiResolverResponseValue struct { + Index int `json:"index"` + Type string `json:"type"` + Data DoiResolverResponseValueData `json:"data"` + TTL int `json:"ttl"` + Timestamp string `json:"timestamp"` +} + +// DoiResolverResponseValueData is the data held in a handle value +type DoiResolverResponseValueData struct { + Format string `json:"format"` + Value any `json:"value"` +} diff --git a/backend/doi/dataverse.go b/backend/doi/dataverse.go new file mode 100644 index 000000000..40b4f4884 --- /dev/null +++ b/backend/doi/dataverse.go @@ -0,0 +1,112 @@ +// Implementation for Dataverse + +package doi + +import ( + "context" + "fmt" + "net/http" + "net/url" + "path" + "strings" + "time" + + "github.com/rclone/rclone/backend/doi/api" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/lib/rest" +) + +// Returns true if resolvedURL is likely a DOI hosted on a Dataverse intallation +func activateDataverse(resolvedURL *url.URL) (isActive bool) { + queryValues := resolvedURL.Query() + persistentID := queryValues.Get("persistentId") + return persistentID != "" +} + +// Resolve the main API endpoint for a DOI hosted on a Dataverse installation +func resolveDataverseEndpoint(resolvedURL *url.URL) (provider Provider, endpoint *url.URL, err error) { + queryValues := resolvedURL.Query() + persistentID := queryValues.Get("persistentId") + + query := url.Values{} + query.Add("persistentId", persistentID) + endpointURL := resolvedURL.ResolveReference(&url.URL{Path: "/api/datasets/:persistentId/", RawQuery: query.Encode()}) + + return Dataverse, endpointURL, nil +} + +// dataverseProvider implements the doiProvider interface for Dataverse installations +type dataverseProvider struct { + f *Fs +} + +// ListEntries returns the full list of entries found at the remote, regardless of root +func (dp *dataverseProvider) ListEntries(ctx context.Context) (entries []*Object, err error) { + // Use the cache if populated + cachedEntries, found := dp.f.cache.GetMaybe("files") + if found { + parsedEntries, ok := cachedEntries.([]Object) + if ok { + for _, entry := range parsedEntries { + newEntry := entry + entries = append(entries, &newEntry) + } + return entries, nil + } + } + + filesURL := dp.f.endpoint + var res *http.Response + var result api.DataverseDatasetResponse + opts := rest.Opts{ + Method: "GET", + Path: strings.TrimLeft(filesURL.EscapedPath(), "/"), + Parameters: filesURL.Query(), + } + err = dp.f.pacer.Call(func() (bool, error) { + res, err = dp.f.srv.CallJSON(ctx, &opts, nil, &result) + return shouldRetry(ctx, res, err) + }) + if err != nil { + return nil, fmt.Errorf("readDir failed: %w", err) + } + modTime, modTimeErr := time.Parse(time.RFC3339, result.Data.LatestVersion.LastUpdateTime) + if modTimeErr != nil { + fs.Logf(dp.f, "error: could not parse last update time %v", modTimeErr) + modTime = timeUnset + } + for _, file := range result.Data.LatestVersion.Files { + contentURLPath := fmt.Sprintf("/api/access/datafile/%d", file.DataFile.ID) + query := url.Values{} + query.Add("format", "original") + contentURL := dp.f.endpoint.ResolveReference(&url.URL{Path: contentURLPath, RawQuery: query.Encode()}) + entry := &Object{ + fs: dp.f, + remote: path.Join(file.DirectoryLabel, file.DataFile.Filename), + contentURL: contentURL.String(), + size: file.DataFile.FileSize, + modTime: modTime, + md5: file.DataFile.MD5, + contentType: file.DataFile.ContentType, + } + if file.DataFile.OriginalFileName != "" { + entry.remote = path.Join(file.DirectoryLabel, file.DataFile.OriginalFileName) + entry.size = file.DataFile.OriginalFileSize + entry.contentType = file.DataFile.OriginalFileFormat + } + entries = append(entries, entry) + } + // Populate the cache + cacheEntries := []Object{} + for _, entry := range entries { + cacheEntries = append(cacheEntries, *entry) + } + dp.f.cache.Put("files", cacheEntries) + return entries, nil +} + +func newDataverseProvider(f *Fs) doiProvider { + return &dataverseProvider{ + f: f, + } +} diff --git a/backend/doi/doi.go b/backend/doi/doi.go new file mode 100644 index 000000000..00e98c494 --- /dev/null +++ b/backend/doi/doi.go @@ -0,0 +1,649 @@ +// Package doi provides a filesystem interface for digital objects identified by DOIs. +// +// See: https://www.doi.org/the-identifier/what-is-a-doi/ +package doi + +import ( + "context" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "path" + "strings" + "time" + + "github.com/rclone/rclone/backend/doi/api" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/config/configmap" + "github.com/rclone/rclone/fs/config/configstruct" + "github.com/rclone/rclone/fs/fserrors" + "github.com/rclone/rclone/fs/fshttp" + "github.com/rclone/rclone/fs/hash" + "github.com/rclone/rclone/lib/cache" + "github.com/rclone/rclone/lib/pacer" + "github.com/rclone/rclone/lib/rest" +) + +const ( + // the URL of the DOI resolver + // + // Reference: https://www.doi.org/the-identifier/resources/factsheets/doi-resolution-documentation + doiResolverAPIURL = "https://doi.org/api" + minSleep = 10 * time.Millisecond + maxSleep = 2 * time.Second + decayConstant = 2 // bigger for slower decay, exponential +) + +var ( + errorReadOnly = errors.New("doi remotes are read only") + timeUnset = time.Unix(0, 0) +) + +func init() { + fsi := &fs.RegInfo{ + Name: "doi", + Description: "DOI datasets", + NewFs: NewFs, + CommandHelp: commandHelp, + Options: []fs.Option{{ + Name: "doi", + Help: "The DOI or the doi.org URL.", + Required: true, + }, { + Name: fs.ConfigProvider, + Help: `DOI provider. + +The DOI provider can be set when rclone does not automatically recognize a supported DOI provider.`, + Examples: []fs.OptionExample{ + { + Value: "auto", + Help: "Auto-detect provider", + }, + { + Value: string(Zenodo), + Help: "Zenodo", + }, { + Value: string(Dataverse), + Help: "Dataverse", + }, { + Value: string(Invenio), + Help: "Invenio", + }}, + Required: false, + Advanced: true, + }, { + Name: "doi_resolver_api_url", + Help: `The URL of the DOI resolver API to use. + +The DOI resolver can be set for testing or for cases when the the canonical DOI resolver API cannot be used. + +Defaults to "https://doi.org/api".`, + Required: false, + Advanced: true, + }}, + } + fs.Register(fsi) +} + +// Provider defines the type of provider hosting the DOI +type Provider string + +const ( + // Zenodo provider, see https://zenodo.org + Zenodo Provider = "zenodo" + // Dataverse provider, see https://dataverse.harvard.edu + Dataverse Provider = "dataverse" + // Invenio provider, see https://inveniordm.docs.cern.ch + Invenio Provider = "invenio" +) + +// Options defines the configuration for this backend +type Options struct { + Doi string `config:"doi"` // The DOI, a digital identifier of an object, usually a dataset + Provider string `config:"provider"` // The DOI provider + DoiResolverAPIURL string `config:"doi_resolver_api_url"` // The URL of the DOI resolver API to use. +} + +// Fs stores the interface to the remote HTTP files +type Fs struct { + name string // name of this remote + root string // the path we are working on + provider Provider // the DOI provider + doiProvider doiProvider // the interface used to interact with the DOI provider + features *fs.Features // optional features + opt Options // options for this backend + ci *fs.ConfigInfo // global config + endpoint *url.URL // the main API endpoint for this remote + endpointURL string // endpoint as a string + srv *rest.Client // the connection to the server + pacer *fs.Pacer // pacer for API calls + cache *cache.Cache // a cache for the remote metadata +} + +// Object is a remote object that has been stat'd (so it exists, but is not necessarily open for reading) +type Object struct { + fs *Fs // what this object is part of + remote string // the remote path + contentURL string // the URL where the contents of the file can be downloaded + size int64 // size of the object + modTime time.Time // modification time of the object + contentType string // content type of the object + md5 string // MD5 hash of the object content +} + +// doiProvider is the interface used to list objects in a DOI +type doiProvider interface { + // ListEntries returns the full list of entries found at the remote, regardless of root + ListEntries(ctx context.Context) (entries []*Object, err error) +} + +// Parse the input string as a DOI +// Examples: +// 10.1000/182 -> 10.1000/182 +// https://doi.org/10.1000/182 -> 10.1000/182 +// doi:10.1000/182 -> 10.1000/182 +func parseDoi(doi string) string { + doiURL, err := url.Parse(doi) + if err != nil { + return doi + } + if doiURL.Scheme == "doi" { + return strings.TrimLeft(strings.TrimPrefix(doi, "doi:"), "/") + } + if strings.HasSuffix(doiURL.Hostname(), "doi.org") { + return strings.TrimLeft(doiURL.Path, "/") + } + return doi +} + +// Resolve a DOI to a URL +// Reference: https://www.doi.org/the-identifier/resources/factsheets/doi-resolution-documentation +func resolveDoiURL(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, opt *Options) (doiURL *url.URL, err error) { + resolverURL := opt.DoiResolverAPIURL + if resolverURL == "" { + resolverURL = doiResolverAPIURL + } + + var result api.DoiResolverResponse + params := url.Values{} + params.Add("index", "1") + opts := rest.Opts{ + Method: "GET", + RootURL: resolverURL, + Path: "/handles/" + opt.Doi, + Parameters: params, + } + err = pacer.Call(func() (bool, error) { + res, err := srv.CallJSON(ctx, &opts, nil, &result) + return shouldRetry(ctx, res, err) + }) + if err != nil { + return nil, err + } + + if result.ResponseCode != 1 { + return nil, fmt.Errorf("could not resolve DOI (error code %d)", result.ResponseCode) + } + resolvedURLStr := "" + for _, value := range result.Values { + if value.Type == "URL" && value.Data.Format == "string" { + valueStr, ok := value.Data.Value.(string) + if !ok { + return nil, fmt.Errorf("could not resolve DOI (incorrect response format)") + } + resolvedURLStr = valueStr + } + } + resolvedURL, err := url.Parse(resolvedURLStr) + if err != nil { + return nil, err + } + return resolvedURL, nil +} + +// Resolve the passed configuration into a provider and enpoint +func resolveEndpoint(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, opt *Options) (provider Provider, endpoint *url.URL, err error) { + resolvedURL, err := resolveDoiURL(ctx, srv, pacer, opt) + if err != nil { + return "", nil, err + } + + switch opt.Provider { + case string(Dataverse): + return resolveDataverseEndpoint(resolvedURL) + case string(Invenio): + return resolveInvenioEndpoint(ctx, srv, pacer, resolvedURL) + case string(Zenodo): + return resolveZenodoEndpoint(ctx, srv, pacer, resolvedURL, opt.Doi) + } + + hostname := strings.ToLower(resolvedURL.Hostname()) + if hostname == "dataverse.harvard.edu" || activateDataverse(resolvedURL) { + return resolveDataverseEndpoint(resolvedURL) + } + if hostname == "zenodo.org" || strings.HasSuffix(hostname, ".zenodo.org") { + return resolveZenodoEndpoint(ctx, srv, pacer, resolvedURL, opt.Doi) + } + if activateInvenio(ctx, srv, pacer, resolvedURL) { + return resolveInvenioEndpoint(ctx, srv, pacer, resolvedURL) + } + + return "", nil, fmt.Errorf("provider '%s' is not supported", resolvedURL.Hostname()) +} + +// Make the http connection from the passed options +func (f *Fs) httpConnection(ctx context.Context, opt *Options) (isFile bool, err error) { + provider, endpoint, err := resolveEndpoint(ctx, f.srv, f.pacer, opt) + if err != nil { + return false, err + } + + // Update f with the new parameters + f.srv.SetRoot(endpoint.ResolveReference(&url.URL{Path: "/"}).String()) + f.endpoint = endpoint + f.endpointURL = endpoint.String() + f.provider = provider + f.opt.Provider = string(provider) + + switch f.provider { + case Dataverse: + f.doiProvider = newDataverseProvider(f) + case Invenio, Zenodo: + f.doiProvider = newInvenioProvider(f) + default: + return false, fmt.Errorf("provider type '%s' not supported", f.provider) + } + + // Determine if the root is a file + entries, err := f.doiProvider.ListEntries(ctx) + if err != nil { + return false, err + } + for _, entry := range entries { + if entry.remote == f.root { + isFile = true + break + } + } + return isFile, nil +} + +// retryErrorCodes is a slice of error codes that we will retry +var retryErrorCodes = []int{ + 429, // Too Many Requests. + 500, // Internal Server Error + 502, // Bad Gateway + 503, // Service Unavailable + 504, // Gateway Timeout + 509, // Bandwidth Limit Exceeded +} + +// shouldRetry returns a boolean as to whether this res and err +// deserve to be retried. It returns the err as a convenience. +func shouldRetry(ctx context.Context, res *http.Response, err error) (bool, error) { + if fserrors.ContextError(ctx, &err) { + return false, err + } + return fserrors.ShouldRetry(err) || fserrors.ShouldRetryHTTP(res, retryErrorCodes), err +} + +// NewFs creates a new Fs object from the name and root. It connects to +// the host specified in the config file. +func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) { + root = strings.Trim(root, "/") + + // Parse config into Options struct + opt := new(Options) + err := configstruct.Set(m, opt) + if err != nil { + return nil, err + } + opt.Doi = parseDoi(opt.Doi) + + client := fshttp.NewClient(ctx) + ci := fs.GetConfig(ctx) + f := &Fs{ + name: name, + root: root, + opt: *opt, + ci: ci, + srv: rest.NewClient(client), + pacer: fs.NewPacer(ctx, pacer.NewDefault(pacer.MinSleep(minSleep), pacer.MaxSleep(maxSleep), pacer.DecayConstant(decayConstant))), + cache: cache.New(), + } + f.features = (&fs.Features{ + CanHaveEmptyDirectories: true, + }).Fill(ctx, f) + + isFile, err := f.httpConnection(ctx, opt) + if err != nil { + return nil, err + } + + if isFile { + // return an error with an fs which points to the parent + newRoot := path.Dir(f.root) + if newRoot == "." { + newRoot = "" + } + f.root = newRoot + return f, fs.ErrorIsFile + } + + return f, nil +} + +// Name returns the configured name of the file system +func (f *Fs) Name() string { + return f.name +} + +// Root returns the root for the filesystem +func (f *Fs) Root() string { + return f.root +} + +// String returns the URL for the filesystem +func (f *Fs) String() string { + return fmt.Sprintf("DOI %s", f.opt.Doi) +} + +// Features returns the optional features of this Fs +func (f *Fs) Features() *fs.Features { + return f.features +} + +// Precision is the remote http file system's modtime precision, which we have no way of knowing. We estimate at 1s +func (f *Fs) Precision() time.Duration { + return time.Second +} + +// Hashes returns hash.HashNone to indicate remote hashing is unavailable +func (f *Fs) Hashes() hash.Set { + return hash.Set(hash.MD5) + // return hash.Set(hash.None) +} + +// Mkdir makes the root directory of the Fs object +func (f *Fs) Mkdir(ctx context.Context, dir string) error { + return errorReadOnly +} + +// Remove a remote http file object +func (o *Object) Remove(ctx context.Context) error { + return errorReadOnly +} + +// Rmdir removes the root directory of the Fs object +func (f *Fs) Rmdir(ctx context.Context, dir string) error { + return errorReadOnly +} + +// NewObject creates a new remote http file object +func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { + entries, err := f.doiProvider.ListEntries(ctx) + if err != nil { + return nil, err + } + + remoteFullPath := remote + if f.root != "" { + remoteFullPath = path.Join(f.root, remote) + } + + for _, entry := range entries { + if entry.Remote() == remoteFullPath { + return entry, nil + } + } + + return nil, fs.ErrorObjectNotFound +} + +// List the objects and directories in dir into entries. The +// entries can be returned in any order but should be for a +// complete directory. +// +// dir should be "" to list the root, and should not have +// trailing slashes. +// +// This should return ErrDirNotFound if the directory isn't +// found. +func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { + fileEntries, err := f.doiProvider.ListEntries(ctx) + if err != nil { + return nil, fmt.Errorf("error listing %q: %w", dir, err) + } + + fullDir := path.Join(f.root, dir) + if fullDir != "" { + fullDir += "/" + } + + dirPaths := map[string]bool{} + for _, entry := range fileEntries { + // First, filter out files not in `fullDir` + if !strings.HasPrefix(entry.remote, fullDir) { + continue + } + // Then, find entries in subfolers + remotePath := entry.remote + if fullDir != "" { + remotePath = strings.TrimLeft(strings.TrimPrefix(remotePath, fullDir), "/") + } + parts := strings.SplitN(remotePath, "/", 2) + if len(parts) == 1 { + newEntry := *entry + newEntry.remote = path.Join(dir, remotePath) + entries = append(entries, &newEntry) + } else { + dirPaths[path.Join(dir, parts[0])] = true + } + } + + for dirPath := range dirPaths { + entry := fs.NewDir(dirPath, time.Time{}) + entries = append(entries, entry) + } + + return entries, nil +} + +// Put in to the remote path with the modTime given of the given size +// +// May create the object even if it returns an error - if so +// will return the object and the error, otherwise will return +// nil and the error +func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + return nil, errorReadOnly +} + +// PutStream uploads to the remote path with the modTime given of indeterminate size +func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + return nil, errorReadOnly +} + +// Fs is the filesystem this remote http file object is located within +func (o *Object) Fs() fs.Info { + return o.fs +} + +// String returns the URL to the remote HTTP file +func (o *Object) String() string { + if o == nil { + return "" + } + return o.remote +} + +// Remote the name of the remote HTTP file, relative to the fs root +func (o *Object) Remote() string { + return o.remote +} + +// Hash returns "" since HTTP (in Go or OpenSSH) doesn't support remote calculation of hashes +func (o *Object) Hash(ctx context.Context, t hash.Type) (string, error) { + if t != hash.MD5 { + return "", hash.ErrUnsupported + } + return o.md5, nil +} + +// Size returns the size in bytes of the remote http file +func (o *Object) Size() int64 { + return o.size +} + +// ModTime returns the modification time of the remote http file +func (o *Object) ModTime(ctx context.Context) time.Time { + return o.modTime +} + +// SetModTime sets the modification and access time to the specified time +// +// it also updates the info field +func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error { + return errorReadOnly +} + +// Storable returns whether the remote http file is a regular file (not a directory, symbolic link, block device, character device, named pipe, etc.) +func (o *Object) Storable() bool { + return true +} + +// Open a remote http file object for reading. Seek is supported +func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) { + fs.FixRangeOption(options, o.size) + opts := rest.Opts{ + Method: "GET", + RootURL: o.contentURL, + Options: options, + } + var res *http.Response + err = o.fs.pacer.Call(func() (bool, error) { + res, err = o.fs.srv.Call(ctx, &opts) + return shouldRetry(ctx, res, err) + }) + if err != nil { + return nil, fmt.Errorf("Open failed: %w", err) + } + + // Handle non-compliant redirects + if res.Header.Get("Location") != "" { + newURL, err := res.Location() + if err == nil { + opts.RootURL = newURL.String() + err = o.fs.pacer.Call(func() (bool, error) { + res, err = o.fs.srv.Call(ctx, &opts) + return shouldRetry(ctx, res, err) + }) + if err != nil { + return nil, fmt.Errorf("Open failed: %w", err) + } + } + } + + return res.Body, nil +} + +// Update in to the object with the modTime given of the given size +func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { + return errorReadOnly +} + +// MimeType of an Object if known, "" otherwise +func (o *Object) MimeType(ctx context.Context) string { + return o.contentType +} + +var commandHelp = []fs.CommandHelp{{ + Name: "metadata", + Short: "Show metadata about the DOI.", + Long: `This command returns a JSON object with some information about the DOI. + + rclone backend medatadata doi: + +It returns a JSON object representing metadata about the DOI. +`, +}, { + Name: "set", + Short: "Set command for updating the config parameters.", + Long: `This set command can be used to update the config parameters +for a running doi backend. + +Usage Examples: + + rclone backend set doi: [-o opt_name=opt_value] [-o opt_name2=opt_value2] + rclone rc backend/command command=set fs=doi: [-o opt_name=opt_value] [-o opt_name2=opt_value2] + rclone rc backend/command command=set fs=doi: -o doi=NEW_DOI + +The option keys are named as they are in the config file. + +This rebuilds the connection to the doi backend when it is called with +the new parameters. Only new parameters need be passed as the values +will default to those currently in use. + +It doesn't return anything. +`, +}} + +// Command the backend to run a named command +// +// The command run is name +// args may be used to read arguments from +// opts may be used to read optional arguments from +// +// The result should be capable of being JSON encoded +// If it is a string or a []string it will be shown to the user +// otherwise it will be JSON encoded and shown to the user like that +func (f *Fs) Command(ctx context.Context, name string, arg []string, opt map[string]string) (out interface{}, err error) { + switch name { + case "metadata": + return f.ShowMetadata(ctx) + case "set": + newOpt := f.opt + err := configstruct.Set(configmap.Simple(opt), &newOpt) + if err != nil { + return nil, fmt.Errorf("reading config: %w", err) + } + _, err = f.httpConnection(ctx, &newOpt) + if err != nil { + return nil, fmt.Errorf("updating session: %w", err) + } + f.opt = newOpt + keys := []string{} + for k := range opt { + keys = append(keys, k) + } + fs.Logf(f, "Updated config values: %s", strings.Join(keys, ", ")) + return nil, nil + default: + return nil, fs.ErrorCommandNotFound + } +} + +// ShowMetadata returns some metadata about the corresponding DOI +func (f *Fs) ShowMetadata(ctx context.Context) (metadata interface{}, err error) { + doiURL, err := url.Parse("https://doi.org/" + f.opt.Doi) + if err != nil { + return nil, err + } + + info := map[string]any{} + info["DOI"] = f.opt.Doi + info["URL"] = doiURL.String() + info["metadataURL"] = f.endpointURL + info["provider"] = f.provider + return info, nil +} + +// Check the interfaces are satisfied +var ( + _ fs.Fs = (*Fs)(nil) + _ fs.PutStreamer = (*Fs)(nil) + _ fs.Commander = (*Fs)(nil) + _ fs.Object = (*Object)(nil) + _ fs.MimeTyper = (*Object)(nil) +) diff --git a/backend/doi/doi_internal_test.go b/backend/doi/doi_internal_test.go new file mode 100644 index 000000000..22046be09 --- /dev/null +++ b/backend/doi/doi_internal_test.go @@ -0,0 +1,260 @@ +package doi + +import ( + "context" + "crypto/md5" + "encoding/hex" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "net/url" + "sort" + "strings" + "testing" + "time" + + "github.com/rclone/rclone/backend/doi/api" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/config/configmap" + "github.com/rclone/rclone/fs/hash" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var remoteName = "TestDoi" + +func TestParseDoi(t *testing.T) { + // 10.1000/182 -> 10.1000/182 + doi := "10.1000/182" + parsed := parseDoi(doi) + assert.Equal(t, "10.1000/182", parsed) + + // https://doi.org/10.1000/182 -> 10.1000/182 + doi = "https://doi.org/10.1000/182" + parsed = parseDoi(doi) + assert.Equal(t, "10.1000/182", parsed) + + // https://dx.doi.org/10.1000/182 -> 10.1000/182 + doi = "https://dxdoi.org/10.1000/182" + parsed = parseDoi(doi) + assert.Equal(t, "10.1000/182", parsed) + + // doi:10.1000/182 -> 10.1000/182 + doi = "doi:10.1000/182" + parsed = parseDoi(doi) + assert.Equal(t, "10.1000/182", parsed) + + // doi://10.1000/182 -> 10.1000/182 + doi = "doi://10.1000/182" + parsed = parseDoi(doi) + assert.Equal(t, "10.1000/182", parsed) +} + +// prepareMockDoiResolverServer prepares a test server to resolve DOIs +func prepareMockDoiResolverServer(t *testing.T, resolvedURL string) (doiResolverAPIURL string) { + mux := http.NewServeMux() + + // Handle requests for resolving DOIs + mux.HandleFunc("GET /api/handles/{handle...}", func(w http.ResponseWriter, r *http.Request) { + // Check that we are resolving a DOI + handle := strings.TrimPrefix(r.URL.Path, "/api/handles/") + assert.NotEmpty(t, handle) + index := r.URL.Query().Get("index") + assert.Equal(t, "1", index) + + // Return the most basic response + result := api.DoiResolverResponse{ + ResponseCode: 1, + Handle: handle, + Values: []api.DoiResolverResponseValue{ + { + Index: 1, + Type: "URL", + Data: api.DoiResolverResponseValueData{ + Format: "string", + Value: resolvedURL, + }, + }, + }, + } + resultBytes, err := json.Marshal(result) + require.NoError(t, err) + w.Header().Add("Content-Type", "application/json") + _, err = w.Write(resultBytes) + require.NoError(t, err) + }) + + // Make the test server + ts := httptest.NewServer(mux) + + // Close the server at the end of the test + t.Cleanup(ts.Close) + + return ts.URL + "/api" +} + +func md5Sum(text string) string { + hash := md5.Sum([]byte(text)) + return hex.EncodeToString(hash[:]) +} + +// prepareMockZenodoServer prepares a test server that mocks Zenodo.org +func prepareMockZenodoServer(t *testing.T, files map[string]string) *httptest.Server { + mux := http.NewServeMux() + + // Handle requests for a single record + mux.HandleFunc("GET /api/records/{recordID...}", func(w http.ResponseWriter, r *http.Request) { + // Check that we are returning data about a single record + recordID := strings.TrimPrefix(r.URL.Path, "/api/records/") + assert.NotEmpty(t, recordID) + + // Return the most basic response + selfURL, err := url.Parse("http://" + r.Host) + require.NoError(t, err) + selfURL = selfURL.JoinPath(r.URL.String()) + result := api.InvenioRecordResponse{ + Links: api.InvenioRecordResponseLinks{ + Self: selfURL.String(), + }, + } + resultBytes, err := json.Marshal(result) + require.NoError(t, err) + w.Header().Add("Content-Type", "application/json") + _, err = w.Write(resultBytes) + require.NoError(t, err) + }) + // Handle requests for listing files in a record + mux.HandleFunc("GET /api/records/{record}/files", func(w http.ResponseWriter, r *http.Request) { + // Return the most basic response + filesBaseURL, err := url.Parse("http://" + r.Host) + require.NoError(t, err) + filesBaseURL = filesBaseURL.JoinPath("/api/files/") + + entries := []api.InvenioFilesResponseEntry{} + for filename, contents := range files { + entries = append(entries, + api.InvenioFilesResponseEntry{ + Key: filename, + Checksum: md5Sum(contents), + Size: int64(len(contents)), + Updated: time.Now().UTC().Format(time.RFC3339), + MimeType: "text/plain; charset=utf-8", + Links: api.InvenioFilesResponseEntryLinks{ + Content: filesBaseURL.JoinPath(filename).String(), + }, + }, + ) + } + + result := api.InvenioFilesResponse{ + Entries: entries, + } + resultBytes, err := json.Marshal(result) + require.NoError(t, err) + w.Header().Add("Content-Type", "application/json") + _, err = w.Write(resultBytes) + require.NoError(t, err) + }) + // Handle requests for file contents + mux.HandleFunc("/api/files/{file}", func(w http.ResponseWriter, r *http.Request) { + // Check that we are returning the contents of a file + filename := strings.TrimPrefix(r.URL.Path, "/api/files/") + assert.NotEmpty(t, filename) + contents, found := files[filename] + if !found { + w.WriteHeader(404) + return + } + + // Return the most basic response + _, err := w.Write([]byte(contents)) + require.NoError(t, err) + }) + + // Make the test server + ts := httptest.NewServer(mux) + + // Close the server at the end of the test + t.Cleanup(ts.Close) + + return ts +} + +func TestZenodoRemote(t *testing.T) { + recordID := "2600782" + doi := "10.5281/zenodo.2600782" + + // The files in the dataset + files := map[string]string{ + "README.md": "This is a dataset.", + "data.txt": "Some data", + } + + ts := prepareMockZenodoServer(t, files) + resolvedURL := ts.URL + "/record/" + recordID + + doiResolverAPIURL := prepareMockDoiResolverServer(t, resolvedURL) + + testConfig := configmap.Simple{ + "type": "doi", + "doi": doi, + "provider": "zenodo", + "doi_resolver_api_url": doiResolverAPIURL, + } + f, err := NewFs(context.Background(), remoteName, "", testConfig) + require.NoError(t, err) + + // Test listing the DOI files + entries, err := f.List(context.Background(), "") + require.NoError(t, err) + + sort.Sort(entries) + + require.Equal(t, len(files), len(entries)) + + e := entries[0] + assert.Equal(t, "README.md", e.Remote()) + assert.Equal(t, int64(18), e.Size()) + _, ok := e.(*Object) + assert.True(t, ok) + + e = entries[1] + assert.Equal(t, "data.txt", e.Remote()) + assert.Equal(t, int64(9), e.Size()) + _, ok = e.(*Object) + assert.True(t, ok) + + // Test reading the DOI files + o, err := f.NewObject(context.Background(), "README.md") + require.NoError(t, err) + assert.Equal(t, int64(18), o.Size()) + md5Hash, err := o.Hash(context.Background(), hash.MD5) + require.NoError(t, err) + assert.Equal(t, "464352b1cab5240e44528a56fda33d9d", md5Hash) + fd, err := o.Open(context.Background()) + require.NoError(t, err) + data, err := io.ReadAll(fd) + require.NoError(t, err) + require.NoError(t, fd.Close()) + assert.Equal(t, []byte(files["README.md"]), data) + do, ok := o.(fs.MimeTyper) + require.True(t, ok) + assert.Equal(t, "text/plain; charset=utf-8", do.MimeType(context.Background())) + + o, err = f.NewObject(context.Background(), "data.txt") + require.NoError(t, err) + assert.Equal(t, int64(9), o.Size()) + md5Hash, err = o.Hash(context.Background(), hash.MD5) + require.NoError(t, err) + assert.Equal(t, "5b82f8bf4df2bfb0e66ccaa7306fd024", md5Hash) + fd, err = o.Open(context.Background()) + require.NoError(t, err) + data, err = io.ReadAll(fd) + require.NoError(t, err) + require.NoError(t, fd.Close()) + assert.Equal(t, []byte(files["data.txt"]), data) + do, ok = o.(fs.MimeTyper) + require.True(t, ok) + assert.Equal(t, "text/plain; charset=utf-8", do.MimeType(context.Background())) +} diff --git a/backend/doi/doi_test.go b/backend/doi/doi_test.go new file mode 100644 index 000000000..1d9dfc094 --- /dev/null +++ b/backend/doi/doi_test.go @@ -0,0 +1,16 @@ +// Test DOI filesystem interface +package doi + +import ( + "testing" + + "github.com/rclone/rclone/fstest/fstests" +) + +// TestIntegration runs integration tests against the remote +func TestIntegration(t *testing.T) { + fstests.Run(t, &fstests.Opt{ + RemoteName: "TestDoi:", + NilObject: (*Object)(nil), + }) +} diff --git a/backend/doi/invenio.go b/backend/doi/invenio.go new file mode 100644 index 000000000..1f06f67d5 --- /dev/null +++ b/backend/doi/invenio.go @@ -0,0 +1,164 @@ +// Implementation for InvenioRDM + +package doi + +import ( + "context" + "fmt" + "net/http" + "net/url" + "regexp" + "strings" + "time" + + "github.com/rclone/rclone/backend/doi/api" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/lib/rest" +) + +var invenioRecordRegex = regexp.MustCompile(`\/records?\/(.+)`) + +// Returns true if resolvedURL is likely a DOI hosted on an InvenioRDM intallation +func activateInvenio(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, resolvedURL *url.URL) (isActive bool) { + _, _, err := resolveInvenioEndpoint(ctx, srv, pacer, resolvedURL) + return err == nil +} + +// Resolve the main API endpoint for a DOI hosted on an InvenioRDM installation +func resolveInvenioEndpoint(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, resolvedURL *url.URL) (provider Provider, endpoint *url.URL, err error) { + var res *http.Response + opts := rest.Opts{ + Method: "GET", + RootURL: resolvedURL.String(), + } + err = pacer.Call(func() (bool, error) { + res, err = srv.Call(ctx, &opts) + return shouldRetry(ctx, res, err) + }) + if err != nil { + return "", nil, err + } + + // First, attempt to grab the API URL from the headers + var linksetURL *url.URL + links := parseLinkHeader(res.Header.Get("Link")) + for _, link := range links { + if link.Rel == "linkset" && link.Type == "application/linkset+json" { + parsed, err := url.Parse(link.Href) + if err == nil { + linksetURL = parsed + break + } + } + } + + if linksetURL != nil { + endpoint, err = checkInvenioAPIURL(ctx, srv, pacer, linksetURL) + if err == nil { + return Invenio, endpoint, nil + } + fs.Logf(nil, "using linkset URL failed: %s", err.Error()) + } + + // If there is no linkset header, try to grab the record ID from the URL + recordID := "" + resURL := res.Request.URL + match := invenioRecordRegex.FindStringSubmatch(resURL.EscapedPath()) + if match != nil { + recordID = match[1] + guessedURL := res.Request.URL.ResolveReference(&url.URL{ + Path: "/api/records/" + recordID, + }) + endpoint, err = checkInvenioAPIURL(ctx, srv, pacer, guessedURL) + if err == nil { + return Invenio, endpoint, nil + } + fs.Logf(nil, "guessing the URL failed: %s", err.Error()) + } + + return "", nil, fmt.Errorf("could not resolve the Invenio API endpoint for '%s'", resolvedURL.String()) +} + +func checkInvenioAPIURL(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, resolvedURL *url.URL) (endpoint *url.URL, err error) { + var result api.InvenioRecordResponse + opts := rest.Opts{ + Method: "GET", + RootURL: resolvedURL.String(), + } + err = pacer.Call(func() (bool, error) { + res, err := srv.CallJSON(ctx, &opts, nil, &result) + return shouldRetry(ctx, res, err) + }) + if err != nil { + return nil, err + } + if result.Links.Self == "" { + return nil, fmt.Errorf("could not parse API response from '%s'", resolvedURL.String()) + } + return url.Parse(result.Links.Self) +} + +// invenioProvider implements the doiProvider interface for InvenioRDM installations +type invenioProvider struct { + f *Fs +} + +// ListEntries returns the full list of entries found at the remote, regardless of root +func (ip *invenioProvider) ListEntries(ctx context.Context) (entries []*Object, err error) { + // Use the cache if populated + cachedEntries, found := ip.f.cache.GetMaybe("files") + if found { + parsedEntries, ok := cachedEntries.([]Object) + if ok { + for _, entry := range parsedEntries { + newEntry := entry + entries = append(entries, &newEntry) + } + return entries, nil + } + } + + filesURL := ip.f.endpoint.JoinPath("files") + var result api.InvenioFilesResponse + opts := rest.Opts{ + Method: "GET", + Path: strings.TrimLeft(filesURL.EscapedPath(), "/"), + } + err = ip.f.pacer.Call(func() (bool, error) { + res, err := ip.f.srv.CallJSON(ctx, &opts, nil, &result) + return shouldRetry(ctx, res, err) + }) + if err != nil { + return nil, fmt.Errorf("readDir failed: %w", err) + } + for _, file := range result.Entries { + modTime, modTimeErr := time.Parse(time.RFC3339, file.Updated) + if modTimeErr != nil { + fs.Logf(ip.f, "error: could not parse last update time %v", modTimeErr) + modTime = timeUnset + } + entry := &Object{ + fs: ip.f, + remote: file.Key, + contentURL: file.Links.Content, + size: file.Size, + modTime: modTime, + contentType: file.MimeType, + md5: strings.TrimPrefix(file.Checksum, "md5:"), + } + entries = append(entries, entry) + } + // Populate the cache + cacheEntries := []Object{} + for _, entry := range entries { + cacheEntries = append(cacheEntries, *entry) + } + ip.f.cache.Put("files", cacheEntries) + return entries, nil +} + +func newInvenioProvider(f *Fs) doiProvider { + return &invenioProvider{ + f: f, + } +} diff --git a/backend/doi/link_header.go b/backend/doi/link_header.go new file mode 100644 index 000000000..423ea8d89 --- /dev/null +++ b/backend/doi/link_header.go @@ -0,0 +1,75 @@ +package doi + +import ( + "regexp" + "strings" +) + +var linkRegex = regexp.MustCompile(`^<(.+)>$`) +var valueRegex = regexp.MustCompile(`^"(.+)"$`) + +// headerLink represents a link as presented in HTTP headers +// MDN Reference: https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Link +type headerLink struct { + Href string + Rel string + Type string + Extras map[string]string +} + +func parseLinkHeader(header string) (links []headerLink) { + for _, link := range strings.Split(header, ",") { + link = strings.TrimSpace(link) + parsed := parseLink(link) + if parsed != nil { + links = append(links, *parsed) + } + } + return links +} + +func parseLink(link string) (parsedLink *headerLink) { + var parts []string + for _, part := range strings.Split(link, ";") { + parts = append(parts, strings.TrimSpace(part)) + } + + match := linkRegex.FindStringSubmatch(parts[0]) + if match == nil { + return nil + } + + result := &headerLink{ + Href: match[1], + Extras: map[string]string{}, + } + + for _, keyValue := range parts[1:] { + parsed := parseKeyValue(keyValue) + if parsed != nil { + key, value := parsed[0], parsed[1] + switch strings.ToLower(key) { + case "rel": + result.Rel = value + case "type": + result.Type = value + default: + result.Extras[key] = value + } + } + } + return result +} + +func parseKeyValue(keyValue string) []string { + parts := strings.SplitN(keyValue, "=", 2) + if parts[0] == "" || len(parts) < 2 { + return nil + } + match := valueRegex.FindStringSubmatch(parts[1]) + if match != nil { + parts[1] = match[1] + return parts + } + return parts +} diff --git a/backend/doi/link_header_internal_test.go b/backend/doi/link_header_internal_test.go new file mode 100644 index 000000000..857256b0e --- /dev/null +++ b/backend/doi/link_header_internal_test.go @@ -0,0 +1,44 @@ +package doi + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseLinkHeader(t *testing.T) { + header := " ; rel=\"linkset\" ; type=\"application/linkset+json\"" + links := parseLinkHeader(header) + expected := headerLink{ + Href: "https://zenodo.org/api/records/15063252", + Rel: "linkset", + Type: "application/linkset+json", + Extras: map[string]string{}, + } + assert.Contains(t, links, expected) + + header = "; rel=\"prev\", ; rel=\"next\", ; rel=\"last\", ; rel=\"first\"" + links = parseLinkHeader(header) + expectedList := []headerLink{{ + Href: "https://api.example.com/issues?page=2", + Rel: "prev", + Type: "", + Extras: map[string]string{}, + }, { + Href: "https://api.example.com/issues?page=4", + Rel: "next", + Type: "", + Extras: map[string]string{}, + }, { + Href: "https://api.example.com/issues?page=10", + Rel: "last", + Type: "", + Extras: map[string]string{}, + }, { + Href: "https://api.example.com/issues?page=1", + Rel: "first", + Type: "", + Extras: map[string]string{}, + }} + assert.Equal(t, links, expectedList) +} diff --git a/backend/doi/zenodo.go b/backend/doi/zenodo.go new file mode 100644 index 000000000..21f5c22e6 --- /dev/null +++ b/backend/doi/zenodo.go @@ -0,0 +1,47 @@ +// Implementation for Zenodo + +package doi + +import ( + "context" + "fmt" + "net/url" + "regexp" + + "github.com/rclone/rclone/backend/doi/api" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/lib/rest" +) + +var zenodoRecordRegex = regexp.MustCompile(`zenodo[.](.+)`) + +// Resolve the main API endpoint for a DOI hosted on Zenodo +func resolveZenodoEndpoint(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, resolvedURL *url.URL, doi string) (provider Provider, endpoint *url.URL, err error) { + match := zenodoRecordRegex.FindStringSubmatch(doi) + if match == nil { + return "", nil, fmt.Errorf("could not derive API endpoint URL from '%s'", resolvedURL.String()) + } + + recordID := match[1] + endpointURL := resolvedURL.ResolveReference(&url.URL{Path: "/api/records/" + recordID}) + + var result api.InvenioRecordResponse + opts := rest.Opts{ + Method: "GET", + RootURL: endpointURL.String(), + } + err = pacer.Call(func() (bool, error) { + res, err := srv.CallJSON(ctx, &opts, nil, &result) + return shouldRetry(ctx, res, err) + }) + if err != nil { + return "", nil, err + } + + endpointURL, err = url.Parse(result.Links.Self) + if err != nil { + return "", nil, err + } + + return Zenodo, endpointURL, nil +} diff --git a/docs/content/doi.md b/docs/content/doi.md new file mode 100644 index 000000000..1d8e20dbe --- /dev/null +++ b/docs/content/doi.md @@ -0,0 +1,187 @@ +--- +title: "DOI" +description: "Rclone docs for DOI" +versionIntroduced: "?" +--- + +# {{< icon "fa fa-building-columns" >}} DOI + +The DOI remote is a read only remote for reading files from digital object identifiers (DOI). + +Currently, the DOI backend supports supports DOIs hosted with: +- [InvenioRDM](https://inveniosoftware.org/products/rdm/) + - [Zenodo](https://zenodo.org) + - [CaltechDATA](https://data.caltech.edu) + - [Other InvenioRDM repositories](https://inveniosoftware.org/showcase/) +- [Dataverse](https://dataverse.org) + - [Harvard Dataverse](https://dataverse.harvard.edu) + - [Other Dataverse repositories](https://dataverse.org/installations) + +Paths are specified as `remote:path` + +Paths may be as deep as required, e.g. `remote:directory/subdirectory`. + +## Configuration + +Here is an example of how to make a remote called `remote`. First run: + + rclone config + +This will guide you through an interactive setup process: + +``` +No remotes found, make a new one? +n) New remote +s) Set configuration password +q) Quit config +n/s/q> n +Enter name for new remote. +name> remote +Type of storage to configure. +Choose a number from below, or type in your own value +[snip] +XX / DOI datasets + \ (doi) +[snip] +Storage> doi +Option doi. +The DOI or the doi.org URL. +Enter a value. +doi> 10.5281/zenodo.5876941 +Edit advanced config? +y) Yes +n) No (default) +y/n> n +Configuration complete. +Options: +- type: doi +- doi: 10.5281/zenodo.5876941 +Keep this "remote" remote? +y) Yes this is OK (default) +e) Edit this remote +d) Delete this remote +y/e/d> y +``` + +{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/doi/doi.go then run make backenddocs" >}} +### Standard options + +Here are the Standard options specific to doi (DOI datasets). + +#### --doi-doi + +The DOI or the doi.org URL. + +Properties: + +- Config: doi +- Env Var: RCLONE_DOI_DOI +- Type: string +- Required: true + +### Advanced options + +Here are the Advanced options specific to doi (DOI datasets). + +#### --doi-provider + +DOI provider. + +The DOI provider can be set when rclone does not automatically recognize a supported DOI provider. + +Properties: + +- Config: provider +- Env Var: RCLONE_DOI_PROVIDER +- Type: string +- Required: false +- Examples: + - "auto" + - Auto-detect provider + - "zenodo" + - Zenodo + - "dataverse" + - Dataverse + - "invenio" + - Invenio + +#### --doi-doi-resolver-api-url + +The URL of the DOI resolver API to use. + +The DOI resolver can be set for testing or for cases when the the canonical DOI resolver API cannot be used. + +Defaults to "https://doi.org/api". + +Properties: + +- Config: doi_resolver_api_url +- Env Var: RCLONE_DOI_DOI_RESOLVER_API_URL +- Type: string +- Required: false + +#### --doi-description + +Description of the remote. + +Properties: + +- Config: description +- Env Var: RCLONE_DOI_DESCRIPTION +- Type: string +- Required: false + +## Backend commands + +Here are the commands specific to the doi backend. + +Run them with + + rclone backend COMMAND remote: + +The help below will explain what arguments each command takes. + +See the [backend](/commands/rclone_backend/) command for more +info on how to pass options and arguments. + +These can be run on a running backend using the rc command +[backend/command](/rc/#backend-command). + +### metadata + +Show metadata about the DOI. + + rclone backend metadata remote: [options] [+] + +This command returns a JSON object with some information about the DOI. + + rclone backend medatadata doi: + +It returns a JSON object representing metadata about the DOI. + + +### set + +Set command for updating the config parameters. + + rclone backend set remote: [options] [+] + +This set command can be used to update the config parameters +for a running doi backend. + +Usage Examples: + + rclone backend set doi: [-o opt_name=opt_value] [-o opt_name2=opt_value2] + rclone rc backend/command command=set fs=doi: [-o opt_name=opt_value] [-o opt_name2=opt_value2] + rclone rc backend/command command=set fs=doi: -o doi=NEW_DOI + +The option keys are named as they are in the config file. + +This rebuilds the connection to the doi backend when it is called with +the new parameters. Only new parameters need be passed as the values +will default to those currently in use. + +It doesn't return anything. + + +{{< rem autogenerated options stop >}}