mirror of
https://github.com/rclone/rclone.git
synced 2025-06-23 05:21:50 +02:00
doi: add new doi backend
Add a new backend to support mounting datasets published with a digital object identifier (DOI).
This commit is contained in:
parent
51fd697c7a
commit
3b3096c940
@ -14,6 +14,7 @@ import (
|
|||||||
_ "github.com/rclone/rclone/backend/combine"
|
_ "github.com/rclone/rclone/backend/combine"
|
||||||
_ "github.com/rclone/rclone/backend/compress"
|
_ "github.com/rclone/rclone/backend/compress"
|
||||||
_ "github.com/rclone/rclone/backend/crypt"
|
_ "github.com/rclone/rclone/backend/crypt"
|
||||||
|
_ "github.com/rclone/rclone/backend/doi"
|
||||||
_ "github.com/rclone/rclone/backend/drive"
|
_ "github.com/rclone/rclone/backend/drive"
|
||||||
_ "github.com/rclone/rclone/backend/dropbox"
|
_ "github.com/rclone/rclone/backend/dropbox"
|
||||||
_ "github.com/rclone/rclone/backend/fichier"
|
_ "github.com/rclone/rclone/backend/fichier"
|
||||||
|
38
backend/doi/api/dataversetypes.go
Normal file
38
backend/doi/api/dataversetypes.go
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
// Type definitions specific to Dataverse
|
||||||
|
|
||||||
|
package api
|
||||||
|
|
||||||
|
// DataverseDatasetResponse is returned by the Dataverse dataset API
|
||||||
|
type DataverseDatasetResponse struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
Data DataverseDataset `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// DataverseDataset is the representation of a dataset
|
||||||
|
type DataverseDataset struct {
|
||||||
|
LatestVersion DataverseDatasetVersion `json:"latestVersion"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// DataverseDatasetVersion is the representation of a dataset version
|
||||||
|
type DataverseDatasetVersion struct {
|
||||||
|
LastUpdateTime string `json:"lastUpdateTime"`
|
||||||
|
Files []DataverseFile `json:"files"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// DataverseFile is the representation of a file found in a dataset
|
||||||
|
type DataverseFile struct {
|
||||||
|
DirectoryLabel string `json:"directoryLabel"`
|
||||||
|
DataFile DataverseDataFile `json:"dataFile"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// DataverseDataFile represents file metadata details
|
||||||
|
type DataverseDataFile struct {
|
||||||
|
ID int64 `json:"id"`
|
||||||
|
Filename string `json:"filename"`
|
||||||
|
ContentType string `json:"contentType"`
|
||||||
|
FileSize int64 `json:"filesize"`
|
||||||
|
OriginalFileFormat string `json:"originalFileFormat"`
|
||||||
|
OriginalFileSize int64 `json:"originalFileSize"`
|
||||||
|
OriginalFileName string `json:"originalFileName"`
|
||||||
|
MD5 string `json:"md5"`
|
||||||
|
}
|
33
backend/doi/api/inveniotypes.go
Normal file
33
backend/doi/api/inveniotypes.go
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
// Type definitions specific to InvenioRDM
|
||||||
|
|
||||||
|
package api
|
||||||
|
|
||||||
|
// InvenioRecordResponse is the representation of a record stored in InvenioRDM
|
||||||
|
type InvenioRecordResponse struct {
|
||||||
|
Links InvenioRecordResponseLinks `json:"links"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// InvenioRecordResponseLinks represents a record's links
|
||||||
|
type InvenioRecordResponseLinks struct {
|
||||||
|
Self string `json:"self"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// InvenioFilesResponse is the representation of a record's files
|
||||||
|
type InvenioFilesResponse struct {
|
||||||
|
Entries []InvenioFilesResponseEntry `json:"entries"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// InvenioFilesResponseEntry is the representation of a file entry
|
||||||
|
type InvenioFilesResponseEntry struct {
|
||||||
|
Key string `json:"key"`
|
||||||
|
Checksum string `json:"checksum"`
|
||||||
|
Size int64 `json:"size"`
|
||||||
|
Updated string `json:"updated"`
|
||||||
|
MimeType string `json:"mimetype"`
|
||||||
|
Links InvenioFilesResponseEntryLinks `json:"links"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// InvenioFilesResponseEntryLinks represents file links details
|
||||||
|
type InvenioFilesResponseEntryLinks struct {
|
||||||
|
Content string `json:"content"`
|
||||||
|
}
|
26
backend/doi/api/types.go
Normal file
26
backend/doi/api/types.go
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
// Package api has general type definitions for doi
|
||||||
|
package api
|
||||||
|
|
||||||
|
// DoiResolverResponse is returned by the DOI resolver API
|
||||||
|
//
|
||||||
|
// Reference: https://www.doi.org/the-identifier/resources/factsheets/doi-resolution-documentation
|
||||||
|
type DoiResolverResponse struct {
|
||||||
|
ResponseCode int `json:"responseCode"`
|
||||||
|
Handle string `json:"handle"`
|
||||||
|
Values []DoiResolverResponseValue `json:"values"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// DoiResolverResponseValue is a single handle record value
|
||||||
|
type DoiResolverResponseValue struct {
|
||||||
|
Index int `json:"index"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
Data DoiResolverResponseValueData `json:"data"`
|
||||||
|
TTL int `json:"ttl"`
|
||||||
|
Timestamp string `json:"timestamp"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// DoiResolverResponseValueData is the data held in a handle value
|
||||||
|
type DoiResolverResponseValueData struct {
|
||||||
|
Format string `json:"format"`
|
||||||
|
Value any `json:"value"`
|
||||||
|
}
|
112
backend/doi/dataverse.go
Normal file
112
backend/doi/dataverse.go
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
// Implementation for Dataverse
|
||||||
|
|
||||||
|
package doi
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"path"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/rclone/rclone/backend/doi/api"
|
||||||
|
"github.com/rclone/rclone/fs"
|
||||||
|
"github.com/rclone/rclone/lib/rest"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Returns true if resolvedURL is likely a DOI hosted on a Dataverse intallation
|
||||||
|
func activateDataverse(resolvedURL *url.URL) (isActive bool) {
|
||||||
|
queryValues := resolvedURL.Query()
|
||||||
|
persistentID := queryValues.Get("persistentId")
|
||||||
|
return persistentID != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve the main API endpoint for a DOI hosted on a Dataverse installation
|
||||||
|
func resolveDataverseEndpoint(resolvedURL *url.URL) (provider Provider, endpoint *url.URL, err error) {
|
||||||
|
queryValues := resolvedURL.Query()
|
||||||
|
persistentID := queryValues.Get("persistentId")
|
||||||
|
|
||||||
|
query := url.Values{}
|
||||||
|
query.Add("persistentId", persistentID)
|
||||||
|
endpointURL := resolvedURL.ResolveReference(&url.URL{Path: "/api/datasets/:persistentId/", RawQuery: query.Encode()})
|
||||||
|
|
||||||
|
return Dataverse, endpointURL, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// dataverseProvider implements the doiProvider interface for Dataverse installations
|
||||||
|
type dataverseProvider struct {
|
||||||
|
f *Fs
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListEntries returns the full list of entries found at the remote, regardless of root
|
||||||
|
func (dp *dataverseProvider) ListEntries(ctx context.Context) (entries []*Object, err error) {
|
||||||
|
// Use the cache if populated
|
||||||
|
cachedEntries, found := dp.f.cache.GetMaybe("files")
|
||||||
|
if found {
|
||||||
|
parsedEntries, ok := cachedEntries.([]Object)
|
||||||
|
if ok {
|
||||||
|
for _, entry := range parsedEntries {
|
||||||
|
newEntry := entry
|
||||||
|
entries = append(entries, &newEntry)
|
||||||
|
}
|
||||||
|
return entries, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
filesURL := dp.f.endpoint
|
||||||
|
var res *http.Response
|
||||||
|
var result api.DataverseDatasetResponse
|
||||||
|
opts := rest.Opts{
|
||||||
|
Method: "GET",
|
||||||
|
Path: strings.TrimLeft(filesURL.EscapedPath(), "/"),
|
||||||
|
Parameters: filesURL.Query(),
|
||||||
|
}
|
||||||
|
err = dp.f.pacer.Call(func() (bool, error) {
|
||||||
|
res, err = dp.f.srv.CallJSON(ctx, &opts, nil, &result)
|
||||||
|
return shouldRetry(ctx, res, err)
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("readDir failed: %w", err)
|
||||||
|
}
|
||||||
|
modTime, modTimeErr := time.Parse(time.RFC3339, result.Data.LatestVersion.LastUpdateTime)
|
||||||
|
if modTimeErr != nil {
|
||||||
|
fs.Logf(dp.f, "error: could not parse last update time %v", modTimeErr)
|
||||||
|
modTime = timeUnset
|
||||||
|
}
|
||||||
|
for _, file := range result.Data.LatestVersion.Files {
|
||||||
|
contentURLPath := fmt.Sprintf("/api/access/datafile/%d", file.DataFile.ID)
|
||||||
|
query := url.Values{}
|
||||||
|
query.Add("format", "original")
|
||||||
|
contentURL := dp.f.endpoint.ResolveReference(&url.URL{Path: contentURLPath, RawQuery: query.Encode()})
|
||||||
|
entry := &Object{
|
||||||
|
fs: dp.f,
|
||||||
|
remote: path.Join(file.DirectoryLabel, file.DataFile.Filename),
|
||||||
|
contentURL: contentURL.String(),
|
||||||
|
size: file.DataFile.FileSize,
|
||||||
|
modTime: modTime,
|
||||||
|
md5: file.DataFile.MD5,
|
||||||
|
contentType: file.DataFile.ContentType,
|
||||||
|
}
|
||||||
|
if file.DataFile.OriginalFileName != "" {
|
||||||
|
entry.remote = path.Join(file.DirectoryLabel, file.DataFile.OriginalFileName)
|
||||||
|
entry.size = file.DataFile.OriginalFileSize
|
||||||
|
entry.contentType = file.DataFile.OriginalFileFormat
|
||||||
|
}
|
||||||
|
entries = append(entries, entry)
|
||||||
|
}
|
||||||
|
// Populate the cache
|
||||||
|
cacheEntries := []Object{}
|
||||||
|
for _, entry := range entries {
|
||||||
|
cacheEntries = append(cacheEntries, *entry)
|
||||||
|
}
|
||||||
|
dp.f.cache.Put("files", cacheEntries)
|
||||||
|
return entries, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func newDataverseProvider(f *Fs) doiProvider {
|
||||||
|
return &dataverseProvider{
|
||||||
|
f: f,
|
||||||
|
}
|
||||||
|
}
|
649
backend/doi/doi.go
Normal file
649
backend/doi/doi.go
Normal file
@ -0,0 +1,649 @@
|
|||||||
|
// Package doi provides a filesystem interface for digital objects identified by DOIs.
|
||||||
|
//
|
||||||
|
// See: https://www.doi.org/the-identifier/what-is-a-doi/
|
||||||
|
package doi
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"path"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/rclone/rclone/backend/doi/api"
|
||||||
|
"github.com/rclone/rclone/fs"
|
||||||
|
"github.com/rclone/rclone/fs/config/configmap"
|
||||||
|
"github.com/rclone/rclone/fs/config/configstruct"
|
||||||
|
"github.com/rclone/rclone/fs/fserrors"
|
||||||
|
"github.com/rclone/rclone/fs/fshttp"
|
||||||
|
"github.com/rclone/rclone/fs/hash"
|
||||||
|
"github.com/rclone/rclone/lib/cache"
|
||||||
|
"github.com/rclone/rclone/lib/pacer"
|
||||||
|
"github.com/rclone/rclone/lib/rest"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// the URL of the DOI resolver
|
||||||
|
//
|
||||||
|
// Reference: https://www.doi.org/the-identifier/resources/factsheets/doi-resolution-documentation
|
||||||
|
doiResolverAPIURL = "https://doi.org/api"
|
||||||
|
minSleep = 10 * time.Millisecond
|
||||||
|
maxSleep = 2 * time.Second
|
||||||
|
decayConstant = 2 // bigger for slower decay, exponential
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
errorReadOnly = errors.New("doi remotes are read only")
|
||||||
|
timeUnset = time.Unix(0, 0)
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
fsi := &fs.RegInfo{
|
||||||
|
Name: "doi",
|
||||||
|
Description: "DOI datasets",
|
||||||
|
NewFs: NewFs,
|
||||||
|
CommandHelp: commandHelp,
|
||||||
|
Options: []fs.Option{{
|
||||||
|
Name: "doi",
|
||||||
|
Help: "The DOI or the doi.org URL.",
|
||||||
|
Required: true,
|
||||||
|
}, {
|
||||||
|
Name: fs.ConfigProvider,
|
||||||
|
Help: `DOI provider.
|
||||||
|
|
||||||
|
The DOI provider can be set when rclone does not automatically recognize a supported DOI provider.`,
|
||||||
|
Examples: []fs.OptionExample{
|
||||||
|
{
|
||||||
|
Value: "auto",
|
||||||
|
Help: "Auto-detect provider",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: string(Zenodo),
|
||||||
|
Help: "Zenodo",
|
||||||
|
}, {
|
||||||
|
Value: string(Dataverse),
|
||||||
|
Help: "Dataverse",
|
||||||
|
}, {
|
||||||
|
Value: string(Invenio),
|
||||||
|
Help: "Invenio",
|
||||||
|
}},
|
||||||
|
Required: false,
|
||||||
|
Advanced: true,
|
||||||
|
}, {
|
||||||
|
Name: "doi_resolver_api_url",
|
||||||
|
Help: `The URL of the DOI resolver API to use.
|
||||||
|
|
||||||
|
The DOI resolver can be set for testing or for cases when the the canonical DOI resolver API cannot be used.
|
||||||
|
|
||||||
|
Defaults to "https://doi.org/api".`,
|
||||||
|
Required: false,
|
||||||
|
Advanced: true,
|
||||||
|
}},
|
||||||
|
}
|
||||||
|
fs.Register(fsi)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Provider defines the type of provider hosting the DOI
|
||||||
|
type Provider string
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Zenodo provider, see https://zenodo.org
|
||||||
|
Zenodo Provider = "zenodo"
|
||||||
|
// Dataverse provider, see https://dataverse.harvard.edu
|
||||||
|
Dataverse Provider = "dataverse"
|
||||||
|
// Invenio provider, see https://inveniordm.docs.cern.ch
|
||||||
|
Invenio Provider = "invenio"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Options defines the configuration for this backend
|
||||||
|
type Options struct {
|
||||||
|
Doi string `config:"doi"` // The DOI, a digital identifier of an object, usually a dataset
|
||||||
|
Provider string `config:"provider"` // The DOI provider
|
||||||
|
DoiResolverAPIURL string `config:"doi_resolver_api_url"` // The URL of the DOI resolver API to use.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fs stores the interface to the remote HTTP files
|
||||||
|
type Fs struct {
|
||||||
|
name string // name of this remote
|
||||||
|
root string // the path we are working on
|
||||||
|
provider Provider // the DOI provider
|
||||||
|
doiProvider doiProvider // the interface used to interact with the DOI provider
|
||||||
|
features *fs.Features // optional features
|
||||||
|
opt Options // options for this backend
|
||||||
|
ci *fs.ConfigInfo // global config
|
||||||
|
endpoint *url.URL // the main API endpoint for this remote
|
||||||
|
endpointURL string // endpoint as a string
|
||||||
|
srv *rest.Client // the connection to the server
|
||||||
|
pacer *fs.Pacer // pacer for API calls
|
||||||
|
cache *cache.Cache // a cache for the remote metadata
|
||||||
|
}
|
||||||
|
|
||||||
|
// Object is a remote object that has been stat'd (so it exists, but is not necessarily open for reading)
|
||||||
|
type Object struct {
|
||||||
|
fs *Fs // what this object is part of
|
||||||
|
remote string // the remote path
|
||||||
|
contentURL string // the URL where the contents of the file can be downloaded
|
||||||
|
size int64 // size of the object
|
||||||
|
modTime time.Time // modification time of the object
|
||||||
|
contentType string // content type of the object
|
||||||
|
md5 string // MD5 hash of the object content
|
||||||
|
}
|
||||||
|
|
||||||
|
// doiProvider is the interface used to list objects in a DOI
|
||||||
|
type doiProvider interface {
|
||||||
|
// ListEntries returns the full list of entries found at the remote, regardless of root
|
||||||
|
ListEntries(ctx context.Context) (entries []*Object, err error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the input string as a DOI
|
||||||
|
// Examples:
|
||||||
|
// 10.1000/182 -> 10.1000/182
|
||||||
|
// https://doi.org/10.1000/182 -> 10.1000/182
|
||||||
|
// doi:10.1000/182 -> 10.1000/182
|
||||||
|
func parseDoi(doi string) string {
|
||||||
|
doiURL, err := url.Parse(doi)
|
||||||
|
if err != nil {
|
||||||
|
return doi
|
||||||
|
}
|
||||||
|
if doiURL.Scheme == "doi" {
|
||||||
|
return strings.TrimLeft(strings.TrimPrefix(doi, "doi:"), "/")
|
||||||
|
}
|
||||||
|
if strings.HasSuffix(doiURL.Hostname(), "doi.org") {
|
||||||
|
return strings.TrimLeft(doiURL.Path, "/")
|
||||||
|
}
|
||||||
|
return doi
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve a DOI to a URL
|
||||||
|
// Reference: https://www.doi.org/the-identifier/resources/factsheets/doi-resolution-documentation
|
||||||
|
func resolveDoiURL(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, opt *Options) (doiURL *url.URL, err error) {
|
||||||
|
resolverURL := opt.DoiResolverAPIURL
|
||||||
|
if resolverURL == "" {
|
||||||
|
resolverURL = doiResolverAPIURL
|
||||||
|
}
|
||||||
|
|
||||||
|
var result api.DoiResolverResponse
|
||||||
|
params := url.Values{}
|
||||||
|
params.Add("index", "1")
|
||||||
|
opts := rest.Opts{
|
||||||
|
Method: "GET",
|
||||||
|
RootURL: resolverURL,
|
||||||
|
Path: "/handles/" + opt.Doi,
|
||||||
|
Parameters: params,
|
||||||
|
}
|
||||||
|
err = pacer.Call(func() (bool, error) {
|
||||||
|
res, err := srv.CallJSON(ctx, &opts, nil, &result)
|
||||||
|
return shouldRetry(ctx, res, err)
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.ResponseCode != 1 {
|
||||||
|
return nil, fmt.Errorf("could not resolve DOI (error code %d)", result.ResponseCode)
|
||||||
|
}
|
||||||
|
resolvedURLStr := ""
|
||||||
|
for _, value := range result.Values {
|
||||||
|
if value.Type == "URL" && value.Data.Format == "string" {
|
||||||
|
valueStr, ok := value.Data.Value.(string)
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("could not resolve DOI (incorrect response format)")
|
||||||
|
}
|
||||||
|
resolvedURLStr = valueStr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resolvedURL, err := url.Parse(resolvedURLStr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return resolvedURL, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve the passed configuration into a provider and enpoint
|
||||||
|
func resolveEndpoint(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, opt *Options) (provider Provider, endpoint *url.URL, err error) {
|
||||||
|
resolvedURL, err := resolveDoiURL(ctx, srv, pacer, opt)
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch opt.Provider {
|
||||||
|
case string(Dataverse):
|
||||||
|
return resolveDataverseEndpoint(resolvedURL)
|
||||||
|
case string(Invenio):
|
||||||
|
return resolveInvenioEndpoint(ctx, srv, pacer, resolvedURL)
|
||||||
|
case string(Zenodo):
|
||||||
|
return resolveZenodoEndpoint(ctx, srv, pacer, resolvedURL, opt.Doi)
|
||||||
|
}
|
||||||
|
|
||||||
|
hostname := strings.ToLower(resolvedURL.Hostname())
|
||||||
|
if hostname == "dataverse.harvard.edu" || activateDataverse(resolvedURL) {
|
||||||
|
return resolveDataverseEndpoint(resolvedURL)
|
||||||
|
}
|
||||||
|
if hostname == "zenodo.org" || strings.HasSuffix(hostname, ".zenodo.org") {
|
||||||
|
return resolveZenodoEndpoint(ctx, srv, pacer, resolvedURL, opt.Doi)
|
||||||
|
}
|
||||||
|
if activateInvenio(ctx, srv, pacer, resolvedURL) {
|
||||||
|
return resolveInvenioEndpoint(ctx, srv, pacer, resolvedURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", nil, fmt.Errorf("provider '%s' is not supported", resolvedURL.Hostname())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make the http connection from the passed options
|
||||||
|
func (f *Fs) httpConnection(ctx context.Context, opt *Options) (isFile bool, err error) {
|
||||||
|
provider, endpoint, err := resolveEndpoint(ctx, f.srv, f.pacer, opt)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update f with the new parameters
|
||||||
|
f.srv.SetRoot(endpoint.ResolveReference(&url.URL{Path: "/"}).String())
|
||||||
|
f.endpoint = endpoint
|
||||||
|
f.endpointURL = endpoint.String()
|
||||||
|
f.provider = provider
|
||||||
|
f.opt.Provider = string(provider)
|
||||||
|
|
||||||
|
switch f.provider {
|
||||||
|
case Dataverse:
|
||||||
|
f.doiProvider = newDataverseProvider(f)
|
||||||
|
case Invenio, Zenodo:
|
||||||
|
f.doiProvider = newInvenioProvider(f)
|
||||||
|
default:
|
||||||
|
return false, fmt.Errorf("provider type '%s' not supported", f.provider)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine if the root is a file
|
||||||
|
entries, err := f.doiProvider.ListEntries(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
for _, entry := range entries {
|
||||||
|
if entry.remote == f.root {
|
||||||
|
isFile = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return isFile, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// retryErrorCodes is a slice of error codes that we will retry
|
||||||
|
var retryErrorCodes = []int{
|
||||||
|
429, // Too Many Requests.
|
||||||
|
500, // Internal Server Error
|
||||||
|
502, // Bad Gateway
|
||||||
|
503, // Service Unavailable
|
||||||
|
504, // Gateway Timeout
|
||||||
|
509, // Bandwidth Limit Exceeded
|
||||||
|
}
|
||||||
|
|
||||||
|
// shouldRetry returns a boolean as to whether this res and err
|
||||||
|
// deserve to be retried. It returns the err as a convenience.
|
||||||
|
func shouldRetry(ctx context.Context, res *http.Response, err error) (bool, error) {
|
||||||
|
if fserrors.ContextError(ctx, &err) {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return fserrors.ShouldRetry(err) || fserrors.ShouldRetryHTTP(res, retryErrorCodes), err
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFs creates a new Fs object from the name and root. It connects to
|
||||||
|
// the host specified in the config file.
|
||||||
|
func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) {
|
||||||
|
root = strings.Trim(root, "/")
|
||||||
|
|
||||||
|
// Parse config into Options struct
|
||||||
|
opt := new(Options)
|
||||||
|
err := configstruct.Set(m, opt)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
opt.Doi = parseDoi(opt.Doi)
|
||||||
|
|
||||||
|
client := fshttp.NewClient(ctx)
|
||||||
|
ci := fs.GetConfig(ctx)
|
||||||
|
f := &Fs{
|
||||||
|
name: name,
|
||||||
|
root: root,
|
||||||
|
opt: *opt,
|
||||||
|
ci: ci,
|
||||||
|
srv: rest.NewClient(client),
|
||||||
|
pacer: fs.NewPacer(ctx, pacer.NewDefault(pacer.MinSleep(minSleep), pacer.MaxSleep(maxSleep), pacer.DecayConstant(decayConstant))),
|
||||||
|
cache: cache.New(),
|
||||||
|
}
|
||||||
|
f.features = (&fs.Features{
|
||||||
|
CanHaveEmptyDirectories: true,
|
||||||
|
}).Fill(ctx, f)
|
||||||
|
|
||||||
|
isFile, err := f.httpConnection(ctx, opt)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if isFile {
|
||||||
|
// return an error with an fs which points to the parent
|
||||||
|
newRoot := path.Dir(f.root)
|
||||||
|
if newRoot == "." {
|
||||||
|
newRoot = ""
|
||||||
|
}
|
||||||
|
f.root = newRoot
|
||||||
|
return f, fs.ErrorIsFile
|
||||||
|
}
|
||||||
|
|
||||||
|
return f, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the configured name of the file system
|
||||||
|
func (f *Fs) Name() string {
|
||||||
|
return f.name
|
||||||
|
}
|
||||||
|
|
||||||
|
// Root returns the root for the filesystem
|
||||||
|
func (f *Fs) Root() string {
|
||||||
|
return f.root
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the URL for the filesystem
|
||||||
|
func (f *Fs) String() string {
|
||||||
|
return fmt.Sprintf("DOI %s", f.opt.Doi)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Features returns the optional features of this Fs
|
||||||
|
func (f *Fs) Features() *fs.Features {
|
||||||
|
return f.features
|
||||||
|
}
|
||||||
|
|
||||||
|
// Precision is the remote http file system's modtime precision, which we have no way of knowing. We estimate at 1s
|
||||||
|
func (f *Fs) Precision() time.Duration {
|
||||||
|
return time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hashes returns hash.HashNone to indicate remote hashing is unavailable
|
||||||
|
func (f *Fs) Hashes() hash.Set {
|
||||||
|
return hash.Set(hash.MD5)
|
||||||
|
// return hash.Set(hash.None)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mkdir makes the root directory of the Fs object
|
||||||
|
func (f *Fs) Mkdir(ctx context.Context, dir string) error {
|
||||||
|
return errorReadOnly
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove a remote http file object
|
||||||
|
func (o *Object) Remove(ctx context.Context) error {
|
||||||
|
return errorReadOnly
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rmdir removes the root directory of the Fs object
|
||||||
|
func (f *Fs) Rmdir(ctx context.Context, dir string) error {
|
||||||
|
return errorReadOnly
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewObject creates a new remote http file object
|
||||||
|
func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
|
||||||
|
entries, err := f.doiProvider.ListEntries(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
remoteFullPath := remote
|
||||||
|
if f.root != "" {
|
||||||
|
remoteFullPath = path.Join(f.root, remote)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range entries {
|
||||||
|
if entry.Remote() == remoteFullPath {
|
||||||
|
return entry, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, fs.ErrorObjectNotFound
|
||||||
|
}
|
||||||
|
|
||||||
|
// List the objects and directories in dir into entries. The
|
||||||
|
// entries can be returned in any order but should be for a
|
||||||
|
// complete directory.
|
||||||
|
//
|
||||||
|
// dir should be "" to list the root, and should not have
|
||||||
|
// trailing slashes.
|
||||||
|
//
|
||||||
|
// This should return ErrDirNotFound if the directory isn't
|
||||||
|
// found.
|
||||||
|
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
|
||||||
|
fileEntries, err := f.doiProvider.ListEntries(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error listing %q: %w", dir, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fullDir := path.Join(f.root, dir)
|
||||||
|
if fullDir != "" {
|
||||||
|
fullDir += "/"
|
||||||
|
}
|
||||||
|
|
||||||
|
dirPaths := map[string]bool{}
|
||||||
|
for _, entry := range fileEntries {
|
||||||
|
// First, filter out files not in `fullDir`
|
||||||
|
if !strings.HasPrefix(entry.remote, fullDir) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Then, find entries in subfolers
|
||||||
|
remotePath := entry.remote
|
||||||
|
if fullDir != "" {
|
||||||
|
remotePath = strings.TrimLeft(strings.TrimPrefix(remotePath, fullDir), "/")
|
||||||
|
}
|
||||||
|
parts := strings.SplitN(remotePath, "/", 2)
|
||||||
|
if len(parts) == 1 {
|
||||||
|
newEntry := *entry
|
||||||
|
newEntry.remote = path.Join(dir, remotePath)
|
||||||
|
entries = append(entries, &newEntry)
|
||||||
|
} else {
|
||||||
|
dirPaths[path.Join(dir, parts[0])] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for dirPath := range dirPaths {
|
||||||
|
entry := fs.NewDir(dirPath, time.Time{})
|
||||||
|
entries = append(entries, entry)
|
||||||
|
}
|
||||||
|
|
||||||
|
return entries, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Put in to the remote path with the modTime given of the given size
|
||||||
|
//
|
||||||
|
// May create the object even if it returns an error - if so
|
||||||
|
// will return the object and the error, otherwise will return
|
||||||
|
// nil and the error
|
||||||
|
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
|
||||||
|
return nil, errorReadOnly
|
||||||
|
}
|
||||||
|
|
||||||
|
// PutStream uploads to the remote path with the modTime given of indeterminate size
|
||||||
|
func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
|
||||||
|
return nil, errorReadOnly
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fs is the filesystem this remote http file object is located within
|
||||||
|
func (o *Object) Fs() fs.Info {
|
||||||
|
return o.fs
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the URL to the remote HTTP file
|
||||||
|
func (o *Object) String() string {
|
||||||
|
if o == nil {
|
||||||
|
return "<nil>"
|
||||||
|
}
|
||||||
|
return o.remote
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remote the name of the remote HTTP file, relative to the fs root
|
||||||
|
func (o *Object) Remote() string {
|
||||||
|
return o.remote
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hash returns "" since HTTP (in Go or OpenSSH) doesn't support remote calculation of hashes
|
||||||
|
func (o *Object) Hash(ctx context.Context, t hash.Type) (string, error) {
|
||||||
|
if t != hash.MD5 {
|
||||||
|
return "", hash.ErrUnsupported
|
||||||
|
}
|
||||||
|
return o.md5, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size returns the size in bytes of the remote http file
|
||||||
|
func (o *Object) Size() int64 {
|
||||||
|
return o.size
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModTime returns the modification time of the remote http file
|
||||||
|
func (o *Object) ModTime(ctx context.Context) time.Time {
|
||||||
|
return o.modTime
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetModTime sets the modification and access time to the specified time
|
||||||
|
//
|
||||||
|
// it also updates the info field
|
||||||
|
func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error {
|
||||||
|
return errorReadOnly
|
||||||
|
}
|
||||||
|
|
||||||
|
// Storable returns whether the remote http file is a regular file (not a directory, symbolic link, block device, character device, named pipe, etc.)
|
||||||
|
func (o *Object) Storable() bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open a remote http file object for reading. Seek is supported
|
||||||
|
func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) {
|
||||||
|
fs.FixRangeOption(options, o.size)
|
||||||
|
opts := rest.Opts{
|
||||||
|
Method: "GET",
|
||||||
|
RootURL: o.contentURL,
|
||||||
|
Options: options,
|
||||||
|
}
|
||||||
|
var res *http.Response
|
||||||
|
err = o.fs.pacer.Call(func() (bool, error) {
|
||||||
|
res, err = o.fs.srv.Call(ctx, &opts)
|
||||||
|
return shouldRetry(ctx, res, err)
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("Open failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle non-compliant redirects
|
||||||
|
if res.Header.Get("Location") != "" {
|
||||||
|
newURL, err := res.Location()
|
||||||
|
if err == nil {
|
||||||
|
opts.RootURL = newURL.String()
|
||||||
|
err = o.fs.pacer.Call(func() (bool, error) {
|
||||||
|
res, err = o.fs.srv.Call(ctx, &opts)
|
||||||
|
return shouldRetry(ctx, res, err)
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("Open failed: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.Body, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update in to the object with the modTime given of the given size
|
||||||
|
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
|
||||||
|
return errorReadOnly
|
||||||
|
}
|
||||||
|
|
||||||
|
// MimeType of an Object if known, "" otherwise
|
||||||
|
func (o *Object) MimeType(ctx context.Context) string {
|
||||||
|
return o.contentType
|
||||||
|
}
|
||||||
|
|
||||||
|
var commandHelp = []fs.CommandHelp{{
|
||||||
|
Name: "metadata",
|
||||||
|
Short: "Show metadata about the DOI.",
|
||||||
|
Long: `This command returns a JSON object with some information about the DOI.
|
||||||
|
|
||||||
|
rclone backend medatadata doi:
|
||||||
|
|
||||||
|
It returns a JSON object representing metadata about the DOI.
|
||||||
|
`,
|
||||||
|
}, {
|
||||||
|
Name: "set",
|
||||||
|
Short: "Set command for updating the config parameters.",
|
||||||
|
Long: `This set command can be used to update the config parameters
|
||||||
|
for a running doi backend.
|
||||||
|
|
||||||
|
Usage Examples:
|
||||||
|
|
||||||
|
rclone backend set doi: [-o opt_name=opt_value] [-o opt_name2=opt_value2]
|
||||||
|
rclone rc backend/command command=set fs=doi: [-o opt_name=opt_value] [-o opt_name2=opt_value2]
|
||||||
|
rclone rc backend/command command=set fs=doi: -o doi=NEW_DOI
|
||||||
|
|
||||||
|
The option keys are named as they are in the config file.
|
||||||
|
|
||||||
|
This rebuilds the connection to the doi backend when it is called with
|
||||||
|
the new parameters. Only new parameters need be passed as the values
|
||||||
|
will default to those currently in use.
|
||||||
|
|
||||||
|
It doesn't return anything.
|
||||||
|
`,
|
||||||
|
}}
|
||||||
|
|
||||||
|
// Command the backend to run a named command
|
||||||
|
//
|
||||||
|
// The command run is name
|
||||||
|
// args may be used to read arguments from
|
||||||
|
// opts may be used to read optional arguments from
|
||||||
|
//
|
||||||
|
// The result should be capable of being JSON encoded
|
||||||
|
// If it is a string or a []string it will be shown to the user
|
||||||
|
// otherwise it will be JSON encoded and shown to the user like that
|
||||||
|
func (f *Fs) Command(ctx context.Context, name string, arg []string, opt map[string]string) (out interface{}, err error) {
|
||||||
|
switch name {
|
||||||
|
case "metadata":
|
||||||
|
return f.ShowMetadata(ctx)
|
||||||
|
case "set":
|
||||||
|
newOpt := f.opt
|
||||||
|
err := configstruct.Set(configmap.Simple(opt), &newOpt)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("reading config: %w", err)
|
||||||
|
}
|
||||||
|
_, err = f.httpConnection(ctx, &newOpt)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("updating session: %w", err)
|
||||||
|
}
|
||||||
|
f.opt = newOpt
|
||||||
|
keys := []string{}
|
||||||
|
for k := range opt {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
fs.Logf(f, "Updated config values: %s", strings.Join(keys, ", "))
|
||||||
|
return nil, nil
|
||||||
|
default:
|
||||||
|
return nil, fs.ErrorCommandNotFound
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ShowMetadata returns some metadata about the corresponding DOI
|
||||||
|
func (f *Fs) ShowMetadata(ctx context.Context) (metadata interface{}, err error) {
|
||||||
|
doiURL, err := url.Parse("https://doi.org/" + f.opt.Doi)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
info := map[string]any{}
|
||||||
|
info["DOI"] = f.opt.Doi
|
||||||
|
info["URL"] = doiURL.String()
|
||||||
|
info["metadataURL"] = f.endpointURL
|
||||||
|
info["provider"] = f.provider
|
||||||
|
return info, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the interfaces are satisfied
|
||||||
|
var (
|
||||||
|
_ fs.Fs = (*Fs)(nil)
|
||||||
|
_ fs.PutStreamer = (*Fs)(nil)
|
||||||
|
_ fs.Commander = (*Fs)(nil)
|
||||||
|
_ fs.Object = (*Object)(nil)
|
||||||
|
_ fs.MimeTyper = (*Object)(nil)
|
||||||
|
)
|
260
backend/doi/doi_internal_test.go
Normal file
260
backend/doi/doi_internal_test.go
Normal file
@ -0,0 +1,260 @@
|
|||||||
|
package doi
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/md5"
|
||||||
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"net/url"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/rclone/rclone/backend/doi/api"
|
||||||
|
"github.com/rclone/rclone/fs"
|
||||||
|
"github.com/rclone/rclone/fs/config/configmap"
|
||||||
|
"github.com/rclone/rclone/fs/hash"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
var remoteName = "TestDoi"
|
||||||
|
|
||||||
|
func TestParseDoi(t *testing.T) {
|
||||||
|
// 10.1000/182 -> 10.1000/182
|
||||||
|
doi := "10.1000/182"
|
||||||
|
parsed := parseDoi(doi)
|
||||||
|
assert.Equal(t, "10.1000/182", parsed)
|
||||||
|
|
||||||
|
// https://doi.org/10.1000/182 -> 10.1000/182
|
||||||
|
doi = "https://doi.org/10.1000/182"
|
||||||
|
parsed = parseDoi(doi)
|
||||||
|
assert.Equal(t, "10.1000/182", parsed)
|
||||||
|
|
||||||
|
// https://dx.doi.org/10.1000/182 -> 10.1000/182
|
||||||
|
doi = "https://dxdoi.org/10.1000/182"
|
||||||
|
parsed = parseDoi(doi)
|
||||||
|
assert.Equal(t, "10.1000/182", parsed)
|
||||||
|
|
||||||
|
// doi:10.1000/182 -> 10.1000/182
|
||||||
|
doi = "doi:10.1000/182"
|
||||||
|
parsed = parseDoi(doi)
|
||||||
|
assert.Equal(t, "10.1000/182", parsed)
|
||||||
|
|
||||||
|
// doi://10.1000/182 -> 10.1000/182
|
||||||
|
doi = "doi://10.1000/182"
|
||||||
|
parsed = parseDoi(doi)
|
||||||
|
assert.Equal(t, "10.1000/182", parsed)
|
||||||
|
}
|
||||||
|
|
||||||
|
// prepareMockDoiResolverServer prepares a test server to resolve DOIs
|
||||||
|
func prepareMockDoiResolverServer(t *testing.T, resolvedURL string) (doiResolverAPIURL string) {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
|
// Handle requests for resolving DOIs
|
||||||
|
mux.HandleFunc("GET /api/handles/{handle...}", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
// Check that we are resolving a DOI
|
||||||
|
handle := strings.TrimPrefix(r.URL.Path, "/api/handles/")
|
||||||
|
assert.NotEmpty(t, handle)
|
||||||
|
index := r.URL.Query().Get("index")
|
||||||
|
assert.Equal(t, "1", index)
|
||||||
|
|
||||||
|
// Return the most basic response
|
||||||
|
result := api.DoiResolverResponse{
|
||||||
|
ResponseCode: 1,
|
||||||
|
Handle: handle,
|
||||||
|
Values: []api.DoiResolverResponseValue{
|
||||||
|
{
|
||||||
|
Index: 1,
|
||||||
|
Type: "URL",
|
||||||
|
Data: api.DoiResolverResponseValueData{
|
||||||
|
Format: "string",
|
||||||
|
Value: resolvedURL,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
resultBytes, err := json.Marshal(result)
|
||||||
|
require.NoError(t, err)
|
||||||
|
w.Header().Add("Content-Type", "application/json")
|
||||||
|
_, err = w.Write(resultBytes)
|
||||||
|
require.NoError(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Make the test server
|
||||||
|
ts := httptest.NewServer(mux)
|
||||||
|
|
||||||
|
// Close the server at the end of the test
|
||||||
|
t.Cleanup(ts.Close)
|
||||||
|
|
||||||
|
return ts.URL + "/api"
|
||||||
|
}
|
||||||
|
|
||||||
|
func md5Sum(text string) string {
|
||||||
|
hash := md5.Sum([]byte(text))
|
||||||
|
return hex.EncodeToString(hash[:])
|
||||||
|
}
|
||||||
|
|
||||||
|
// prepareMockZenodoServer prepares a test server that mocks Zenodo.org
|
||||||
|
func prepareMockZenodoServer(t *testing.T, files map[string]string) *httptest.Server {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
|
// Handle requests for a single record
|
||||||
|
mux.HandleFunc("GET /api/records/{recordID...}", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
// Check that we are returning data about a single record
|
||||||
|
recordID := strings.TrimPrefix(r.URL.Path, "/api/records/")
|
||||||
|
assert.NotEmpty(t, recordID)
|
||||||
|
|
||||||
|
// Return the most basic response
|
||||||
|
selfURL, err := url.Parse("http://" + r.Host)
|
||||||
|
require.NoError(t, err)
|
||||||
|
selfURL = selfURL.JoinPath(r.URL.String())
|
||||||
|
result := api.InvenioRecordResponse{
|
||||||
|
Links: api.InvenioRecordResponseLinks{
|
||||||
|
Self: selfURL.String(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
resultBytes, err := json.Marshal(result)
|
||||||
|
require.NoError(t, err)
|
||||||
|
w.Header().Add("Content-Type", "application/json")
|
||||||
|
_, err = w.Write(resultBytes)
|
||||||
|
require.NoError(t, err)
|
||||||
|
})
|
||||||
|
// Handle requests for listing files in a record
|
||||||
|
mux.HandleFunc("GET /api/records/{record}/files", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
// Return the most basic response
|
||||||
|
filesBaseURL, err := url.Parse("http://" + r.Host)
|
||||||
|
require.NoError(t, err)
|
||||||
|
filesBaseURL = filesBaseURL.JoinPath("/api/files/")
|
||||||
|
|
||||||
|
entries := []api.InvenioFilesResponseEntry{}
|
||||||
|
for filename, contents := range files {
|
||||||
|
entries = append(entries,
|
||||||
|
api.InvenioFilesResponseEntry{
|
||||||
|
Key: filename,
|
||||||
|
Checksum: md5Sum(contents),
|
||||||
|
Size: int64(len(contents)),
|
||||||
|
Updated: time.Now().UTC().Format(time.RFC3339),
|
||||||
|
MimeType: "text/plain; charset=utf-8",
|
||||||
|
Links: api.InvenioFilesResponseEntryLinks{
|
||||||
|
Content: filesBaseURL.JoinPath(filename).String(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
result := api.InvenioFilesResponse{
|
||||||
|
Entries: entries,
|
||||||
|
}
|
||||||
|
resultBytes, err := json.Marshal(result)
|
||||||
|
require.NoError(t, err)
|
||||||
|
w.Header().Add("Content-Type", "application/json")
|
||||||
|
_, err = w.Write(resultBytes)
|
||||||
|
require.NoError(t, err)
|
||||||
|
})
|
||||||
|
// Handle requests for file contents
|
||||||
|
mux.HandleFunc("/api/files/{file}", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
// Check that we are returning the contents of a file
|
||||||
|
filename := strings.TrimPrefix(r.URL.Path, "/api/files/")
|
||||||
|
assert.NotEmpty(t, filename)
|
||||||
|
contents, found := files[filename]
|
||||||
|
if !found {
|
||||||
|
w.WriteHeader(404)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the most basic response
|
||||||
|
_, err := w.Write([]byte(contents))
|
||||||
|
require.NoError(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Make the test server
|
||||||
|
ts := httptest.NewServer(mux)
|
||||||
|
|
||||||
|
// Close the server at the end of the test
|
||||||
|
t.Cleanup(ts.Close)
|
||||||
|
|
||||||
|
return ts
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestZenodoRemote(t *testing.T) {
|
||||||
|
recordID := "2600782"
|
||||||
|
doi := "10.5281/zenodo.2600782"
|
||||||
|
|
||||||
|
// The files in the dataset
|
||||||
|
files := map[string]string{
|
||||||
|
"README.md": "This is a dataset.",
|
||||||
|
"data.txt": "Some data",
|
||||||
|
}
|
||||||
|
|
||||||
|
ts := prepareMockZenodoServer(t, files)
|
||||||
|
resolvedURL := ts.URL + "/record/" + recordID
|
||||||
|
|
||||||
|
doiResolverAPIURL := prepareMockDoiResolverServer(t, resolvedURL)
|
||||||
|
|
||||||
|
testConfig := configmap.Simple{
|
||||||
|
"type": "doi",
|
||||||
|
"doi": doi,
|
||||||
|
"provider": "zenodo",
|
||||||
|
"doi_resolver_api_url": doiResolverAPIURL,
|
||||||
|
}
|
||||||
|
f, err := NewFs(context.Background(), remoteName, "", testConfig)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Test listing the DOI files
|
||||||
|
entries, err := f.List(context.Background(), "")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
sort.Sort(entries)
|
||||||
|
|
||||||
|
require.Equal(t, len(files), len(entries))
|
||||||
|
|
||||||
|
e := entries[0]
|
||||||
|
assert.Equal(t, "README.md", e.Remote())
|
||||||
|
assert.Equal(t, int64(18), e.Size())
|
||||||
|
_, ok := e.(*Object)
|
||||||
|
assert.True(t, ok)
|
||||||
|
|
||||||
|
e = entries[1]
|
||||||
|
assert.Equal(t, "data.txt", e.Remote())
|
||||||
|
assert.Equal(t, int64(9), e.Size())
|
||||||
|
_, ok = e.(*Object)
|
||||||
|
assert.True(t, ok)
|
||||||
|
|
||||||
|
// Test reading the DOI files
|
||||||
|
o, err := f.NewObject(context.Background(), "README.md")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, int64(18), o.Size())
|
||||||
|
md5Hash, err := o.Hash(context.Background(), hash.MD5)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "464352b1cab5240e44528a56fda33d9d", md5Hash)
|
||||||
|
fd, err := o.Open(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
data, err := io.ReadAll(fd)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NoError(t, fd.Close())
|
||||||
|
assert.Equal(t, []byte(files["README.md"]), data)
|
||||||
|
do, ok := o.(fs.MimeTyper)
|
||||||
|
require.True(t, ok)
|
||||||
|
assert.Equal(t, "text/plain; charset=utf-8", do.MimeType(context.Background()))
|
||||||
|
|
||||||
|
o, err = f.NewObject(context.Background(), "data.txt")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, int64(9), o.Size())
|
||||||
|
md5Hash, err = o.Hash(context.Background(), hash.MD5)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "5b82f8bf4df2bfb0e66ccaa7306fd024", md5Hash)
|
||||||
|
fd, err = o.Open(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
data, err = io.ReadAll(fd)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NoError(t, fd.Close())
|
||||||
|
assert.Equal(t, []byte(files["data.txt"]), data)
|
||||||
|
do, ok = o.(fs.MimeTyper)
|
||||||
|
require.True(t, ok)
|
||||||
|
assert.Equal(t, "text/plain; charset=utf-8", do.MimeType(context.Background()))
|
||||||
|
}
|
16
backend/doi/doi_test.go
Normal file
16
backend/doi/doi_test.go
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
// Test DOI filesystem interface
|
||||||
|
package doi
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/rclone/rclone/fstest/fstests"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestIntegration runs integration tests against the remote
|
||||||
|
func TestIntegration(t *testing.T) {
|
||||||
|
fstests.Run(t, &fstests.Opt{
|
||||||
|
RemoteName: "TestDoi:",
|
||||||
|
NilObject: (*Object)(nil),
|
||||||
|
})
|
||||||
|
}
|
164
backend/doi/invenio.go
Normal file
164
backend/doi/invenio.go
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
// Implementation for InvenioRDM
|
||||||
|
|
||||||
|
package doi
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/rclone/rclone/backend/doi/api"
|
||||||
|
"github.com/rclone/rclone/fs"
|
||||||
|
"github.com/rclone/rclone/lib/rest"
|
||||||
|
)
|
||||||
|
|
||||||
|
var invenioRecordRegex = regexp.MustCompile(`\/records?\/(.+)`)
|
||||||
|
|
||||||
|
// Returns true if resolvedURL is likely a DOI hosted on an InvenioRDM intallation
|
||||||
|
func activateInvenio(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, resolvedURL *url.URL) (isActive bool) {
|
||||||
|
_, _, err := resolveInvenioEndpoint(ctx, srv, pacer, resolvedURL)
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve the main API endpoint for a DOI hosted on an InvenioRDM installation
|
||||||
|
func resolveInvenioEndpoint(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, resolvedURL *url.URL) (provider Provider, endpoint *url.URL, err error) {
|
||||||
|
var res *http.Response
|
||||||
|
opts := rest.Opts{
|
||||||
|
Method: "GET",
|
||||||
|
RootURL: resolvedURL.String(),
|
||||||
|
}
|
||||||
|
err = pacer.Call(func() (bool, error) {
|
||||||
|
res, err = srv.Call(ctx, &opts)
|
||||||
|
return shouldRetry(ctx, res, err)
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// First, attempt to grab the API URL from the headers
|
||||||
|
var linksetURL *url.URL
|
||||||
|
links := parseLinkHeader(res.Header.Get("Link"))
|
||||||
|
for _, link := range links {
|
||||||
|
if link.Rel == "linkset" && link.Type == "application/linkset+json" {
|
||||||
|
parsed, err := url.Parse(link.Href)
|
||||||
|
if err == nil {
|
||||||
|
linksetURL = parsed
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if linksetURL != nil {
|
||||||
|
endpoint, err = checkInvenioAPIURL(ctx, srv, pacer, linksetURL)
|
||||||
|
if err == nil {
|
||||||
|
return Invenio, endpoint, nil
|
||||||
|
}
|
||||||
|
fs.Logf(nil, "using linkset URL failed: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there is no linkset header, try to grab the record ID from the URL
|
||||||
|
recordID := ""
|
||||||
|
resURL := res.Request.URL
|
||||||
|
match := invenioRecordRegex.FindStringSubmatch(resURL.EscapedPath())
|
||||||
|
if match != nil {
|
||||||
|
recordID = match[1]
|
||||||
|
guessedURL := res.Request.URL.ResolveReference(&url.URL{
|
||||||
|
Path: "/api/records/" + recordID,
|
||||||
|
})
|
||||||
|
endpoint, err = checkInvenioAPIURL(ctx, srv, pacer, guessedURL)
|
||||||
|
if err == nil {
|
||||||
|
return Invenio, endpoint, nil
|
||||||
|
}
|
||||||
|
fs.Logf(nil, "guessing the URL failed: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", nil, fmt.Errorf("could not resolve the Invenio API endpoint for '%s'", resolvedURL.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkInvenioAPIURL(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, resolvedURL *url.URL) (endpoint *url.URL, err error) {
|
||||||
|
var result api.InvenioRecordResponse
|
||||||
|
opts := rest.Opts{
|
||||||
|
Method: "GET",
|
||||||
|
RootURL: resolvedURL.String(),
|
||||||
|
}
|
||||||
|
err = pacer.Call(func() (bool, error) {
|
||||||
|
res, err := srv.CallJSON(ctx, &opts, nil, &result)
|
||||||
|
return shouldRetry(ctx, res, err)
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if result.Links.Self == "" {
|
||||||
|
return nil, fmt.Errorf("could not parse API response from '%s'", resolvedURL.String())
|
||||||
|
}
|
||||||
|
return url.Parse(result.Links.Self)
|
||||||
|
}
|
||||||
|
|
||||||
|
// invenioProvider implements the doiProvider interface for InvenioRDM installations
|
||||||
|
type invenioProvider struct {
|
||||||
|
f *Fs
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListEntries returns the full list of entries found at the remote, regardless of root
|
||||||
|
func (ip *invenioProvider) ListEntries(ctx context.Context) (entries []*Object, err error) {
|
||||||
|
// Use the cache if populated
|
||||||
|
cachedEntries, found := ip.f.cache.GetMaybe("files")
|
||||||
|
if found {
|
||||||
|
parsedEntries, ok := cachedEntries.([]Object)
|
||||||
|
if ok {
|
||||||
|
for _, entry := range parsedEntries {
|
||||||
|
newEntry := entry
|
||||||
|
entries = append(entries, &newEntry)
|
||||||
|
}
|
||||||
|
return entries, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
filesURL := ip.f.endpoint.JoinPath("files")
|
||||||
|
var result api.InvenioFilesResponse
|
||||||
|
opts := rest.Opts{
|
||||||
|
Method: "GET",
|
||||||
|
Path: strings.TrimLeft(filesURL.EscapedPath(), "/"),
|
||||||
|
}
|
||||||
|
err = ip.f.pacer.Call(func() (bool, error) {
|
||||||
|
res, err := ip.f.srv.CallJSON(ctx, &opts, nil, &result)
|
||||||
|
return shouldRetry(ctx, res, err)
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("readDir failed: %w", err)
|
||||||
|
}
|
||||||
|
for _, file := range result.Entries {
|
||||||
|
modTime, modTimeErr := time.Parse(time.RFC3339, file.Updated)
|
||||||
|
if modTimeErr != nil {
|
||||||
|
fs.Logf(ip.f, "error: could not parse last update time %v", modTimeErr)
|
||||||
|
modTime = timeUnset
|
||||||
|
}
|
||||||
|
entry := &Object{
|
||||||
|
fs: ip.f,
|
||||||
|
remote: file.Key,
|
||||||
|
contentURL: file.Links.Content,
|
||||||
|
size: file.Size,
|
||||||
|
modTime: modTime,
|
||||||
|
contentType: file.MimeType,
|
||||||
|
md5: strings.TrimPrefix(file.Checksum, "md5:"),
|
||||||
|
}
|
||||||
|
entries = append(entries, entry)
|
||||||
|
}
|
||||||
|
// Populate the cache
|
||||||
|
cacheEntries := []Object{}
|
||||||
|
for _, entry := range entries {
|
||||||
|
cacheEntries = append(cacheEntries, *entry)
|
||||||
|
}
|
||||||
|
ip.f.cache.Put("files", cacheEntries)
|
||||||
|
return entries, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func newInvenioProvider(f *Fs) doiProvider {
|
||||||
|
return &invenioProvider{
|
||||||
|
f: f,
|
||||||
|
}
|
||||||
|
}
|
75
backend/doi/link_header.go
Normal file
75
backend/doi/link_header.go
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
package doi
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
var linkRegex = regexp.MustCompile(`^<(.+)>$`)
|
||||||
|
var valueRegex = regexp.MustCompile(`^"(.+)"$`)
|
||||||
|
|
||||||
|
// headerLink represents a link as presented in HTTP headers
|
||||||
|
// MDN Reference: https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Link
|
||||||
|
type headerLink struct {
|
||||||
|
Href string
|
||||||
|
Rel string
|
||||||
|
Type string
|
||||||
|
Extras map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseLinkHeader(header string) (links []headerLink) {
|
||||||
|
for _, link := range strings.Split(header, ",") {
|
||||||
|
link = strings.TrimSpace(link)
|
||||||
|
parsed := parseLink(link)
|
||||||
|
if parsed != nil {
|
||||||
|
links = append(links, *parsed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return links
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseLink(link string) (parsedLink *headerLink) {
|
||||||
|
var parts []string
|
||||||
|
for _, part := range strings.Split(link, ";") {
|
||||||
|
parts = append(parts, strings.TrimSpace(part))
|
||||||
|
}
|
||||||
|
|
||||||
|
match := linkRegex.FindStringSubmatch(parts[0])
|
||||||
|
if match == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
result := &headerLink{
|
||||||
|
Href: match[1],
|
||||||
|
Extras: map[string]string{},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, keyValue := range parts[1:] {
|
||||||
|
parsed := parseKeyValue(keyValue)
|
||||||
|
if parsed != nil {
|
||||||
|
key, value := parsed[0], parsed[1]
|
||||||
|
switch strings.ToLower(key) {
|
||||||
|
case "rel":
|
||||||
|
result.Rel = value
|
||||||
|
case "type":
|
||||||
|
result.Type = value
|
||||||
|
default:
|
||||||
|
result.Extras[key] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseKeyValue(keyValue string) []string {
|
||||||
|
parts := strings.SplitN(keyValue, "=", 2)
|
||||||
|
if parts[0] == "" || len(parts) < 2 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
match := valueRegex.FindStringSubmatch(parts[1])
|
||||||
|
if match != nil {
|
||||||
|
parts[1] = match[1]
|
||||||
|
return parts
|
||||||
|
}
|
||||||
|
return parts
|
||||||
|
}
|
44
backend/doi/link_header_internal_test.go
Normal file
44
backend/doi/link_header_internal_test.go
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
package doi
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseLinkHeader(t *testing.T) {
|
||||||
|
header := "<https://zenodo.org/api/records/15063252> ; rel=\"linkset\" ; type=\"application/linkset+json\""
|
||||||
|
links := parseLinkHeader(header)
|
||||||
|
expected := headerLink{
|
||||||
|
Href: "https://zenodo.org/api/records/15063252",
|
||||||
|
Rel: "linkset",
|
||||||
|
Type: "application/linkset+json",
|
||||||
|
Extras: map[string]string{},
|
||||||
|
}
|
||||||
|
assert.Contains(t, links, expected)
|
||||||
|
|
||||||
|
header = "<https://api.example.com/issues?page=2>; rel=\"prev\", <https://api.example.com/issues?page=4>; rel=\"next\", <https://api.example.com/issues?page=10>; rel=\"last\", <https://api.example.com/issues?page=1>; rel=\"first\""
|
||||||
|
links = parseLinkHeader(header)
|
||||||
|
expectedList := []headerLink{{
|
||||||
|
Href: "https://api.example.com/issues?page=2",
|
||||||
|
Rel: "prev",
|
||||||
|
Type: "",
|
||||||
|
Extras: map[string]string{},
|
||||||
|
}, {
|
||||||
|
Href: "https://api.example.com/issues?page=4",
|
||||||
|
Rel: "next",
|
||||||
|
Type: "",
|
||||||
|
Extras: map[string]string{},
|
||||||
|
}, {
|
||||||
|
Href: "https://api.example.com/issues?page=10",
|
||||||
|
Rel: "last",
|
||||||
|
Type: "",
|
||||||
|
Extras: map[string]string{},
|
||||||
|
}, {
|
||||||
|
Href: "https://api.example.com/issues?page=1",
|
||||||
|
Rel: "first",
|
||||||
|
Type: "",
|
||||||
|
Extras: map[string]string{},
|
||||||
|
}}
|
||||||
|
assert.Equal(t, links, expectedList)
|
||||||
|
}
|
47
backend/doi/zenodo.go
Normal file
47
backend/doi/zenodo.go
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
// Implementation for Zenodo
|
||||||
|
|
||||||
|
package doi
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"net/url"
|
||||||
|
"regexp"
|
||||||
|
|
||||||
|
"github.com/rclone/rclone/backend/doi/api"
|
||||||
|
"github.com/rclone/rclone/fs"
|
||||||
|
"github.com/rclone/rclone/lib/rest"
|
||||||
|
)
|
||||||
|
|
||||||
|
var zenodoRecordRegex = regexp.MustCompile(`zenodo[.](.+)`)
|
||||||
|
|
||||||
|
// Resolve the main API endpoint for a DOI hosted on Zenodo
|
||||||
|
func resolveZenodoEndpoint(ctx context.Context, srv *rest.Client, pacer *fs.Pacer, resolvedURL *url.URL, doi string) (provider Provider, endpoint *url.URL, err error) {
|
||||||
|
match := zenodoRecordRegex.FindStringSubmatch(doi)
|
||||||
|
if match == nil {
|
||||||
|
return "", nil, fmt.Errorf("could not derive API endpoint URL from '%s'", resolvedURL.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
recordID := match[1]
|
||||||
|
endpointURL := resolvedURL.ResolveReference(&url.URL{Path: "/api/records/" + recordID})
|
||||||
|
|
||||||
|
var result api.InvenioRecordResponse
|
||||||
|
opts := rest.Opts{
|
||||||
|
Method: "GET",
|
||||||
|
RootURL: endpointURL.String(),
|
||||||
|
}
|
||||||
|
err = pacer.Call(func() (bool, error) {
|
||||||
|
res, err := srv.CallJSON(ctx, &opts, nil, &result)
|
||||||
|
return shouldRetry(ctx, res, err)
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
endpointURL, err = url.Parse(result.Links.Self)
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return Zenodo, endpointURL, nil
|
||||||
|
}
|
187
docs/content/doi.md
Normal file
187
docs/content/doi.md
Normal file
@ -0,0 +1,187 @@
|
|||||||
|
---
|
||||||
|
title: "DOI"
|
||||||
|
description: "Rclone docs for DOI"
|
||||||
|
versionIntroduced: "?"
|
||||||
|
---
|
||||||
|
|
||||||
|
# {{< icon "fa fa-building-columns" >}} DOI
|
||||||
|
|
||||||
|
The DOI remote is a read only remote for reading files from digital object identifiers (DOI).
|
||||||
|
|
||||||
|
Currently, the DOI backend supports supports DOIs hosted with:
|
||||||
|
- [InvenioRDM](https://inveniosoftware.org/products/rdm/)
|
||||||
|
- [Zenodo](https://zenodo.org)
|
||||||
|
- [CaltechDATA](https://data.caltech.edu)
|
||||||
|
- [Other InvenioRDM repositories](https://inveniosoftware.org/showcase/)
|
||||||
|
- [Dataverse](https://dataverse.org)
|
||||||
|
- [Harvard Dataverse](https://dataverse.harvard.edu)
|
||||||
|
- [Other Dataverse repositories](https://dataverse.org/installations)
|
||||||
|
|
||||||
|
Paths are specified as `remote:path`
|
||||||
|
|
||||||
|
Paths may be as deep as required, e.g. `remote:directory/subdirectory`.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Here is an example of how to make a remote called `remote`. First run:
|
||||||
|
|
||||||
|
rclone config
|
||||||
|
|
||||||
|
This will guide you through an interactive setup process:
|
||||||
|
|
||||||
|
```
|
||||||
|
No remotes found, make a new one?
|
||||||
|
n) New remote
|
||||||
|
s) Set configuration password
|
||||||
|
q) Quit config
|
||||||
|
n/s/q> n
|
||||||
|
Enter name for new remote.
|
||||||
|
name> remote
|
||||||
|
Type of storage to configure.
|
||||||
|
Choose a number from below, or type in your own value
|
||||||
|
[snip]
|
||||||
|
XX / DOI datasets
|
||||||
|
\ (doi)
|
||||||
|
[snip]
|
||||||
|
Storage> doi
|
||||||
|
Option doi.
|
||||||
|
The DOI or the doi.org URL.
|
||||||
|
Enter a value.
|
||||||
|
doi> 10.5281/zenodo.5876941
|
||||||
|
Edit advanced config?
|
||||||
|
y) Yes
|
||||||
|
n) No (default)
|
||||||
|
y/n> n
|
||||||
|
Configuration complete.
|
||||||
|
Options:
|
||||||
|
- type: doi
|
||||||
|
- doi: 10.5281/zenodo.5876941
|
||||||
|
Keep this "remote" remote?
|
||||||
|
y) Yes this is OK (default)
|
||||||
|
e) Edit this remote
|
||||||
|
d) Delete this remote
|
||||||
|
y/e/d> y
|
||||||
|
```
|
||||||
|
|
||||||
|
{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/doi/doi.go then run make backenddocs" >}}
|
||||||
|
### Standard options
|
||||||
|
|
||||||
|
Here are the Standard options specific to doi (DOI datasets).
|
||||||
|
|
||||||
|
#### --doi-doi
|
||||||
|
|
||||||
|
The DOI or the doi.org URL.
|
||||||
|
|
||||||
|
Properties:
|
||||||
|
|
||||||
|
- Config: doi
|
||||||
|
- Env Var: RCLONE_DOI_DOI
|
||||||
|
- Type: string
|
||||||
|
- Required: true
|
||||||
|
|
||||||
|
### Advanced options
|
||||||
|
|
||||||
|
Here are the Advanced options specific to doi (DOI datasets).
|
||||||
|
|
||||||
|
#### --doi-provider
|
||||||
|
|
||||||
|
DOI provider.
|
||||||
|
|
||||||
|
The DOI provider can be set when rclone does not automatically recognize a supported DOI provider.
|
||||||
|
|
||||||
|
Properties:
|
||||||
|
|
||||||
|
- Config: provider
|
||||||
|
- Env Var: RCLONE_DOI_PROVIDER
|
||||||
|
- Type: string
|
||||||
|
- Required: false
|
||||||
|
- Examples:
|
||||||
|
- "auto"
|
||||||
|
- Auto-detect provider
|
||||||
|
- "zenodo"
|
||||||
|
- Zenodo
|
||||||
|
- "dataverse"
|
||||||
|
- Dataverse
|
||||||
|
- "invenio"
|
||||||
|
- Invenio
|
||||||
|
|
||||||
|
#### --doi-doi-resolver-api-url
|
||||||
|
|
||||||
|
The URL of the DOI resolver API to use.
|
||||||
|
|
||||||
|
The DOI resolver can be set for testing or for cases when the the canonical DOI resolver API cannot be used.
|
||||||
|
|
||||||
|
Defaults to "https://doi.org/api".
|
||||||
|
|
||||||
|
Properties:
|
||||||
|
|
||||||
|
- Config: doi_resolver_api_url
|
||||||
|
- Env Var: RCLONE_DOI_DOI_RESOLVER_API_URL
|
||||||
|
- Type: string
|
||||||
|
- Required: false
|
||||||
|
|
||||||
|
#### --doi-description
|
||||||
|
|
||||||
|
Description of the remote.
|
||||||
|
|
||||||
|
Properties:
|
||||||
|
|
||||||
|
- Config: description
|
||||||
|
- Env Var: RCLONE_DOI_DESCRIPTION
|
||||||
|
- Type: string
|
||||||
|
- Required: false
|
||||||
|
|
||||||
|
## Backend commands
|
||||||
|
|
||||||
|
Here are the commands specific to the doi backend.
|
||||||
|
|
||||||
|
Run them with
|
||||||
|
|
||||||
|
rclone backend COMMAND remote:
|
||||||
|
|
||||||
|
The help below will explain what arguments each command takes.
|
||||||
|
|
||||||
|
See the [backend](/commands/rclone_backend/) command for more
|
||||||
|
info on how to pass options and arguments.
|
||||||
|
|
||||||
|
These can be run on a running backend using the rc command
|
||||||
|
[backend/command](/rc/#backend-command).
|
||||||
|
|
||||||
|
### metadata
|
||||||
|
|
||||||
|
Show metadata about the DOI.
|
||||||
|
|
||||||
|
rclone backend metadata remote: [options] [<arguments>+]
|
||||||
|
|
||||||
|
This command returns a JSON object with some information about the DOI.
|
||||||
|
|
||||||
|
rclone backend medatadata doi:
|
||||||
|
|
||||||
|
It returns a JSON object representing metadata about the DOI.
|
||||||
|
|
||||||
|
|
||||||
|
### set
|
||||||
|
|
||||||
|
Set command for updating the config parameters.
|
||||||
|
|
||||||
|
rclone backend set remote: [options] [<arguments>+]
|
||||||
|
|
||||||
|
This set command can be used to update the config parameters
|
||||||
|
for a running doi backend.
|
||||||
|
|
||||||
|
Usage Examples:
|
||||||
|
|
||||||
|
rclone backend set doi: [-o opt_name=opt_value] [-o opt_name2=opt_value2]
|
||||||
|
rclone rc backend/command command=set fs=doi: [-o opt_name=opt_value] [-o opt_name2=opt_value2]
|
||||||
|
rclone rc backend/command command=set fs=doi: -o doi=NEW_DOI
|
||||||
|
|
||||||
|
The option keys are named as they are in the config file.
|
||||||
|
|
||||||
|
This rebuilds the connection to the doi backend when it is called with
|
||||||
|
the new parameters. Only new parameters need be passed as the values
|
||||||
|
will default to those currently in use.
|
||||||
|
|
||||||
|
It doesn't return anything.
|
||||||
|
|
||||||
|
|
||||||
|
{{< rem autogenerated options stop >}}
|
Loading…
x
Reference in New Issue
Block a user