mirror of
https://github.com/rclone/rclone.git
synced 2025-01-08 23:40:29 +01:00
internetarchive: add support for Metadata
This commit is contained in:
parent
b4d847cadd
commit
42dfadfa1b
@ -38,6 +38,84 @@ func init() {
|
||||
Name: "internetarchive",
|
||||
Description: "Internet Archive",
|
||||
NewFs: NewFs,
|
||||
|
||||
MetadataInfo: &fs.MetadataInfo{
|
||||
System: map[string]fs.MetadataHelp{
|
||||
"name": {
|
||||
Help: "Full file path, without the bucket part",
|
||||
Type: "filename",
|
||||
Example: "backend/internetarchive/internetarchive.go",
|
||||
},
|
||||
"source": {
|
||||
Help: "The source of the file",
|
||||
Type: "string",
|
||||
Example: "original",
|
||||
},
|
||||
"mtime": {
|
||||
Help: "Time of last modification, managed by Rclone",
|
||||
Type: "RFC 3339",
|
||||
Example: "2006-01-02T15:04:05.999999999Z",
|
||||
},
|
||||
"size": {
|
||||
Help: "File size in bytes",
|
||||
Type: "decimal number",
|
||||
Example: "123456",
|
||||
},
|
||||
"md5": {
|
||||
Help: "MD5 hash calculated by Internet Archive",
|
||||
Type: "string",
|
||||
Example: "01234567012345670123456701234567",
|
||||
},
|
||||
"crc32": {
|
||||
Help: "CRC32 calculated by Internet Archive",
|
||||
Type: "string",
|
||||
Example: "01234567",
|
||||
},
|
||||
"sha1": {
|
||||
Help: "SHA1 hash calculated by Internet Archive",
|
||||
Type: "string",
|
||||
Example: "0123456701234567012345670123456701234567",
|
||||
},
|
||||
"format": {
|
||||
Help: "Name of format identified by Internet Archive",
|
||||
Type: "string",
|
||||
Example: "Comma-Separated Values",
|
||||
},
|
||||
"old_version": {
|
||||
Help: "Whether the file was replaced and moved by keep-old-version flag",
|
||||
Type: "boolean",
|
||||
Example: "true",
|
||||
},
|
||||
"viruscheck": {
|
||||
Help: "The last time viruscheck process was run for the file (?)",
|
||||
Type: "unixtime",
|
||||
Example: "1654191352",
|
||||
},
|
||||
|
||||
"rclone-ia-mtime": {
|
||||
Help: "Time of last modification, managed by Internet Archive",
|
||||
Type: "RFC 3339",
|
||||
Example: "2006-01-02T15:04:05.999999999Z",
|
||||
},
|
||||
"rclone-mtime": {
|
||||
Help: "Time of last modification, managed by Rclone",
|
||||
Type: "RFC 3339",
|
||||
Example: "2006-01-02T15:04:05.999999999Z",
|
||||
},
|
||||
"rclone-update-track": {
|
||||
Help: "Random value used by Rclone for tracking changes inside Internet Archive",
|
||||
Type: "string",
|
||||
Example: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
|
||||
},
|
||||
},
|
||||
Help: `Metadata fields provided by Internet Archive.
|
||||
If there are multiple values for a key, only the first one is returned.
|
||||
This is a limitation of Rclone, that supports one value per one key.
|
||||
|
||||
Owner is able to add custom keys. Metadata feature grabs all the keys including them.
|
||||
`,
|
||||
},
|
||||
|
||||
Options: []fs.Option{{
|
||||
Name: "access_key_id",
|
||||
Help: "IAS3 Access Key.\n\nLeave blank for anonymous access.\nYou can find one here: https://archive.org/account/s3.php",
|
||||
@ -90,6 +168,14 @@ Only enable if you need to be guaranteed to be reflected after write operations.
|
||||
// maximum size of an item. this is constant across all items
|
||||
const iaItemMaxSize int64 = 1099511627776
|
||||
|
||||
// metadata keys that are not writeable
|
||||
var roMetadataKey = map[string]interface{}{
|
||||
// do not add mtime here, it's a documented exception
|
||||
"name": nil, "source": nil, "size": nil, "md5": nil,
|
||||
"crc32": nil, "sha1": nil, "format": nil, "old_version": nil,
|
||||
"viruscheck": nil,
|
||||
}
|
||||
|
||||
// Options defines the configuration for this backend
|
||||
type Options struct {
|
||||
AccessKeyID string `config:"access_key_id"`
|
||||
@ -122,6 +208,7 @@ type Object struct {
|
||||
md5 string // md5 hash of the file presented by the server
|
||||
sha1 string // sha1 hash of the file presented by the server
|
||||
crc32 string // crc32 of the file presented by the server
|
||||
rawData json.RawMessage
|
||||
}
|
||||
|
||||
// IAFile reprensents a subset of object in MetadataResponse.Files
|
||||
@ -135,6 +222,8 @@ type IAFile struct {
|
||||
Md5 string `json:"md5"`
|
||||
Crc32 string `json:"crc32"`
|
||||
Sha1 string `json:"sha1"`
|
||||
|
||||
rawData json.RawMessage
|
||||
}
|
||||
|
||||
// MetadataResponse reprensents subset of the JSON object returned by (frontend)/metadata/
|
||||
@ -143,6 +232,12 @@ type MetadataResponse struct {
|
||||
ItemSize int64 `json:"item_size"`
|
||||
}
|
||||
|
||||
// MetadataResponseRaw is the form of MetadataResponse to deal with metadata
|
||||
type MetadataResponseRaw struct {
|
||||
Files []json.RawMessage `json:"files"`
|
||||
ItemSize int64 `json:"item_size"`
|
||||
}
|
||||
|
||||
// ModMetadataResponse represents response for amending metadata
|
||||
type ModMetadataResponse struct {
|
||||
// https://archive.org/services/docs/api/md-write.html#example
|
||||
@ -226,7 +321,10 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e
|
||||
}
|
||||
f.setRoot(root)
|
||||
f.features = (&fs.Features{
|
||||
BucketBased: true,
|
||||
BucketBased: true,
|
||||
ReadMetadata: true,
|
||||
WriteMetadata: true,
|
||||
UserMetadata: true,
|
||||
}).Fill(ctx, f)
|
||||
|
||||
f.srv = rest.NewClient(fshttp.NewClient(ctx))
|
||||
@ -307,18 +405,17 @@ func (o *Object) SetModTime(ctx context.Context, t time.Time) (err error) {
|
||||
}
|
||||
|
||||
// https://archive.org/services/docs/api/md-write.html
|
||||
var patch = []interface{}{
|
||||
// the following code might be useful for modifying metadata of an uploaded file
|
||||
patch := []map[string]string{
|
||||
// we should drop it first to clear all rclone-provided mtimes
|
||||
struct {
|
||||
Op string `json:"op"`
|
||||
Path string `json:"path"`
|
||||
}{"remove", "/rclone-mtime"},
|
||||
struct {
|
||||
Op string `json:"op"`
|
||||
Path string `json:"path"`
|
||||
Value string `json:"value"`
|
||||
}{"add", "/rclone-mtime", t.Format(time.RFC3339Nano)},
|
||||
}
|
||||
{
|
||||
"op": "remove",
|
||||
"path": "/rclone-mtime",
|
||||
}, {
|
||||
"op": "add",
|
||||
"path": "/rclone-mtime",
|
||||
"value": t.Format(time.RFC3339Nano),
|
||||
}}
|
||||
res, err := json.Marshal(patch)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -685,6 +782,23 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
|
||||
headers["Content-Length"] = fmt.Sprintf("%d", size)
|
||||
headers["x-archive-size-hint"] = fmt.Sprintf("%d", size)
|
||||
}
|
||||
var mdata fs.Metadata
|
||||
mdata, err = fs.GetMetadataOptions(ctx, src, options)
|
||||
if err == nil && mdata != nil {
|
||||
for mk, mv := range mdata {
|
||||
mk = strings.ToLower(mk)
|
||||
if strings.HasPrefix(mk, "rclone-") {
|
||||
fs.LogPrintf(fs.LogLevelWarning, o, "reserved metadata key %s is about to set", mk)
|
||||
} else if _, ok := roMetadataKey[mk]; ok {
|
||||
fs.LogPrintf(fs.LogLevelWarning, o, "setting or modifying read-only key %s is requested, skipping", mk)
|
||||
continue
|
||||
} else if mk == "mtime" {
|
||||
// redirect to make it work
|
||||
mk = "rclone-mtime"
|
||||
}
|
||||
headers[fmt.Sprintf("x-amz-filemeta-%s", mk)] = mv
|
||||
}
|
||||
}
|
||||
|
||||
// read the md5sum if available
|
||||
var md5sumHex string
|
||||
@ -762,6 +876,34 @@ func (o *Object) String() string {
|
||||
return o.remote
|
||||
}
|
||||
|
||||
// Metadata returns all file metadata provided by Internet Archive
|
||||
func (o *Object) Metadata(ctx context.Context) (m fs.Metadata, err error) {
|
||||
if o.rawData == nil {
|
||||
return nil, nil
|
||||
}
|
||||
raw := make(map[string]json.RawMessage)
|
||||
err = json.Unmarshal(o.rawData, &raw)
|
||||
if err != nil {
|
||||
// fatal: json parsing failed
|
||||
return
|
||||
}
|
||||
for k, v := range raw {
|
||||
items, err := listOrString(v)
|
||||
if len(items) == 0 || err != nil {
|
||||
// skip: an entry failed to parse
|
||||
continue
|
||||
}
|
||||
m.Set(k, items[0])
|
||||
}
|
||||
// move the old mtime to an another key
|
||||
if v, ok := m["mtime"]; ok {
|
||||
m["rclone-ia-mtime"] = v
|
||||
}
|
||||
// overwrite with a correct mtime
|
||||
m["mtime"] = o.modTime.Format(time.RFC3339Nano)
|
||||
return
|
||||
}
|
||||
|
||||
func (f *Fs) shouldRetry(resp *http.Response, err error) (bool, error) {
|
||||
if resp != nil {
|
||||
for _, e := range retryErrorCodes {
|
||||
@ -788,7 +930,7 @@ func (o *Object) split() (bucket, bucketPath string) {
|
||||
return o.fs.split(o.remote)
|
||||
}
|
||||
|
||||
func (f *Fs) requestMetadata(ctx context.Context, bucket string) (result MetadataResponse, err error) {
|
||||
func (f *Fs) requestMetadata(ctx context.Context, bucket string) (result *MetadataResponse, err error) {
|
||||
var resp *http.Response
|
||||
// make a GET request to (frontend)/metadata/:item/
|
||||
opts := rest.Opts{
|
||||
@ -796,12 +938,15 @@ func (f *Fs) requestMetadata(ctx context.Context, bucket string) (result Metadat
|
||||
Path: path.Join("/metadata/", bucket),
|
||||
}
|
||||
|
||||
var temp MetadataResponseRaw
|
||||
err = f.pacer.Call(func() (bool, error) {
|
||||
resp, err = f.front.CallJSON(ctx, &opts, nil, &result)
|
||||
resp, err = f.front.CallJSON(ctx, &opts, nil, &temp)
|
||||
return f.shouldRetry(resp, err)
|
||||
})
|
||||
|
||||
return result, err
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return temp.unraw()
|
||||
}
|
||||
|
||||
// list up all files/directories without any filters
|
||||
@ -998,6 +1143,7 @@ func makeValidObject(f *Fs, remote string, file IAFile, mtime time.Time, size in
|
||||
md5: file.Md5,
|
||||
crc32: file.Crc32,
|
||||
sha1: file.Sha1,
|
||||
rawData: file.rawData,
|
||||
}
|
||||
}
|
||||
|
||||
@ -1045,6 +1191,23 @@ func (file IAFile) parseMtime() (mtime time.Time) {
|
||||
return mtime
|
||||
}
|
||||
|
||||
func (mrr *MetadataResponseRaw) unraw() (_ *MetadataResponse, err error) {
|
||||
var files []IAFile
|
||||
for _, raw := range mrr.Files {
|
||||
var parsed IAFile
|
||||
err = json.Unmarshal(raw, &parsed)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
parsed.rawData = raw
|
||||
files = append(files, parsed)
|
||||
}
|
||||
return &MetadataResponse{
|
||||
Files: files,
|
||||
ItemSize: mrr.ItemSize,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func compareSize(a, b int64) bool {
|
||||
if a < 0 || b < 0 {
|
||||
// we won't compare if any of them is not known
|
||||
@ -1106,4 +1269,5 @@ var (
|
||||
_ fs.PublicLinker = &Fs{}
|
||||
_ fs.Abouter = &Fs{}
|
||||
_ fs.Object = &Object{}
|
||||
_ fs.Metadataer = &Object{}
|
||||
)
|
||||
|
@ -38,6 +38,33 @@ You can optionally wait for the server's processing to finish, by setting non-ze
|
||||
By making it wait, rclone can do normal file comparison.
|
||||
Make sure to set a large enough value (e.g. `30m0s` for smaller files) as it can take a long time depending on server's queue.
|
||||
|
||||
## About metadata
|
||||
This backend supports setting, updating and reading metadata of each file.
|
||||
The metadata will appear as file metadata on Internet Archive.
|
||||
However, some fields are reserved by both Internet Archive and rclone.
|
||||
|
||||
The following are reserved by Internet Archive:
|
||||
- `name`
|
||||
- `source`
|
||||
- `size`
|
||||
- `md5`
|
||||
- `crc32`
|
||||
- `sha1`
|
||||
- `format`
|
||||
- `old_version`
|
||||
- `viruscheck`
|
||||
|
||||
Trying to set values to these keys is ignored with a warning.
|
||||
Only setting `mtime` is an exception. Doing so make it the identical behavior as setting ModTime.
|
||||
|
||||
rclone reserves all the keys starting with `rclone-`. Setting value for these keys will give you warnings, but values are set according to request.
|
||||
|
||||
If there are multiple values for a key, only the first one is returned.
|
||||
This is a limitation of rclone, that supports one value per one key.
|
||||
It can be triggered when you did a server-side copy.
|
||||
|
||||
Reading metadata will also provide custom (non-standard nor reserved) ones.
|
||||
|
||||
## Configuration
|
||||
|
||||
Here is an example of making an internetarchive configuration.
|
||||
|
@ -33,7 +33,7 @@ Here is an overview of the major features of each cloud storage system.
|
||||
| HiDrive | HiDrive ¹² | R/W | No | No | - | - |
|
||||
| HTTP | - | R | No | No | R | - |
|
||||
| Hubic | MD5 | R/W | No | No | R/W | - |
|
||||
| Internet Archive | MD5, SHA1, CRC32 | R/W ¹¹ | No | No | - | - |
|
||||
| Internet Archive | MD5, SHA1, CRC32 | R/W ¹¹ | No | No | - | RWU |
|
||||
| Jottacloud | MD5 | R/W | Yes | No | R | - |
|
||||
| Koofr | MD5 | - | Yes | No | - | - |
|
||||
| Mail.ru Cloud | Mailru ⁶ | R/W | Yes | No | - | - |
|
||||
|
Loading…
Reference in New Issue
Block a user