mirror of
https://github.com/rclone/rclone.git
synced 2024-11-07 09:04:52 +01:00
h5ai: prepared new h5ai http backend
This commit is contained in:
parent
b6013a5e68
commit
0855d7e230
@ -20,6 +20,7 @@ import (
|
||||
_ "github.com/rclone/rclone/backend/ftp"
|
||||
_ "github.com/rclone/rclone/backend/googlecloudstorage"
|
||||
_ "github.com/rclone/rclone/backend/googlephotos"
|
||||
_ "github.com/rclone/rclone/backend/h5ai"
|
||||
_ "github.com/rclone/rclone/backend/hasher"
|
||||
_ "github.com/rclone/rclone/backend/hdfs"
|
||||
_ "github.com/rclone/rclone/backend/hidrive"
|
||||
|
787
backend/h5ai/h5ai.go
Normal file
787
backend/h5ai/h5ai.go
Normal file
@ -0,0 +1,787 @@
|
||||
// Package http provides a filesystem interface using golang.org/net/http
|
||||
//
|
||||
// It treats HTML pages served from the endpoint as directory
|
||||
// listings, and includes any links found as files.
|
||||
package h5ai
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"mime"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/config/configmap"
|
||||
"github.com/rclone/rclone/fs/config/configstruct"
|
||||
"github.com/rclone/rclone/fs/fshttp"
|
||||
"github.com/rclone/rclone/fs/hash"
|
||||
"github.com/rclone/rclone/lib/rest"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
var (
|
||||
errorReadOnly = errors.New("h5ai remotes are read only")
|
||||
timeUnset = time.Unix(0, 0)
|
||||
)
|
||||
|
||||
func init() {
|
||||
fsi := &fs.RegInfo{
|
||||
Name: "h5ai",
|
||||
Description: "H5AI",
|
||||
NewFs: NewFs,
|
||||
CommandHelp: commandHelp,
|
||||
Options: []fs.Option{{
|
||||
Name: "url",
|
||||
Help: "URL of HTTP host to connect to.\n\nE.g. \"https://example.com\", or \"https://user:pass@example.com\" to use a username and password.",
|
||||
Required: true,
|
||||
}, {
|
||||
Name: "headers",
|
||||
Help: `Set HTTP headers for all transactions.
|
||||
|
||||
Use this to set additional HTTP headers for all transactions.
|
||||
|
||||
The input format is comma separated list of key,value pairs. Standard
|
||||
[CSV encoding](https://godoc.org/encoding/csv) may be used.
|
||||
|
||||
For example, to set a Cookie use 'Cookie,name=value', or '"Cookie","name=value"'.
|
||||
|
||||
You can set multiple headers, e.g. '"Cookie","name=value","Authorization","xxx"'.`,
|
||||
Default: fs.CommaSepList{},
|
||||
Advanced: true,
|
||||
}, {
|
||||
Name: "no_slash",
|
||||
Help: `Set this if the site doesn't end directories with /.
|
||||
|
||||
Use this if your target website does not use / on the end of
|
||||
directories.
|
||||
|
||||
A / on the end of a path is how rclone normally tells the difference
|
||||
between files and directories. If this flag is set, then rclone will
|
||||
treat all files with Content-Type: text/html as directories and read
|
||||
URLs from them rather than downloading them.
|
||||
|
||||
Note that this may cause rclone to confuse genuine HTML files with
|
||||
directories.`,
|
||||
Default: false,
|
||||
Advanced: true,
|
||||
}, {
|
||||
Name: "no_head",
|
||||
Help: `Don't use HEAD requests.
|
||||
|
||||
HEAD requests are mainly used to find file sizes in dir listing.
|
||||
If your site is being very slow to load then you can try this option.
|
||||
Normally rclone does a HEAD request for each potential file in a
|
||||
directory listing to:
|
||||
|
||||
- find its size
|
||||
- check it really exists
|
||||
- check to see if it is a directory
|
||||
|
||||
If you set this option, rclone will not do the HEAD request. This will mean
|
||||
that directory listings are much quicker, but rclone won't have the times or
|
||||
sizes of any files, and some files that don't exist may be in the listing.`,
|
||||
Default: false,
|
||||
Advanced: true,
|
||||
}},
|
||||
}
|
||||
fs.Register(fsi)
|
||||
}
|
||||
|
||||
// Options defines the configuration for this backend
|
||||
type Options struct {
|
||||
Endpoint string `config:"url"`
|
||||
NoSlash bool `config:"no_slash"`
|
||||
NoHead bool `config:"no_head"`
|
||||
Headers fs.CommaSepList `config:"headers"`
|
||||
}
|
||||
|
||||
// Fs stores the interface to the remote HTTP files
|
||||
type Fs struct {
|
||||
name string
|
||||
root string
|
||||
features *fs.Features // optional features
|
||||
opt Options // options for this backend
|
||||
ci *fs.ConfigInfo // global config
|
||||
endpoint *url.URL
|
||||
endpointURL string // endpoint as a string
|
||||
httpClient *http.Client
|
||||
}
|
||||
|
||||
// Object is a remote object that has been stat'd (so it exists, but is not necessarily open for reading)
|
||||
type Object struct {
|
||||
fs *Fs
|
||||
remote string
|
||||
size int64
|
||||
modTime time.Time
|
||||
contentType string
|
||||
}
|
||||
|
||||
// statusError returns an error if the res contained an error
|
||||
func statusError(res *http.Response, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if res.StatusCode < 200 || res.StatusCode > 299 {
|
||||
_ = res.Body.Close()
|
||||
return fmt.Errorf("HTTP Error: %s", res.Status)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getFsEndpoint decides if url is to be considered a file or directory,
|
||||
// and returns a proper endpoint url to use for the fs.
|
||||
func getFsEndpoint(ctx context.Context, client *http.Client, url string, opt *Options) (string, bool) {
|
||||
// If url ends with '/' it is already a proper url always assumed to be a directory.
|
||||
if url[len(url)-1] == '/' {
|
||||
return url, false
|
||||
}
|
||||
|
||||
// If url does not end with '/' we send a HEAD request to decide
|
||||
// if it is directory or file, and if directory appends the missing
|
||||
// '/', or if file returns the directory url to parent instead.
|
||||
createFileResult := func() (string, bool) {
|
||||
fs.Debugf(nil, "If path is a directory you must add a trailing '/'")
|
||||
parent, _ := path.Split(url)
|
||||
return parent, true
|
||||
}
|
||||
createDirResult := func() (string, bool) {
|
||||
fs.Debugf(nil, "To avoid the initial HEAD request add a trailing '/' to the path")
|
||||
return url + "/", false
|
||||
}
|
||||
|
||||
// If HEAD requests are not allowed we just have to assume it is a file.
|
||||
if opt.NoHead {
|
||||
fs.Debugf(nil, "Assuming path is a file as --http-no-head is set")
|
||||
return createFileResult()
|
||||
}
|
||||
|
||||
// Use a client which doesn't follow redirects so the server
|
||||
// doesn't redirect http://host/dir to http://host/dir/
|
||||
noRedir := *client
|
||||
noRedir.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
||||
return http.ErrUseLastResponse
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil)
|
||||
if err != nil {
|
||||
fs.Debugf(nil, "Assuming path is a file as HEAD request could not be created: %v", err)
|
||||
return createFileResult()
|
||||
}
|
||||
addHeaders(req, opt)
|
||||
res, err := noRedir.Do(req)
|
||||
|
||||
if err != nil {
|
||||
fs.Debugf(nil, "Assuming path is a file as HEAD request could not be sent: %v", err)
|
||||
return createFileResult()
|
||||
}
|
||||
if res.StatusCode == http.StatusNotFound {
|
||||
fs.Debugf(nil, "Assuming path is a directory as HEAD response is it does not exist as a file (%s)", res.Status)
|
||||
return createDirResult()
|
||||
}
|
||||
if res.StatusCode == http.StatusMovedPermanently ||
|
||||
res.StatusCode == http.StatusFound ||
|
||||
res.StatusCode == http.StatusSeeOther ||
|
||||
res.StatusCode == http.StatusTemporaryRedirect ||
|
||||
res.StatusCode == http.StatusPermanentRedirect {
|
||||
redir := res.Header.Get("Location")
|
||||
if redir != "" {
|
||||
if redir[len(redir)-1] == '/' {
|
||||
fs.Debugf(nil, "Assuming path is a directory as HEAD response is redirect (%s) to a path that ends with '/': %s", res.Status, redir)
|
||||
return createDirResult()
|
||||
}
|
||||
fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) to a path that does not end with '/': %s", res.Status, redir)
|
||||
return createFileResult()
|
||||
}
|
||||
fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) but no location header", res.Status)
|
||||
return createFileResult()
|
||||
}
|
||||
if res.StatusCode < 200 || res.StatusCode > 299 {
|
||||
// Example is 403 (http.StatusForbidden) for servers not allowing HEAD requests.
|
||||
fs.Debugf(nil, "Assuming path is a file as HEAD response is an error (%s)", res.Status)
|
||||
return createFileResult()
|
||||
}
|
||||
|
||||
fs.Debugf(nil, "Assuming path is a file as HEAD response is success (%s)", res.Status)
|
||||
return createFileResult()
|
||||
}
|
||||
|
||||
// Make the http connection with opt
|
||||
func (f *Fs) httpConnection(ctx context.Context, opt *Options) (isFile bool, err error) {
|
||||
if len(opt.Headers)%2 != 0 {
|
||||
return false, errors.New("odd number of headers supplied")
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(opt.Endpoint, "/") {
|
||||
opt.Endpoint += "/"
|
||||
}
|
||||
|
||||
// Parse the endpoint and stick the root onto it
|
||||
base, err := url.Parse(opt.Endpoint)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
u, err := rest.URLJoin(base, rest.URLPathEscape(f.root))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
client := fshttp.NewClient(ctx)
|
||||
|
||||
endpoint, isFile := getFsEndpoint(ctx, client, u.String(), opt)
|
||||
fs.Debugf(nil, "Root: %s", endpoint)
|
||||
u, err = url.Parse(endpoint)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Update f with the new parameters
|
||||
f.httpClient = client
|
||||
f.endpoint = u
|
||||
f.endpointURL = u.String()
|
||||
return isFile, nil
|
||||
}
|
||||
|
||||
// NewFs creates a new Fs object from the name and root. It connects to
|
||||
// the host specified in the config file.
|
||||
func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) {
|
||||
// Parse config into Options struct
|
||||
opt := new(Options)
|
||||
err := configstruct.Set(m, opt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ci := fs.GetConfig(ctx)
|
||||
f := &Fs{
|
||||
name: name,
|
||||
root: root,
|
||||
opt: *opt,
|
||||
ci: ci,
|
||||
}
|
||||
f.features = (&fs.Features{
|
||||
CanHaveEmptyDirectories: true,
|
||||
}).Fill(ctx, f)
|
||||
|
||||
// Make the http connection
|
||||
isFile, err := f.httpConnection(ctx, opt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if isFile {
|
||||
// return an error with an fs which points to the parent
|
||||
return f, fs.ErrorIsFile
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(f.endpointURL, "/") {
|
||||
return nil, errors.New("internal error: url doesn't end with /")
|
||||
}
|
||||
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// Name returns the configured name of the file system
|
||||
func (f *Fs) Name() string {
|
||||
return f.name
|
||||
}
|
||||
|
||||
// Root returns the root for the filesystem
|
||||
func (f *Fs) Root() string {
|
||||
return f.root
|
||||
}
|
||||
|
||||
// String returns the URL for the filesystem
|
||||
func (f *Fs) String() string {
|
||||
return f.endpointURL
|
||||
}
|
||||
|
||||
// Features returns the optional features of this Fs
|
||||
func (f *Fs) Features() *fs.Features {
|
||||
return f.features
|
||||
}
|
||||
|
||||
// Precision is the remote http file system's modtime precision, which we have no way of knowing. We estimate at 1s
|
||||
func (f *Fs) Precision() time.Duration {
|
||||
return time.Second
|
||||
}
|
||||
|
||||
// NewObject creates a new remote http file object
|
||||
func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
|
||||
o := &Object{
|
||||
fs: f,
|
||||
remote: remote,
|
||||
}
|
||||
err := o.head(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return o, nil
|
||||
}
|
||||
|
||||
// Join's the remote onto the base URL
|
||||
func (f *Fs) url(remote string) string {
|
||||
return f.endpointURL + rest.URLPathEscape(remote)
|
||||
}
|
||||
|
||||
// Errors returned by parseName
|
||||
var (
|
||||
errURLJoinFailed = errors.New("URLJoin failed")
|
||||
errFoundQuestionMark = errors.New("found ? in URL")
|
||||
errHostMismatch = errors.New("host mismatch")
|
||||
errSchemeMismatch = errors.New("scheme mismatch")
|
||||
errNotUnderRoot = errors.New("not under root")
|
||||
errNameIsEmpty = errors.New("name is empty")
|
||||
errNameContainsSlash = errors.New("name contains /")
|
||||
)
|
||||
|
||||
// parseName turns a name as found in the page into a remote path or returns an error
|
||||
func parseName(base *url.URL, name string) (string, error) {
|
||||
// make URL absolute
|
||||
u, err := rest.URLJoin(base, name)
|
||||
if err != nil {
|
||||
return "", errURLJoinFailed
|
||||
}
|
||||
// check it doesn't have URL parameters
|
||||
uStr := u.String()
|
||||
if strings.Contains(uStr, "?") {
|
||||
return "", errFoundQuestionMark
|
||||
}
|
||||
// check that this is going back to the same host and scheme
|
||||
if base.Host != u.Host {
|
||||
return "", errHostMismatch
|
||||
}
|
||||
if base.Scheme != u.Scheme {
|
||||
return "", errSchemeMismatch
|
||||
}
|
||||
// check has path prefix
|
||||
if !strings.HasPrefix(u.Path, base.Path) {
|
||||
return "", errNotUnderRoot
|
||||
}
|
||||
// calculate the name relative to the base
|
||||
name = u.Path[len(base.Path):]
|
||||
// mustn't be empty
|
||||
if name == "" {
|
||||
return "", errNameIsEmpty
|
||||
}
|
||||
// mustn't contain a / - we are looking for a single level directory
|
||||
slash := strings.Index(name, "/")
|
||||
if slash >= 0 && slash != len(name)-1 {
|
||||
return "", errNameContainsSlash
|
||||
}
|
||||
return name, nil
|
||||
}
|
||||
|
||||
// Parse turns HTML for a directory into names
|
||||
// base should be the base URL to resolve any relative names from
|
||||
func parse(base *url.URL, in io.Reader) (names []string, err error) {
|
||||
doc, err := html.Parse(in)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var (
|
||||
walk func(*html.Node)
|
||||
seen = make(map[string]struct{})
|
||||
)
|
||||
walk = func(n *html.Node) {
|
||||
if n.Type == html.ElementNode && n.Data == "a" {
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == "href" {
|
||||
name, err := parseName(base, a.Val)
|
||||
if err == nil {
|
||||
if _, found := seen[name]; !found {
|
||||
names = append(names, name)
|
||||
seen[name] = struct{}{}
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
walk(c)
|
||||
}
|
||||
}
|
||||
walk(doc)
|
||||
return names, nil
|
||||
}
|
||||
|
||||
// Adds the configured headers to the request if any
|
||||
func addHeaders(req *http.Request, opt *Options) {
|
||||
for i := 0; i < len(opt.Headers); i += 2 {
|
||||
key := opt.Headers[i]
|
||||
value := opt.Headers[i+1]
|
||||
req.Header.Add(key, value)
|
||||
}
|
||||
}
|
||||
|
||||
// Adds the configured headers to the request if any
|
||||
func (f *Fs) addHeaders(req *http.Request) {
|
||||
addHeaders(req, &f.opt)
|
||||
}
|
||||
|
||||
// Read the directory passed in
|
||||
func (f *Fs) readDir(ctx context.Context, dir string) (names []string, err error) {
|
||||
URL := f.url(dir)
|
||||
u, err := url.Parse(URL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to readDir: %w", err)
|
||||
}
|
||||
if !strings.HasSuffix(URL, "/") {
|
||||
return nil, fmt.Errorf("internal error: readDir URL %q didn't end in /", URL)
|
||||
}
|
||||
// Do the request
|
||||
payload := &struct {
|
||||
Action string `json:"action"`
|
||||
Items map[string]string `json:"items"`
|
||||
}{
|
||||
Action: "get",
|
||||
Items: map[string]string{
|
||||
"href": u.Path,
|
||||
"what": "1",
|
||||
},
|
||||
}
|
||||
buf, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("readDir failed: %w", err)
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", URL, bytes.NewReader(buf))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("readDir failed: %w", err)
|
||||
}
|
||||
f.addHeaders(req)
|
||||
res, err := f.httpClient.Do(req)
|
||||
if err == nil {
|
||||
defer fs.CheckClose(res.Body, &err)
|
||||
if res.StatusCode == http.StatusNotFound {
|
||||
return nil, fs.ErrorDirNotFound
|
||||
}
|
||||
}
|
||||
err = statusError(res, err)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to readDir: %w", err)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(res.Body)
|
||||
type Item struct {
|
||||
Href string `json:"href"`
|
||||
}
|
||||
type Items struct {
|
||||
Items []Item `json:"items"`
|
||||
}
|
||||
items := Items{}
|
||||
err = json.Unmarshal(body, &items)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to readDir: %w", err)
|
||||
}
|
||||
for _, v := range items.Items {
|
||||
href, _ := url.QueryUnescape(v.Href)
|
||||
if href != u.Path && strings.HasPrefix(href, u.Path) {
|
||||
names = append(names, strings.TrimPrefix(href, u.Path))
|
||||
}
|
||||
}
|
||||
return names, nil
|
||||
}
|
||||
|
||||
// List the objects and directories in dir into entries. The
|
||||
// entries can be returned in any order but should be for a
|
||||
// complete directory.
|
||||
//
|
||||
// dir should be "" to list the root, and should not have
|
||||
// trailing slashes.
|
||||
//
|
||||
// This should return ErrDirNotFound if the directory isn't
|
||||
// found.
|
||||
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
|
||||
if !strings.HasSuffix(dir, "/") && dir != "" {
|
||||
dir += "/"
|
||||
}
|
||||
names, err := f.readDir(ctx, dir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error listing %q: %w", dir, err)
|
||||
}
|
||||
var (
|
||||
entriesMu sync.Mutex // to protect entries
|
||||
wg sync.WaitGroup
|
||||
checkers = f.ci.Checkers
|
||||
in = make(chan string, checkers)
|
||||
)
|
||||
add := func(entry fs.DirEntry) {
|
||||
entriesMu.Lock()
|
||||
entries = append(entries, entry)
|
||||
entriesMu.Unlock()
|
||||
}
|
||||
for i := 0; i < checkers; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for remote := range in {
|
||||
file := &Object{
|
||||
fs: f,
|
||||
remote: remote,
|
||||
}
|
||||
switch err := file.head(ctx); err {
|
||||
case nil:
|
||||
add(file)
|
||||
case fs.ErrorNotAFile:
|
||||
// ...found a directory not a file
|
||||
add(fs.NewDir(remote, time.Time{}))
|
||||
default:
|
||||
fs.Debugf(remote, "skipping because of error: %v", err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
for _, name := range names {
|
||||
isDir := name[len(name)-1] == '/'
|
||||
name = strings.TrimRight(name, "/")
|
||||
remote := path.Join(dir, name)
|
||||
if isDir {
|
||||
add(fs.NewDir(remote, time.Time{}))
|
||||
} else {
|
||||
in <- remote
|
||||
}
|
||||
}
|
||||
close(in)
|
||||
wg.Wait()
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
// Put in to the remote path with the modTime given of the given size
|
||||
//
|
||||
// May create the object even if it returns an error - if so
|
||||
// will return the object and the error, otherwise will return
|
||||
// nil and the error
|
||||
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
|
||||
return nil, errorReadOnly
|
||||
}
|
||||
|
||||
// PutStream uploads to the remote path with the modTime given of indeterminate size
|
||||
func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
|
||||
return nil, errorReadOnly
|
||||
}
|
||||
|
||||
// Fs is the filesystem this remote http file object is located within
|
||||
func (o *Object) Fs() fs.Info {
|
||||
return o.fs
|
||||
}
|
||||
|
||||
// String returns the URL to the remote HTTP file
|
||||
func (o *Object) String() string {
|
||||
if o == nil {
|
||||
return "<nil>"
|
||||
}
|
||||
return o.remote
|
||||
}
|
||||
|
||||
// Remote the name of the remote HTTP file, relative to the fs root
|
||||
func (o *Object) Remote() string {
|
||||
return o.remote
|
||||
}
|
||||
|
||||
// Hash returns "" since HTTP (in Go or OpenSSH) doesn't support remote calculation of hashes
|
||||
func (o *Object) Hash(ctx context.Context, r hash.Type) (string, error) {
|
||||
return "", hash.ErrUnsupported
|
||||
}
|
||||
|
||||
// Size returns the size in bytes of the remote http file
|
||||
func (o *Object) Size() int64 {
|
||||
return o.size
|
||||
}
|
||||
|
||||
// ModTime returns the modification time of the remote http file
|
||||
func (o *Object) ModTime(ctx context.Context) time.Time {
|
||||
return o.modTime
|
||||
}
|
||||
|
||||
// url returns the native url of the object
|
||||
func (o *Object) url() string {
|
||||
return o.fs.url(o.remote)
|
||||
}
|
||||
|
||||
// head sends a HEAD request to update info fields in the Object
|
||||
func (o *Object) head(ctx context.Context) error {
|
||||
if o.fs.opt.NoHead {
|
||||
o.size = -1
|
||||
o.modTime = timeUnset
|
||||
o.contentType = fs.MimeType(ctx, o)
|
||||
return nil
|
||||
}
|
||||
url := o.url()
|
||||
req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("stat failed: %w", err)
|
||||
}
|
||||
o.fs.addHeaders(req)
|
||||
res, err := o.fs.httpClient.Do(req)
|
||||
if err == nil && res.StatusCode == http.StatusNotFound {
|
||||
return fs.ErrorObjectNotFound
|
||||
}
|
||||
err = statusError(res, err)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to stat: %w", err)
|
||||
}
|
||||
return o.decodeMetadata(ctx, res)
|
||||
}
|
||||
|
||||
// decodeMetadata updates info fields in the Object according to HTTP response headers
|
||||
func (o *Object) decodeMetadata(ctx context.Context, res *http.Response) error {
|
||||
t, err := http.ParseTime(res.Header.Get("Last-Modified"))
|
||||
if err != nil {
|
||||
t = timeUnset
|
||||
}
|
||||
o.modTime = t
|
||||
o.contentType = res.Header.Get("Content-Type")
|
||||
o.size = rest.ParseSizeFromHeaders(res.Header)
|
||||
|
||||
// If NoSlash is set then check ContentType to see if it is a directory
|
||||
if o.fs.opt.NoSlash {
|
||||
mediaType, _, err := mime.ParseMediaType(o.contentType)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse Content-Type: %q: %w", o.contentType, err)
|
||||
}
|
||||
if mediaType == "text/html" {
|
||||
return fs.ErrorNotAFile
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetModTime sets the modification and access time to the specified time
|
||||
//
|
||||
// it also updates the info field
|
||||
func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error {
|
||||
return errorReadOnly
|
||||
}
|
||||
|
||||
// Storable returns whether the remote http file is a regular file (not a directory, symbolic link, block device, character device, named pipe, etc.)
|
||||
func (o *Object) Storable() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// Open a remote http file object for reading. Seek is supported
|
||||
func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) {
|
||||
url := o.url()
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Open failed: %w", err)
|
||||
}
|
||||
|
||||
// Add optional headers
|
||||
for k, v := range fs.OpenOptionHeaders(options) {
|
||||
req.Header.Add(k, v)
|
||||
}
|
||||
o.fs.addHeaders(req)
|
||||
|
||||
// Do the request
|
||||
res, err := o.fs.httpClient.Do(req)
|
||||
err = statusError(res, err)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Open failed: %w", err)
|
||||
}
|
||||
if err = o.decodeMetadata(ctx, res); err != nil {
|
||||
return nil, fmt.Errorf("decodeMetadata failed: %w", err)
|
||||
}
|
||||
return res.Body, nil
|
||||
}
|
||||
|
||||
// Hashes returns hash.HashNone to indicate remote hashing is unavailable
|
||||
func (f *Fs) Hashes() hash.Set {
|
||||
return hash.Set(hash.None)
|
||||
}
|
||||
|
||||
// Mkdir makes the root directory of the Fs object
|
||||
func (f *Fs) Mkdir(ctx context.Context, dir string) error {
|
||||
return errorReadOnly
|
||||
}
|
||||
|
||||
// Remove a remote http file object
|
||||
func (o *Object) Remove(ctx context.Context) error {
|
||||
return errorReadOnly
|
||||
}
|
||||
|
||||
// Rmdir removes the root directory of the Fs object
|
||||
func (f *Fs) Rmdir(ctx context.Context, dir string) error {
|
||||
return errorReadOnly
|
||||
}
|
||||
|
||||
// Update in to the object with the modTime given of the given size
|
||||
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
|
||||
return errorReadOnly
|
||||
}
|
||||
|
||||
// MimeType of an Object if known, "" otherwise
|
||||
func (o *Object) MimeType(ctx context.Context) string {
|
||||
return o.contentType
|
||||
}
|
||||
|
||||
var commandHelp = []fs.CommandHelp{{
|
||||
Name: "set",
|
||||
Short: "Set command for updating the config parameters.",
|
||||
Long: `This set command can be used to update the config parameters
|
||||
for a running http backend.
|
||||
|
||||
Usage Examples:
|
||||
|
||||
rclone backend set remote: [-o opt_name=opt_value] [-o opt_name2=opt_value2]
|
||||
rclone rc backend/command command=set fs=remote: [-o opt_name=opt_value] [-o opt_name2=opt_value2]
|
||||
rclone rc backend/command command=set fs=remote: -o url=https://example.com
|
||||
|
||||
The option keys are named as they are in the config file.
|
||||
|
||||
This rebuilds the connection to the http backend when it is called with
|
||||
the new parameters. Only new parameters need be passed as the values
|
||||
will default to those currently in use.
|
||||
|
||||
It doesn't return anything.
|
||||
`,
|
||||
}}
|
||||
|
||||
// Command the backend to run a named command
|
||||
//
|
||||
// The command run is name
|
||||
// args may be used to read arguments from
|
||||
// opts may be used to read optional arguments from
|
||||
//
|
||||
// The result should be capable of being JSON encoded
|
||||
// If it is a string or a []string it will be shown to the user
|
||||
// otherwise it will be JSON encoded and shown to the user like that
|
||||
func (f *Fs) Command(ctx context.Context, name string, arg []string, opt map[string]string) (out interface{}, err error) {
|
||||
switch name {
|
||||
case "set":
|
||||
newOpt := f.opt
|
||||
err := configstruct.Set(configmap.Simple(opt), &newOpt)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading config: %w", err)
|
||||
}
|
||||
_, err = f.httpConnection(ctx, &newOpt)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("updating session: %w", err)
|
||||
}
|
||||
f.opt = newOpt
|
||||
keys := []string{}
|
||||
for k := range opt {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
fs.Logf(f, "Updated config values: %s", strings.Join(keys, ", "))
|
||||
return nil, nil
|
||||
default:
|
||||
return nil, fs.ErrorCommandNotFound
|
||||
}
|
||||
}
|
||||
|
||||
// Check the interfaces are satisfied
|
||||
var (
|
||||
_ fs.Fs = &Fs{}
|
||||
_ fs.PutStreamer = &Fs{}
|
||||
_ fs.Object = &Object{}
|
||||
_ fs.MimeTyper = &Object{}
|
||||
_ fs.Commander = &Fs{}
|
||||
)
|
265
docs/content/h5ai.md
Normal file
265
docs/content/h5ai.md
Normal file
@ -0,0 +1,265 @@
|
||||
---
|
||||
title: "H5AI Remote"
|
||||
description: "Read only remote for H5AI servers"
|
||||
versionIntroduced: "v1.65"
|
||||
---
|
||||
|
||||
# {{< icon "fa fa-globe" >}} H5AI
|
||||
|
||||
The H5AI remote is a read only remote for reading files of a
|
||||
webserver. The webserver should provide file listings which rclone
|
||||
will read and turn into a remote. This has been tested with common
|
||||
webservers such as Apache/Nginx/Caddy and will likely work with file
|
||||
listings from most web servers. (If it doesn't then please file an
|
||||
issue, or send a pull request!)
|
||||
|
||||
Paths are specified as `remote:` or `remote:path`.
|
||||
|
||||
The `remote:` represents the configured [url](#http-url), and any path following
|
||||
it will be resolved relative to this url, according to the URL standard. This
|
||||
means with remote url `https://beta.rclone.org/branch` and path `fix`, the
|
||||
resolved URL will be `https://beta.rclone.org/branch/fix`, while with path
|
||||
`/fix` the resolved URL will be `https://beta.rclone.org/fix` as the absolute
|
||||
path is resolved from the root of the domain.
|
||||
|
||||
If the path following the `remote:` ends with `/` it will be assumed to point
|
||||
to a directory. If the path does not end with `/`, then a HEAD request is sent
|
||||
and the response used to decide if it it is treated as a file or a directory
|
||||
(run with `-vv` to see details). When [--http-no-head](#http-no-head) is
|
||||
specified, a path without ending `/` is always assumed to be a file. If rclone
|
||||
incorrectly assumes the path is a file, the solution is to specify the path with
|
||||
ending `/`. When you know the path is a directory, ending it with `/` is always
|
||||
better as it avoids the initial HEAD request.
|
||||
|
||||
To just download a single file it is easier to use
|
||||
[copyurl](/commands/rclone_copyurl/).
|
||||
|
||||
## Configuration
|
||||
|
||||
Here is an example of how to make a remote called `remote`. First
|
||||
run:
|
||||
|
||||
rclone config
|
||||
|
||||
This will guide you through an interactive setup process:
|
||||
|
||||
```
|
||||
No remotes found, make a new one?
|
||||
n) New remote
|
||||
s) Set configuration password
|
||||
q) Quit config
|
||||
n/s/q> n
|
||||
name> remote
|
||||
Type of storage to configure.
|
||||
Choose a number from below, or type in your own value
|
||||
[snip]
|
||||
XX / H5Ai
|
||||
\ "h5ai"
|
||||
[snip]
|
||||
Storage> h5ai
|
||||
URL of http host to connect to
|
||||
Choose a number from below, or type in your own value
|
||||
1 / Connect to example.com
|
||||
\ "https://example.com"
|
||||
url> https://beta.rclone.org
|
||||
Remote config
|
||||
--------------------
|
||||
[remote]
|
||||
url = https://beta.rclone.org
|
||||
--------------------
|
||||
y) Yes this is OK
|
||||
e) Edit this remote
|
||||
d) Delete this remote
|
||||
y/e/d> y
|
||||
Current remotes:
|
||||
|
||||
Name Type
|
||||
==== ====
|
||||
remote h5ai
|
||||
|
||||
e) Edit existing remote
|
||||
n) New remote
|
||||
d) Delete remote
|
||||
r) Rename remote
|
||||
c) Copy remote
|
||||
s) Set configuration password
|
||||
q) Quit config
|
||||
e/n/d/r/c/s/q> q
|
||||
```
|
||||
|
||||
This remote is called `remote` and can now be used like this
|
||||
|
||||
See all the top level directories
|
||||
|
||||
rclone lsd remote:
|
||||
|
||||
List the contents of a directory
|
||||
|
||||
rclone ls remote:directory
|
||||
|
||||
Sync the remote `directory` to `/home/local/directory`, deleting any excess files.
|
||||
|
||||
rclone sync --interactive remote:directory /home/local/directory
|
||||
|
||||
### Read only
|
||||
|
||||
This remote is read only - you can't upload files to an HTTP server.
|
||||
|
||||
### Modification times
|
||||
|
||||
Most HTTP servers store time accurate to 1 second.
|
||||
|
||||
### Checksum
|
||||
|
||||
No checksums are stored.
|
||||
|
||||
### Usage without a config file
|
||||
|
||||
Since the http remote only has one config parameter it is easy to use
|
||||
without a config file:
|
||||
|
||||
rclone lsd --http-url https://beta.rclone.org :h5ai:
|
||||
|
||||
or:
|
||||
|
||||
rclone lsd :http,url='https://beta.rclone.org':
|
||||
|
||||
{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/h5ai/h5ai.go then run make backenddocs" >}}
|
||||
### Standard options
|
||||
|
||||
Here are the Standard options specific to http (HTTP).
|
||||
|
||||
#### --http-url
|
||||
|
||||
URL of HTTP host to connect to.
|
||||
|
||||
E.g. "https://example.com", or "https://user:pass@example.com" to use a username and password.
|
||||
|
||||
Properties:
|
||||
|
||||
- Config: url
|
||||
- Env Var: RCLONE_HTTP_URL
|
||||
- Type: string
|
||||
- Required: true
|
||||
|
||||
### Advanced options
|
||||
|
||||
Here are the Advanced options specific to http (HTTP).
|
||||
|
||||
#### --http-headers
|
||||
|
||||
Set HTTP headers for all transactions.
|
||||
|
||||
Use this to set additional HTTP headers for all transactions.
|
||||
|
||||
The input format is comma separated list of key,value pairs. Standard
|
||||
[CSV encoding](https://godoc.org/encoding/csv) may be used.
|
||||
|
||||
For example, to set a Cookie use 'Cookie,name=value', or '"Cookie","name=value"'.
|
||||
|
||||
You can set multiple headers, e.g. '"Cookie","name=value","Authorization","xxx"'.
|
||||
|
||||
Properties:
|
||||
|
||||
- Config: headers
|
||||
- Env Var: RCLONE_HTTP_HEADERS
|
||||
- Type: CommaSepList
|
||||
- Default:
|
||||
|
||||
#### --http-no-slash
|
||||
|
||||
Set this if the site doesn't end directories with /.
|
||||
|
||||
Use this if your target website does not use / on the end of
|
||||
directories.
|
||||
|
||||
A / on the end of a path is how rclone normally tells the difference
|
||||
between files and directories. If this flag is set, then rclone will
|
||||
treat all files with Content-Type: text/html as directories and read
|
||||
URLs from them rather than downloading them.
|
||||
|
||||
Note that this may cause rclone to confuse genuine HTML files with
|
||||
directories.
|
||||
|
||||
Properties:
|
||||
|
||||
- Config: no_slash
|
||||
- Env Var: RCLONE_HTTP_NO_SLASH
|
||||
- Type: bool
|
||||
- Default: false
|
||||
|
||||
#### --http-no-head
|
||||
|
||||
Don't use HEAD requests.
|
||||
|
||||
HEAD requests are mainly used to find file sizes in dir listing.
|
||||
If your site is being very slow to load then you can try this option.
|
||||
Normally rclone does a HEAD request for each potential file in a
|
||||
directory listing to:
|
||||
|
||||
- find its size
|
||||
- check it really exists
|
||||
- check to see if it is a directory
|
||||
|
||||
If you set this option, rclone will not do the HEAD request. This will mean
|
||||
that directory listings are much quicker, but rclone won't have the times or
|
||||
sizes of any files, and some files that don't exist may be in the listing.
|
||||
|
||||
Properties:
|
||||
|
||||
- Config: no_head
|
||||
- Env Var: RCLONE_HTTP_NO_HEAD
|
||||
- Type: bool
|
||||
- Default: false
|
||||
|
||||
## Backend commands
|
||||
|
||||
Here are the commands specific to the http backend.
|
||||
|
||||
Run them with
|
||||
|
||||
rclone backend COMMAND remote:
|
||||
|
||||
The help below will explain what arguments each command takes.
|
||||
|
||||
See the [backend](/commands/rclone_backend/) command for more
|
||||
info on how to pass options and arguments.
|
||||
|
||||
These can be run on a running backend using the rc command
|
||||
[backend/command](/rc/#backend-command).
|
||||
|
||||
### set
|
||||
|
||||
Set command for updating the config parameters.
|
||||
|
||||
rclone backend set remote: [options] [<arguments>+]
|
||||
|
||||
This set command can be used to update the config parameters
|
||||
for a running http backend.
|
||||
|
||||
Usage Examples:
|
||||
|
||||
rclone backend set remote: [-o opt_name=opt_value] [-o opt_name2=opt_value2]
|
||||
rclone rc backend/command command=set fs=remote: [-o opt_name=opt_value] [-o opt_name2=opt_value2]
|
||||
rclone rc backend/command command=set fs=remote: -o url=https://example.com
|
||||
|
||||
The option keys are named as they are in the config file.
|
||||
|
||||
This rebuilds the connection to the http backend when it is called with
|
||||
the new parameters. Only new parameters need be passed as the values
|
||||
will default to those currently in use.
|
||||
|
||||
It doesn't return anything.
|
||||
|
||||
|
||||
{{< rem autogenerated options stop >}}
|
||||
|
||||
## Limitations
|
||||
|
||||
`rclone about` is not supported by the HTTP backend. Backends without
|
||||
this capability cannot determine free space for an rclone mount or
|
||||
use policy `mfs` (most free space) as a member of an rclone union
|
||||
remote.
|
||||
|
||||
See [List of backends that do not support rclone about](https://rclone.org/overview/#optional-features) and [rclone about](https://rclone.org/commands/rclone_about/)
|
||||
|
Loading…
Reference in New Issue
Block a user