2017-03-17 09:47:23 +01:00
|
|
|
// Package http provides a filesystem interface using golang.org/net/http
|
2017-04-22 12:48:32 +02:00
|
|
|
//
|
2017-06-19 18:36:14 +02:00
|
|
|
// It treats HTML pages served from the endpoint as directory
|
2017-04-22 12:48:32 +02:00
|
|
|
// listings, and includes any links found as files.
|
2017-03-17 09:47:23 +01:00
|
|
|
package http
|
|
|
|
|
|
|
|
import (
|
2019-06-17 10:34:30 +02:00
|
|
|
"context"
|
2021-11-04 11:12:57 +01:00
|
|
|
"errors"
|
|
|
|
"fmt"
|
2017-03-17 09:47:23 +01:00
|
|
|
"io"
|
2019-02-08 14:58:47 +01:00
|
|
|
"mime"
|
2017-03-17 09:47:23 +01:00
|
|
|
"net/http"
|
|
|
|
"net/url"
|
|
|
|
"path"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
2019-09-09 22:03:20 +02:00
|
|
|
"sync"
|
2017-03-17 09:47:23 +01:00
|
|
|
"time"
|
|
|
|
|
2019-07-28 19:47:38 +02:00
|
|
|
"github.com/rclone/rclone/fs"
|
|
|
|
"github.com/rclone/rclone/fs/config/configmap"
|
|
|
|
"github.com/rclone/rclone/fs/config/configstruct"
|
|
|
|
"github.com/rclone/rclone/fs/fshttp"
|
|
|
|
"github.com/rclone/rclone/fs/hash"
|
|
|
|
"github.com/rclone/rclone/lib/rest"
|
2017-06-19 16:05:09 +02:00
|
|
|
"golang.org/x/net/html"
|
2017-03-17 09:47:23 +01:00
|
|
|
)
|
|
|
|
|
2017-06-19 18:36:14 +02:00
|
|
|
var (
|
|
|
|
errorReadOnly = errors.New("http remotes are read only")
|
|
|
|
timeUnset = time.Unix(0, 0)
|
|
|
|
)
|
2017-06-19 16:05:09 +02:00
|
|
|
|
2017-03-17 09:47:23 +01:00
|
|
|
func init() {
|
|
|
|
fsi := &fs.RegInfo{
|
|
|
|
Name: "http",
|
|
|
|
Description: "http Connection",
|
|
|
|
NewFs: NewFs,
|
|
|
|
Options: []fs.Option{{
|
2017-06-19 18:36:14 +02:00
|
|
|
Name: "url",
|
2021-08-22 15:11:41 +02:00
|
|
|
Help: "URL of http host to connect to.\n\nE.g. \"https://example.com\", or \"https://user:pass@example.com\" to use a username and password.",
|
2018-05-14 19:06:57 +02:00
|
|
|
Required: true,
|
2019-08-12 16:29:35 +02:00
|
|
|
}, {
|
|
|
|
Name: "headers",
|
2021-08-16 11:30:01 +02:00
|
|
|
Help: `Set HTTP headers for all transactions.
|
2019-08-12 16:29:35 +02:00
|
|
|
|
2021-08-16 11:30:01 +02:00
|
|
|
Use this to set additional HTTP headers for all transactions.
|
2019-08-12 16:29:35 +02:00
|
|
|
|
|
|
|
The input format is comma separated list of key,value pairs. Standard
|
|
|
|
[CSV encoding](https://godoc.org/encoding/csv) may be used.
|
|
|
|
|
2021-11-04 12:50:43 +01:00
|
|
|
For example, to set a Cookie use 'Cookie,name=value', or '"Cookie","name=value"'.
|
2019-08-12 16:29:35 +02:00
|
|
|
|
2022-01-24 15:16:08 +01:00
|
|
|
You can set multiple headers, e.g. '"Cookie","name=value","Authorization","xxx"'.`,
|
2019-08-12 16:29:35 +02:00
|
|
|
Default: fs.CommaSepList{},
|
|
|
|
Advanced: true,
|
2019-02-08 14:58:47 +01:00
|
|
|
}, {
|
|
|
|
Name: "no_slash",
|
2021-08-16 11:30:01 +02:00
|
|
|
Help: `Set this if the site doesn't end directories with /.
|
2019-02-08 14:58:47 +01:00
|
|
|
|
|
|
|
Use this if your target website does not use / on the end of
|
|
|
|
directories.
|
|
|
|
|
|
|
|
A / on the end of a path is how rclone normally tells the difference
|
|
|
|
between files and directories. If this flag is set, then rclone will
|
|
|
|
treat all files with Content-Type: text/html as directories and read
|
|
|
|
URLs from them rather than downloading them.
|
|
|
|
|
|
|
|
Note that this may cause rclone to confuse genuine HTML files with
|
|
|
|
directories.`,
|
|
|
|
Default: false,
|
|
|
|
Advanced: true,
|
2019-09-09 22:13:10 +02:00
|
|
|
}, {
|
|
|
|
Name: "no_head",
|
2022-01-23 23:39:05 +01:00
|
|
|
Help: `Don't use HEAD requests.
|
2019-09-09 22:13:10 +02:00
|
|
|
|
2022-01-23 23:39:05 +01:00
|
|
|
HEAD requests are mainly used to find file sizes in dir listing.
|
2019-09-09 22:13:10 +02:00
|
|
|
If your site is being very slow to load then you can try this option.
|
|
|
|
Normally rclone does a HEAD request for each potential file in a
|
|
|
|
directory listing to:
|
|
|
|
|
|
|
|
- find its size
|
|
|
|
- check it really exists
|
|
|
|
- check to see if it is a directory
|
|
|
|
|
2022-01-24 15:16:08 +01:00
|
|
|
If you set this option, rclone will not do the HEAD request. This will mean
|
|
|
|
that directory listings are much quicker, but rclone won't have the times or
|
|
|
|
sizes of any files, and some files that don't exist may be in the listing.`,
|
2019-09-09 22:13:10 +02:00
|
|
|
Default: false,
|
|
|
|
Advanced: true,
|
2017-03-17 09:47:23 +01:00
|
|
|
}},
|
|
|
|
}
|
|
|
|
fs.Register(fsi)
|
|
|
|
}
|
|
|
|
|
2018-05-14 19:06:57 +02:00
|
|
|
// Options defines the configuration for this backend
|
|
|
|
type Options struct {
|
2019-08-12 16:29:35 +02:00
|
|
|
Endpoint string `config:"url"`
|
|
|
|
NoSlash bool `config:"no_slash"`
|
2019-09-09 22:13:10 +02:00
|
|
|
NoHead bool `config:"no_head"`
|
2019-08-12 16:29:35 +02:00
|
|
|
Headers fs.CommaSepList `config:"headers"`
|
2018-05-14 19:06:57 +02:00
|
|
|
}
|
|
|
|
|
2017-04-22 12:48:32 +02:00
|
|
|
// Fs stores the interface to the remote HTTP files
|
2017-03-17 09:47:23 +01:00
|
|
|
type Fs struct {
|
2017-08-02 14:19:36 +02:00
|
|
|
name string
|
|
|
|
root string
|
2020-11-05 12:33:32 +01:00
|
|
|
features *fs.Features // optional features
|
|
|
|
opt Options // options for this backend
|
|
|
|
ci *fs.ConfigInfo // global config
|
2017-08-02 14:19:36 +02:00
|
|
|
endpoint *url.URL
|
|
|
|
endpointURL string // endpoint as a string
|
|
|
|
httpClient *http.Client
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Object is a remote object that has been stat'd (so it exists, but is not necessarily open for reading)
|
|
|
|
type Object struct {
|
2017-06-19 18:36:14 +02:00
|
|
|
fs *Fs
|
|
|
|
remote string
|
|
|
|
size int64
|
|
|
|
modTime time.Time
|
|
|
|
contentType string
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
2017-06-19 18:36:14 +02:00
|
|
|
// statusError returns an error if the res contained an error
|
|
|
|
func statusError(res *http.Response, err error) error {
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
2017-06-19 18:36:14 +02:00
|
|
|
if res.StatusCode < 200 || res.StatusCode > 299 {
|
|
|
|
_ = res.Body.Close()
|
2022-01-23 23:19:06 +01:00
|
|
|
return fmt.Errorf("HTTP Error: %s", res.Status)
|
2017-06-19 18:36:14 +02:00
|
|
|
}
|
|
|
|
return nil
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
2022-01-23 23:39:05 +01:00
|
|
|
// getFsEndpoint decides if url is to be considered a file or directory,
|
|
|
|
// and returns a proper endpoint url to use for the fs.
|
|
|
|
func getFsEndpoint(ctx context.Context, client *http.Client, url string, opt *Options) (string, bool) {
|
|
|
|
// If url ends with '/' it is already a proper url always assumed to be a directory.
|
|
|
|
if url[len(url)-1] == '/' {
|
|
|
|
return url, false
|
|
|
|
}
|
|
|
|
|
|
|
|
// If url does not end with '/' we send a HEAD request to decide
|
|
|
|
// if it is directory or file, and if directory appends the missing
|
|
|
|
// '/', or if file returns the directory url to parent instead.
|
|
|
|
createFileResult := func() (string, bool) {
|
|
|
|
fs.Debugf(nil, "If path is a directory you must add a trailing '/'")
|
|
|
|
parent, _ := path.Split(url)
|
|
|
|
return parent, true
|
|
|
|
}
|
|
|
|
createDirResult := func() (string, bool) {
|
|
|
|
fs.Debugf(nil, "To avoid the initial HEAD request add a trailing '/' to the path")
|
|
|
|
return url + "/", false
|
|
|
|
}
|
|
|
|
|
|
|
|
// If HEAD requests are not allowed we just have to assume it is a file.
|
|
|
|
if opt.NoHead {
|
|
|
|
fs.Debugf(nil, "Assuming path is a file as --http-no-head is set")
|
|
|
|
return createFileResult()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use a client which doesn't follow redirects so the server
|
|
|
|
// doesn't redirect http://host/dir to http://host/dir/
|
|
|
|
noRedir := *client
|
|
|
|
noRedir.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
|
|
|
return http.ErrUseLastResponse
|
|
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil)
|
|
|
|
if err != nil {
|
|
|
|
fs.Debugf(nil, "Assuming path is a file as HEAD request could not be created: %v", err)
|
|
|
|
return createFileResult()
|
|
|
|
}
|
|
|
|
addHeaders(req, opt)
|
|
|
|
res, err := noRedir.Do(req)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
fs.Debugf(nil, "Assuming path is a file as HEAD request could not be sent: %v", err)
|
|
|
|
return createFileResult()
|
|
|
|
}
|
|
|
|
if res.StatusCode == http.StatusNotFound {
|
|
|
|
fs.Debugf(nil, "Assuming path is a directory as HEAD response is it does not exist as a file (%s)", res.Status)
|
|
|
|
return createDirResult()
|
|
|
|
}
|
|
|
|
if res.StatusCode == http.StatusMovedPermanently ||
|
|
|
|
res.StatusCode == http.StatusFound ||
|
|
|
|
res.StatusCode == http.StatusSeeOther ||
|
|
|
|
res.StatusCode == http.StatusTemporaryRedirect ||
|
|
|
|
res.StatusCode == http.StatusPermanentRedirect {
|
|
|
|
redir := res.Header.Get("Location")
|
|
|
|
if redir != "" {
|
|
|
|
if redir[len(redir)-1] == '/' {
|
|
|
|
fs.Debugf(nil, "Assuming path is a directory as HEAD response is redirect (%s) to a path that ends with '/': %s", res.Status, redir)
|
|
|
|
return createDirResult()
|
|
|
|
}
|
|
|
|
fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) to a path that does not end with '/': %s", res.Status, redir)
|
|
|
|
return createFileResult()
|
|
|
|
}
|
|
|
|
fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) but no location header", res.Status)
|
|
|
|
return createFileResult()
|
|
|
|
}
|
|
|
|
if res.StatusCode < 200 || res.StatusCode > 299 {
|
|
|
|
// Example is 403 (http.StatusForbidden) for servers not allowing HEAD requests.
|
|
|
|
fs.Debugf(nil, "Assuming path is a file as HEAD response is an error (%s)", res.Status)
|
|
|
|
return createFileResult()
|
|
|
|
}
|
|
|
|
|
|
|
|
fs.Debugf(nil, "Assuming path is a file as HEAD response is success (%s)", res.Status)
|
|
|
|
return createFileResult()
|
|
|
|
}
|
|
|
|
|
2017-03-17 09:47:23 +01:00
|
|
|
// NewFs creates a new Fs object from the name and root. It connects to
|
|
|
|
// the host specified in the config file.
|
2020-11-05 16:18:51 +01:00
|
|
|
func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) {
|
2018-05-14 19:06:57 +02:00
|
|
|
// Parse config into Options struct
|
|
|
|
opt := new(Options)
|
|
|
|
err := configstruct.Set(m, opt)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2019-08-12 16:29:35 +02:00
|
|
|
if len(opt.Headers)%2 != 0 {
|
|
|
|
return nil, errors.New("odd number of headers supplied")
|
|
|
|
}
|
|
|
|
|
2018-05-14 19:06:57 +02:00
|
|
|
if !strings.HasSuffix(opt.Endpoint, "/") {
|
|
|
|
opt.Endpoint += "/"
|
2017-06-19 18:36:14 +02:00
|
|
|
}
|
2017-03-17 09:47:23 +01:00
|
|
|
|
2017-06-19 18:36:14 +02:00
|
|
|
// Parse the endpoint and stick the root onto it
|
2018-05-14 19:06:57 +02:00
|
|
|
base, err := url.Parse(opt.Endpoint)
|
2017-03-17 09:47:23 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2018-01-05 16:55:43 +01:00
|
|
|
u, err := rest.URLJoin(base, rest.URLPathEscape(root))
|
2017-06-19 18:36:14 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
2020-11-13 16:24:43 +01:00
|
|
|
client := fshttp.NewClient(ctx)
|
2017-03-17 09:47:23 +01:00
|
|
|
|
2022-01-23 23:39:05 +01:00
|
|
|
endpoint, isFile := getFsEndpoint(ctx, client, u.String(), opt)
|
|
|
|
fs.Debugf(nil, "Root: %s", endpoint)
|
|
|
|
u, err = url.Parse(endpoint)
|
2017-03-17 09:47:23 +01:00
|
|
|
if err != nil {
|
2017-06-19 18:36:14 +02:00
|
|
|
return nil, err
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
2017-06-19 18:36:14 +02:00
|
|
|
|
2020-11-05 12:33:32 +01:00
|
|
|
ci := fs.GetConfig(ctx)
|
2017-03-17 09:47:23 +01:00
|
|
|
f := &Fs{
|
2017-08-02 14:19:36 +02:00
|
|
|
name: name,
|
|
|
|
root: root,
|
2018-05-14 19:06:57 +02:00
|
|
|
opt: *opt,
|
2020-11-05 12:33:32 +01:00
|
|
|
ci: ci,
|
2017-08-02 14:19:36 +02:00
|
|
|
httpClient: client,
|
|
|
|
endpoint: u,
|
|
|
|
endpointURL: u.String(),
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
2017-08-09 16:27:43 +02:00
|
|
|
f.features = (&fs.Features{
|
|
|
|
CanHaveEmptyDirectories: true,
|
2020-11-05 17:00:40 +01:00
|
|
|
}).Fill(ctx, f)
|
2022-01-23 23:39:05 +01:00
|
|
|
|
2017-06-19 18:36:14 +02:00
|
|
|
if isFile {
|
2022-01-23 23:39:05 +01:00
|
|
|
// return an error with an fs which points to the parent
|
2017-06-19 18:36:14 +02:00
|
|
|
return f, fs.ErrorIsFile
|
|
|
|
}
|
2022-01-23 23:39:05 +01:00
|
|
|
|
2017-08-02 14:19:36 +02:00
|
|
|
if !strings.HasSuffix(f.endpointURL, "/") {
|
|
|
|
return nil, errors.New("internal error: url doesn't end with /")
|
|
|
|
}
|
2022-01-23 23:39:05 +01:00
|
|
|
|
2017-03-17 09:47:23 +01:00
|
|
|
return f, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Name returns the configured name of the file system
|
|
|
|
func (f *Fs) Name() string {
|
|
|
|
return f.name
|
|
|
|
}
|
|
|
|
|
|
|
|
// Root returns the root for the filesystem
|
|
|
|
func (f *Fs) Root() string {
|
|
|
|
return f.root
|
|
|
|
}
|
|
|
|
|
|
|
|
// String returns the URL for the filesystem
|
|
|
|
func (f *Fs) String() string {
|
2017-08-02 14:19:36 +02:00
|
|
|
return f.endpointURL
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Features returns the optional features of this Fs
|
|
|
|
func (f *Fs) Features() *fs.Features {
|
|
|
|
return f.features
|
|
|
|
}
|
|
|
|
|
|
|
|
// Precision is the remote http file system's modtime precision, which we have no way of knowing. We estimate at 1s
|
|
|
|
func (f *Fs) Precision() time.Duration {
|
|
|
|
return time.Second
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewObject creates a new remote http file object
|
2019-06-17 10:34:30 +02:00
|
|
|
func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
|
2017-03-17 09:47:23 +01:00
|
|
|
o := &Object{
|
|
|
|
fs: f,
|
|
|
|
remote: remote,
|
|
|
|
}
|
2019-09-04 21:21:10 +02:00
|
|
|
err := o.stat(ctx)
|
2017-03-17 09:47:23 +01:00
|
|
|
if err != nil {
|
2018-12-04 18:40:44 +01:00
|
|
|
return nil, err
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
return o, nil
|
|
|
|
}
|
|
|
|
|
2017-08-02 14:19:36 +02:00
|
|
|
// Join's the remote onto the base URL
|
|
|
|
func (f *Fs) url(remote string) string {
|
2018-01-05 16:55:43 +01:00
|
|
|
return f.endpointURL + rest.URLPathEscape(remote)
|
2017-08-02 14:19:36 +02:00
|
|
|
}
|
|
|
|
|
2018-04-16 20:40:02 +02:00
|
|
|
// parse s into an int64, on failure return def
|
|
|
|
func parseInt64(s string, def int64) int64 {
|
2017-03-17 09:47:23 +01:00
|
|
|
n, e := strconv.ParseInt(s, 10, 64)
|
|
|
|
if e != nil {
|
2018-04-16 20:40:02 +02:00
|
|
|
return def
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
return n
|
|
|
|
}
|
|
|
|
|
2018-02-14 12:26:37 +01:00
|
|
|
// Errors returned by parseName
|
|
|
|
var (
|
|
|
|
errURLJoinFailed = errors.New("URLJoin failed")
|
|
|
|
errFoundQuestionMark = errors.New("found ? in URL")
|
|
|
|
errHostMismatch = errors.New("host mismatch")
|
|
|
|
errSchemeMismatch = errors.New("scheme mismatch")
|
|
|
|
errNotUnderRoot = errors.New("not under root")
|
|
|
|
errNameIsEmpty = errors.New("name is empty")
|
|
|
|
errNameContainsSlash = errors.New("name contains /")
|
|
|
|
)
|
|
|
|
|
|
|
|
// parseName turns a name as found in the page into a remote path or returns an error
|
|
|
|
func parseName(base *url.URL, name string) (string, error) {
|
|
|
|
// make URL absolute
|
2017-10-02 18:37:36 +02:00
|
|
|
u, err := rest.URLJoin(base, name)
|
2017-06-19 18:36:14 +02:00
|
|
|
if err != nil {
|
2018-02-14 12:26:37 +01:00
|
|
|
return "", errURLJoinFailed
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
2018-02-14 12:26:37 +01:00
|
|
|
// check it doesn't have URL parameters
|
2017-06-19 18:36:14 +02:00
|
|
|
uStr := u.String()
|
2022-01-23 22:55:37 +01:00
|
|
|
if strings.Contains(uStr, "?") {
|
2018-02-14 12:26:37 +01:00
|
|
|
return "", errFoundQuestionMark
|
|
|
|
}
|
|
|
|
// check that this is going back to the same host and scheme
|
|
|
|
if base.Host != u.Host {
|
|
|
|
return "", errHostMismatch
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
2018-02-14 12:26:37 +01:00
|
|
|
if base.Scheme != u.Scheme {
|
|
|
|
return "", errSchemeMismatch
|
2017-06-19 18:36:14 +02:00
|
|
|
}
|
|
|
|
// check has path prefix
|
|
|
|
if !strings.HasPrefix(u.Path, base.Path) {
|
2018-02-14 12:26:37 +01:00
|
|
|
return "", errNotUnderRoot
|
2017-06-19 18:36:14 +02:00
|
|
|
}
|
|
|
|
// calculate the name relative to the base
|
|
|
|
name = u.Path[len(base.Path):]
|
2019-02-07 18:41:17 +01:00
|
|
|
// mustn't be empty
|
2017-06-19 18:36:14 +02:00
|
|
|
if name == "" {
|
2018-02-14 12:26:37 +01:00
|
|
|
return "", errNameIsEmpty
|
2017-06-19 18:36:14 +02:00
|
|
|
}
|
2018-02-14 12:26:37 +01:00
|
|
|
// mustn't contain a / - we are looking for a single level directory
|
2017-06-19 18:36:14 +02:00
|
|
|
slash := strings.Index(name, "/")
|
|
|
|
if slash >= 0 && slash != len(name)-1 {
|
2018-02-14 12:26:37 +01:00
|
|
|
return "", errNameContainsSlash
|
2017-06-19 18:36:14 +02:00
|
|
|
}
|
2018-02-14 12:26:37 +01:00
|
|
|
return name, nil
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
2017-06-19 18:36:14 +02:00
|
|
|
// Parse turns HTML for a directory into names
|
|
|
|
// base should be the base URL to resolve any relative names from
|
|
|
|
func parse(base *url.URL, in io.Reader) (names []string, err error) {
|
|
|
|
doc, err := html.Parse(in)
|
2017-03-17 09:47:23 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2019-02-08 14:58:22 +01:00
|
|
|
var (
|
|
|
|
walk func(*html.Node)
|
|
|
|
seen = make(map[string]struct{})
|
|
|
|
)
|
2017-06-19 18:36:14 +02:00
|
|
|
walk = func(n *html.Node) {
|
|
|
|
if n.Type == html.ElementNode && n.Data == "a" {
|
|
|
|
for _, a := range n.Attr {
|
|
|
|
if a.Key == "href" {
|
2018-02-14 12:26:37 +01:00
|
|
|
name, err := parseName(base, a.Val)
|
|
|
|
if err == nil {
|
2019-02-08 14:58:22 +01:00
|
|
|
if _, found := seen[name]; !found {
|
|
|
|
names = append(names, name)
|
|
|
|
seen[name] = struct{}{}
|
|
|
|
}
|
2017-06-19 18:36:14 +02:00
|
|
|
}
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
|
|
walk(c)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
walk(doc)
|
|
|
|
return names, nil
|
|
|
|
}
|
|
|
|
|
2019-08-12 16:29:35 +02:00
|
|
|
// Adds the configured headers to the request if any
|
|
|
|
func addHeaders(req *http.Request, opt *Options) {
|
|
|
|
for i := 0; i < len(opt.Headers); i += 2 {
|
|
|
|
key := opt.Headers[i]
|
|
|
|
value := opt.Headers[i+1]
|
|
|
|
req.Header.Add(key, value)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Adds the configured headers to the request if any
|
|
|
|
func (f *Fs) addHeaders(req *http.Request) {
|
|
|
|
addHeaders(req, &f.opt)
|
|
|
|
}
|
|
|
|
|
2017-06-19 18:36:14 +02:00
|
|
|
// Read the directory passed in
|
2019-09-04 21:21:10 +02:00
|
|
|
func (f *Fs) readDir(ctx context.Context, dir string) (names []string, err error) {
|
2017-08-02 14:19:36 +02:00
|
|
|
URL := f.url(dir)
|
|
|
|
u, err := url.Parse(URL)
|
2017-07-31 00:16:32 +02:00
|
|
|
if err != nil {
|
2021-11-04 11:12:57 +01:00
|
|
|
return nil, fmt.Errorf("failed to readDir: %w", err)
|
2017-07-31 00:16:32 +02:00
|
|
|
}
|
2017-08-02 14:19:36 +02:00
|
|
|
if !strings.HasSuffix(URL, "/") {
|
2021-11-04 11:12:57 +01:00
|
|
|
return nil, fmt.Errorf("internal error: readDir URL %q didn't end in /", URL)
|
2017-06-19 18:36:14 +02:00
|
|
|
}
|
2019-08-12 16:29:35 +02:00
|
|
|
// Do the request
|
2021-02-03 18:41:27 +01:00
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", URL, nil)
|
2019-08-12 16:29:35 +02:00
|
|
|
if err != nil {
|
2021-11-04 11:12:57 +01:00
|
|
|
return nil, fmt.Errorf("readDir failed: %w", err)
|
2019-08-12 16:29:35 +02:00
|
|
|
}
|
|
|
|
f.addHeaders(req)
|
|
|
|
res, err := f.httpClient.Do(req)
|
2019-02-08 18:40:40 +01:00
|
|
|
if err == nil {
|
|
|
|
defer fs.CheckClose(res.Body, &err)
|
|
|
|
if res.StatusCode == http.StatusNotFound {
|
|
|
|
return nil, fs.ErrorDirNotFound
|
|
|
|
}
|
2017-06-19 18:36:14 +02:00
|
|
|
}
|
|
|
|
err = statusError(res, err)
|
|
|
|
if err != nil {
|
2021-11-04 11:12:57 +01:00
|
|
|
return nil, fmt.Errorf("failed to readDir: %w", err)
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
2017-06-19 18:36:14 +02:00
|
|
|
contentType := strings.SplitN(res.Header.Get("Content-Type"), ";", 2)[0]
|
|
|
|
switch contentType {
|
2017-03-17 09:47:23 +01:00
|
|
|
case "text/html":
|
2017-06-19 18:36:14 +02:00
|
|
|
names, err = parse(u, res.Body)
|
2017-03-17 09:47:23 +01:00
|
|
|
if err != nil {
|
2021-11-04 11:12:57 +01:00
|
|
|
return nil, fmt.Errorf("readDir: %w", err)
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
2017-06-19 18:36:14 +02:00
|
|
|
default:
|
2022-01-23 22:54:08 +01:00
|
|
|
return nil, fmt.Errorf("can't parse content type %q", contentType)
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
2017-06-19 18:36:14 +02:00
|
|
|
return names, nil
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
2017-06-19 16:05:09 +02:00
|
|
|
// List the objects and directories in dir into entries. The
|
|
|
|
// entries can be returned in any order but should be for a
|
|
|
|
// complete directory.
|
|
|
|
//
|
|
|
|
// dir should be "" to list the root, and should not have
|
|
|
|
// trailing slashes.
|
|
|
|
//
|
|
|
|
// This should return ErrDirNotFound if the directory isn't
|
|
|
|
// found.
|
2019-06-17 10:34:30 +02:00
|
|
|
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
|
2017-06-19 18:36:14 +02:00
|
|
|
if !strings.HasSuffix(dir, "/") && dir != "" {
|
|
|
|
dir += "/"
|
2017-06-19 16:05:09 +02:00
|
|
|
}
|
2019-09-04 21:21:10 +02:00
|
|
|
names, err := f.readDir(ctx, dir)
|
2017-03-17 09:47:23 +01:00
|
|
|
if err != nil {
|
2021-11-04 11:12:57 +01:00
|
|
|
return nil, fmt.Errorf("error listing %q: %w", dir, err)
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
2019-09-09 22:03:20 +02:00
|
|
|
var (
|
|
|
|
entriesMu sync.Mutex // to protect entries
|
|
|
|
wg sync.WaitGroup
|
2020-11-05 12:33:32 +01:00
|
|
|
checkers = f.ci.Checkers
|
|
|
|
in = make(chan string, checkers)
|
2019-09-09 22:03:20 +02:00
|
|
|
)
|
|
|
|
add := func(entry fs.DirEntry) {
|
|
|
|
entriesMu.Lock()
|
|
|
|
entries = append(entries, entry)
|
|
|
|
entriesMu.Unlock()
|
|
|
|
}
|
2020-11-05 12:33:32 +01:00
|
|
|
for i := 0; i < checkers; i++ {
|
2019-09-09 22:03:20 +02:00
|
|
|
wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer wg.Done()
|
|
|
|
for remote := range in {
|
|
|
|
file := &Object{
|
|
|
|
fs: f,
|
|
|
|
remote: remote,
|
|
|
|
}
|
2019-09-14 09:48:13 +02:00
|
|
|
switch err := file.stat(ctx); err {
|
2019-09-09 22:03:20 +02:00
|
|
|
case nil:
|
|
|
|
add(file)
|
|
|
|
case fs.ErrorNotAFile:
|
|
|
|
// ...found a directory not a file
|
|
|
|
add(fs.NewDir(remote, timeUnset))
|
|
|
|
default:
|
|
|
|
fs.Debugf(remote, "skipping because of error: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
}
|
2017-06-19 18:36:14 +02:00
|
|
|
for _, name := range names {
|
|
|
|
isDir := name[len(name)-1] == '/'
|
|
|
|
name = strings.TrimRight(name, "/")
|
|
|
|
remote := path.Join(dir, name)
|
|
|
|
if isDir {
|
2019-09-09 22:03:20 +02:00
|
|
|
add(fs.NewDir(remote, timeUnset))
|
2017-03-17 09:47:23 +01:00
|
|
|
} else {
|
2019-09-09 22:03:20 +02:00
|
|
|
in <- remote
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
}
|
2019-09-09 22:03:20 +02:00
|
|
|
close(in)
|
|
|
|
wg.Wait()
|
2017-06-19 16:05:09 +02:00
|
|
|
return entries, nil
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
2017-06-19 16:05:09 +02:00
|
|
|
// Put in to the remote path with the modTime given of the given size
|
|
|
|
//
|
|
|
|
// May create the object even if it returns an error - if so
|
|
|
|
// will return the object and the error, otherwise will return
|
|
|
|
// nil and the error
|
2019-06-17 10:34:30 +02:00
|
|
|
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
|
2017-06-19 16:05:09 +02:00
|
|
|
return nil, errorReadOnly
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
2017-08-19 12:42:31 +02:00
|
|
|
// PutStream uploads to the remote path with the modTime given of indeterminate size
|
2019-06-17 10:34:30 +02:00
|
|
|
func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
|
2017-08-19 12:42:31 +02:00
|
|
|
return nil, errorReadOnly
|
|
|
|
}
|
|
|
|
|
2017-03-17 09:47:23 +01:00
|
|
|
// Fs is the filesystem this remote http file object is located within
|
|
|
|
func (o *Object) Fs() fs.Info {
|
|
|
|
return o.fs
|
|
|
|
}
|
|
|
|
|
2017-04-22 12:48:32 +02:00
|
|
|
// String returns the URL to the remote HTTP file
|
2017-03-17 09:47:23 +01:00
|
|
|
func (o *Object) String() string {
|
|
|
|
if o == nil {
|
|
|
|
return "<nil>"
|
|
|
|
}
|
|
|
|
return o.remote
|
|
|
|
}
|
|
|
|
|
2017-04-22 12:48:32 +02:00
|
|
|
// Remote the name of the remote HTTP file, relative to the fs root
|
2017-03-17 09:47:23 +01:00
|
|
|
func (o *Object) Remote() string {
|
|
|
|
return o.remote
|
|
|
|
}
|
|
|
|
|
2017-04-22 12:48:32 +02:00
|
|
|
// Hash returns "" since HTTP (in Go or OpenSSH) doesn't support remote calculation of hashes
|
2019-06-17 10:34:30 +02:00
|
|
|
func (o *Object) Hash(ctx context.Context, r hash.Type) (string, error) {
|
2018-01-18 21:27:52 +01:00
|
|
|
return "", hash.ErrUnsupported
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Size returns the size in bytes of the remote http file
|
|
|
|
func (o *Object) Size() int64 {
|
2017-06-19 18:36:14 +02:00
|
|
|
return o.size
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// ModTime returns the modification time of the remote http file
|
2019-06-17 10:34:30 +02:00
|
|
|
func (o *Object) ModTime(ctx context.Context) time.Time {
|
2017-06-19 18:36:14 +02:00
|
|
|
return o.modTime
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
2017-08-02 14:19:36 +02:00
|
|
|
// url returns the native url of the object
|
|
|
|
func (o *Object) url() string {
|
|
|
|
return o.fs.url(o.remote)
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// stat updates the info field in the Object
|
2019-09-04 21:21:10 +02:00
|
|
|
func (o *Object) stat(ctx context.Context) error {
|
2019-09-09 22:13:10 +02:00
|
|
|
if o.fs.opt.NoHead {
|
|
|
|
o.size = -1
|
|
|
|
o.modTime = timeUnset
|
|
|
|
o.contentType = fs.MimeType(ctx, o)
|
|
|
|
return nil
|
|
|
|
}
|
2017-08-02 14:19:36 +02:00
|
|
|
url := o.url()
|
2021-02-03 18:41:27 +01:00
|
|
|
req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil)
|
2019-08-12 16:29:35 +02:00
|
|
|
if err != nil {
|
2021-11-04 11:12:57 +01:00
|
|
|
return fmt.Errorf("stat failed: %w", err)
|
2019-08-12 16:29:35 +02:00
|
|
|
}
|
|
|
|
o.fs.addHeaders(req)
|
|
|
|
res, err := o.fs.httpClient.Do(req)
|
2018-12-04 18:40:44 +01:00
|
|
|
if err == nil && res.StatusCode == http.StatusNotFound {
|
|
|
|
return fs.ErrorObjectNotFound
|
|
|
|
}
|
2017-06-19 18:36:14 +02:00
|
|
|
err = statusError(res, err)
|
2017-03-17 09:47:23 +01:00
|
|
|
if err != nil {
|
2021-11-04 11:12:57 +01:00
|
|
|
return fmt.Errorf("failed to stat: %w", err)
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
t, err := http.ParseTime(res.Header.Get("Last-Modified"))
|
|
|
|
if err != nil {
|
2017-06-19 18:36:14 +02:00
|
|
|
t = timeUnset
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
2018-04-16 20:40:02 +02:00
|
|
|
o.size = parseInt64(res.Header.Get("Content-Length"), -1)
|
2017-06-19 18:36:14 +02:00
|
|
|
o.modTime = t
|
|
|
|
o.contentType = res.Header.Get("Content-Type")
|
2019-02-08 14:58:47 +01:00
|
|
|
// If NoSlash is set then check ContentType to see if it is a directory
|
|
|
|
if o.fs.opt.NoSlash {
|
|
|
|
mediaType, _, err := mime.ParseMediaType(o.contentType)
|
|
|
|
if err != nil {
|
2021-11-04 11:12:57 +01:00
|
|
|
return fmt.Errorf("failed to parse Content-Type: %q: %w", o.contentType, err)
|
2019-02-08 14:58:47 +01:00
|
|
|
}
|
|
|
|
if mediaType == "text/html" {
|
|
|
|
return fs.ErrorNotAFile
|
|
|
|
}
|
|
|
|
}
|
2017-03-17 09:47:23 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetModTime sets the modification and access time to the specified time
|
|
|
|
//
|
|
|
|
// it also updates the info field
|
2019-06-17 10:34:30 +02:00
|
|
|
func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error {
|
2017-06-19 16:05:09 +02:00
|
|
|
return errorReadOnly
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
2020-10-14 00:07:12 +02:00
|
|
|
// Storable returns whether the remote http file is a regular file (not a directory, symbolic link, block device, character device, named pipe, etc.)
|
2017-03-17 09:47:23 +01:00
|
|
|
func (o *Object) Storable() bool {
|
2017-06-19 18:36:14 +02:00
|
|
|
return true
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Open a remote http file object for reading. Seek is supported
|
2019-06-17 10:34:30 +02:00
|
|
|
func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) {
|
2017-08-02 14:19:36 +02:00
|
|
|
url := o.url()
|
2021-02-03 18:41:27 +01:00
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
2017-03-17 09:47:23 +01:00
|
|
|
if err != nil {
|
2021-11-04 11:12:57 +01:00
|
|
|
return nil, fmt.Errorf("Open failed: %w", err)
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
2017-06-19 18:36:14 +02:00
|
|
|
|
|
|
|
// Add optional headers
|
|
|
|
for k, v := range fs.OpenOptionHeaders(options) {
|
|
|
|
req.Header.Add(k, v)
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
2019-08-12 16:29:35 +02:00
|
|
|
o.fs.addHeaders(req)
|
2017-06-19 18:36:14 +02:00
|
|
|
|
|
|
|
// Do the request
|
2017-03-17 09:47:23 +01:00
|
|
|
res, err := o.fs.httpClient.Do(req)
|
2017-06-19 18:36:14 +02:00
|
|
|
err = statusError(res, err)
|
2017-03-17 09:47:23 +01:00
|
|
|
if err != nil {
|
2021-11-04 11:12:57 +01:00
|
|
|
return nil, fmt.Errorf("Open failed: %w", err)
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
2017-06-19 18:36:14 +02:00
|
|
|
return res.Body, nil
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
2018-01-12 17:30:54 +01:00
|
|
|
// Hashes returns hash.HashNone to indicate remote hashing is unavailable
|
|
|
|
func (f *Fs) Hashes() hash.Set {
|
2018-01-18 21:27:52 +01:00
|
|
|
return hash.Set(hash.None)
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Mkdir makes the root directory of the Fs object
|
2019-06-17 10:34:30 +02:00
|
|
|
func (f *Fs) Mkdir(ctx context.Context, dir string) error {
|
2017-06-19 16:05:09 +02:00
|
|
|
return errorReadOnly
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Remove a remote http file object
|
2019-06-17 10:34:30 +02:00
|
|
|
func (o *Object) Remove(ctx context.Context) error {
|
2017-06-19 16:05:09 +02:00
|
|
|
return errorReadOnly
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Rmdir removes the root directory of the Fs object
|
2019-06-17 10:34:30 +02:00
|
|
|
func (f *Fs) Rmdir(ctx context.Context, dir string) error {
|
2017-06-19 16:05:09 +02:00
|
|
|
return errorReadOnly
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
2017-06-19 16:05:09 +02:00
|
|
|
// Update in to the object with the modTime given of the given size
|
2019-06-17 10:34:30 +02:00
|
|
|
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
|
2017-06-19 16:05:09 +02:00
|
|
|
return errorReadOnly
|
2017-03-17 09:47:23 +01:00
|
|
|
}
|
|
|
|
|
2017-06-19 18:36:14 +02:00
|
|
|
// MimeType of an Object if known, "" otherwise
|
2019-06-17 10:34:30 +02:00
|
|
|
func (o *Object) MimeType(ctx context.Context) string {
|
2017-06-19 18:36:14 +02:00
|
|
|
return o.contentType
|
|
|
|
}
|
|
|
|
|
2017-03-17 09:47:23 +01:00
|
|
|
// Check the interfaces are satisfied
|
|
|
|
var (
|
2017-08-19 12:42:31 +02:00
|
|
|
_ fs.Fs = &Fs{}
|
|
|
|
_ fs.PutStreamer = &Fs{}
|
|
|
|
_ fs.Object = &Object{}
|
|
|
|
_ fs.MimeTyper = &Object{}
|
2017-03-17 09:47:23 +01:00
|
|
|
)
|