diff --git a/fs/all/all.go b/fs/all/all.go index 7155d22ba..b5a0c89ff 100644 --- a/fs/all/all.go +++ b/fs/all/all.go @@ -9,6 +9,7 @@ import ( _ "github.com/ncw/rclone/dropbox" _ "github.com/ncw/rclone/ftp" _ "github.com/ncw/rclone/googlecloudstorage" + _ "github.com/ncw/rclone/http" _ "github.com/ncw/rclone/hubic" _ "github.com/ncw/rclone/local" _ "github.com/ncw/rclone/onedrive" diff --git a/http/http.go b/http/http.go new file mode 100644 index 000000000..0d8c4e1d3 --- /dev/null +++ b/http/http.go @@ -0,0 +1,535 @@ +// Package http provides a filesystem interface using golang.org/net/http + +// +build !plan9 + +package http + +import ( + "fmt" + "io" + "net/http" + "net/url" + "os" + "path" + "strconv" + "strings" + "sync" + "time" + + "golang.org/x/net/html" + + "github.com/ncw/rclone/fs" + "github.com/pkg/errors" +) + +func init() { + fsi := &fs.RegInfo{ + Name: "http", + Description: "http Connection", + NewFs: NewFs, + Options: []fs.Option{{ + Name: "endpoint", + Help: "http host to connect to", + Optional: false, + Examples: []fs.OptionExample{{ + Value: "example.com", + Help: "Connect to example.com", + }}, + }}, + } + fs.Register(fsi) +} + +// Fs stores the interface to the remote SFTP files +type Fs struct { + name string + root string + features *fs.Features // optional features + endpoint *url.URL + httpClient *http.Client +} + +// Object is a remote object that has been stat'd (so it exists, but is not necessarily open for reading) +type Object struct { + fs *Fs + remote string + info os.FileInfo +} + +// ObjectReader holds the File interface to a remote http file opened for reading +type ObjectReader struct { + object *Object + httpFile io.ReadCloser +} + +func urlJoin(u *url.URL, paths ...string) string { + r := u + for _, p := range paths { + if p == "/" { + continue + } + rel, _ := url.Parse(p) + r = r.ResolveReference(rel) + } + return r.String() +} + +// NewFs creates a new Fs object from the name and root. It connects to +// the host specified in the config file. +func NewFs(name, root string) (fs.Fs, error) { + endpoint := fs.ConfigFileGet(name, "endpoint") + + u, err := url.Parse(endpoint) + if err != nil { + return nil, err + } + + if !strings.HasSuffix(root, "/") && root != "" { + root += "/" + } + + client := fs.Config.Client() + + _, err = client.Head(urlJoin(u, root)) + if err != nil { + return nil, errors.Wrap(err, "couldn't connect http") + } + f := &Fs{ + name: name, + root: root, + httpClient: client, + endpoint: u, + } + f.features = (&fs.Features{}).Fill(f) + return f, nil +} + +// Name returns the configured name of the file system +func (f *Fs) Name() string { + return f.name +} + +// Root returns the root for the filesystem +func (f *Fs) Root() string { + return f.root +} + +// String returns the URL for the filesystem +func (f *Fs) String() string { + return urlJoin(f.endpoint, f.root) +} + +// Features returns the optional features of this Fs +func (f *Fs) Features() *fs.Features { + return f.features +} + +// Precision is the remote http file system's modtime precision, which we have no way of knowing. We estimate at 1s +func (f *Fs) Precision() time.Duration { + return time.Second +} + +// NewObject creates a new remote http file object +func (f *Fs) NewObject(remote string) (fs.Object, error) { + o := &Object{ + fs: f, + remote: remote, + } + err := o.stat() + if err != nil { + return nil, errors.Wrap(err, "Stat failed") + } + return o, nil +} + +// dirExists returns true,nil if the directory exists, false, nil if +// it doesn't or false, err +func (f *Fs) dirExists(dir string) (bool, error) { + res, err := f.httpClient.Head(urlJoin(f.endpoint, dir)) + if err != nil { + return false, err + } + if res.StatusCode == http.StatusOK { + return true, nil + } + return false, nil +} + +type entry struct { + name string + url string + size int64 + mode os.FileMode + mtime int64 +} + +func (e *entry) Name() string { + return e.name +} + +func (e *entry) Size() int64 { + return e.size +} + +func (e *entry) Mode() os.FileMode { + return os.FileMode(e.mode) +} + +func (e *entry) ModTime() time.Time { + return time.Unix(e.mtime, 0) +} + +func (e *entry) IsDir() bool { + return e.mode&os.ModeDir != 0 +} + +func (e *entry) Sys() interface{} { + return nil +} + +func parseInt64(s string) int64 { + n, e := strconv.ParseInt(s, 10, 64) + if e != nil { + return 0 + } + return n +} + +func parseBool(s string) bool { + b, e := strconv.ParseBool(s) + if e != nil { + return false + } + return b +} + +func prepareTimeString(ts string) string { + return strings.Trim(strings.Join(strings.SplitN(strings.Trim(ts, "\t "), " ", 3)[0:2], " "), "\r\n\t ") +} + +func parseTime(n *html.Node) (t time.Time) { + if ts := prepareTimeString(n.Data); ts != "" { + t, _ = time.Parse("2-Jan-2006 15:04", ts) + } + return t +} + +// CheckClose is a utility function used to check the return from +// Close in a defer statement. +func CheckClose(c io.Closer, err *error) { + cerr := c.Close() + if *err == nil { + *err = cerr + } +} +func (f *Fs) readDir(path string) ([]*entry, error) { + entries := make([]*entry, 0) + res, err := f.httpClient.Get(urlJoin(f.endpoint, path)) + if err != nil { + return nil, err + } + if res.Body == nil || res.StatusCode != http.StatusOK { + return nil, nil + } + defer CheckClose(res.Body, &err) + + switch strings.SplitN(res.Header.Get("Content-Type"), ";", 2)[0] { + case "text/html": + doc, err := html.Parse(res.Body) + if err != nil { + return nil, err + } + var walk func(*html.Node) + walk = func(n *html.Node) { + if n.Type == html.ElementNode && n.Data == "a" { + for _, a := range n.Attr { + if a.Key == "href" { + name, err := url.QueryUnescape(a.Val) + if err != nil { + continue + } + if name == "../" || name == "./" { + break + } + e := &entry{ + name: strings.TrimRight(name, "/"), + url: name, + } + if a.Val[len(a.Val)-1] == '/' { + e.mode = os.FileMode(0555) | os.ModeDir + } else { + e.mode = os.FileMode(0444) + } + entries = append(entries, e) + break + } + } + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + walk(c) + } + } + walk(doc) + } + return entries, nil +} + +func (f *Fs) list(out fs.ListOpts, dir string, level int, wg *sync.WaitGroup, tokens chan struct{}) { + defer wg.Done() + // take a token + <-tokens + // return it when done + defer func() { + tokens <- struct{}{} + }() + httpDir := path.Join(f.root, dir) + if !strings.HasSuffix(dir, "/") { + httpDir += "/" + } + infos, err := f.readDir(httpDir) + if err != nil { + err = errors.Wrapf(err, "error listing %q", dir) + fs.Errorf(f, "Listing failed: %v", err) + out.SetError(err) + return + } + for _, info := range infos { + remote := "" + if dir != "" { + remote = dir + "/" + info.Name() + } else { + remote = info.Name() + } + if info.IsDir() { + if out.IncludeDirectory(remote) { + dir := &fs.Dir{ + Name: remote, + When: info.ModTime(), + Bytes: 0, + Count: 0, + } + out.AddDir(dir) + if level < out.Level() { + wg.Add(1) + go f.list(out, remote, level+1, wg, tokens) + } + } + } else { + file := &Object{ + fs: f, + remote: remote, + info: info, + } + if err = file.stat(); err != nil { + continue + } + out.Add(file) + } + } +} + +// List the files and directories starting at