From e62bbf761b3fc8ff8f8551d5bde8c93d0518465b Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Fri, 8 Feb 2019 13:58:47 +0000 Subject: [PATCH] http: add --http-no-slash for websites with directories with no slashes #3053 See: https://forum.rclone.org/t/is-there-a-way-to-log-into-an-htpp-server/8484 --- backend/http/http.go | 39 +++++++++++++++++++++++++++--- backend/http/http_internal_test.go | 26 +++++++++++++++----- backend/http/test/files/two.html | 2 +- 3 files changed, 57 insertions(+), 10 deletions(-) diff --git a/backend/http/http.go b/backend/http/http.go index c42049972..549787b2e 100644 --- a/backend/http/http.go +++ b/backend/http/http.go @@ -6,6 +6,7 @@ package http import ( "io" + "mime" "net/http" "net/url" "path" @@ -44,6 +45,22 @@ func init() { Value: "https://user:pass@example.com", Help: "Connect to example.com using a username and password", }}, + }, { + Name: "no_slash", + Help: `Set this if the site doesn't end directories with / + +Use this if your target website does not use / on the end of +directories. + +A / on the end of a path is how rclone normally tells the difference +between files and directories. If this flag is set, then rclone will +treat all files with Content-Type: text/html as directories and read +URLs from them rather than downloading them. + +Note that this may cause rclone to confuse genuine HTML files with +directories.`, + Default: false, + Advanced: true, }}, } fs.Register(fsi) @@ -52,6 +69,7 @@ func init() { // Options defines the configuration for this backend type Options struct { Endpoint string `config:"url"` + NoSlash bool `config:"no_slash"` } // Fs stores the interface to the remote HTTP files @@ -359,11 +377,16 @@ func (f *Fs) List(dir string) (entries fs.DirEntries, err error) { fs: f, remote: remote, } - if err = file.stat(); err != nil { + switch err = file.stat(); err { + case nil: + entries = append(entries, file) + case fs.ErrorNotAFile: + // ...found a directory not a file + dir := fs.NewDir(remote, timeUnset) + entries = append(entries, dir) + default: fs.Debugf(remote, "skipping because of error: %v", err) - continue } - entries = append(entries, file) } } return entries, nil @@ -439,6 +462,16 @@ func (o *Object) stat() error { o.size = parseInt64(res.Header.Get("Content-Length"), -1) o.modTime = t o.contentType = res.Header.Get("Content-Type") + // If NoSlash is set then check ContentType to see if it is a directory + if o.fs.opt.NoSlash { + mediaType, _, err := mime.ParseMediaType(o.contentType) + if err != nil { + return errors.Wrapf(err, "failed to parse Content-Type: %q", o.contentType) + } + if mediaType == "text/html" { + return fs.ErrorNotAFile + } + } return nil } diff --git a/backend/http/http_internal_test.go b/backend/http/http_internal_test.go index 4c1436895..97e00eca0 100644 --- a/backend/http/http_internal_test.go +++ b/backend/http/http_internal_test.go @@ -65,7 +65,7 @@ func prepare(t *testing.T) (fs.Fs, func()) { return f, tidy } -func testListRoot(t *testing.T, f fs.Fs) { +func testListRoot(t *testing.T, f fs.Fs, noSlash bool) { entries, err := f.List("") require.NoError(t, err) @@ -93,15 +93,29 @@ func testListRoot(t *testing.T, f fs.Fs) { e = entries[3] assert.Equal(t, "two.html", e.Remote()) - assert.Equal(t, int64(7), e.Size()) - _, ok = e.(*Object) - assert.True(t, ok) + if noSlash { + assert.Equal(t, int64(-1), e.Size()) + _, ok = e.(fs.Directory) + assert.True(t, ok) + } else { + assert.Equal(t, int64(41), e.Size()) + _, ok = e.(*Object) + assert.True(t, ok) + } } func TestListRoot(t *testing.T) { f, tidy := prepare(t) defer tidy() - testListRoot(t, f) + testListRoot(t, f, false) +} + +func TestListRootNoSlash(t *testing.T) { + f, tidy := prepare(t) + f.(*Fs).opt.NoSlash = true + defer tidy() + + testListRoot(t, f, true) } func TestListSubDir(t *testing.T) { @@ -194,7 +208,7 @@ func TestIsAFileRoot(t *testing.T) { f, err := NewFs(remoteName, "one%.txt", m) assert.Equal(t, err, fs.ErrorIsFile) - testListRoot(t, f) + testListRoot(t, f, false) } func TestIsAFileSubDir(t *testing.T) { diff --git a/backend/http/test/files/two.html b/backend/http/test/files/two.html index 4bc562871..da1fddaa4 100644 --- a/backend/http/test/files/two.html +++ b/backend/http/test/files/two.html @@ -1 +1 @@ -potato +file.txt