copyurl: Add option to honor the HTTP header filename directive.

Implemented --header-filename for use with copyurl.

For specifically setting preferred download filenames for HTTP requests, RFC 6226
specifies a 'filename' directive, available within 'Content-Disposition'
header. We can handle with 'mime.ParseMediaType'.

See below for details:
https://httpwg.org/specs/rfc6266.html#disposition.parameter.filename
https://httpwg.org/specs/rfc6266.html#advice.generating

Co-authored-by: buengese <buengese@protonmail.com>
This commit is contained in:
J-P Treen 2021-07-28 17:05:21 +01:00 committed by Nick Craig-Wood
parent f2a15a174f
commit 8b8802a078
4 changed files with 36 additions and 21 deletions

View File

@ -14,16 +14,18 @@ import (
) )
var ( var (
autoFilename = false autoFilename = false
printFilename = false headerFilename = false
stdout = false printFilename = false
noClobber = false stdout = false
noClobber = false
) )
func init() { func init() {
cmd.Root.AddCommand(commandDefinition) cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags() cmdFlags := commandDefinition.Flags()
flags.BoolVarP(cmdFlags, &autoFilename, "auto-filename", "a", autoFilename, "Get the file name from the URL and use it for destination file path") flags.BoolVarP(cmdFlags, &autoFilename, "auto-filename", "a", autoFilename, "Get the file name from the URL and use it for destination file path")
flags.BoolVarP(cmdFlags, &headerFilename, "header-filename", "", headerFilename, "Get the file name from the Content-Disposition header")
flags.BoolVarP(cmdFlags, &printFilename, "print-filename", "p", printFilename, "Print the resulting name from --auto-filename") flags.BoolVarP(cmdFlags, &printFilename, "print-filename", "p", printFilename, "Print the resulting name from --auto-filename")
flags.BoolVarP(cmdFlags, &noClobber, "no-clobber", "", noClobber, "Prevent overwriting file with same name") flags.BoolVarP(cmdFlags, &noClobber, "no-clobber", "", noClobber, "Prevent overwriting file with same name")
flags.BoolVarP(cmdFlags, &stdout, "stdout", "", stdout, "Write the output to stdout rather than a file") flags.BoolVarP(cmdFlags, &stdout, "stdout", "", stdout, "Write the output to stdout rather than a file")
@ -36,10 +38,11 @@ var commandDefinition = &cobra.Command{
Download a URL's content and copy it to the destination without saving Download a URL's content and copy it to the destination without saving
it in temporary storage. it in temporary storage.
Setting ` + "`--auto-filename`" + ` will cause the file name to be retrieved from Setting ` + "`--auto-filename`" + ` will attempt to automatically determine the filename from the URL
the URL (after any redirections) and used in the destination (after any redirections) and used in the destination path.
path. With ` + "`--print-filename`" + ` in addition, the resulting file name will With ` + "`--auto-filename-header`" + ` in
be printed. addition, if a specific filename is set in HTTP headers, it will be used instead of the name from the URL.
With ` + "`--print-filename`" + ` in addition, the resulting file name will be printed.
Setting ` + "`--no-clobber`" + ` will prevent overwriting file on the Setting ` + "`--no-clobber`" + ` will prevent overwriting file on the
destination if there is one with the same name. destination if there is one with the same name.
@ -69,7 +72,7 @@ will cause the output to be written to standard output.
if stdout { if stdout {
err = operations.CopyURLToWriter(context.Background(), args[0], os.Stdout) err = operations.CopyURLToWriter(context.Background(), args[0], os.Stdout)
} else { } else {
dst, err = operations.CopyURL(context.Background(), fsdst, dstFileName, args[0], autoFilename, noClobber) dst, err = operations.CopyURL(context.Background(), fsdst, dstFileName, args[0], autoFilename, headerFilename, noClobber)
if printFilename && err == nil && dst != nil { if printFilename && err == nil && dst != nil {
fmt.Println(dst.Remote()) fmt.Println(dst.Remote())
} }

View File

@ -11,6 +11,7 @@ import (
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"mime"
"net/http" "net/http"
"os" "os"
"path" "path"
@ -1757,7 +1758,7 @@ func RcatSize(ctx context.Context, fdst fs.Fs, dstFileName string, in io.ReadClo
type copyURLFunc func(ctx context.Context, dstFileName string, in io.ReadCloser, size int64, modTime time.Time) (err error) type copyURLFunc func(ctx context.Context, dstFileName string, in io.ReadCloser, size int64, modTime time.Time) (err error)
// copyURLFn copies the data from the url to the function supplied // copyURLFn copies the data from the url to the function supplied
func copyURLFn(ctx context.Context, dstFileName string, url string, dstFileNameFromURL bool, fn copyURLFunc) (err error) { func copyURLFn(ctx context.Context, dstFileName string, url string, autoFilename, dstFileNameFromHeader bool, fn copyURLFunc) (err error) {
client := fshttp.NewClient(ctx) client := fshttp.NewClient(ctx)
resp, err := client.Get(url) resp, err := client.Get(url)
if err != nil { if err != nil {
@ -1771,7 +1772,17 @@ func copyURLFn(ctx context.Context, dstFileName string, url string, dstFileNameF
if err != nil { if err != nil {
modTime = time.Now() modTime = time.Now()
} }
if dstFileNameFromURL { if autoFilename {
if dstFileNameFromHeader {
_, params, err := mime.ParseMediaType(resp.Header.Get("Content-Disposition"))
headerFilename := path.Base(strings.Replace(params["filename"], "\\", "/", -1))
if err != nil || headerFilename == "" {
return fmt.Errorf("copyurl failed: filename not found in the Content-Dispoition header")
}
fs.Debugf(headerFilename, "filename found in Content-Disposition header.")
return fn(ctx, headerFilename, resp.Body, resp.ContentLength, modTime)
}
dstFileName = path.Base(resp.Request.URL.Path) dstFileName = path.Base(resp.Request.URL.Path)
if dstFileName == "." || dstFileName == "/" { if dstFileName == "." || dstFileName == "/" {
return fmt.Errorf("CopyURL failed: file name wasn't found in url") return fmt.Errorf("CopyURL failed: file name wasn't found in url")
@ -1782,9 +1793,9 @@ func copyURLFn(ctx context.Context, dstFileName string, url string, dstFileNameF
} }
// CopyURL copies the data from the url to (fdst, dstFileName) // CopyURL copies the data from the url to (fdst, dstFileName)
func CopyURL(ctx context.Context, fdst fs.Fs, dstFileName string, url string, dstFileNameFromURL bool, noClobber bool) (dst fs.Object, err error) { func CopyURL(ctx context.Context, fdst fs.Fs, dstFileName string, url string, autoFilename, dstFileNameFromHeader bool, noClobber bool) (dst fs.Object, err error) {
err = copyURLFn(ctx, dstFileName, url, dstFileNameFromURL, func(ctx context.Context, dstFileName string, in io.ReadCloser, size int64, modTime time.Time) (err error) { err = copyURLFn(ctx, dstFileName, url, autoFilename, dstFileNameFromHeader, func(ctx context.Context, dstFileName string, in io.ReadCloser, size int64, modTime time.Time) (err error) {
if noClobber { if noClobber {
_, err = fdst.NewObject(ctx, dstFileName) _, err = fdst.NewObject(ctx, dstFileName)
if err == nil { if err == nil {
@ -1799,7 +1810,7 @@ func CopyURL(ctx context.Context, fdst fs.Fs, dstFileName string, url string, ds
// CopyURLToWriter copies the data from the url to the io.Writer supplied // CopyURLToWriter copies the data from the url to the io.Writer supplied
func CopyURLToWriter(ctx context.Context, url string, out io.Writer) (err error) { func CopyURLToWriter(ctx context.Context, url string, out io.Writer) (err error) {
return copyURLFn(ctx, "", url, false, func(ctx context.Context, dstFileName string, in io.ReadCloser, size int64, modTime time.Time) (err error) { return copyURLFn(ctx, "", url, false, false, func(ctx context.Context, dstFileName string, in io.ReadCloser, size int64, modTime time.Time) (err error) {
_, err = io.Copy(out, in) _, err = io.Copy(out, in)
return err return err
}) })

View File

@ -739,31 +739,31 @@ func TestCopyURL(t *testing.T) {
ts := httptest.NewServer(handler) ts := httptest.NewServer(handler)
defer ts.Close() defer ts.Close()
o, err := operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, false) o, err := operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, false, false)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, int64(len(contents)), o.Size()) assert.Equal(t, int64(len(contents)), o.Size())
fstest.CheckListingWithPrecision(t, r.Fremote, []fstest.Item{file1}, nil, fs.ModTimeNotSupported) fstest.CheckListingWithPrecision(t, r.Fremote, []fstest.Item{file1}, nil, fs.ModTimeNotSupported)
// Check file clobbering // Check file clobbering
_, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, true) _, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, false, true)
require.Error(t, err) require.Error(t, err)
// Check auto file naming // Check auto file naming
status = 0 status = 0
urlFileName := "filename.txt" urlFileName := "filename.txt"
o, err = operations.CopyURL(ctx, r.Fremote, "", ts.URL+"/"+urlFileName, true, false) o, err = operations.CopyURL(ctx, r.Fremote, "", ts.URL+"/"+urlFileName, true, false, false)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, int64(len(contents)), o.Size()) assert.Equal(t, int64(len(contents)), o.Size())
assert.Equal(t, urlFileName, o.Remote()) assert.Equal(t, urlFileName, o.Remote())
// Check auto file naming when url without file name // Check auto file naming when url without file name
_, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, true, false) _, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, true, false, false)
require.Error(t, err) require.Error(t, err)
// Check an error is returned for a 404 // Check an error is returned for a 404
status = http.StatusNotFound status = http.StatusNotFound
o, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, false) o, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, false, false)
require.Error(t, err) require.Error(t, err)
assert.Contains(t, err.Error(), "Not Found") assert.Contains(t, err.Error(), "Not Found")
assert.Nil(t, o) assert.Nil(t, o)
@ -776,7 +776,7 @@ func TestCopyURL(t *testing.T) {
tss := httptest.NewTLSServer(handler) tss := httptest.NewTLSServer(handler)
defer tss.Close() defer tss.Close()
o, err = operations.CopyURL(ctx, r.Fremote, "file2", tss.URL, false, false) o, err = operations.CopyURL(ctx, r.Fremote, "file2", tss.URL, false, false, false)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, int64(len(contents)), o.Size()) assert.Equal(t, int64(len(contents)), o.Size())
fstest.CheckListingWithPrecision(t, r.Fremote, []fstest.Item{file1, file2, fstest.NewItem(urlFileName, contents, t1)}, nil, fs.ModTimeNotSupported) fstest.CheckListingWithPrecision(t, r.Fremote, []fstest.Item{file1, file2, fstest.NewItem(urlFileName, contents, t1)}, nil, fs.ModTimeNotSupported)

View File

@ -274,8 +274,9 @@ func rcSingleCommand(ctx context.Context, in rc.Params, name string, noRemote bo
} }
autoFilename, _ := in.GetBool("autoFilename") autoFilename, _ := in.GetBool("autoFilename")
noClobber, _ := in.GetBool("noClobber") noClobber, _ := in.GetBool("noClobber")
headerFilename, _ := in.GetBool("headerFilename")
_, err = CopyURL(ctx, f, remote, url, autoFilename, noClobber) _, err = CopyURL(ctx, f, remote, url, autoFilename, headerFilename, noClobber)
return nil, err return nil, err
case "uploadfile": case "uploadfile":