copyurl: Add option to honor the HTTP header filename directive.

Implemented --header-filename for use with copyurl.

For specifically setting preferred download filenames for HTTP requests, RFC 6226
specifies a 'filename' directive, available within 'Content-Disposition'
header. We can handle with 'mime.ParseMediaType'.

See below for details:
https://httpwg.org/specs/rfc6266.html#disposition.parameter.filename
https://httpwg.org/specs/rfc6266.html#advice.generating

Co-authored-by: buengese <buengese@protonmail.com>
This commit is contained in:
J-P Treen 2021-07-28 17:05:21 +01:00 committed by Nick Craig-Wood
parent f2a15a174f
commit 8b8802a078
4 changed files with 36 additions and 21 deletions

View File

@ -14,16 +14,18 @@ import (
)
var (
autoFilename = false
printFilename = false
stdout = false
noClobber = false
autoFilename = false
headerFilename = false
printFilename = false
stdout = false
noClobber = false
)
func init() {
cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags()
flags.BoolVarP(cmdFlags, &autoFilename, "auto-filename", "a", autoFilename, "Get the file name from the URL and use it for destination file path")
flags.BoolVarP(cmdFlags, &headerFilename, "header-filename", "", headerFilename, "Get the file name from the Content-Disposition header")
flags.BoolVarP(cmdFlags, &printFilename, "print-filename", "p", printFilename, "Print the resulting name from --auto-filename")
flags.BoolVarP(cmdFlags, &noClobber, "no-clobber", "", noClobber, "Prevent overwriting file with same name")
flags.BoolVarP(cmdFlags, &stdout, "stdout", "", stdout, "Write the output to stdout rather than a file")
@ -36,10 +38,11 @@ var commandDefinition = &cobra.Command{
Download a URL's content and copy it to the destination without saving
it in temporary storage.
Setting ` + "`--auto-filename`" + ` will cause the file name to be retrieved from
the URL (after any redirections) and used in the destination
path. With ` + "`--print-filename`" + ` in addition, the resulting file name will
be printed.
Setting ` + "`--auto-filename`" + ` will attempt to automatically determine the filename from the URL
(after any redirections) and used in the destination path.
With ` + "`--auto-filename-header`" + ` in
addition, if a specific filename is set in HTTP headers, it will be used instead of the name from the URL.
With ` + "`--print-filename`" + ` in addition, the resulting file name will be printed.
Setting ` + "`--no-clobber`" + ` will prevent overwriting file on the
destination if there is one with the same name.
@ -69,7 +72,7 @@ will cause the output to be written to standard output.
if stdout {
err = operations.CopyURLToWriter(context.Background(), args[0], os.Stdout)
} else {
dst, err = operations.CopyURL(context.Background(), fsdst, dstFileName, args[0], autoFilename, noClobber)
dst, err = operations.CopyURL(context.Background(), fsdst, dstFileName, args[0], autoFilename, headerFilename, noClobber)
if printFilename && err == nil && dst != nil {
fmt.Println(dst.Remote())
}

View File

@ -11,6 +11,7 @@ import (
"fmt"
"io"
"io/ioutil"
"mime"
"net/http"
"os"
"path"
@ -1757,7 +1758,7 @@ func RcatSize(ctx context.Context, fdst fs.Fs, dstFileName string, in io.ReadClo
type copyURLFunc func(ctx context.Context, dstFileName string, in io.ReadCloser, size int64, modTime time.Time) (err error)
// copyURLFn copies the data from the url to the function supplied
func copyURLFn(ctx context.Context, dstFileName string, url string, dstFileNameFromURL bool, fn copyURLFunc) (err error) {
func copyURLFn(ctx context.Context, dstFileName string, url string, autoFilename, dstFileNameFromHeader bool, fn copyURLFunc) (err error) {
client := fshttp.NewClient(ctx)
resp, err := client.Get(url)
if err != nil {
@ -1771,7 +1772,17 @@ func copyURLFn(ctx context.Context, dstFileName string, url string, dstFileNameF
if err != nil {
modTime = time.Now()
}
if dstFileNameFromURL {
if autoFilename {
if dstFileNameFromHeader {
_, params, err := mime.ParseMediaType(resp.Header.Get("Content-Disposition"))
headerFilename := path.Base(strings.Replace(params["filename"], "\\", "/", -1))
if err != nil || headerFilename == "" {
return fmt.Errorf("copyurl failed: filename not found in the Content-Dispoition header")
}
fs.Debugf(headerFilename, "filename found in Content-Disposition header.")
return fn(ctx, headerFilename, resp.Body, resp.ContentLength, modTime)
}
dstFileName = path.Base(resp.Request.URL.Path)
if dstFileName == "." || dstFileName == "/" {
return fmt.Errorf("CopyURL failed: file name wasn't found in url")
@ -1782,9 +1793,9 @@ func copyURLFn(ctx context.Context, dstFileName string, url string, dstFileNameF
}
// CopyURL copies the data from the url to (fdst, dstFileName)
func CopyURL(ctx context.Context, fdst fs.Fs, dstFileName string, url string, dstFileNameFromURL bool, noClobber bool) (dst fs.Object, err error) {
func CopyURL(ctx context.Context, fdst fs.Fs, dstFileName string, url string, autoFilename, dstFileNameFromHeader bool, noClobber bool) (dst fs.Object, err error) {
err = copyURLFn(ctx, dstFileName, url, dstFileNameFromURL, func(ctx context.Context, dstFileName string, in io.ReadCloser, size int64, modTime time.Time) (err error) {
err = copyURLFn(ctx, dstFileName, url, autoFilename, dstFileNameFromHeader, func(ctx context.Context, dstFileName string, in io.ReadCloser, size int64, modTime time.Time) (err error) {
if noClobber {
_, err = fdst.NewObject(ctx, dstFileName)
if err == nil {
@ -1799,7 +1810,7 @@ func CopyURL(ctx context.Context, fdst fs.Fs, dstFileName string, url string, ds
// CopyURLToWriter copies the data from the url to the io.Writer supplied
func CopyURLToWriter(ctx context.Context, url string, out io.Writer) (err error) {
return copyURLFn(ctx, "", url, false, func(ctx context.Context, dstFileName string, in io.ReadCloser, size int64, modTime time.Time) (err error) {
return copyURLFn(ctx, "", url, false, false, func(ctx context.Context, dstFileName string, in io.ReadCloser, size int64, modTime time.Time) (err error) {
_, err = io.Copy(out, in)
return err
})

View File

@ -739,31 +739,31 @@ func TestCopyURL(t *testing.T) {
ts := httptest.NewServer(handler)
defer ts.Close()
o, err := operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, false)
o, err := operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, false, false)
require.NoError(t, err)
assert.Equal(t, int64(len(contents)), o.Size())
fstest.CheckListingWithPrecision(t, r.Fremote, []fstest.Item{file1}, nil, fs.ModTimeNotSupported)
// Check file clobbering
_, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, true)
_, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, false, true)
require.Error(t, err)
// Check auto file naming
status = 0
urlFileName := "filename.txt"
o, err = operations.CopyURL(ctx, r.Fremote, "", ts.URL+"/"+urlFileName, true, false)
o, err = operations.CopyURL(ctx, r.Fremote, "", ts.URL+"/"+urlFileName, true, false, false)
require.NoError(t, err)
assert.Equal(t, int64(len(contents)), o.Size())
assert.Equal(t, urlFileName, o.Remote())
// Check auto file naming when url without file name
_, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, true, false)
_, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, true, false, false)
require.Error(t, err)
// Check an error is returned for a 404
status = http.StatusNotFound
o, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, false)
o, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, false, false)
require.Error(t, err)
assert.Contains(t, err.Error(), "Not Found")
assert.Nil(t, o)
@ -776,7 +776,7 @@ func TestCopyURL(t *testing.T) {
tss := httptest.NewTLSServer(handler)
defer tss.Close()
o, err = operations.CopyURL(ctx, r.Fremote, "file2", tss.URL, false, false)
o, err = operations.CopyURL(ctx, r.Fremote, "file2", tss.URL, false, false, false)
require.NoError(t, err)
assert.Equal(t, int64(len(contents)), o.Size())
fstest.CheckListingWithPrecision(t, r.Fremote, []fstest.Item{file1, file2, fstest.NewItem(urlFileName, contents, t1)}, nil, fs.ModTimeNotSupported)

View File

@ -274,8 +274,9 @@ func rcSingleCommand(ctx context.Context, in rc.Params, name string, noRemote bo
}
autoFilename, _ := in.GetBool("autoFilename")
noClobber, _ := in.GetBool("noClobber")
headerFilename, _ := in.GetBool("headerFilename")
_, err = CopyURL(ctx, f, remote, url, autoFilename, noClobber)
_, err = CopyURL(ctx, f, remote, url, autoFilename, headerFilename, noClobber)
return nil, err
case "uploadfile":