diff --git a/backend/http/http.go b/backend/http/http.go index 4c436fb36..93cd3e102 100644 --- a/backend/http/http.go +++ b/backend/http/http.go @@ -200,37 +200,52 @@ func parseInt64(s string) int64 { return n } -// parseName turns a name as found in the page into a remote path or returns false -func parseName(base *url.URL, name string) (string, bool) { +// Errors returned by parseName +var ( + errURLJoinFailed = errors.New("URLJoin failed") + errFoundQuestionMark = errors.New("found ? in URL") + errHostMismatch = errors.New("host mismatch") + errSchemeMismatch = errors.New("scheme mismatch") + errNotUnderRoot = errors.New("not under root") + errNameIsEmpty = errors.New("name is empty") + errNameContainsSlash = errors.New("name contains /") +) + +// parseName turns a name as found in the page into a remote path or returns an error +func parseName(base *url.URL, name string) (string, error) { + // make URL absolute u, err := rest.URLJoin(base, name) if err != nil { - return "", false + return "", errURLJoinFailed } + // check it doesn't have URL parameters uStr := u.String() if strings.Index(uStr, "?") >= 0 { - return "", false + return "", errFoundQuestionMark } - baseStr := base.String() - // check has URL prefix - if !strings.HasPrefix(uStr, baseStr) { - return "", false + // check that this is going back to the same host and scheme + if base.Host != u.Host { + return "", errHostMismatch + } + if base.Scheme != u.Scheme { + return "", errSchemeMismatch } // check has path prefix if !strings.HasPrefix(u.Path, base.Path) { - return "", false + return "", errNotUnderRoot } // calculate the name relative to the base name = u.Path[len(base.Path):] // musn't be empty if name == "" { - return "", false + return "", errNameIsEmpty } - // mustn't contain a / + // mustn't contain a / - we are looking for a single level directory slash := strings.Index(name, "/") if slash >= 0 && slash != len(name)-1 { - return "", false + return "", errNameContainsSlash } - return name, true + return name, nil } // Parse turns HTML for a directory into names @@ -245,8 +260,8 @@ func parse(base *url.URL, in io.Reader) (names []string, err error) { if n.Type == html.ElementNode && n.Data == "a" { for _, a := range n.Attr { if a.Key == "href" { - name, ok := parseName(base, a.Val) - if ok { + name, err := parseName(base, a.Val) + if err == nil { names = append(names, name) } break diff --git a/backend/http/http_internal_test.go b/backend/http/http_internal_test.go index 0cfe325de..a802297b4 100644 --- a/backend/http/http_internal_test.go +++ b/backend/http/http_internal_test.go @@ -207,30 +207,34 @@ func TestIsAFileSubDir(t *testing.T) { func TestParseName(t *testing.T) { for i, test := range []struct { - base string - val string - wantOK bool - want string + base string + val string + wantErr error + want string }{ - {"http://example.com/", "potato", true, "potato"}, - {"http://example.com/dir/", "potato", true, "potato"}, - {"http://example.com/dir/", "../dir/potato", true, "potato"}, - {"http://example.com/dir/", "..", false, ""}, - {"http://example.com/dir/", "http://example.com/", false, ""}, - {"http://example.com/dir/", "http://example.com/dir/", false, ""}, - {"http://example.com/dir/", "http://example.com/dir/potato", true, "potato"}, - {"http://example.com/dir/", "/dir/", false, ""}, - {"http://example.com/dir/", "/dir/potato", true, "potato"}, - {"http://example.com/dir/", "subdir/potato", false, ""}, - {"http://example.com/dir/", "With percent %25.txt", true, "With percent %.txt"}, - {"http://example.com/dir/", "With colon :", false, ""}, - {"http://example.com/dir/", rest.URLPathEscape("With colon :"), true, "With colon :"}, + {"http://example.com/", "potato", nil, "potato"}, + {"http://example.com/dir/", "potato", nil, "potato"}, + {"http://example.com/dir/", "potato?download=true", errFoundQuestionMark, ""}, + {"http://example.com/dir/", "../dir/potato", nil, "potato"}, + {"http://example.com/dir/", "..", errNotUnderRoot, ""}, + {"http://example.com/dir/", "http://example.com/", errNotUnderRoot, ""}, + {"http://example.com/dir/", "http://example.com/dir/", errNameIsEmpty, ""}, + {"http://example.com/dir/", "http://example.com/dir/potato", nil, "potato"}, + {"http://example.com/dir/", "https://example.com/dir/potato", errSchemeMismatch, ""}, + {"http://example.com/dir/", "http://notexample.com/dir/potato", errHostMismatch, ""}, + {"http://example.com/dir/", "/dir/", errNameIsEmpty, ""}, + {"http://example.com/dir/", "/dir/potato", nil, "potato"}, + {"http://example.com/dir/", "subdir/potato", errNameContainsSlash, ""}, + {"http://example.com/dir/", "With percent %25.txt", nil, "With percent %.txt"}, + {"http://example.com/dir/", "With colon :", errURLJoinFailed, ""}, + {"http://example.com/dir/", rest.URLPathEscape("With colon :"), nil, "With colon :"}, + {"http://example.com/Dungeons%20%26%20Dragons/", "/Dungeons%20&%20Dragons/D%26D%20Basic%20%28Holmes%2C%20B%2C%20X%2C%20BECMI%29/", nil, "D&D Basic (Holmes, B, X, BECMI)/"}, } { u, err := url.Parse(test.base) require.NoError(t, err) - got, gotOK := parseName(u, test.val) + got, gotErr := parseName(u, test.val) what := fmt.Sprintf("test %d base=%q, val=%q", i, test.base, test.val) - assert.Equal(t, test.wantOK, gotOK, what) + assert.Equal(t, test.wantErr, gotErr, what) assert.Equal(t, test.want, got, what) } }