diff --git a/internal/providers/sourceproviders/fedorasource/fedorasource.go b/internal/providers/sourceproviders/fedorasource/fedorasource.go index 9029ffc2..c0489e4e 100644 --- a/internal/providers/sourceproviders/fedorasource/fedorasource.go +++ b/internal/providers/sourceproviders/fedorasource/fedorasource.go @@ -10,6 +10,7 @@ import ( "errors" "fmt" "log/slog" + "net/url" "path/filepath" "regexp" "strings" @@ -309,30 +310,80 @@ const ( PlaceholderHash = "$hash" ) +// validateAbsoluteURL parses uri and verifies it is an absolute URL with a +// non-empty scheme and host. The label parameter is used in error messages to +// identify the URL's purpose (e.g. "lookaside", "dist-git"). +func validateAbsoluteURL(uri, label string) error { + u, err := url.Parse(uri) + if err != nil { + return fmt.Errorf("resulting %s URL is not valid:\n%w", label, err) + } + + if u.Scheme == "" || u.Host == "" { + return fmt.Errorf("resulting %s URL %#q is missing scheme or host", label, uri) + } + + return nil +} + // BuildLookasideURL constructs a lookaside cache URL by substituting placeholders in the // URI template with the provided values. Supported placeholders are [PlaceholderPkg], // [PlaceholderFilename], [PlaceholderHashType], and [PlaceholderHash]. // Placeholders not present in the template are simply ignored. // +// Substituted values are URL path-escaped via [url.PathEscape] so that reserved +// characters such as /, ?, #, and % do not alter the URL structure. +// // Returns an error if any of the provided values contain a placeholder string, as this -// would cause ambiguous substitution results depending on replacement order. +// would cause ambiguous substitution results depending on replacement order, or if the +// resulting URL is not valid. func BuildLookasideURL(template, packageName, fileName, hashType, hash string) (string, error) { // allPlaceholders lists all supported lookaside URI template placeholders. allPlaceholders := []string{PlaceholderPkg, PlaceholderFilename, PlaceholderHashType, PlaceholderHash} + // Normalize hashType to lowercase since that is the form actually substituted. + hashType = strings.ToLower(hashType) + for _, v := range []string{packageName, fileName, hashType, hash} { for _, p := range allPlaceholders { if strings.Contains(v, p) { - return "", fmt.Errorf("value %#q contains placeholder %s, which would cause ambiguous substitution", v, p) + return "", fmt.Errorf("value %#q contains placeholder %#q, which would cause ambiguous substitution", v, p) } } } uri := template - uri = strings.ReplaceAll(uri, PlaceholderPkg, packageName) - uri = strings.ReplaceAll(uri, PlaceholderFilename, fileName) - uri = strings.ReplaceAll(uri, PlaceholderHashType, strings.ToLower(hashType)) - uri = strings.ReplaceAll(uri, PlaceholderHash, hash) + uri = strings.ReplaceAll(uri, PlaceholderPkg, url.PathEscape(packageName)) + uri = strings.ReplaceAll(uri, PlaceholderFilename, url.PathEscape(fileName)) + uri = strings.ReplaceAll(uri, PlaceholderHashType, url.PathEscape(hashType)) + uri = strings.ReplaceAll(uri, PlaceholderHash, url.PathEscape(hash)) + + if err := validateAbsoluteURL(uri, "lookaside"); err != nil { + return "", err + } + + return uri, nil +} + +// BuildDistGitURL constructs a dist-git repository URL by substituting the +// [PlaceholderPkg] placeholder in the URI template with the provided package name. +// +// The package name is URL path-escaped via [url.PathEscape] so that reserved +// characters such as /, ?, #, and % do not alter the URL structure. +// +// Returns an error if the package name contains a placeholder string, or if the +// resulting URL is not valid. +func BuildDistGitURL(template, packageName string) (string, error) { + if strings.Contains(packageName, PlaceholderPkg) { + return "", fmt.Errorf("package name %#q contains placeholder %#q, which would cause ambiguous substitution", + packageName, PlaceholderPkg) + } + + uri := strings.ReplaceAll(template, PlaceholderPkg, url.PathEscape(packageName)) + + if err := validateAbsoluteURL(uri, "dist-git"); err != nil { + return "", err + } return uri, nil } diff --git a/internal/providers/sourceproviders/fedorasource/fedorasource_test.go b/internal/providers/sourceproviders/fedorasource/fedorasource_test.go index 2249196a..afa45e72 100644 --- a/internal/providers/sourceproviders/fedorasource/fedorasource_test.go +++ b/internal/providers/sourceproviders/fedorasource/fedorasource_test.go @@ -326,6 +326,78 @@ func TestBuildLookasideURL(t *testing.T) { hash: "abc123", expectedError: "ambiguous substitution", }, + { + name: "filename with slash is path-escaped", + template: "https://example.com/$pkg/$filename/$hashtype/$hash", + pkg: "my-pkg", + filename: "foo/bar", + hashType: "SHA512", + hash: "abc123", + expected: "https://example.com/my-pkg/foo%2Fbar/sha512/abc123", + }, + { + name: "filename with question mark is path-escaped", + template: "https://example.com/$pkg/$filename/$hashtype/$hash", + pkg: "my-pkg", + filename: "file?x=1", + hashType: "SHA512", + hash: "abc123", + expected: "https://example.com/my-pkg/file%3Fx=1/sha512/abc123", + }, + { + name: "filename with hash is path-escaped", + template: "https://example.com/$pkg/$filename/$hashtype/$hash", + pkg: "my-pkg", + filename: "file#frag", + hashType: "SHA512", + hash: "abc123", + expected: "https://example.com/my-pkg/file%23frag/sha512/abc123", + }, + { + name: "filename with malformed percent is path-escaped", + template: "https://example.com/$pkg/$filename/$hashtype/$hash", + pkg: "my-pkg", + filename: "file%zz", + hashType: "SHA512", + hash: "abc123", + expected: "https://example.com/my-pkg/file%25zz/sha512/abc123", + }, + { + name: "packageName with slash is path-escaped", + template: "https://example.com/$pkg/$filename/$hashtype/$hash", + pkg: "foo/bar", + filename: "source.tar.gz", + hashType: "SHA512", + hash: "abc123", + expected: "https://example.com/foo%2Fbar/source.tar.gz/sha512/abc123", + }, + { + name: "packageName with hash is path-escaped", + template: "https://example.com/$pkg/$filename/$hashtype/$hash", + pkg: "foo#bar", + filename: "source.tar.gz", + hashType: "SHA512", + hash: "abc123", + expected: "https://example.com/foo%23bar/source.tar.gz/sha512/abc123", + }, + { + name: "hashType containing uppercase placeholder is caught after lowercasing", + template: "https://example.com/$pkg/$filename/$hashtype/$hash", + pkg: "my-pkg", + filename: "source.tar.gz", + hashType: "$PKG", + hash: "abc123", + expectedError: "ambiguous substitution", + }, + { + name: "template without scheme is rejected", + template: "example.com/$pkg/$filename/$hashtype/$hash", + pkg: "my-pkg", + filename: "source.tar.gz", + hashType: "SHA512", + hash: "abc123", + expectedError: "missing scheme or host", + }, } for _, testCase := range tests { @@ -342,3 +414,68 @@ func TestBuildLookasideURL(t *testing.T) { }) } } + +func TestBuildDistGitURL(t *testing.T) { + tests := []struct { + name string + template string + pkg string + expected string + expectedError string + }{ + { + name: "standard template", + template: "https://src.example.com/rpms/$pkg.git", + pkg: "curl", + expected: "https://src.example.com/rpms/curl.git", + }, + { + name: "packageName containing $pkg placeholder", + template: "https://src.example.com/rpms/$pkg.git", + pkg: "evil-$pkg-name", + expectedError: "ambiguous substitution", + }, + { + name: "packageName with slash is path-escaped", + template: "https://src.example.com/rpms/$pkg.git", + pkg: "foo/bar", + expected: "https://src.example.com/rpms/foo%2Fbar.git", + }, + { + name: "packageName with hash is path-escaped", + template: "https://src.example.com/rpms/$pkg.git", + pkg: "foo#bar", + expected: "https://src.example.com/rpms/foo%23bar.git", + }, + { + name: "packageName with question mark is path-escaped", + template: "https://src.example.com/rpms/$pkg.git", + pkg: "foo?bar", + expected: "https://src.example.com/rpms/foo%3Fbar.git", + }, + { + name: "packageName with malformed percent is path-escaped", + template: "https://src.example.com/rpms/$pkg.git", + pkg: "foo%zz", + expected: "https://src.example.com/rpms/foo%25zz.git", + }, + { + name: "template without scheme is rejected", + template: "example.com/rpms/$pkg.git", + pkg: "curl", + expectedError: "missing scheme or host", + }, + } + + for _, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { + result, err := BuildDistGitURL(testCase.template, testCase.pkg) + if testCase.expectedError != "" { + assert.ErrorContains(t, err, testCase.expectedError) + } else { + require.NoError(t, err) + assert.Equal(t, testCase.expected, result) + } + }) + } +} diff --git a/internal/providers/sourceproviders/fedorasourceprovider.go b/internal/providers/sourceproviders/fedorasourceprovider.go index d99338a7..00c69ff2 100644 --- a/internal/providers/sourceproviders/fedorasourceprovider.go +++ b/internal/providers/sourceproviders/fedorasourceprovider.go @@ -9,7 +9,6 @@ import ( "fmt" "log/slog" "path/filepath" - "strings" "time" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" @@ -106,7 +105,10 @@ func (g *FedoraSourcesProviderImpl) GetComponent( return errors.New("destination path cannot be empty") } - gitRepoURL := strings.ReplaceAll(g.distroGitBaseURI, "$pkg", upstreamNameToUse) + gitRepoURL, err := fedorasource.BuildDistGitURL(g.distroGitBaseURI, upstreamNameToUse) + if err != nil { + return fmt.Errorf("failed to build dist-git URL for %#q:\n%w", upstreamNameToUse, err) + } slog.Info("Getting component from git repo", "component", componentName, @@ -259,7 +261,10 @@ func (g *FedoraSourcesProviderImpl) ResolveIdentity( upstreamName = component.GetName() } - gitRepoURL := strings.ReplaceAll(g.distroGitBaseURI, "$pkg", upstreamName) + gitRepoURL, err := fedorasource.BuildDistGitURL(g.distroGitBaseURI, upstreamName) + if err != nil { + return "", fmt.Errorf("failed to build dist-git URL for %#q:\n%w", upstreamName, err) + } return g.resolveCommit(ctx, gitRepoURL, upstreamName, component.GetConfig().Spec.UpstreamCommit) }