pkg/fetch/fetch.go (215 lines of code) (raw):
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package fetch contains functions for downloading various content types via HTTP.
package fetch
import (
"archive/tar"
"compress/gzip"
"encoding/json"
"io"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"strings"
gcp "github.com/GoogleCloudPlatform/buildpacks/pkg/gcpbuildpack"
"github.com/google/go-containerregistry/pkg/crane"
"github.com/hashicorp/go-retryablehttp"
)
// gcpUserAgent is required for the Ruby runtime, but used for others for simplicity.
const gcpUserAgent = "GCPBuildpacks"
// Tarball downloads a tarball from a URL and extracts it into the provided directory.
func Tarball(url, dir string, stripComponents int) error {
response, err := doGet(url)
if err != nil {
return err
}
defer response.Body.Close()
return untar(dir, response.Body, stripComponents)
}
// ARVersions downloads list of versions from artifact registry.
var ARVersions = func(url, fallbackURL string, ctx *gcp.Context) ([]string, error) {
versions, err := crane.ListTags(url)
if err != nil || len(versions) == 0 {
ctx.Logf("Failed to list versions from %s. Size of versions is %d. Error is: %v", url, len(versions), err)
ctx.Logf("Attempting to list versions from %s as a fallback", fallbackURL)
versions, err = crane.ListTags(fallbackURL)
}
return versions, err
}
// ARImage downloads tarball from images in artifact registry.
var ARImage = func(url, fallbackURL, dir string, stripComponents int, ctx *gcp.Context) error {
image, err := crane.Pull(url)
if err != nil {
ctx.Logf("Failed to download runtime from %s: %v", url, err)
ctx.Logf("Attempting to download from %s as a fallback", fallbackURL)
image, err = crane.Pull(fallbackURL)
if err != nil {
return err
}
ctx.Logf("Runtime successfully downloaded from %s", fallbackURL)
} else {
ctx.Logf("Runtime successfully downloaded from %s", url)
}
layers, err := image.Layers()
if err != nil {
return err
}
if len(layers) < 1 {
return gcp.InternalErrorf("runtime image has no layer")
}
l := layers[0]
rc, err := l.Compressed()
if err != nil {
return err
}
defer rc.Close()
return untar(dir, rc, stripComponents)
}
// File downloads a file from a URL and writes it to the provided path.
func File(url, outPath string) error {
out, err := os.Create(outPath)
if err != nil {
return err
}
defer out.Close()
response, err := doGet(url)
if err != nil {
return err
}
defer response.Body.Close()
_, err = io.Copy(out, response.Body)
return err
}
// JSON fetches a JSON payload from a URL and unmarshals it into the value pointed to by v.
func JSON(url string, v interface{}) error {
response, err := doGet(url)
if err != nil {
return err
}
defer response.Body.Close()
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return gcp.InternalErrorf("reading response body from %q: %v", url, err)
}
if err := json.Unmarshal(body, v); err != nil {
return gcp.InternalErrorf("decoding response from %q: %v", url, err)
}
return nil
}
// GetURL makes an HTTP GET request to given URL and writes the body to the provided writer.
func GetURL(url string, f io.Writer) error {
response, err := doGet(url)
if err != nil {
return err
}
defer response.Body.Close()
if _, err = io.Copy(f, response.Body); err != nil {
return gcp.InternalErrorf("copying response body: %v", err)
}
return nil
}
// untar extracts a tarball from a reader and writes it to the given directory.
func untar(dir string, r io.Reader, stripComponents int) error {
gzr, err := gzip.NewReader(r)
if err != nil {
return gcp.InternalErrorf("creating gzip reader: %v", err)
}
defer gzr.Close()
madeDir := map[string]bool{}
tr := tar.NewReader(gzr)
for {
header, err := tr.Next()
switch {
case err == io.EOF:
return nil
case err != nil:
return gcp.InternalErrorf("untaring file: %v", err)
case header == nil:
continue
}
target, err := tarDestination(header.Name, dir, header.Typeflag, stripComponents)
if err != nil {
return err
}
switch header.Typeflag {
case tar.TypeDir:
if _, err := os.Stat(target); err != nil {
if err := os.Mkdir(target, os.FileMode(header.Mode)); err != nil {
return gcp.InternalErrorf("creating directory %q: %v", target, err)
}
madeDir[target] = true
}
case tar.TypeReg, tar.TypeRegA:
// Make the directory. This is redundant because it should
// already be made by a directory entry in the tar
// beforehand. Thus, don't check for errors; the next
// write will fail with the same error.
dir := filepath.Dir(target)
if !madeDir[dir] {
if err := os.MkdirAll(dir, 0755); err != nil {
return gcp.InternalErrorf("creating directory %q: %v", target, err)
}
madeDir[dir] = true
}
f, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode))
if err != nil {
return gcp.InternalErrorf("opening file %q: %v", target, err)
}
if _, err := io.Copy(f, tr); err != nil {
return gcp.InternalErrorf("copying file %q: %v", target, err)
}
if err := f.Close(); err != nil {
return gcp.InternalErrorf("closing file %q: %v", target, err)
}
case tar.TypeSymlink:
targetPath := filepath.Join(filepath.Dir(target), header.Linkname)
if !isValidTarDestination(targetPath, dir, header.Typeflag) {
return gcp.InternalErrorf("symlink %q -> %q traverses out of root", target, header.Linkname)
}
if err := os.Symlink(header.Linkname, target); err != nil {
return gcp.InternalErrorf("symlinking %q to %q: %v", target, header.Linkname, err)
}
case tar.TypeLink:
link, err := tarDestination(header.Linkname, dir, header.Typeflag, stripComponents)
if err != nil {
return err
}
if err := os.Link(link, target); err != nil {
return gcp.InternalErrorf("linking %q to %q: %v", target, link, err)
}
default:
return gcp.InternalErrorf("invalid tar entry %v", header)
}
}
}
// tarDestination returns the filepath that a tar entry should be written to when extracted.
func tarDestination(tarPath, rootDir string, tarType byte, stripComponents int) (string, error) {
rootDir = filepath.Clean(rootDir)
path := filepath.Join(rootDir, filepath.Clean(tarPath))
if stripComponents > 0 {
drop := strings.Count(rootDir, string(filepath.Separator)) + stripComponents + 1
parts := strings.Split(path, string(filepath.Separator))
if drop >= len(parts) && tarType == tar.TypeDir {
// This is a stripped away directory, returning rootDir makes this a no-op.
return rootDir, nil
}
if drop >= len(parts) {
// This is a file that would have been dropped if stripped it.
return "", gcp.InternalErrorf("stripped too many components (%v)", stripComponents)
}
path = filepath.Join(rootDir, filepath.Join(parts[drop:]...))
}
// Only allow extraction either directly into the root, or within a subdirectory from the root.
if isValidTarDestination(path, rootDir, tarType) {
return path, nil
}
return "", gcp.InternalErrorf("tar entry %q traverses out of root", tarPath)
}
// isValidTarDestination protects against a path traversal vulnerability by ensuring the final path
// is within the target directory.
func isValidTarDestination(dest, rootDir string, tarType byte) bool {
destDir := dest
if tarType != tar.TypeDir {
destDir = filepath.Dir(dest)
}
return destDir == rootDir ||
strings.HasPrefix(destDir, rootDir+string(filepath.Separator))
}
// doGet performs an HTTP GET request for a URL.
func doGet(url string) (*http.Response, error) {
retryClient := retryablehttp.NewClient()
retryClient.RetryMax = 3
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, gcp.UserErrorf("fetching %s: %v", url, err)
}
req.Header.Set("User-Agent", gcpUserAgent)
response, err := retryClient.StandardClient().Do(req)
if err != nil {
return nil, gcp.UserErrorf("requesting %s: %v", url, err)
}
if response.StatusCode < http.StatusOK || response.StatusCode >= http.StatusMultipleChoices {
defer response.Body.Close()
return nil, gcp.UserErrorf("fetching %s returned HTTP status: %d", url, response.StatusCode)
}
return response, err
}