gce-containers-startup/volumes/volumes.go (411 lines of code) (raw):

// Copyright 2017 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package volumes import ( "bufio" "bytes" "encoding/json" "errors" "flag" "fmt" "log" "os" "strings" "github.com/GoogleCloudPlatform/konlet/gce-containers-startup/metadata" api "github.com/GoogleCloudPlatform/konlet/gce-containers-startup/types" ) const ( ext4FsType string = "ext4" ) var ( mountedVolumesPathPrefixFlag = flag.String("mounted-volumes-path-prefix", "/mnt/disks/gce-containers-mounts", "Path prefix under which mount volumes.") hostProcPathFlag = flag.String("host-proc-path", "/host_proc", "Use nsenter to enter host's mount namespace specified under this path. If left empty, no namespace switch is performed (implying running outside of container.") ) // Environment struct for dependency injection. type Env struct { OsCommandRunner OsCommandRunner MetadataProvider metadata.Provider } type OsCommandRunner interface { Run(...string) (string, error) MkdirAll(path string, perm os.FileMode) error Stat(name string) (os.FileInfo, error) } type VolumeHostPathAndMode struct { // nil hostPath means no backing directory, implying tmpfs mount. hostPath string readOnly bool } type HostPathBindConfiguration struct { HostPath string ContainerPath string ReadOnly bool } type mount struct { device string mountPoint string fsType string options string } type mountOption func(*mount) func newMount(device, mountPoint, fsType string, opts ...mountOption) mount { mnt := mount{device, mountPoint, fsType, ""} for _, opt := range opts { opt(&mnt) } return mnt } func readOnly(ro bool) mountOption { return func(mnt *mount) { var opt string if ro { opt = "ro" } else { opt = "rw" } if mnt.options == "" { mnt.options = opt return } mnt.options += ("," + opt) } } // UnmountExistingVolumes unmounts all volumes mounted under the directory // managed by konlet. The function continues even if some unmounting operations // fail and reports errors, if any, at the end. func (env Env) UnmountExistingVolumes() error { mounts, err := env.existingMounts() if err != nil { return fmt.Errorf("failed to list existing volumes: %v", err) } var buf bytes.Buffer for _, mnt := range mounts { if err := env.unmountDevice(mnt); err != nil { buf.WriteString(fmt.Sprintf("%v\n", err)) continue } log.Printf("Unmounted %s", mnt.mountPoint) } if buf.Len() > 0 { msg := buf.String() return errors.New(msg[:len(msg)-1]) // remove trailing newline } return nil } // existingMounts returns a slice of mount descriptors containing all existing // devices mounted by konlet (they are mounted at paths prefixed by // mountedVolumesPathPrefixFlag). func (env Env) existingMounts() ([]mount, error) { file, err := env.OsCommandRunner.Run(wrapToEnterHostMountNamespace("cat", "/proc/mounts")...) if err != nil { return nil, err } var mounts []mount scanner := bufio.NewScanner(strings.NewReader(file)) for scanner.Scan() { line := scanner.Text() mnt, err := parseMountEntry(line) if err != nil || mnt == nil { continue } if strings.HasPrefix(mnt.mountPoint, *mountedVolumesPathPrefixFlag) { mounts = append(mounts, *mnt) } } return mounts, nil } // parseMountEntry takes a single line of /proc/mounts and returns a pointer to // a struct with corresponding fields and a nil error if parsing succeeded. The // pointer may be nil if the line is a comment. If the format is not as expected // the function returns a nil value and a non-nil error. func parseMountEntry(entry string) (*mount, error) { if strings.HasPrefix(entry, "#") { // The entry is a comment. return nil, nil } parts := strings.Split(entry, " ") if len(parts) < 4 { return nil, fmt.Errorf("invalid format: expected at least 4 space-separated columns") } // There may be 4 escaped characters (space (\040), tab (\011), newline (\012) // and backslash (\134)), so we unescape them if necessary. unescape := strings.NewReplacer( `\040`, "\040", `\011`, "\011", `\012`, "\012", `\134`, "\134", ) return &mount{ device: unescape.Replace(parts[0]), mountPoint: unescape.Replace(parts[1]), fsType: unescape.Replace(parts[2]), options: unescape.Replace(parts[3]), }, nil } // PrepareVolumesAndGetBindings does 3 things: // - Verifies if the container specification passed to it is correct in terms of // volumes it references. // - Creates/mounts/formats all the necessary volumes. // - Outputs the binding map, which will be used by the container runtime to // bind the volumes containers. // // The function takes a container specification struct. It operates in context // of its environment (the receiver), which consist of two parts: // - OsCommandRunner: it executes the commands issued during execution of the // function. // - MetadataProvider: it is the source of additional information coming from // the metadata, used in processing persistent disks. // // The function returns a map, its keys are container names (currently this // should be a single name) and values are slices of hostPath binds. These // corresponds to files and directories that are mounted in a container. // Currently all supported types of volumes (EmptyDir, HostPath and // PersistentDisk) are ultimately handled using a Docker's hostPath bind. // // The caller should not expect the function to be idempotent. Errors are to be // considered non-retryable. func (env Env) PrepareVolumesAndGetBindings(spec api.ContainerSpecStruct) (map[string][]HostPathBindConfiguration, error) { // First, build maps that will allow to verify logical consistency: // - All volumes must be referenced at least once. // - All volume mounts must refer an existing volume. volumesReferencesCount := map[string]int{} volumeMountWantsReadWriteMap := map[string]bool{} for _, apiVolume := range spec.Volumes { volumesReferencesCount[apiVolume.Name] = 0 volumeMountWantsReadWriteMap[apiVolume.Name] = false } for containerIndex, container := range spec.Containers { log.Printf("Found %d volume mounts in container %s declaration.", len(container.VolumeMounts), container.Name) for _, volumeMount := range container.VolumeMounts { if _, present := volumesReferencesCount[volumeMount.Name]; !present { return nil, fmt.Errorf("Invalid container declaration: Volume %s referenced in container %s (index: %d) not found in volume definitions.", volumeMount.Name, container.Name, containerIndex) } else { volumesReferencesCount[volumeMount.Name] += 1 volumeMountWantsReadWriteMap[volumeMount.Name] = volumeMountWantsReadWriteMap[volumeMount.Name] || !volumeMount.ReadOnly } } } for volumeName, referenceCount := range volumesReferencesCount { if referenceCount == 0 { return nil, fmt.Errorf("Invalid container declaration: Volume %s not referenced by any container.", volumeName) } } volumeNameToHostPathMap, volumeNameMapBuildingError := env.buildVolumeNameToHostPathMap(spec.Volumes, volumeMountWantsReadWriteMap) if volumeNameMapBuildingError != nil { return nil, volumeNameMapBuildingError } containerBindingConfigurationMap := map[string][]HostPathBindConfiguration{} for _, container := range spec.Containers { var hostPathBinds []HostPathBindConfiguration for _, volumeMount := range container.VolumeMounts { // It has already been checked that the volume is present. volumeHostPathAndMode, _ := volumeNameToHostPathMap[volumeMount.Name] if volumeHostPathAndMode.readOnly && !volumeMount.ReadOnly { return nil, fmt.Errorf("Container %s: volumeMount %s specifies read-write access, but underlying volume is read-only.", container.Name, volumeMount.Name) } hostPathBinds = append(hostPathBinds, HostPathBindConfiguration{HostPath: volumeHostPathAndMode.hostPath, ContainerPath: volumeMount.MountPath, ReadOnly: volumeMount.ReadOnly}) } containerBindingConfigurationMap[container.Name] = hostPathBinds } return containerBindingConfigurationMap, nil } func (env Env) buildVolumeNameToHostPathMap(apiVolumes []api.Volume, volumeMountWantsReadWriteMap map[string]bool) (map[string]VolumeHostPathAndMode, error) { // For each volume, use the proper handler function to build the volume name -> hostpath+mode map. volumeNameToHostPathMap := map[string]VolumeHostPathAndMode{} diskMetadataReadOnlyMap, err := env.buildDiskMetadataReadOnlyMap() if err != nil { return nil, fmt.Errorf("Failed to build disk read only map from metadata: %s", err) } for _, apiVolume := range apiVolumes { volumeMountWantsReadWrite, found := volumeMountWantsReadWriteMap[apiVolume.Name] if !found { return nil, fmt.Errorf("apiVolume %s not found in the volumeMount RW map. This should not happen.", apiVolume.Name) } // Enforce exactly one volume definition. definitions := 0 var volumeHostPathAndMode VolumeHostPathAndMode var processError error if apiVolume.HostPath != nil { definitions++ volumeHostPathAndMode, processError = env.processHostPathVolume(apiVolume.HostPath) } if apiVolume.EmptyDir != nil { definitions++ volumeHostPathAndMode, processError = env.processEmptyDirVolume(apiVolume.EmptyDir, apiVolume.Name) } if apiVolume.GcePersistentDisk != nil { definitions++ volumeHostPathAndMode, processError = env.processGcePersistentDiskVolume(apiVolume.GcePersistentDisk, volumeMountWantsReadWrite, diskMetadataReadOnlyMap) } if definitions != 1 { return nil, fmt.Errorf("Invalid container declaration: Exactly one volume specification required for volume %s, %d found.", apiVolume.Name, definitions) } if processError != nil { return nil, fmt.Errorf("Volume %s: %s", apiVolume.Name, processError) } else { volumeNameToHostPathMap[apiVolume.Name] = volumeHostPathAndMode } } return volumeNameToHostPathMap, nil } func (env Env) processEmptyDirVolume(volume *api.EmptyDirVolume, volumeName string) (VolumeHostPathAndMode, error) { if volume.Medium != "Memory" { return VolumeHostPathAndMode{}, fmt.Errorf("Unsupported emptyDir volume medium: %s", volume.Medium) } return env.processMemoryBackedEmptyDirVolume(volume, volumeName) } func (env Env) processMemoryBackedEmptyDirVolume(volume *api.EmptyDirVolume, volumeName string) (VolumeHostPathAndMode, error) { tmpfsMountPoint, err := env.createNewMountPath("tmpfs", volumeName) if err != nil { return VolumeHostPathAndMode{}, err } if err := env.mountDevice(newMount("tmpfs", tmpfsMountPoint, "tmpfs", readOnly(false))); err != nil { return VolumeHostPathAndMode{}, err } return VolumeHostPathAndMode{hostPath: tmpfsMountPoint, readOnly: false}, nil } func (env Env) processHostPathVolume(volume *api.HostPathVolume) (VolumeHostPathAndMode, error) { // No checks are done on this level. It is expected that underlying docker // will report errors (if any), at the same time it will take care of // creating missing directores etc. Note that it might still fail due to // large parts of the COS system being read-only. return VolumeHostPathAndMode{hostPath: volume.Path, readOnly: false}, nil } func (env Env) processGcePersistentDiskVolume(volume *api.GcePersistentDiskVolume, volumeMountWantsReadWrite bool, diskMetadataReadOnlyMap map[string]bool) (VolumeHostPathAndMode, error) { if volume.FsType != "" && volume.FsType != ext4FsType { return VolumeHostPathAndMode{}, fmt.Errorf("Unsupported filesystem type: %s", volume.FsType) } chosenFsType := ext4FsType if volume.PdName == "" { return VolumeHostPathAndMode{}, fmt.Errorf("Empty GCE Persistent Disk name!") } attachedReadOnly, diskMetadataFound := diskMetadataReadOnlyMap[volume.PdName] if !diskMetadataFound { return VolumeHostPathAndMode{}, fmt.Errorf("Could not determine if the GCE Persistent Disk %s is attached read-only or read-write.", volume.PdName) } if attachedReadOnly && volumeMountWantsReadWrite { return VolumeHostPathAndMode{}, fmt.Errorf("Volume mount requires read-write access, but the GCE Persistent Disk %s is attached read-only.", volume.PdName) } devicePath, err := resolveGcePersistentDiskDevicePath(volume.PdName) if err != nil { return VolumeHostPathAndMode{}, fmt.Errorf("Could not resolve GCE Persistent Disk device path: %s", err) } if volume.Partition > 0 { devicePath = fmt.Sprintf("%s-part%d", devicePath, volume.Partition) } if err := env.checkDeviceReadable(devicePath); err != nil { return VolumeHostPathAndMode{}, err } if err := env.checkDeviceNotMounted(devicePath); err != nil { return VolumeHostPathAndMode{}, err } if !attachedReadOnly { if err := env.checkFilesystemAndFormatIfNeeded(devicePath, chosenFsType); err != nil { return VolumeHostPathAndMode{}, err } } mountReadOnly := attachedReadOnly || !volumeMountWantsReadWrite deviceMountPoint, err := env.createNewMountPath("gce-persistent-disk", volume.PdName) if err != nil { return VolumeHostPathAndMode{}, err } mnt := newMount(devicePath, deviceMountPoint, chosenFsType, readOnly(mountReadOnly)) if err := env.mountDevice(mnt); err != nil { return VolumeHostPathAndMode{}, err } // Success! return VolumeHostPathAndMode{deviceMountPoint, mountReadOnly}, nil } func (env Env) buildDiskMetadataReadOnlyMap() (map[string]bool, error) { diskMetadataReadOnlyMap := map[string]bool{} diskMetadataJson, err := env.MetadataProvider.RetrieveDisksMetadataAsJson() if err != nil { return nil, fmt.Errorf("Failed to retrieve disk metadata: %s", err) } var parsedMetadata []struct { // Note: there are other fields in the list, but they're irrelevant for our purpose. DeviceName string Mode string } err = json.Unmarshal(diskMetadataJson, &parsedMetadata) if err != nil { return nil, fmt.Errorf("Failed to unmarshal disk metadata JSON: %s", err) } for _, entry := range parsedMetadata { if entry.DeviceName == "" { return nil, fmt.Errorf("Received empty device name in the metadata: %+v", parsedMetadata) } switch entry.Mode { case "READ_WRITE": diskMetadataReadOnlyMap[entry.DeviceName] = false case "READ_ONLY": diskMetadataReadOnlyMap[entry.DeviceName] = true default: return nil, fmt.Errorf("Received unknown device mode from metadata for device %s: %s", entry.DeviceName, entry.Mode) } } return diskMetadataReadOnlyMap, nil } func resolveGcePersistentDiskDevicePath(pdName string) (string, error) { // Currently, only static mapping is supported, as metadata about PD name is not available. return fmt.Sprintf("/dev/disk/by-id/google-%s", pdName), nil } // Generate a name for the new volume mount, based on the volume family (type) // and volume name. Create the directory if necessary, return a path to a // valid directory to mount the volume in, error otherwise. func (env Env) createNewMountPath(volumeFamily string, volumeName string) (string, error) { path := fmt.Sprintf("%s/%ss/%s", *mountedVolumesPathPrefixFlag, volumeFamily, volumeName) log.Printf("Creating directory %s as a mount point for volume %s.", path, volumeName) if err := env.OsCommandRunner.MkdirAll(path, 0755); err != nil { return "", fmt.Errorf("Failed to create directory %s: %s", path, err) } else { return path, nil } } func wrapToEnterHostMountNamespace(origCommandline ...string) []string { if *hostProcPathFlag == "" { return origCommandline } // Change the mount namespace to the host one. Note that we're // not able to access the mounted directory afterwards (without // yet another nsenter call). nsenterCommandline := []string{"nsenter", fmt.Sprintf("--mount=%s/1/ns/mnt", *hostProcPathFlag), "--"} return append(nsenterCommandline, origCommandline...) } // Attempt to mount the device at the specified path. Assumes the device // contains a clean filesystem. func (env Env) mountDevice(mnt mount) error { log.Printf("Attempting to mount device %s at %s.", mnt.device, mnt.mountPoint) mountCommandline := []string{"mount"} if len(mnt.options) > 0 { mountCommandline = append(mountCommandline, "-o", mnt.options) } mountCommandline = append(mountCommandline, "-t", mnt.fsType, mnt.device, mnt.mountPoint) _, err := env.OsCommandRunner.Run(wrapToEnterHostMountNamespace(mountCommandline...)...) if err != nil { return fmt.Errorf("Failed to mount %s at %s: %v", mnt.device, mnt.mountPoint, err) } return nil } // Attempt to unmount the device at the specified path. func (env Env) unmountDevice(mnt mount) error { log.Printf("Attempting to unmount device %s at %s.", mnt.device, mnt.mountPoint) _, err := env.OsCommandRunner.Run(wrapToEnterHostMountNamespace("umount", mnt.mountPoint)...) if err != nil { return fmt.Errorf("Failed to unmount %s at %s: %v", mnt.device, mnt.mountPoint, err) } return nil } func (env Env) checkDeviceReadable(devicePath string) error { fileInfo, err := env.OsCommandRunner.Stat(devicePath) if err != nil { return fmt.Errorf("Device %s access error: %s", devicePath, err) } if fileInfo.Mode()&os.ModeDevice == 0 || fileInfo.Mode()&os.ModeCharDevice != 0 { return fmt.Errorf("Path %s is not a block device.", devicePath) } // TODO: More detailed access checks. return nil } func (env Env) checkFilesystemAndFormatIfNeeded(devicePath string, configuredFsType string) error { // Should be const, but Go can't into map consts. filesystemCheckerMap := map[string][]string{ ext4FsType: []string{"fsck.ext4", "-p"}, } filesystemFormatterMap := map[string][]string{ ext4FsType: []string{"mkfs.ext4"}, } filesystemChecker := filesystemCheckerMap[configuredFsType] filesystemFormatter := filesystemFormatterMap[configuredFsType] if filesystemChecker == nil || filesystemFormatter == nil { return fmt.Errorf("Could not find checker or formatter for filesystem %s.", configuredFsType) } const lsblkFsType string = "FSTYPE" foundFsType, err := env.getSinglePropertyFromDeviceWithLsblk(devicePath, lsblkFsType) if err != nil { return err } // Unfortunately, lsblk(8) doesn't provide a way to tell apart a // nonexistent filesystem (e.g. a fresh drive) from device read problem // - in both cases not reporting any errors and returning an empty // FSTYPE field. Therefore, care must be taken to compensate for this // behaviour. The strategy below is deemed safe, because: // // - If lsblk(8) lacks privileges to read the filesystem and the // decision is put forward to format it, mkfs(8) will fail as well. // - If lsblk(8) had privileges and still didn't detect the filesystem, // it's OK to format it. if foundFsType == "" { // Need to format. log.Printf("Formatting device %s with filesystem %s...", devicePath, configuredFsType) output, err := env.OsCommandRunner.Run(append(filesystemFormatter, devicePath)...) if err != nil { return fmt.Errorf("Failed to format filesystem: %s", err) } else { log.Printf("%s\n", output) } } else if foundFsType == configuredFsType { // Need to fsck. log.Printf("Running filesystem checker on device %s...", devicePath) output, err := env.OsCommandRunner.Run(append(filesystemChecker, devicePath)...) if err != nil { return fmt.Errorf("Filesystem check failed: %s", err) } else { log.Printf("%s\n", output) } } else { return fmt.Errorf("Device %s: found filesystem type %s, expected %s.", devicePath, foundFsType, configuredFsType) } return nil } // Return non-nil error with meaningful message when the device is already mounted. func (env Env) checkDeviceNotMounted(devicePath string) error { const lsblkMountPoint string = "MOUNTPOINT" if mountPoint, err := env.getSinglePropertyFromDeviceWithLsblk(devicePath, lsblkMountPoint); err != nil { return err } else { if mountPoint == "" { return nil } else { return fmt.Errorf("Device %s is already mounted at %s", devicePath, mountPoint) } } } // Use lsblk(8) to get the value of a single property for the device. // // Empty string is returned if property is not present and/or lsblk has // no access to the device. // // Error is returned upon failed command execution or multiline lsblk output, // signalling children devices (likely subpartitions). func (env Env) getSinglePropertyFromDeviceWithLsblk(devicePath string, property string) (string, error) { output, err := env.OsCommandRunner.Run(wrapToEnterHostMountNamespace("lsblk", "-n", "-o", property, devicePath)...) if err != nil { return "", err } if strings.Count(output, "\n") > 1 { // Try to print standard lsblk output to show what's there. debugOutput, debugErr := env.OsCommandRunner.Run(wrapToEnterHostMountNamespace("lsblk")...) if debugErr != nil { return "", fmt.Errorf("Received multiline output, but can't run standard lsblk for debug output: %s", debugErr) } return "", fmt.Errorf("Received multiline output from lsblk. The device likely contains subpartitions:\n%s", debugOutput) } return strings.TrimSpace(output), nil }