integration_test/soak_test/cmd/launcher/main.go (144 lines of code) (raw):
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build integration_test
/*
The launcher command launches a VM and begins a soak test on it.
Specifically, it installs the Ops Agent and a Python program that
logs to a specific file that the Ops Agent is watching.
This command is configured by the following environment variables,
in addition to the ones at the top of gce_testing.go:
LOG_RATE: How many log entries per second to send to the Ops Agent.
LOG_SIZE_IN_BYTES: How many bytes each log entry should be.
TTL: How long to keep the VM alive, expressed as "24h30m" or similar.
DISTRO: The Image Spec to run the tests on, e.g. "debian-cloud:debian-11".
VM_NAME: (Optional) The name of the VM to spawn. If not supplied,
a random name will be generated by gce_testing.go.
For example, after replacing `my_project` with a real project, you
could run it like:
```
PROJECT=my_project \
DISTRO=debian-11 \
ZONES=us-central1-b \
TTL=100m \
LOG_SIZE_IN_BYTES=1000 \
LOG_RATE=1000 \
go run -tags=integration_test .
```
*/
package main
import (
"context"
_ "embed"
"errors"
"fmt"
"log"
"os"
"strings"
"time"
"github.com/GoogleCloudPlatform/opentelemetry-operations-collector/integration_test/gce-testing-internal/gce"
"github.com/GoogleCloudPlatform/ops-agent/integration_test/agents"
)
var (
logSizeInBytes = os.Getenv("LOG_SIZE_IN_BYTES")
logRate = os.Getenv("LOG_RATE")
logPath = "/tmp/tail_file"
logGeneratorPath = "/log_generator.py"
ttl = os.Getenv("TTL")
distro = os.Getenv("DISTRO")
vmName = os.Getenv("VM_NAME")
)
//go:embed log_generator.py
var logGeneratorSource string
func main() {
if err := mainErr(); err != nil {
log.Fatal(err)
}
}
// Pause updates for 35 days to avoid reboots (b/297357060) using:
// https://stackoverflow.com/a/64862952/1188632
func pauseWindowsUpdates(ctx context.Context, logger *log.Logger, vm *gce.VM) (gce.CommandOutput, error) {
return gce.RunRemotely(ctx, logger, vm, `
$ErrorActionPreference = 'Stop'
$now = Get-Date
$pause_start = $now.ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")
$pause_end = $now.AddDays(35).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")
Set-ItemProperty -Path 'HKLM:\SOFTWARE\Microsoft\WindowsUpdate\UX\Settings' -Name 'PauseUpdatesExpiryTime' -Value $pause_end
Set-ItemProperty -Path 'HKLM:\SOFTWARE\Microsoft\WindowsUpdate\UX\Settings' -Name 'PauseFeatureUpdatesStartTime' -Value $pause_start
Set-ItemProperty -Path 'HKLM:\SOFTWARE\Microsoft\WindowsUpdate\UX\Settings' -Name 'PauseFeatureUpdatesEndTime' -Value $pause_end
Set-ItemProperty -Path 'HKLM:\SOFTWARE\Microsoft\WindowsUpdate\UX\Settings' -Name 'PauseQualityUpdatesStartTime' -Value $pause_start
Set-itemproperty -Path 'HKLM:\SOFTWARE\Microsoft\WindowsUpdate\UX\Settings' -Name 'PauseQualityUpdatesEndTime' -Value $pause_end
New-Item -Path 'HKLM:\SOFTWARE\Policies\Microsoft\Windows\WindowsUpdate\AU' -Force
New-ItemProperty -Path 'HKLM:\SOFTWARE\Policies\Microsoft\Windows\WindowsUpdate\AU' -Name 'NoAutoUpdate' -PropertyType DWORD -Value 1
`)
}
func mainErr() error {
defer gce.CleanupKeysOrDie()
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute)
defer cancel()
// Log to stderr.
logger := log.Default()
if distro == "" {
return errors.New("Env variable DISTRO cannot be empty")
}
if ttl == "" {
return errors.New("Env variable TTL cannot be empty")
}
// Create the VM.
options := gce.VMOptions{
ImageSpec: distro,
TimeToLive: ttl,
Name: vmName,
MachineType: "e2-standard-16",
Metadata: map[string]string{
// This is to avoid Windows updates and reboots (b/295165549), and
// also to avoid throughput blips when the OS Config agent runs
// periodically.
"osconfig-disabled-features": "tasks",
},
ExtraCreateArguments: []string{"--boot-disk-size=4000GB"},
}
vm, err := gce.CreateInstance(ctx, logger, options)
if err != nil {
return err
}
if gce.IsWindows(vm.ImageSpec) {
if _, err := pauseWindowsUpdates(ctx, logger, vm); err != nil {
return err
}
}
debugLogPath := "/tmp/log_generator.log"
// Install the Ops Agent with a config telling it to watch logPath,
// and debugLogPath for debugging.
config := fmt.Sprintf(`logging:
receivers:
mylog_source:
type: files
include_paths:
- %s
generator_debug_logs:
type: files
include_paths:
- %s
exporters:
google:
type: google_cloud_logging
service:
pipelines:
my_pipeline:
receivers:
- mylog_source
- generator_debug_logs
exporters: [google]
`, logPath, debugLogPath)
if err := agents.SetupOpsAgent(ctx, logger, vm, config); err != nil {
return err
}
// Install Python.
// TODO: Consider shipping over a prebuilt binary so that we don't need to
// install Python.
if gce.IsWindows(vm.ImageSpec) {
installPython := `$tempDir = "/tmp"
mkdir $tempDir
$pythonUrl = 'https://www.python.org/ftp/python/3.11.2/python-3.11.2.exe'
$pythonInstallerName = $pythonUrl -replace '.*/'
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
$webClient = New-Object System.Net.WebClient
$webClient.DownloadFile($pythonUrl, "$tempDir\$pythonInstallerName")
$pythonInstallDir = "$env:SystemDrive\Python"
$pythonPath = "$pythonInstallDir\python.exe"
Start-Process "$tempDir\$pythonInstallerName" -Wait -ArgumentList "/quiet TargetDir=$pythonInstallDir InstallAllUsers=1"
`
if _, err := gce.RunRemotely(ctx, logger, vm, installPython); err != nil {
return fmt.Errorf("Could not install Python: %w", err)
}
} else {
if err := agents.InstallPackages(ctx, logger, vm, []string{"python3"}); err != nil {
return err
}
}
// Upload log_generator.py.
if err := gce.UploadContent(ctx, logger, vm, strings.NewReader(logGeneratorSource), logGeneratorPath); err != nil {
return err
}
// Start log_generator.py asynchronously.
var startLogGenerator string
if gce.IsWindows(vm.ImageSpec) {
// The best way I've found to start a process asynchronously. One downside
// is that standard output and standard error are lost.
startLogGenerator = fmt.Sprintf(`Invoke-WmiMethod -ComputerName . -Class Win32_Process -Name Create -ArgumentList "$env:SystemDrive\Python\python.exe %v --log-size-in-bytes=%v --log-rate=%v --log-write-type=file --file-path=%v"`, logGeneratorPath, logSizeInBytes, logRate, logPath)
} else {
startLogGenerator = fmt.Sprintf(`nohup python3 %v \
--log-size-in-bytes="%v" \
--log-rate="%v" \
--log-write-type=file \
--file-path="%v" \
&> %v &
`, logGeneratorPath, logSizeInBytes, logRate, logPath, debugLogPath)
}
if _, err := gce.RunRemotely(ctx, logger, vm, startLogGenerator); err != nil {
return err
}
// Print log_generator log files to debug startup errors.
// These log files are unfortunately not available on Windows.
if !gce.IsWindows(vm.ImageSpec) {
time.Sleep(5 * time.Second)
if _, err := gce.RunRemotely(ctx, logger, vm, "cat "+debugLogPath); err != nil {
return err
}
}
return nil
}