in perfkitbenchmarker/linux_benchmarks/object_storage_service_benchmark.py [0:0]
def Prepare(benchmark_spec):
"""Prepare vm with cloud provider tool and prepare vm with data file.
Args:
benchmark_spec: The benchmark specification. Contains all data that is
required to run the benchmark.
Raises:
ColdDataError: If this benchmark is reading cold data, but the data isn't
cold enough (as configured by object_storage_read_objects_min_hours).
"""
# We would like to always cleanup server side states when exception happens.
benchmark_spec.always_call_cleanup = True
# Load the objects to read file if specified
benchmark_spec.read_objects = None
if FLAGS.object_storage_read_objects_prefix is not None:
# By taking a glob, we choose an arbitrary file that is old enough, assuming
# there is ever more than one.
search_prefix = '%s-%s*' % (
FLAGS.object_storage_read_objects_prefix,
OBJECT_STORAGE_REGION.value,
)
read_objects_filenames = glob.glob(search_prefix)
logging.info(
'Considering object files %s*: %s',
search_prefix,
read_objects_filenames,
)
for filename in read_objects_filenames:
age_hours = _ColdObjectsWrittenFileAgeHours(filename)
if age_hours and age_hours > FLAGS.object_storage_read_objects_min_hours:
read_objects_filename = filename
break
else:
raise ColdDataError(
'Object data older than %d hours does not exist. Current cold data '
'files include the following: %s'
% (
FLAGS.object_storage_read_objects_min_hours,
read_objects_filenames,
)
)
with open(read_objects_filename) as read_objects_file:
# Format of json structure is:
# {"bucket_name": <bucket_name>,
# ... any other provider-specific context needed
# "objects_written": <objects_written_array>}
benchmark_spec.read_objects = json.loads(read_objects_file.read())
benchmark_spec.read_objects_filename = read_objects_filename
benchmark_spec.read_objects_age_hours = age_hours
# When this benchmark reads these files, the data will be deleted. Delete
# the file that specifies the data too.
if not FLAGS.object_storage_dont_delete_bucket:
os.remove(read_objects_filename)
assert benchmark_spec.read_objects is not None, (
'Failed to read the file specified by '
'--object_storage_read_objects_prefix'
)
# Load the provider and its object storage service
providers.LoadProvider(FLAGS.storage)
# Determine the bucket name.
if benchmark_spec.read_objects is not None:
# Using an existing bucket
bucket_name = benchmark_spec.read_objects['bucket_name']
if FLAGS.object_storage_bucket_name is not None:
logging.warning(
'--object_storage_bucket_name ignored because '
'--object_storage_read_objects was specified'
)
else:
# Use a new bucket (or the name of a specified bucket).
bucket_name = FLAGS.object_storage_bucket_name or 'pkb%s' % FLAGS.run_uri
if FLAGS.object_storage_apply_region_suffix_to_bucket_name:
# Avoid non-alphanumeric characters in the region as bucket names on some
# clouds cannot contain non-alphanumeric characters.
bucket_name = '%s%s' % (
bucket_name,
re.sub(r'[\W_]', '', OBJECT_STORAGE_REGION.value),
)
service = object_storage_service.GetObjectStorageClass(FLAGS.storage)()
if OBJECT_STORAGE_ZONE.value:
service.PrepareService(OBJECT_STORAGE_ZONE.value)
if FLAGS.storage == 'AWS':
if FLAGS.object_storage_bucket_name:
# If the user passed a flag, leave it alone and let S3 eror out if
# it's invalid rather than fixing it.
# TODO(pclay): consider fixing for user instead.
pass
else:
# Required for S3 zonal buckets.
bucket_name += service.S3ExpressZonalSuffix()
elif (
FLAGS.storage == 'Azure'
and FLAGS.object_storage_read_objects_prefix is not None
):
# Storage provider is azure and we are reading existing objects.
# Need to prepare the ObjectStorageService with the existing storage
# account and resource group associated with the bucket containing our
# objects
service.PrepareService(
OBJECT_STORAGE_REGION.value,
# On Azure, use an existing storage account if we
# are reading existing objects
(
benchmark_spec.read_objects['azure_storage_account'],
benchmark_spec.read_objects['azure_resource_group'],
),
)
elif FLAGS.storage == 'Azure' and FLAGS.object_storage_bucket_name:
# We are using a bucket that may exist from a previous run. We should use
# a storage account and resource group for this bucket based on the same
# name (for consistency).
service.PrepareService(
OBJECT_STORAGE_REGION.value,
# The storage account must not exceed 24 characters.
(bucket_name[:24], bucket_name + '-resource-group'),
try_to_create_storage_account_and_resource_group=True,
)
else:
service.PrepareService(OBJECT_STORAGE_REGION.value)
vms = benchmark_spec.vms
background_tasks.RunThreaded(lambda vm: PrepareVM(vm, service), vms)
# Make the bucket.
if benchmark_spec.read_objects is None:
# Fail if we cannot create the bucket as long as the bucket name was not
# set via a flag. If it was set by a flag, then we will still try to create
# the bucket, but won't fail if it was created. This supports running the
# benchmark on the same bucket multiple times.
raise_on_bucket_creation_failure = not FLAGS.object_storage_bucket_name
if FLAGS.storage == 'GCP' and OBJECT_STORAGE_GCS_MULTIREGION.value:
# Use a GCS multiregional bucket
multiregional_service = gcs.GoogleCloudStorageService()
multiregional_service.PrepareService(
OBJECT_STORAGE_GCS_MULTIREGION.value or DEFAULT_GCS_MULTIREGION
)
multiregional_service.MakeBucket(
bucket_name, raise_on_failure=raise_on_bucket_creation_failure
)
else:
# Use a regular bucket
service.MakeBucket(
bucket_name, raise_on_failure=raise_on_bucket_creation_failure
)
# Save the service and the bucket name for later
benchmark_spec.service = service
benchmark_spec.bucket_name = bucket_name