def Prepare()

in perfkitbenchmarker/linux_benchmarks/object_storage_service_benchmark.py [0:0]


def Prepare(benchmark_spec):
  """Prepare vm with cloud provider tool and prepare vm with data file.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
      required to run the benchmark.

  Raises:
    ColdDataError: If this benchmark is reading cold data, but the data isn't
        cold enough (as configured by object_storage_read_objects_min_hours).
  """
  # We would like to always cleanup server side states when exception happens.
  benchmark_spec.always_call_cleanup = True

  # Load the objects to read file if specified
  benchmark_spec.read_objects = None
  if FLAGS.object_storage_read_objects_prefix is not None:
    # By taking a glob, we choose an arbitrary file that is old enough, assuming
    # there is ever more than one.
    search_prefix = '%s-%s*' % (
        FLAGS.object_storage_read_objects_prefix,
        OBJECT_STORAGE_REGION.value,
    )
    read_objects_filenames = glob.glob(search_prefix)
    logging.info(
        'Considering object files %s*: %s',
        search_prefix,
        read_objects_filenames,
    )
    for filename in read_objects_filenames:
      age_hours = _ColdObjectsWrittenFileAgeHours(filename)
      if age_hours and age_hours > FLAGS.object_storage_read_objects_min_hours:
        read_objects_filename = filename
        break
    else:
      raise ColdDataError(
          'Object data older than %d hours does not exist. Current cold data '
          'files include the following: %s'
          % (
              FLAGS.object_storage_read_objects_min_hours,
              read_objects_filenames,
          )
      )

    with open(read_objects_filename) as read_objects_file:
      # Format of json structure is:
      # {"bucket_name": <bucket_name>,
      #  ... any other provider-specific context needed
      #  "objects_written": <objects_written_array>}
      benchmark_spec.read_objects = json.loads(read_objects_file.read())
      benchmark_spec.read_objects_filename = read_objects_filename
      benchmark_spec.read_objects_age_hours = age_hours

    # When this benchmark reads these files, the data will be deleted. Delete
    # the file that specifies the data too.
    if not FLAGS.object_storage_dont_delete_bucket:
      os.remove(read_objects_filename)

    assert benchmark_spec.read_objects is not None, (
        'Failed to read the file specified by '
        '--object_storage_read_objects_prefix'
    )

  # Load the provider and its object storage service
  providers.LoadProvider(FLAGS.storage)

  # Determine the bucket name.
  if benchmark_spec.read_objects is not None:
    # Using an existing bucket
    bucket_name = benchmark_spec.read_objects['bucket_name']
    if FLAGS.object_storage_bucket_name is not None:
      logging.warning(
          '--object_storage_bucket_name ignored because '
          '--object_storage_read_objects was specified'
      )
  else:
    # Use a new bucket (or the name of a specified bucket).
    bucket_name = FLAGS.object_storage_bucket_name or 'pkb%s' % FLAGS.run_uri
    if FLAGS.object_storage_apply_region_suffix_to_bucket_name:
      # Avoid non-alphanumeric characters in the region as bucket names on some
      # clouds cannot contain non-alphanumeric characters.
      bucket_name = '%s%s' % (
          bucket_name,
          re.sub(r'[\W_]', '', OBJECT_STORAGE_REGION.value),
      )

  service = object_storage_service.GetObjectStorageClass(FLAGS.storage)()
  if OBJECT_STORAGE_ZONE.value:
    service.PrepareService(OBJECT_STORAGE_ZONE.value)
    if FLAGS.storage == 'AWS':
      if FLAGS.object_storage_bucket_name:
        # If the user passed a flag, leave it alone and let S3 eror out if
        # it's invalid rather than fixing it.
        # TODO(pclay): consider fixing for user instead.
        pass
      else:
        # Required for S3 zonal buckets.
        bucket_name += service.S3ExpressZonalSuffix()
  elif (
      FLAGS.storage == 'Azure'
      and FLAGS.object_storage_read_objects_prefix is not None
  ):
    # Storage provider is azure and we are reading existing objects.
    # Need to prepare the ObjectStorageService with the existing storage
    # account and resource group associated with the bucket containing our
    # objects
    service.PrepareService(
        OBJECT_STORAGE_REGION.value,
        # On Azure, use an existing storage account if we
        # are reading existing objects
        (
            benchmark_spec.read_objects['azure_storage_account'],
            benchmark_spec.read_objects['azure_resource_group'],
        ),
    )
  elif FLAGS.storage == 'Azure' and FLAGS.object_storage_bucket_name:
    # We are using a bucket that may exist from a previous run. We should use
    # a storage account and resource group for this bucket based on the same
    # name (for consistency).
    service.PrepareService(
        OBJECT_STORAGE_REGION.value,
        # The storage account must not exceed 24 characters.
        (bucket_name[:24], bucket_name + '-resource-group'),
        try_to_create_storage_account_and_resource_group=True,
    )
  else:
    service.PrepareService(OBJECT_STORAGE_REGION.value)

  vms = benchmark_spec.vms
  background_tasks.RunThreaded(lambda vm: PrepareVM(vm, service), vms)

  # Make the bucket.
  if benchmark_spec.read_objects is None:
    # Fail if we cannot create the bucket as long as the bucket name was not
    # set via a flag. If it was set by a flag, then we will still try to create
    # the bucket, but won't fail if it was created. This supports running the
    # benchmark on the same bucket multiple times.
    raise_on_bucket_creation_failure = not FLAGS.object_storage_bucket_name
    if FLAGS.storage == 'GCP' and OBJECT_STORAGE_GCS_MULTIREGION.value:
      # Use a GCS multiregional bucket
      multiregional_service = gcs.GoogleCloudStorageService()
      multiregional_service.PrepareService(
          OBJECT_STORAGE_GCS_MULTIREGION.value or DEFAULT_GCS_MULTIREGION
      )
      multiregional_service.MakeBucket(
          bucket_name, raise_on_failure=raise_on_bucket_creation_failure
      )
    else:
      # Use a regular bucket
      service.MakeBucket(
          bucket_name, raise_on_failure=raise_on_bucket_creation_failure
      )

  # Save the service and the bucket name for later
  benchmark_spec.service = service
  benchmark_spec.bucket_name = bucket_name