def _CollectPerformanceSummaryMetric()

in gslib/metrics.py [0:0]
75 lines of code
29 McCabe index (conditional complexity)

  def _CollectPerformanceSummaryMetric(self):
    """Aggregates PerformanceSummary info and adds the metric to the list."""
    if self.perf_sum_params is None:
      return

    custom_params = {}

    # These parameters need no further processing.
    for attr_name, label in (
        ('num_processes', 'Num Processes'),
        ('num_threads', 'Num Threads'),
        ('num_retryable_service_errors', 'Num Retryable Service Errors'),
        ('num_retryable_network_errors', 'Num Retryable Network Errors'),
        ('avg_throughput', 'Average Overall Throughput'),
        ('num_objects_transferred', 'Number of Files/Objects Transferred'),
        ('total_bytes_transferred', 'Size of Files/Objects Transferred'),
    ):
      custom_params[_GA_LABEL_MAP[label]] = getattr(self.perf_sum_params,
                                                    attr_name)

    # Calculate the disk stats again to calculate deltas of time spent on I/O.
    if system_util.IS_LINUX:
      disk_start = self.perf_sum_params.disk_counters_start
      disk_end = system_util.GetDiskCounters()
      # Read and write time are the 5th and 6th elements of the stat tuple.
      custom_params[_GA_LABEL_MAP['Disk I/O Time']] = (
          sum([stat[4] + stat[5] for stat in disk_end.values()]) -
          sum([stat[4] + stat[5] for stat in disk_start.values()]))

    # Determine source URL type(s).
    if self.perf_sum_params.has_cloud_src:
      src_url_type = 'both' if self.perf_sum_params.has_file_src else 'cloud'
    else:
      src_url_type = 'file'
    custom_params[_GA_LABEL_MAP['Source URL Type']] = src_url_type

    # Determine the type of parallelism used, if any.
    if self.perf_sum_params.uses_fan:
      strategy = 'both' if self.perf_sum_params.uses_slice else 'fan'
    else:
      strategy = 'slice' if self.perf_sum_params.uses_slice else 'none'
    custom_params[_GA_LABEL_MAP['Parallelism Strategy']] = strategy

    # Determine the percentage of time that threads spent idle.
    total_time = (self.perf_sum_params.thread_idle_time +
                  self.perf_sum_params.thread_execution_time)
    if total_time:
      custom_params[_GA_LABEL_MAP['Thread Idle Time Percent']] = (
          float(self.perf_sum_params.thread_idle_time) / float(total_time))

    # Determine the slowest and fastest thread throughputs.
    if self.perf_sum_params.thread_throughputs:
      throughputs = [
          thread.GetThroughput()
          for thread in self.perf_sum_params.thread_throughputs.values()
      ]
      custom_params[_GA_LABEL_MAP['Slowest Thread Throughput']] = min(
          throughputs)
      custom_params[_GA_LABEL_MAP['Fastest Thread Throughput']] = max(
          throughputs)

    # Determine the provider(s) used.
    custom_params[_GA_LABEL_MAP['Provider Types']] = ','.join(
        sorted(self.perf_sum_params.provider_types))

    # Determine the transfer types.
    # This maps a transfer type to whether the condition has been met for it.
    transfer_types = {
        'CloudToCloud':
            self.perf_sum_params.has_cloud_src
            and self.perf_sum_params.has_cloud_dst,
        'CloudToFile':
            self.perf_sum_params.has_cloud_src
            and self.perf_sum_params.has_file_dst,
        'DaisyChain':
            self.perf_sum_params.is_daisy_chain,
        'FileToCloud':
            self.perf_sum_params.has_file_src
            and self.perf_sum_params.has_cloud_dst,
        'FileToFile':
            self.perf_sum_params.has_file_src
            and self.perf_sum_params.has_file_dst,
    }
    action = ','.join(
        sorted([
            transfer_type
            for transfer_type, cond in six.iteritems(transfer_types)
            if cond
        ]))

    # Use the time spent on Apply rather than the total command execution time
    # for the execution time metric. This aligns more closely with throughput
    # and bytes transferred, and the corresponding Command event already tells
    # us the total time. If PerformanceSummary events are expanded, this may not
    # reflect one Apply call as commands like rm may call Apply twice. Currently
    # Apply is timed directly in the RunCommand methods of cp and rsync.
    apply_execution_time = _GetTimeInMillis(
        self.perf_sum_params.total_elapsed_time)

    self.CollectGAMetric(category=_GA_PERFSUM_CATEGORY,
                         action=action,
                         execution_time=apply_execution_time,
                         **custom_params)