in gslib/metrics.py [0:0]
def _CollectPerformanceSummaryMetric(self):
"""Aggregates PerformanceSummary info and adds the metric to the list."""
if self.perf_sum_params is None:
return
custom_params = {}
# These parameters need no further processing.
for attr_name, label in (
('num_processes', 'Num Processes'),
('num_threads', 'Num Threads'),
('num_retryable_service_errors', 'Num Retryable Service Errors'),
('num_retryable_network_errors', 'Num Retryable Network Errors'),
('avg_throughput', 'Average Overall Throughput'),
('num_objects_transferred', 'Number of Files/Objects Transferred'),
('total_bytes_transferred', 'Size of Files/Objects Transferred'),
):
custom_params[_GA_LABEL_MAP[label]] = getattr(self.perf_sum_params,
attr_name)
# Calculate the disk stats again to calculate deltas of time spent on I/O.
if system_util.IS_LINUX:
disk_start = self.perf_sum_params.disk_counters_start
disk_end = system_util.GetDiskCounters()
# Read and write time are the 5th and 6th elements of the stat tuple.
custom_params[_GA_LABEL_MAP['Disk I/O Time']] = (
sum([stat[4] + stat[5] for stat in disk_end.values()]) -
sum([stat[4] + stat[5] for stat in disk_start.values()]))
# Determine source URL type(s).
if self.perf_sum_params.has_cloud_src:
src_url_type = 'both' if self.perf_sum_params.has_file_src else 'cloud'
else:
src_url_type = 'file'
custom_params[_GA_LABEL_MAP['Source URL Type']] = src_url_type
# Determine the type of parallelism used, if any.
if self.perf_sum_params.uses_fan:
strategy = 'both' if self.perf_sum_params.uses_slice else 'fan'
else:
strategy = 'slice' if self.perf_sum_params.uses_slice else 'none'
custom_params[_GA_LABEL_MAP['Parallelism Strategy']] = strategy
# Determine the percentage of time that threads spent idle.
total_time = (self.perf_sum_params.thread_idle_time +
self.perf_sum_params.thread_execution_time)
if total_time:
custom_params[_GA_LABEL_MAP['Thread Idle Time Percent']] = (
float(self.perf_sum_params.thread_idle_time) / float(total_time))
# Determine the slowest and fastest thread throughputs.
if self.perf_sum_params.thread_throughputs:
throughputs = [
thread.GetThroughput()
for thread in self.perf_sum_params.thread_throughputs.values()
]
custom_params[_GA_LABEL_MAP['Slowest Thread Throughput']] = min(
throughputs)
custom_params[_GA_LABEL_MAP['Fastest Thread Throughput']] = max(
throughputs)
# Determine the provider(s) used.
custom_params[_GA_LABEL_MAP['Provider Types']] = ','.join(
sorted(self.perf_sum_params.provider_types))
# Determine the transfer types.
# This maps a transfer type to whether the condition has been met for it.
transfer_types = {
'CloudToCloud':
self.perf_sum_params.has_cloud_src
and self.perf_sum_params.has_cloud_dst,
'CloudToFile':
self.perf_sum_params.has_cloud_src
and self.perf_sum_params.has_file_dst,
'DaisyChain':
self.perf_sum_params.is_daisy_chain,
'FileToCloud':
self.perf_sum_params.has_file_src
and self.perf_sum_params.has_cloud_dst,
'FileToFile':
self.perf_sum_params.has_file_src
and self.perf_sum_params.has_file_dst,
}
action = ','.join(
sorted([
transfer_type
for transfer_type, cond in six.iteritems(transfer_types)
if cond
]))
# Use the time spent on Apply rather than the total command execution time
# for the execution time metric. This aligns more closely with throughput
# and bytes transferred, and the corresponding Command event already tells
# us the total time. If PerformanceSummary events are expanded, this may not
# reflect one Apply call as commands like rm may call Apply twice. Currently
# Apply is timed directly in the RunCommand methods of cp and rsync.
apply_execution_time = _GetTimeInMillis(
self.perf_sum_params.total_elapsed_time)
self.CollectGAMetric(category=_GA_PERFSUM_CATEGORY,
action=action,
execution_time=apply_execution_time,
**custom_params)