perfkitbenchmarker/sample.py (137 lines of code) (raw):
# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A performance sample class."""
import calendar
import collections
import datetime
import math
import time
from typing import Any, Dict, List, NewType
import numpy as np
from perfkitbenchmarker import errors
import pytz
PERCENTILES_LIST = 0.1, 1, 5, 10, 50, 90, 95, 99, 99.9
# Add this flag to the metadata to hide logging to console.
DISABLE_CONSOLE_LOG = 'disable_console_log'
_SAMPLE_FIELDS = 'metric', 'value', 'unit', 'metadata', 'timestamp'
# Metric names for time series
TPM_TIME_SERIES = 'TPM_time_series'
OPS_TIME_SERIES = 'OPS_time_series'
LATENCY_TIME_SERIES = 'Latency_time_series'
# Metadata for time series
VALUES = 'values'
RAMP_UP_ENDS = 'ramp_up_ends'
RAMP_DOWN_STARTS = 'ramp_down_starts'
TIMESTAMPS = 'timestamps'
INTERVAL = 'interval'
TIME_SERIES_METADATA = [
RAMP_UP_ENDS,
RAMP_DOWN_STARTS,
VALUES,
TIMESTAMPS,
INTERVAL,
]
def PercentileCalculator(numbers, percentiles=PERCENTILES_LIST):
"""Computes percentiles, stddev and mean on a set of numbers.
Args:
numbers: A sequence of numbers to compute percentiles for.
percentiles: If given, a list of percentiles to compute. Can be floats, ints
or longs.
Returns:
A dictionary of percentiles.
Raises:
ValueError, if numbers is empty or if a percentile is outside of
[0, 100].
"""
# 'if not numbers' will fail if numbers is an np.Array or pd.Series.
if not len(numbers):
raise ValueError("Can't compute percentiles of empty list.")
numbers_sorted = sorted(numbers)
count = len(numbers_sorted)
total = sum(numbers_sorted)
result = {}
for percentile in percentiles:
float(percentile) # verify type
if percentile < 0.0 or percentile > 100.0:
raise ValueError('Invalid percentile %s' % percentile)
percentile_string = 'p%s' % str(percentile)
index = int(count * float(percentile) / 100.0)
index = min(index, count - 1) # Correction to handle 100th percentile.
result[percentile_string] = numbers_sorted[index]
average = total / float(count)
result['average'] = average
if count > 1:
total_of_squares = sum([(i - average) ** 2 for i in numbers])
result['stddev'] = (total_of_squares / (count - 1)) ** 0.5
else:
result['stddev'] = 0
return result
def GeoMean(iterable):
"""Calculate the geometric mean of a collection of numbers.
Args:
iterable: A sequence of numbers.
Returns:
The geometric mean
Raises:
ValueError, if numbers is empty.
"""
arr = np.fromiter(iterable, dtype='float')
if not arr.size:
raise ValueError("Can't compute geomean of empty list.")
return arr.prod() ** (1 / len(arr))
# The Sample is converted via collections.namedtuple._asdict for publishing
SampleDict = NewType('SampleDict', Dict[str, Any])
class Sample(collections.namedtuple('Sample', _SAMPLE_FIELDS)):
"""A performance sample.
Attributes:
metric: string. Name of the metric within the benchmark.
value: float. Result for 'metric'.
unit: string. Units for 'value'.
metadata: dict. Additional metadata to include with the sample.
timestamp: float. Unix timestamp.
"""
def __new__(
cls, metric, value, unit, metadata=None, timestamp=None, **kwargs
):
if timestamp is None:
timestamp = time.time()
return super().__new__(
cls,
metric,
float(value or 0.0),
unit,
metadata=metadata or {},
timestamp=timestamp,
**kwargs,
)
def asdict(self) -> Dict[str, Any]: # pylint:disable=invalid-name
"""Converts the Sample to a dictionary."""
return self._asdict()
_Histogram = collections.OrderedDict
def MakeHistogram(
values: List[float], round_bottom: float = 0.0, round_to_sig_fig: int = 3
) -> _Histogram[float, int]:
"""Take a list of float values and returns a ordered dict of values and frequency.
Args:
values: a list of float values
round_bottom: A float between 0 and 1 indicating a percentile of values that
should be rounded. Any values below this percentile will be rounded
according to the precision specified by round_to_sig_fig. Values equal to
and above this percentile will not be rounded. (included with full
precision). (e.g. 0.95 will round all values below the 95th percentile and
keep full precision of values above the 95th percentile.) 0 by default,
rounds no values, 1 would round all values.
round_to_sig_fig: The number of significant figures kept when rounding
values. 3 by default.
Returns:
An ordered dictionary of the values and their frequency
"""
histogram = _Histogram()
for iteration, value in enumerate(sorted(values)):
percentile = iteration / len(values)
if percentile < round_bottom:
if value > 0:
rounded_value = round(
value,
round_to_sig_fig - int(math.floor(math.log10(abs(value)))) - 1,
)
else:
rounded_value = 0.0
histogram[rounded_value] = histogram.get(rounded_value, 0) + 1
else:
histogram[value] = histogram.get(value, 0) + 1
return histogram
def _ConvertHistogramToString(histogram: _Histogram[float, int]) -> str:
histogram_label_values = ','.join(
f'"{key}": {value}' for (key, value) in histogram.items()
)
histogram_labels = '{%s}' % histogram_label_values
return histogram_labels
def CreateHistogramSample(
histogram: _Histogram[float, int],
name: str,
subname: str,
units: str,
additional_metadata=None,
metric='',
) -> Sample:
"""Given a histogram of values, create a sample.
Args:
histogram: an ordered dict of objects
name: name of histogram
subname: subname of histogram
units: the units of measure used in the sample
additional_metadata: any additional metadata to add
metric: metric in the sample
Returns:
sample: One sample object that reports the histogram passed in.
"""
metadata = {
'histogram': _ConvertHistogramToString(histogram),
'Name': name,
'Subname': subname,
}
if additional_metadata:
metadata.update(additional_metadata)
return Sample(metric, 0, units, metadata)
def CreateTimeSeriesSample(
values: List[Any],
timestamps: List[float],
metric: str,
units: str,
interval: float,
ramp_up_ends=None,
ramp_down_starts=None,
additional_metadata=None,
) -> Sample:
"""Create time series samples.
Given a list of values and the timestamp the values
created at create a time series samples. Each value correspond to the
timestamp that the value is collected. The size of the values and
timestamps have to be equal.
Args:
values: an value orderd based on time series
timestamps: an value orderd based on time series in Epoch micro timestamp
metric: name of time series samples
units: the units of measure of values
interval: interval of the metrics in seconds
ramp_up_ends: The timestamp when ramp up ends in Epoch micro timestamp
ramp_down_starts: The timestamp when ramp down starts in Epoch nano
timestamp
additional_metadata: any additional metadata to add
Returns:
sample: One sample object that reports the time series passed in.
"""
if len(values) != len(timestamps):
raise errors.Error('Length of values is different to length of timestamps')
metadata = {VALUES: values, TIMESTAMPS: timestamps, INTERVAL: interval}
if additional_metadata:
metadata.update(additional_metadata)
if ramp_up_ends:
metadata[RAMP_UP_ENDS] = ramp_up_ends
if ramp_down_starts:
metadata[RAMP_DOWN_STARTS] = ramp_down_starts
return Sample(metric, 0, units, metadata)
def ConvertDateTimeToUnixMs(date: datetime.datetime):
# calendar.timegm assumes the time is from UTC.
# Convert the datetime to UTC timezone first.
date_utc = date.astimezone(pytz.utc)
return calendar.timegm(date_utc.timetuple()) * 1000