in contrib/profiler/utils.py [0:0]
def detect_pattern(self, sample_list):
# sample_list is a 2-D array with m rows and 7 + (num_GPU * 4) cols
# The number of rows is decided by the sampling time.
# The number of cols is decided by the number of GPU that used.
sample_list = np.array(sample_list, dtype=np.float)
used_gpu_num = int((sample_list.shape[1] - GPU_INFO_OFFSET) / INFO_NUM_PER_GPU)
cpu_usage = sample_list[:, SAMPLE_INFO.cpu_usage.value]
mem_usage = sample_list[:, SAMPLE_INFO.mem_used.value] / sample_list[:, SAMPLE_INFO.men_total.value]
gpu_usage = list()
gpu_mem_usage = list()
for i in range(int(used_gpu_num)):
gpu_usage.append(sample_list[:, GPU_INFO_OFFSET + i * INFO_NUM_PER_GPU])
gpu_mem_usage.append(
sample_list[:, GPU_INFO_OFFSET + GPU_MEM_OFFSET + i * INFO_NUM_PER_GPU]
/ sample_list[:, GPU_INFO_OFFSET + GPU_MEM_OFFSET + 1 + i * INFO_NUM_PER_GPU]
)
for index in range(0, len(self._phenomena)):
if index == 0:
if used_gpu_num >= 2:
# multiple GPUs, analyze whether each GPU has the same memory usage
gpu_mem_usage_avg = list()
for i in range(used_gpu_num):
gpu_mem_usage_avg.append(np.average(gpu_mem_usage[i]))
gpu_mem_usage_avg.sort()
if gpu_mem_usage_avg[-1] < 0.01:
continue
if abs(gpu_mem_usage_avg[-1] - gpu_mem_usage_avg[0]) > 0.15:
self.add_times(index=0)
elif abs(gpu_mem_usage_avg[-1] - gpu_mem_usage_avg[0]) / gpu_mem_usage_avg[-1] > 0.15:
self.add_times(index=0)
elif index == 1:
if np.average(gpu_usage[0]) < 85 and np.average(gpu_mem_usage[0]) < 0.80:
self.add_times(index=1)
elif index == 2:
if np.average(gpu_usage[0]) < 85 and np.average(gpu_mem_usage[0]) >= 0.80:
self.add_times(index=2)
elif index == 3:
if np.average(gpu_usage[0]) < 85:
slide_windows = SlideWindows(10)
cpu_slide = list()
for i in range(cpu_usage.shape[0]):
cpu_slide.append(slide_windows.get_data(cpu_usage[i]))
cpu_slide_copy = cpu_slide.copy()
cpu_slide_copy.sort()
cpu_std_max = cpu_slide_copy[int(len(cpu_slide_copy) * 0.8)]
cpu_std_min = cpu_slide_copy[int(len(cpu_slide_copy) * 0.2)]
gpu_usage_up_down = [0]
for i in range(1, len(gpu_usage[0])):
if gpu_usage[0][i] > gpu_usage[0][i - 1]:
gpu_usage_up_down.append(1)
elif gpu_usage[0][i] < gpu_usage[0][i - 1]:
gpu_usage_up_down.append(-1)
else:
gpu_usage_up_down.append(0)
gpu_usage_up_down[0] = gpu_usage_up_down[1]
gpu_up_interval = list()
gpu_down_interval = list()
up_flag = True
down_flag = True
for i in range(len(gpu_usage_up_down)):
if gpu_usage_up_down[i] == 1 and up_flag:
up_flag = False
elif i >= 1 and gpu_usage_up_down[i] == -1 and not up_flag:
up_flag = True
if gpu_usage_up_down[i] == -1 and down_flag:
down_flag = False
elif i >= 1 and gpu_usage_up_down[i] == 1 and not down_flag:
down_flag = True
if not up_flag:
gpu_up_interval.append(i)
elif not down_flag:
gpu_down_interval.append(i)
up_cpu_min, down_cpu_min = 0, 0
up_cpu_max, down_cpu_max = 0, 0
for i in range(1, len(cpu_slide) - 1):
if cpu_slide[i] < cpu_slide[i - 1] and \
cpu_slide[i] < cpu_slide[i + 1] and cpu_slide[i] < cpu_std_min:
if i in gpu_up_interval:
up_cpu_min += 1
elif i in gpu_down_interval:
down_cpu_min += 1
if cpu_slide[i] > cpu_slide[i - 1] and \
cpu_slide[i] > cpu_slide[i + 1] and cpu_slide[i] > cpu_std_max:
if i in gpu_up_interval:
up_cpu_max += 1
elif i in gpu_down_interval:
down_cpu_max += 1
if up_cpu_min + down_cpu_min != 0 and up_cpu_max + down_cpu_max != 0:
if float(down_cpu_min / (up_cpu_min + down_cpu_min)) > 0.6 or float(
up_cpu_max / (up_cpu_max + down_cpu_max)) > 0.6:
self.add_times(index=3)
self.add_total()