in custom/report_metrics.py [0:0]
def get_metric(filename, filetype):
# Get all keys from filename.
key_value_list = '.'.join(os.path.basename(filename).split('.')[:-1]).split('__')[2:]
setting_dict = {}
for kv in key_value_list:
k_v = kv.split('_')
key = '_'.join(k_v[:-1])
val = k_v[-1]
try:
_maybe_num = int(val)
except:
try:
_maybe_num = float(val)
except:
_maybe_num = val
setting_dict[key] = _maybe_num
split = setting_dict['spl']
prefix_length = setting_dict.get('pfx')
completion_length = setting_dict.get('cmpl')
if filetype == 'completions':
completion_lines = open(filename, 'r').readlines()
ngram_metrics = Metrics()
actual_completions = []
flat_completions = []
for i, line in enumerate(completion_lines):
splitted_line = line.split()
assert len(splitted_line) == (prefix_length+completion_length)
actual_completions.append(splitted_line[prefix_length:])
flat_completions.extend(splitted_line[prefix_length:])
ngram_metrics.update(actual_completions)
num_unique_tokens_completions = len(set(flat_completions))
result = merge_dicts(ngram_metrics.report(kind=f'{split}'), {f'{split}/num_uniq_compl': num_unique_tokens_completions})
if filetype == 'targets':
targets_ngram_metrics = Metrics()
targets_completions = [] # Slice targets to have same length as completions.
targets_flat_completions = []
targets_lines = open(filename, 'r').readlines()
for line in targets_lines:
splitted_line = line.split()
targets_flat_completions.extend(splitted_line)
segmented_lines = [splitted_line[i*completion_length:i*completion_length+completion_length]
for i in range(len(splitted_line) // completion_length)]
targets_completions.extend(segmented_lines)
targets_ngram_metrics.update(targets_completions)
num_unique_target = len(set(targets_flat_completions))
result = merge_dicts(targets_ngram_metrics.report(kind=f'{split}_human'), {f'{split}/num_uniq_target': num_unique_target})
if filetype == 'single_token_predictions':
singlepred_flat = []
single_prediction_lines = open(filename, 'r').readlines()
for line in single_prediction_lines:
singlepred_flat.extend(line.split())
num_unique_singlepred = len(set(singlepred_flat))
result = {f'{split}/num_uniq_singletok': num_unique_singlepred}
if filetype == 'metrics':
result = pickle.load(open(filename, 'rb'))
return setting_dict, result