in aiops/ContrastiveLearningLogClustering/utils/evaluation.py [0:0]
def clustering_evaluate(log_type, cluster_assignment, clustered_sentences):
label_true = []
if log_type == "Flink" or log_type == 'ODPS':
df_log = pd.read_csv(log_type.lower()+'_test.csv')
# df_groundtruth = df_log_structured['EventId']
df_log = df_log[df_log['label_id']!=-1]
label_count = df_log['label_id'].value_counts()
event_amount = len(label_count)
cluster_amount = len(clustered_sentences)
print('event amount: ',event_amount)
print('cluster amount: ',cluster_amount)
for idx, line in df_log.iterrows():
label = line['label_id']
label_true.append(label)
else:
df_log_structured = pd.read_csv("./logs/"+log_type+"/"+log_type+"_2k.log_structured.csv")
# df_groundtruth = df_log_structured['EventId']
label_count = df_log_structured['EventId'].value_counts()
event_amount = len(label_count)
cluster_amount = len(clustered_sentences)
print('event amount: ',event_amount)
print('cluster amount: ',cluster_amount)
for idx, line in df_log_structured.iterrows():
label = line['EventId']
label_true.append(int(label[1:])-1)
rand_index = metrics.rand_score(label_true, cluster_assignment)
homogeneity = metrics.homogeneity_score(label_true, cluster_assignment)#
completeness = metrics.completeness_score(label_true, cluster_assignment)
v_measure = metrics.v_measure_score(label_true,cluster_assignment, beta=1) #v = (1 + beta) * homogeneity * completeness / (beta * homogeneity + completeness)
adj_rand_index = metrics.adjusted_rand_score(label_true, cluster_assignment)
normalized_mi = metrics.normalized_mutual_info_score(label_true, cluster_assignment)
# print("rand_index: ",rand_index)
# print('homogeneity score: ',homogeneity)
# print('completeness score: ',completeness)
# print('v measure score: ',v_measure)
print('ARI',adj_rand_index)
print('NMI',normalized_mi)
# df_log_structured.iterrows
label_groundtrue = label_true
# for idx, line in df_log_structured.iterrows():
# label_groundtrue.append(int(line['EventId'][1:])-1)
series_parsedlog = pd.Series(cluster_assignment)
series_groundtruth = pd.Series(label_groundtrue)
series_parsedlog_valuecounts = series_parsedlog.value_counts()
# series_groundtruth_valuecounts = series_groundtruth.value_counts()
accurate_pairs = 0
accurate_events = 0 # determine how many lines are correctly parsed
for parsed_eventId in series_parsedlog_valuecounts.index:
logIds = series_parsedlog[series_parsedlog == parsed_eventId].index
series_groundtruth_logId_valuecounts = series_groundtruth[logIds].value_counts()
error_eventIds = (parsed_eventId, series_groundtruth_logId_valuecounts.index.tolist())
error = True
if series_groundtruth_logId_valuecounts.size == 1:
groundtruth_eventId = series_groundtruth_logId_valuecounts.index[0]
if logIds.size == series_groundtruth[series_groundtruth == groundtruth_eventId].size:
accurate_events += logIds.size
error = False
parsing_accuracy = float(accurate_events) / series_groundtruth.size
print("parsing accuarcy: ",parsing_accuracy)
# F1_measure = metrics.f1_score(label_true,label_pre,average='micro')
# print("F1 score: ",F1_measure)
score = {}
score['rand index'] = rand_index
score['parsing accuracy'] = parsing_accuracy
score['homogeneity'] = homogeneity
score['completeness'] = completeness
score['v measure'] = v_measure
score['ARI'] = adj_rand_index
score['NMI'] = normalized_mi
return score, event_amount, cluster_amount