def clustering_evaluate()

in aiops/ContrastiveLearningLogClustering/utils/evaluation.py [0:0]


def clustering_evaluate(log_type, cluster_assignment, clustered_sentences):
    
    label_true = []
    
    if log_type == "Flink" or log_type == 'ODPS':
        df_log = pd.read_csv(log_type.lower()+'_test.csv')
        # df_groundtruth = df_log_structured['EventId']
        df_log = df_log[df_log['label_id']!=-1]
        label_count = df_log['label_id'].value_counts()
        event_amount = len(label_count)
        cluster_amount = len(clustered_sentences)
        print('event amount: ',event_amount)
        print('cluster amount: ',cluster_amount)

        for idx, line in df_log.iterrows():
            label = line['label_id']
            label_true.append(label)
    else:
        df_log_structured = pd.read_csv("./logs/"+log_type+"/"+log_type+"_2k.log_structured.csv")
        # df_groundtruth = df_log_structured['EventId']
        label_count = df_log_structured['EventId'].value_counts()
        event_amount = len(label_count)
        cluster_amount = len(clustered_sentences)
        print('event amount: ',event_amount)
        print('cluster amount: ',cluster_amount)

        for idx, line in df_log_structured.iterrows():
            label = line['EventId']
            label_true.append(int(label[1:])-1)

    rand_index = metrics.rand_score(label_true, cluster_assignment) 
    homogeneity = metrics.homogeneity_score(label_true, cluster_assignment)#
    completeness = metrics.completeness_score(label_true, cluster_assignment)
    v_measure = metrics.v_measure_score(label_true,cluster_assignment, beta=1) #v = (1 + beta) * homogeneity * completeness / (beta * homogeneity + completeness)
    adj_rand_index = metrics.adjusted_rand_score(label_true, cluster_assignment)
    normalized_mi = metrics.normalized_mutual_info_score(label_true, cluster_assignment)
    
    # print("rand_index: ",rand_index)
    # print('homogeneity score: ',homogeneity) 
    # print('completeness score: ',completeness) 
    # print('v measure score: ',v_measure)
    print('ARI',adj_rand_index)
    print('NMI',normalized_mi)

    # df_log_structured.iterrows
    label_groundtrue = label_true
    # for idx, line in df_log_structured.iterrows():
    #     label_groundtrue.append(int(line['EventId'][1:])-1)

    series_parsedlog = pd.Series(cluster_assignment)
    series_groundtruth = pd.Series(label_groundtrue)

    series_parsedlog_valuecounts = series_parsedlog.value_counts()
    # series_groundtruth_valuecounts = series_groundtruth.value_counts()

    accurate_pairs = 0
    accurate_events = 0 # determine how many lines are correctly parsed
    for parsed_eventId in series_parsedlog_valuecounts.index:
        logIds = series_parsedlog[series_parsedlog == parsed_eventId].index
        series_groundtruth_logId_valuecounts = series_groundtruth[logIds].value_counts()
        error_eventIds = (parsed_eventId, series_groundtruth_logId_valuecounts.index.tolist())
        error = True
        if series_groundtruth_logId_valuecounts.size == 1:
            groundtruth_eventId = series_groundtruth_logId_valuecounts.index[0]
            if logIds.size == series_groundtruth[series_groundtruth == groundtruth_eventId].size:
                accurate_events += logIds.size
                error = False

    parsing_accuracy = float(accurate_events) / series_groundtruth.size
    
    print("parsing accuarcy: ",parsing_accuracy)

    # F1_measure = metrics.f1_score(label_true,label_pre,average='micro')
    # print("F1 score: ",F1_measure)

    score = {}
    score['rand index'] = rand_index
    score['parsing accuracy'] = parsing_accuracy
    score['homogeneity'] = homogeneity
    score['completeness'] = completeness
    score['v measure'] = v_measure
    score['ARI'] = adj_rand_index
    score['NMI'] = normalized_mi

    return score, event_amount, cluster_amount