aiops/ContrastiveLearningLogClustering/utils/evaluation.py (134 lines of code) (raw):

from sklearn import metrics from sklearn.cluster import AgglomerativeClustering import torch import torch.nn.functional as F import time import pandas as pd from sentence_transformers import SentenceTransformer def generate_embeddings(model,corpus): #Mean Pooling - Take average of all tokens def mean_pooling(model_output, attention_mask): token_embeddings = model_output.last_hidden_state #First element of model_output contains all token embeddings input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) #Encode text # def encode(texts): # # Tokenize sentences # encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors='pt') # # encoded_input.cuda() # # Compute token embeddings # with torch.no_grad(): # model_output = model(**encoded_input, return_dict=True) # # Perform pooling # embeddings = mean_pooling(model_output, encoded_input['attention_mask']) # # Normalize embeddings # embeddings = F.normalize(embeddings, p=2, dim=1) # return embeddings # embedder = SentenceTransformer(model_name) # print(embedder) lower_corpus = [s.lower() for s in corpus] # 生成日志embedding start_time = int(time.time()) # corpus_embeddings = embedder.encode(lower_corpus) corpus_embeddings = model.encode(lower_corpus,normalize_embeddings=True) # Normalize the embeddings to unit length # corpus_embeddings = corpus_embeddings / np.linalg.norm(corpus_embeddings, axis=1, keepdims=True) return corpus_embeddings def embeddings_clustering(corpus, corpus_embeddings, distance_threshold=0.1): def compute_dot_similarity(a): score = a.dot(a.transpose(1,0)) #(b,n)*(n*b) return score #(b*b) clustering_model = AgglomerativeClustering(n_clusters=None, affinity='cosine', linkage='single', distance_threshold=distance_threshold) clustering_model.fit(corpus_embeddings) cluster_assignment = clustering_model.labels_ clustered_sentences = {} for sentence_id, cluster_id in enumerate(cluster_assignment): if cluster_id not in clustered_sentences: clustered_sentences[cluster_id] = [] clustered_sentences[cluster_id].append(corpus[sentence_id]) return clustered_sentences, cluster_assignment def clustering_evaluate(log_type, cluster_assignment, clustered_sentences): label_true = [] if log_type == "Flink" or log_type == 'ODPS': df_log = pd.read_csv(log_type.lower()+'_test.csv') # df_groundtruth = df_log_structured['EventId'] df_log = df_log[df_log['label_id']!=-1] label_count = df_log['label_id'].value_counts() event_amount = len(label_count) cluster_amount = len(clustered_sentences) print('event amount: ',event_amount) print('cluster amount: ',cluster_amount) for idx, line in df_log.iterrows(): label = line['label_id'] label_true.append(label) else: df_log_structured = pd.read_csv("./logs/"+log_type+"/"+log_type+"_2k.log_structured.csv") # df_groundtruth = df_log_structured['EventId'] label_count = df_log_structured['EventId'].value_counts() event_amount = len(label_count) cluster_amount = len(clustered_sentences) print('event amount: ',event_amount) print('cluster amount: ',cluster_amount) for idx, line in df_log_structured.iterrows(): label = line['EventId'] label_true.append(int(label[1:])-1) rand_index = metrics.rand_score(label_true, cluster_assignment) homogeneity = metrics.homogeneity_score(label_true, cluster_assignment)# completeness = metrics.completeness_score(label_true, cluster_assignment) v_measure = metrics.v_measure_score(label_true,cluster_assignment, beta=1) #v = (1 + beta) * homogeneity * completeness / (beta * homogeneity + completeness) adj_rand_index = metrics.adjusted_rand_score(label_true, cluster_assignment) normalized_mi = metrics.normalized_mutual_info_score(label_true, cluster_assignment) # print("rand_index: ",rand_index) # print('homogeneity score: ',homogeneity) # print('completeness score: ',completeness) # print('v measure score: ',v_measure) print('ARI',adj_rand_index) print('NMI',normalized_mi) # df_log_structured.iterrows label_groundtrue = label_true # for idx, line in df_log_structured.iterrows(): # label_groundtrue.append(int(line['EventId'][1:])-1) series_parsedlog = pd.Series(cluster_assignment) series_groundtruth = pd.Series(label_groundtrue) series_parsedlog_valuecounts = series_parsedlog.value_counts() # series_groundtruth_valuecounts = series_groundtruth.value_counts() accurate_pairs = 0 accurate_events = 0 # determine how many lines are correctly parsed for parsed_eventId in series_parsedlog_valuecounts.index: logIds = series_parsedlog[series_parsedlog == parsed_eventId].index series_groundtruth_logId_valuecounts = series_groundtruth[logIds].value_counts() error_eventIds = (parsed_eventId, series_groundtruth_logId_valuecounts.index.tolist()) error = True if series_groundtruth_logId_valuecounts.size == 1: groundtruth_eventId = series_groundtruth_logId_valuecounts.index[0] if logIds.size == series_groundtruth[series_groundtruth == groundtruth_eventId].size: accurate_events += logIds.size error = False parsing_accuracy = float(accurate_events) / series_groundtruth.size print("parsing accuarcy: ",parsing_accuracy) # F1_measure = metrics.f1_score(label_true,label_pre,average='micro') # print("F1 score: ",F1_measure) score = {} score['rand index'] = rand_index score['parsing accuracy'] = parsing_accuracy score['homogeneity'] = homogeneity score['completeness'] = completeness score['v measure'] = v_measure score['ARI'] = adj_rand_index score['NMI'] = normalized_mi return score, event_amount, cluster_amount def clustering_evaluate_industry(file_name, cluster_assignment, clustered_sentences): df_log = pd.read_csv(file_name) # df_groundtruth = df_log_structured['EventId'] df_log = df_log[df_log['label_id']!=-1] label_count = df_log['label_id'].value_counts() event_amount = len(label_count) cluster_amount = len(clustered_sentences) print('event amount: ',event_amount) print('cluster amount: ',cluster_amount) label_true = [] for idx, line in df_log.iterrows(): label = line['label_id'] label_true.append(label) rand_index = metrics.rand_score(label_true, cluster_assignment) homogeneity = metrics.homogeneity_score(label_true, cluster_assignment) completeness = metrics.completeness_score(label_true, cluster_assignment) v_measure = metrics.v_measure_score(label_true,cluster_assignment, beta=1) #v = (1 + beta) * homogeneity * completeness / (beta * homogeneity + completeness) adj_rand_index = metrics.adjusted_rand_score(label_true, cluster_assignment) normalized_mi = metrics.normalized_mutual_info_score(label_true, cluster_assignment) # print("rand_index: ",rand_index) # print('homogeneity score: ',homogeneity) # print('completeness score: ',completeness) # print('v measure score: ',v_measure) print('ARI',adj_rand_index) print('NMI',normalized_mi) label_groundtrue = label_true series_parsedlog = pd.Series(cluster_assignment) series_groundtruth = pd.Series(label_groundtrue) series_parsedlog_valuecounts = series_parsedlog.value_counts() # series_groundtruth_valuecounts = series_groundtruth.value_counts() accurate_pairs = 0 accurate_events = 0 # determine how many lines are correctly parsed for parsed_eventId in series_parsedlog_valuecounts.index: logIds = series_parsedlog[series_parsedlog == parsed_eventId].index series_groundtruth_logId_valuecounts = series_groundtruth[logIds].value_counts() error_eventIds = (parsed_eventId, series_groundtruth_logId_valuecounts.index.tolist()) error = True if series_groundtruth_logId_valuecounts.size == 1: groundtruth_eventId = series_groundtruth_logId_valuecounts.index[0] if logIds.size == series_groundtruth[series_groundtruth == groundtruth_eventId].size: accurate_events += logIds.size error = False parsing_accuracy = float(accurate_events) / series_groundtruth.size print("parsing accuarcy: ",parsing_accuracy) # F1_measure = metrics.f1_score(label_true,label_pre,average='micro') # print("F1 score: ",F1_measure) score = {} score['rand index'] = rand_index score['parsing accuracy'] = parsing_accuracy score['homogeneity'] = homogeneity score['completeness'] = completeness score['v measure'] = v_measure score['ARI'] = adj_rand_index score['NMI'] = normalized_mi return score, event_amount, cluster_amount