in src/jobs/util/key_document_finder.py [0:0]
def get_keywords_and_topics(self):
self.sorted_indices = np.argsort(-self.weighted_important_word_matrix, 1)
num_words = self.weighted_important_word_matrix.shape[1]
num_clusters = self.weighted_important_word_matrix.shape[0]
self.sorted_scores = np.take_along_axis(self.weighted_important_word_matrix, self.sorted_indices, axis=1)
self.topic_info_list = []
self.keyword_list = []
for cluster in range(num_clusters):
topic_words = []
for top_score_ref in range(num_words):
if self.sorted_scores[cluster, top_score_ref] > 0.05:
topic_words.append(self.vectorizer_words[self.sorted_indices[cluster, top_score_ref]])
if len(topic_words) > MAX_WORDS_PER_TOPIC:
break
self.topic_info_list.append(", ".join(topic_words))
self.keyword_list.append(topic_words)