in retail/recommendation-system/bqml-scann/index_builder/builder/indexer.py [0:0]
def load_embeddings(embedding_files_pattern):
embedding_list = list()
tokens = list()
embed_files = tf.io.gfile.glob(embedding_files_pattern)
print(f'{len(embed_files)} embedding files are found.')
for file_idx, embed_file in enumerate(embed_files):
print(f'Loading embeddings in file {file_idx+1} of {len(embed_files)}...')
with tf.io.gfile.GFile(embed_file, 'r') as file_reader:
lines = file_reader.readlines()
for line in lines:
parts = line.split(',')
item_Id = parts[0]
embedding = parts[1:]
embedding = np.array([float(v) for v in embedding])
normalized_embedding = embedding / np.linalg.norm(embedding)
embedding_list.append(normalized_embedding)
tokens.append(item_Id)
print(f'{len(embedding_list)} embeddings are loaded.')
return tokens, np.array(embedding_list)