def load_embeddings()

in retail/recommendation-system/bqml-scann/index_builder/builder/indexer.py [0:0]


def load_embeddings(embedding_files_pattern):
    
  embedding_list = list()
  tokens = list()
  embed_files = tf.io.gfile.glob(embedding_files_pattern)
  print(f'{len(embed_files)} embedding files are found.')

  for file_idx, embed_file in enumerate(embed_files):
    print(f'Loading embeddings in file {file_idx+1} of {len(embed_files)}...')
    with tf.io.gfile.GFile(embed_file, 'r') as file_reader:
      lines = file_reader.readlines()
      for line in lines:
        parts = line.split(',')
        item_Id = parts[0]
        embedding = parts[1:]
        embedding = np.array([float(v) for v in embedding])
        normalized_embedding = embedding / np.linalg.norm(embedding)
        embedding_list.append(normalized_embedding)
        tokens.append(item_Id)
        
    print(f'{len(embedding_list)} embeddings are loaded.')
    
  return tokens, np.array(embedding_list)