def build_index()

in retail/recommendation-system/bqml-scann/tfx_pipeline/scann_indexer.py [0:0]


def build_index(embeddings, num_leaves):
  
  data_size = embeddings.shape[0] 
  if not num_leaves:
    num_leaves = int(math.sqrt(data_size))
  logging.info(f'Indexing {data_size} embeddings with {num_leaves} leaves.')
    
  logging.info('Start building the ScaNN index...')
  scann_builder = scann.scann_ops.builder(embeddings, NUM_NEIGHBOURS, METRIC).tree(
    num_leaves=num_leaves, 
    num_leaves_to_search=NUM_LEAVES_TO_SEARCH, 
    training_sample_size=data_size).score_ah(
      DIMENSIONS_PER_BLOCK,
      anisotropic_quantization_threshold=ANISOTROPIC_QUANTIZATION_THRESHOLD).reorder(REORDER_NUM_NEIGHBOURS)
  scann_index = scann_builder.build()
  logging.info('ScaNN index is built.')
  
  return scann_index