in retail/recommendation-system/bqml-scann/tfx_pipeline/scann_indexer.py [0:0]
def build_index(embeddings, num_leaves):
data_size = embeddings.shape[0]
if not num_leaves:
num_leaves = int(math.sqrt(data_size))
logging.info(f'Indexing {data_size} embeddings with {num_leaves} leaves.')
logging.info('Start building the ScaNN index...')
scann_builder = scann.scann_ops.builder(embeddings, NUM_NEIGHBOURS, METRIC).tree(
num_leaves=num_leaves,
num_leaves_to_search=NUM_LEAVES_TO_SEARCH,
training_sample_size=data_size).score_ah(
DIMENSIONS_PER_BLOCK,
anisotropic_quantization_threshold=ANISOTROPIC_QUANTIZATION_THRESHOLD).reorder(REORDER_NUM_NEIGHBOURS)
scann_index = scann_builder.build()
logging.info('ScaNN index is built.')
return scann_index