in filtering/deduplication/add_dedup_info.py [0:0]
def get_doc_id(pos, pos2id, pos2id_list):
"""
Gets id of the datapoint at position.
"""
pos = bisect_right(pos2id_list, pos)
doc_id = pos2id[pos2id_list[pos - 1]]
return doc_id