in reference/src/main/python/similar.py [0:0]
def get_completions2(query_record, candidate_records):
l = len(candidate_records)
ret = []
n_clusters = 0
n_uniques = 0
print("2-way")
for i in range(l):
jmax = None
maxscore = 0
for j in range(i + 1, l):
pscore = find_similarity_score_features(
candidate_records[i][2], candidate_records[j][2]
)
if pscore > config.THRESHOLD1:
query_score_un = find_similarity_score_features_un(
candidate_records[i][2], candidate_records[j][2]
)
tmp_score = find_similarity_score_features_un(
candidate_records[i][0], candidate_records[j][0]
)
if (
tmp_score > config.THRESHOLD2 * query_score_un
and tmp_score > maxscore
):
jmax = j
maxscore = tmp_score
if jmax is not None:
pruned_record = prune_last_jd(
[query_record, candidate_records[jmax][0]], candidate_records[i][0]
)
ret.append((pruned_record, i, jmax))
print(ast_to_code(pruned_record["ast"]))
n_clusters += 1
# else:
# ret.append((candidate_records[i][0]['ast'], i, i))
# n_uniques += 1
ret2 = []
print("3-way")
for (record, i, j) in ret:
if i != j:
kmax = None
maxscore = 0
for k in range(l):
if k != i and k != j:
pscore = find_similarity_score_features_set(
[
candidate_records[i][2],
candidate_records[j][2],
candidate_records[k][2],
]
)
if pscore > config.THRESHOLD1:
query_score_un = find_similarity_score_features_set_un(
[
candidate_records[i][2],
candidate_records[j][2],
candidate_records[k][2],
]
)
tmp_score = find_similarity_score_features_set_un(
[
candidate_records[i][0],
candidate_records[j][0],
candidate_records[k][0],
]
)
if (
tmp_score > config.THRESHOLD2 * query_score_un
and tmp_score > maxscore
):
kmax = k
maxscore = tmp_score
if kmax is not None:
pruned_record = prune_last_jd(
[query_record, candidate_records[kmax][0]], record
)
n_clusters += 1
print(ast_to_code(pruned_record["ast"]))
ret2.append((pruned_record, i, j, kmax))
logging.info(
f"(# similars, #clusters, #singles, #completions) = ({len(candidate_records)}, {n_clusters}, {n_uniques}, {len(ret)})"
)
print(
f"(# similars, #clusters, #singles, #completions) = ({len(candidate_records)}, {n_clusters}, {n_uniques}, {len(ret)})"
)
return ret2 + ret