def privatize_example()

in source/sagemaker/src/package/data_privatization/data_privatization.py [0:0]

16 lines of code
5 McCabe index (conditional complexity)


def privatize_example(example, local_vocab, local_embedding_dims, local_epsilon):
    from annoy import AnnoyIndex

    # Load files
    local_index = AnnoyIndex(local_embedding_dims, 'euclidean')
    local_index.load(SparkFiles.get("index.ann"))

    sensitive_phrases = [x.strip() for x in clean_example(example) if x.strip()]

    privatized_phrases = []
    for sensitive_phrase in sensitive_phrases:
        privatized_words = []
        for sensitive_word in sensitive_phrase.split(' '):
            privatized_word = replace_word(
                sensitive_word, local_vocab, local_epsilon, local_index, local_embedding_dims)
            privatized_words.append(privatized_word)

        # Flatten nested list of words
        privatized_phrases.append(itertools.chain(*[privatized_words]))

    privatized_review = " ".join(list(itertools.chain(*privatized_phrases)))

    privatized_row = "\"{}\",{}".format(privatized_review, example.sentiment)

    return privatized_row