def get_redaction_intervals()

in infra-as-code/modules/ingest-pipeline/cf-audio-redaction/audio_redaction.py [0:0]


    def get_redaction_intervals(self, json_file):
        """Extracts the redaction intervals from the JSON file."""
        redaction_intervals = []
        word_list = [word_item['word'] for word_item in json_file['words']]

        for dlp_entry in json_file['dlp']:
            phrase = dlp_entry.split(' ')
            if len(phrase) >= 2:
                for index, word in enumerate(phrase):
                    for i, element in enumerate(json_file['words']):
                        if i < len(json_file['words']) - 1 and index < len(phrase) - 1:
                            if word == element['word']:
                                if word_list[i:i + len(phrase)] == phrase:
                                    element['index'] = i
                                    found_words = json_file['words'][i:i + len(phrase)]
                                    redaction_intervals.extend(found_words)
            else:
                for index, word in enumerate(phrase):
                    for i, element in enumerate(json_file['words']):
                        if word == element['word']:
                            element['index'] = i
                            redaction_intervals.append(element)
        return redaction_intervals