def _RedactTranscript()

in utils/import_conversations_v2.py [0:0]


def _RedactTranscript(transcript_response, project_id, \
                      impersonated_service_account):

  """Redacts a transcript response.

  Args:
      transcript_response: The response from transcription.
      project_id: The project ID (not number) to use for redaction.
      impersonated_service_account: The service account to impersonate.

  Returns:
      The response from transcription.
  """

  dlp = google.cloud.dlp_v2.DlpServiceClient(
      credentials=_GetClientCredentials(impersonated_service_account),
  )

  transcript_dict = json.loads(transcript_response)

  entry_list = transcript_dict['entries']

  headers = [{'name': key} for key in entry_list[0].keys()]

  rows = []
  for element in entry_list:
    rows.append(
        {
            'values': [
                {'string_value': str(element['start_timestamp_usec'])},
                {'string_value': element['text']},
                {'string_value': element['role']},
                {'string_value': str(element['user_id'])},
            ]
        }
    )

  items = {'table': {'headers': headers, 'rows': rows}}
  info_types = [
      'AGE',
      'CREDIT_CARD_NUMBER',
      'CREDIT_CARD_TRACK_NUMBER',
      'DOMAIN_NAME',
      'EMAIL_ADDRESS',
      'FEMALE_NAME',
      'MALE_NAME',
      'FIRST_NAME',
      'GENDER',
      'GENERIC_ID',
      'IP_ADDRESS',
      'LAST_NAME',
      'LOCATION',
      'PERSON_NAME',
      'PHONE_NUMBER',
      'STREET_ADDRESS',
  ]
  deidentify_config = {
      'record_transformations': {
          'field_transformations': [{
              'fields': [{'name': 'text'}],
              'info_type_transformations': {
                  'transformations': [{
                      'primitive_transformation': {
                          'character_mask_config': {'masking_character': '*'}
                      },
                      'info_types': [
                          {'name': info_type} for info_type in info_types
                      ],
                  }]
              },
          }]
      }
  }
  inspect_config = {
      'info_types': [{'name': info_type} for info_type in info_types]
  }

  project_path = f'projects/{project_id}'
  response = dlp.deidentify_content(
      request={
          'parent': project_path,
          'deidentify_config': deidentify_config,
          'inspect_config': inspect_config,
          'item': items,
      }
  )
  return_list = []
  for row in response.item.table.rows:
    row_list = []
    for col in row.values:
      if 'string_value' in col:
        row_list.append(col.string_value)
    return_list.append(row_list)
  json_entries_list = []
  for entry in return_list:
    json_entry = {
        'start_timestamp_usec': str(entry[0]),
        'text': entry[1],
        'role': entry[2],
        'user_id': int(entry[3]),
    }
    json_entries_list.append(json_entry)
  transcript_dict['entries'] = json_entries_list
  return transcript_dict