in utils/import_conversations_v2.py [0:0]
def _RedactTranscript(transcript_response, project_id, \
impersonated_service_account):
"""Redacts a transcript response.
Args:
transcript_response: The response from transcription.
project_id: The project ID (not number) to use for redaction.
impersonated_service_account: The service account to impersonate.
Returns:
The response from transcription.
"""
dlp = google.cloud.dlp_v2.DlpServiceClient(
credentials=_GetClientCredentials(impersonated_service_account),
)
transcript_dict = json.loads(transcript_response)
entry_list = transcript_dict['entries']
headers = [{'name': key} for key in entry_list[0].keys()]
rows = []
for element in entry_list:
rows.append(
{
'values': [
{'string_value': str(element['start_timestamp_usec'])},
{'string_value': element['text']},
{'string_value': element['role']},
{'string_value': str(element['user_id'])},
]
}
)
items = {'table': {'headers': headers, 'rows': rows}}
info_types = [
'AGE',
'CREDIT_CARD_NUMBER',
'CREDIT_CARD_TRACK_NUMBER',
'DOMAIN_NAME',
'EMAIL_ADDRESS',
'FEMALE_NAME',
'MALE_NAME',
'FIRST_NAME',
'GENDER',
'GENERIC_ID',
'IP_ADDRESS',
'LAST_NAME',
'LOCATION',
'PERSON_NAME',
'PHONE_NUMBER',
'STREET_ADDRESS',
]
deidentify_config = {
'record_transformations': {
'field_transformations': [{
'fields': [{'name': 'text'}],
'info_type_transformations': {
'transformations': [{
'primitive_transformation': {
'character_mask_config': {'masking_character': '*'}
},
'info_types': [
{'name': info_type} for info_type in info_types
],
}]
},
}]
}
}
inspect_config = {
'info_types': [{'name': info_type} for info_type in info_types]
}
project_path = f'projects/{project_id}'
response = dlp.deidentify_content(
request={
'parent': project_path,
'deidentify_config': deidentify_config,
'inspect_config': inspect_config,
'item': items,
}
)
return_list = []
for row in response.item.table.rows:
row_list = []
for col in row.values:
if 'string_value' in col:
row_list.append(col.string_value)
return_list.append(row_list)
json_entries_list = []
for entry in return_list:
json_entry = {
'start_timestamp_usec': str(entry[0]),
'text': entry[1],
'role': entry[2],
'user_id': int(entry[3]),
}
json_entries_list.append(json_entry)
transcript_dict['entries'] = json_entries_list
return transcript_dict