in utils/import_conversations_v2.py [0:0]
def main():
pargs = _ParseArgs()
project_id = pargs.project_id
impersonated_service_account = pargs.impersonated_service_account
insights_endpoint = pargs.insights_endpoint
api_version = pargs.insights_api_version
should_redact = pargs.redact
agent_id = pargs.agent_id
agent_channel = int(pargs.agent_channel)
xml_bucket = pargs.xml_gcs_bucket
analyze_conv = pargs.analyze
folder_name = pargs.folder_name
if pargs.source_local_audio_path or pargs.source_audio_gcs_bucket:
# Inputs are audio files.
dest_bucket = pargs.dest_gcs_bucket
if pargs.source_local_audio_path:
source_local_audio_path = pargs.source_local_audio_path
source_audio_base_name = os.path.basename(source_local_audio_path)
_UploadFileToGcs(
dest_bucket,
source_local_audio_path,
source_audio_base_name,
project_id,
impersonated_service_account,
)
audio_uris = [_GetGcsUri(dest_bucket, source_audio_base_name)]
elif pargs.source_audio_gcs_bucket:
audio_uris = _GetGcsUris(
pargs.source_audio_gcs_bucket,
project_id,
impersonated_service_account,
)
# print(audio_uris)
else:
audio_uris = []
encoding = pargs.encoding
language_code = pargs.language_code
sample_rate_hertz = pargs.sample_rate_hertz
# get processed audio files
processed_audio_files = _GetGcsUris(
dest_bucket, project_id, impersonated_service_account, uri=False
)
processed_audio_files = list(
map(lambda x: x.split('.')[0], processed_audio_files)
)
# remove processed files
audio_uris = _RemoveProcessedFiles(audio_uris, processed_audio_files)
if not audio_uris:
print('No audio file to process')
conversation_names = []
else:
conversation_names = _ImportConversationsFromAudio(
audio_uris,
encoding,
language_code,
sample_rate_hertz,
project_id,
dest_bucket,
insights_endpoint,
api_version,
should_redact,
agent_id,
impersonated_service_account,
agent_channel,
xml_bucket,
)
else:
# Inputs are transcript files.
if pargs.source_voice_transcript_gcs_bucket:
medium = 1
print(pargs.source_voice_transcript_gcs_bucket)
transcript_bucket = pargs.source_voice_transcript_gcs_bucket
elif pargs.source_chat_transcript_gcs_bucket:
print(pargs.source_chat_transcript_gcs_bucket)
transcript_bucket = pargs.source_chat_transcript_gcs_bucket
medium = 2
else:
print('Provide at least one bucket for (Audio/Chat)')
return
transcript_metadata_flag = pargs.transcript_metadata_flag
transcript_uris = _GetGcsUris(transcript_bucket, project_id,
impersonated_service_account, folder_name)
# get processed transcripts
processed_transcripts_uris = _GetProcessedTranscripts(project_id)
unprocessed_transcript_uris = []
# filter processed transcripts
if should_redact == 'True':
for transcript_uri in transcript_uris:
redacted_uri = _AddRedactedFolderToGcsUri(transcript_uri)
if redacted_uri not in processed_transcripts_uris:
unprocessed_transcript_uris.append(transcript_uri)
else:
unprocessed_transcript_uris = [
i for i in transcript_uris if i not in processed_transcripts_uris
]
print('Total transcripts', len(unprocessed_transcript_uris))
with open('transcripts_uris', 'wb') as fp:
pickle.dump(transcript_uris, fp)
if not transcript_uris:
print('No transcript to ingest')
conversation_names = []
else:
conversation_names = _ImportConversationsFromTranscript(
unprocessed_transcript_uris, project_id, medium, insights_endpoint,
api_version, should_redact, agent_id,
impersonated_service_account, agent_channel, xml_bucket,
transcript_bucket, transcript_metadata_flag)
print(
'Created `{}` conversation IDs: {}'.format(
len(conversation_names), conversation_names
)
)
if analyze_conv == 'True':
print('Starting analysis for conversations.')
_AnalyzeConversations(
conversation_names,
insights_endpoint,
api_version,
impersonated_service_account,
)