in infra-as-code/modules/ingest-pipeline/cf-transcript-correction/lib.py [0:0]
def __init__(
self,
project_id,
location_id,
model_name,
transcript_bucket_id,
transcript_file_name,
formatted_audio_file_name,
formatted_audio_bucket_id,
ingest_record_bucket_id,
original_file_name,
client_specific_constraints,
client_specific_context,
few_shot_examples
):
self.project_id = project_id
self.location_id = location_id
self.model_name = model_name
self.formatted_audio_bucket_id = formatted_audio_bucket_id
self.formatted_audio_file_name = formatted_audio_file_name
self.transcript_bucket_id = transcript_bucket_id
self.transcript_file_name = transcript_file_name
self.gemini_transcript = str()
creds = self.get_credentials()
self.storage_client = storage.Client(project=self.project_id, credentials=creds)
self.original_transcript = self.download_from_gcs(self.transcript_bucket_id, self.transcript_file_name)
self.transcript = self.extract_transcripts(self.original_transcript)
self.record_keeper = RecordKeeper(ingest_record_bucket_id, original_file_name, self.storage_client)
self.event_dict['original_file_name'] = original_file_name
self.client_specific_constraints = client_specific_constraints
self.client_specific_context = client_specific_context
self.few_shot_examples = few_shot_examples
self.prompt = f"""
<OBJECTIVE_AND_PERSONA>
You are an expert audio transcription editor. Your primary goal is to correct errors in transcripts while preserving the original JSON format. You have a strong understanding of the provided terminology and are familiar with common transcription challenges.
</OBJECTIVE_AND_PERSONA>
<INSTRUCTIONS>
You will receive an audio file and its corresponding transcript in JSON format. Your job is to:
1. Carefully listen to the entire audio file.
2. Review the entire transcript provided.
3. Compare, Identify and correct any discrepancies between the audio and the transcript. Pay close attention to:
* **Key Terms:** Ensure accuracy in transcribing all key terms, names, and phrases specific to the client.
* **Speaker Misattribution:** Correctly identify and label different speakers.
* **General Errors:** Fix misspellings, grammatical errors, and any other inaccuracies.
* **Keep the fillers:** Keep any fillers used. Example: "Mhm"
3. Preserve the original JSON structure, including all key-value pairs, nesting, and formatting. Only the text content within the transcript should be modified, keep the same amount of objects in the input transcript.
</INSTRUCTIONS>
<CONSTRAINTS>
Dos and don'ts for the following aspects:
{self.client_specific_constraints}
</CONSTRAINTS>
<CONTEXT>
{self.client_specific_context}
</CONTEXT>
<FEW_SHOT_EXAMPLES>
Example of transcripts with correct terminology:
{self.few_shot_examples}
</FEW_SHOT_EXAMPLES>
<INPUTS>
Transcript: {self.transcript}
</INPUTS>
<OUTPUTS>
Return the transcript with the correct response_schema
</OUTPUTS>
Remember that before you answer, you must check to see if the answer complies with your mission. If not, you must respond, "I am not able to answer this question"
"""
self.response_schema = {
"type": "array",
"items": {
"type": "object",
"properties": {
"index": {
"type": "integer",
"description": "Index position of the transcript"
},
"transcript": {
"type": "string",
"description": "The transcript text"
},
"channelTag": {
"type": "integer",
"description": "Channel identifier"
}
},
"required": ["index", "transcript", "channelTag"]
}
}
print(f'Starting transcript fix on: {self.transcript_file_name}')
print(f'Using prompt: {self.prompt}')