in chunking/chunkers/transcription_chunker.py [0:0]
def get_chunks(self):
chunks = []
logging.info(f"[transcription_chunker][{self.filename}] Running get_chunks.")
# Extract the text from the vtt file
text = self._vtt_process()
logging.debug(f"[transcription_chunker][{self.filename}] transcription text: {text[:100]}")
# Get the summary of the text
prompt = f"Provide clearly elaborated summary along with the keypoints and values mentioned for the transcript of a conversation: {text} "
summary = self.aoai_client.get_completion(prompt)
text_chunks = self._chunk_document_content(text)
chunk_id = 0
for text_chunk in text_chunks:
chunk_id += 1
chunk_size = self.token_estimator.estimate_tokens(text_chunk)
if chunk_size > self.max_chunk_size:
logging.debug(f"[transcription_chunker][{self.filename}] truncating {chunk_size} size chunk to fit within {self.max_chunk_size} tokens")
text_chunk = self._truncate_chunk(text_chunk)
chunk_dict = self._create_chunk(chunk_id=chunk_id, content=text_chunk, embedding_text=summary, summary=summary)
chunks.append(chunk_dict)
return chunks