def invoke_model_with_response_stream()

in skills/contextual-embeddings/contextual-rag-lambda-function/inference_adapter.py [0:0]


    def invoke_model_with_response_stream(self, prompt, max_tokens=1000):

        request_body = json.dumps({
                "anthropic_version": "bedrock-2023-05-31",
                "max_tokens": max_tokens,
                "messages": [
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                "temperature": 0.0,
            })

        # Invoke the model
        try:
            response = self.bedrock_runtime.invoke_model_with_response_stream(
                modelId=self.model_id,
                contentType='application/json',
                accept='application/json',
                body=request_body
            )

            for event in response.get('body'):
                chunk = json.loads(event['chunk']['bytes'].decode())
                if chunk['type'] == 'content_block_delta':
                    yield chunk['delta']['text']
                elif chunk['type'] == 'message_delta':
                    if 'stop_reason' in chunk['delta']:
                        break

        except ClientError as e:
            print(f"An error occurred: {e}")
            yield None