skills/contextual-embeddings/contextual-rag-lambda-function/inference_adapter.py (40 lines of code) (raw):

import json import boto3 import os from botocore.exceptions import ClientError class InferenceAdapter: def __init__(self): self.bedrock_runtime = boto3.client( service_name='bedrock-runtime', region_name='us-east-1' #change region as needed ) self.model_id = 'anthropic.claude-3-haiku-20240307-v1:0' def invoke_model_with_response_stream(self, prompt, max_tokens=1000): request_body = json.dumps({ "anthropic_version": "bedrock-2023-05-31", "max_tokens": max_tokens, "messages": [ { "role": "user", "content": prompt } ], "temperature": 0.0, }) # Invoke the model try: response = self.bedrock_runtime.invoke_model_with_response_stream( modelId=self.model_id, contentType='application/json', accept='application/json', body=request_body ) for event in response.get('body'): chunk = json.loads(event['chunk']['bytes'].decode()) if chunk['type'] == 'content_block_delta': yield chunk['delta']['text'] elif chunk['type'] == 'message_delta': if 'stop_reason' in chunk['delta']: break except ClientError as e: print(f"An error occurred: {e}") yield None