def lambda_handler()

in skills/contextual-embeddings/contextual-rag-lambda-function/lambda_function.py [0:0]


def lambda_handler(event, context):
    logger.debug('input={}'.format(json.dumps(event)))

    s3_adapter = S3Adapter()
    inference_adapter = InferenceAdapter()

    # Extract relevant information from the input event
    input_files = event.get('inputFiles')
    input_bucket = event.get('bucketName')

    if not all([input_files, input_bucket]):
        raise ValueError("Missing required input parameters")

    output_files = []
    for input_file in input_files:

        processed_batches = []
        for batch in input_file.get('contentBatches'):

            # Get chunks from S3
            input_key = batch.get('key')

            if not input_key:
                raise ValueError("Missing uri in content batch")

            # Read file from S3
            file_content = s3_adapter.read_from_s3(bucket_name=input_bucket, file_name=input_key)
            print(file_content.get('fileContents'))

            # Combine all chunks together to build content of original file
            # Alternatively we can also read original file and extract text from it
            original_document_content = ''.join(content.get('contentBody') for content in file_content.get('fileContents') if content)

            # Process one chunk at a time
            chunked_content = {
                'fileContents': []
            }
            for content in file_content.get('fileContents'):
                content_body = content.get('contentBody', '')
                content_type = content.get('contentType', '')
                content_metadata = content.get('contentMetadata', {})

                # Update chunk with additional context
                prompt = contextual_retrieval_prompt.format(doc_content=original_document_content, chunk_content=content_body)
                response_stream = inference_adapter.invoke_model_with_response_stream(prompt)
                chunk_context = ''.join(chunk for chunk in response_stream if chunk)

                # append chunk to output file content
                chunked_content['fileContents'].append({
                    "contentBody": chunk_context + "\n\n" + content_body,
                    "contentType": content_type,
                    "contentMetadata": content_metadata,
                })

            output_key = f"Output/{input_key}"

            # write updated chunk to output S3
            s3_adapter.write_output_to_s3(input_bucket, output_key, chunked_content)

            # Append the processed chunks file to list of files
            processed_batches.append({ "key": output_key })
        output_files.append({
            "originalFileLocation": input_file.get('originalFileLocation'),
            "fileMetadata": {},
            "contentBatches": processed_batches
        })

    return {
        "outputFiles": output_files
    }