def input_fn()

in notebooks/src/code/inference.py [0:0]


def input_fn(input_bytes, content_type: str):
    """Deserialize and pre-process model request JSON

    Requests must be of type application/json. See module-level docstring for API details.
    """
    logger.info(f"Received request of type:{content_type}")
    if content_type != "application/json":
        raise ValueError("Content type must be application/json")

    req_json = json.loads(input_bytes)

    s3_input = req_json.get("S3Input")
    if s3_input:
        try:
            s3_input = S3ObjectSpec(s3_input)
        except ValueError as e:
            raise ValueError(
                "Invalid Request.S3Input: If provided, must be an object with 'URI' or 'Bucket' "
                "and 'Key'"
            ) from e
        logger.info(f"Fetching S3Input from s3://{s3_input.bucket}/{s3_input.key}")
        doc_json = json.loads(
            s3client.get_object(Bucket=s3_input.bucket, Key=s3_input.key)["Body"].read()
        )
        req_root_is_doc = False
    else:
        if "Content" in req_json:
            doc_json = req_json["Content"]
            req_root_is_doc = False
        else:
            doc_json = req_json
            req_root_is_doc = True

    s3_output = req_json.get("S3Output")
    if s3_output:
        try:
            s3_output = S3ObjectSpec(s3_output)
        except ValueError as e:
            raise ValueError(
                "Invalid Request.S3Output: If provided, must be an object with 'URI' or 'Bucket' "
                "and 'Key'"
            ) from e
        if req_root_is_doc:
            del doc_json["S3Output"]

    page_num = req_json.get("TargetPageNum")
    if page_num is not None:
        if req_root_is_doc:
            del doc_json["TargetPageNum"]

    target_page_only = req_json.get("TargetPageOnly")
    if target_page_only is not None:
        if req_root_is_doc:
            del doc_json["TargetPageOnly"]

    return {
        "doc_json": doc_json,
        "page_num": page_num,
        "s3_output": s3_output,
        "target_page_only": target_page_only,
    }