def chat()

in 3_llmops-aifoundry/3_2_prototyping/chat-context/phi35_finetuned.py [0:0]


def chat(question: str, context: str, connection: CustomConnection) -> str:
    
    # More information can be found here:
    # https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-phi-3?WT.mc_id=aiml-137032-kinfeylo&tabs=phi-3-5&pivots=programming-language-rest 
    # Phi-3.5-Mini-Instruct, Phi-3.5-MoE-Instruct, Phi-3-mini-4k-Instruct, Phi-3-mini-128k-Instruct, 
    # Phi-3-small-8k-Instruct, Phi-3-small-128k-Instruct and Phi-3-medium-128k-Instruct 
    # don't support system messages (role="system"). 
    # When you use the Azure AI model inference API, system messages are translated to user messages, 
    # which is the closest capability available. This translation is offered for convenience, 
    # but it's important for you to verify that the model is following the instructions 
    # in the system message with the right level of confidence.
    # "max_new_tokens": 4096,     # The maximum value is 4096.
                
    data = {
        "input_data": 
            [
                {"role": "user", "content": "You are an AI assistant who helps people find information. As the assistant, you answer questions not long, simple, short. "},
                {"role": "user", "content": "Use the following context to reply to the customer:"},
                {"role": "user", "content": context},
                {"role": "user", "content": question}
                
            ],
        "params": {
                "temperature": 0.2,
                "max_new_tokens": 256,
                "do_sample": True,
                "return_full_text": False
        }
    }



    body = str.encode(json.dumps(data))

    endpoint_url = connection.endpoint
    # Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint
    api_key = connection.key
    if not api_key:
        raise Exception("A key should be provided to invoke the endpoint")


    headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

    try:
        response = requests.post(endpoint_url, json=data, headers=headers)
        response.raise_for_status()
        result = response.json()
        result = result['result']
        return result
    except requests.exceptions.RequestException as e:
        print(e)    

    except urllib.error.HTTPError as error:
        print("The request failed with status code: " + str(error.code))

        # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
        print(error.info())
        print(error.read().decode("utf8", 'ignore'))