tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/e2e-genai-langchain.ipynb (223 lines of code) (raw):
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"outputPrepend"
]
},
"outputs": [],
"source": [
"!pip install \"ray==2.8.1\"\n",
"!pip install \"ray[serve]\" \n",
"!pip install requests \n",
"!pip install transformers \n",
"!pip install langchain\n",
"!pip install torch"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.prompts import PromptTemplate\n",
"from langchain.chains import LLMChain\n",
"\n",
"from langchain.prompts.chat import (\n",
" ChatPromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
" AIMessagePromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
")\n",
"from langchain.schema import AIMessage, HumanMessage, SystemMessage\n",
"from langchain.llms import OpenAI\n",
"from langchain.chains import LLMChain\n",
"from langchain import PromptTemplate\n",
"\n",
"\n",
"\n",
"template1 = \"\"\"Give me a fact about {topic}. \"\"\"\n",
"template2 = \"Translate to french: {fact}\"\n",
"\n",
"# create the prompt\n",
"prompt = PromptTemplate(\n",
" input_variables=[\"topic\"],\n",
" template=template1,\n",
")\n",
"\n",
"# create the second prompt\n",
"second_prompt = PromptTemplate(\n",
" input_variables=[\"fact\"],\n",
" template=template2,\n",
")\n",
"\n",
"def create_chains (llm):\n",
" # create two chains \n",
" fact_chain = LLMChain(llm=llm, prompt=prompt)\n",
" translate_chain = LLMChain(llm=llm, prompt=second_prompt)\n",
"\n",
" return fact_chain, translate_chain"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.llms import HuggingFacePipeline\n",
"from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoConfig, pipeline\n",
"\n",
"model_id = 'google/flan-t5-small'\n",
"\n",
"config = AutoConfig.from_pretrained(model_id)\n",
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
"model = AutoModelForSeq2SeqLM.from_pretrained(model_id, config=config)\n",
"\n",
"_pipeline = pipeline('text2text-generation',\n",
" model=model,\n",
" tokenizer=tokenizer,\n",
" max_length = 512\n",
" )\n",
"\n",
"llm = HuggingFacePipeline(pipeline = _pipeline)\n",
"\n",
"fact_chain, translate_chain = create_chains(llm)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Run the chain specifying only the input variable for the first chain.\n",
"fact = fact_chain.run(\"birds\")\n",
"translation = translate_chain.run(fact)\n",
"print (fact)\n",
"print (translation)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import ray\n",
"\n",
"# initialize ray\n",
"ray.init(\n",
" address=\"ray://ray-cluster-kuberay-head-svc:10001\",\n",
" runtime_env={\n",
" \"pip\": [\n",
" \"transformers>=4.26.0\",\n",
" \"langchain\",\n",
" \"requests\",\n",
" \"torch\"\n",
" ]\n",
" }\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from ray import serve\n",
"from starlette.requests import Request\n",
"from langchain.llms import HuggingFacePipeline\n",
"from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoConfig, pipeline\n",
"\n",
"\n",
"@serve.deployment(ray_actor_options={\"num_gpus\": 1})\n",
"class DeployLLM:\n",
" def __init__(self):\n",
" model_id = 'google/flan-t5-small'\n",
" config = AutoConfig.from_pretrained(model_id)\n",
" tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
" model = AutoModelForSeq2SeqLM.from_pretrained(model_id, config=config)\n",
" _pipeline = pipeline('text2text-generation',\n",
" model=model,\n",
" tokenizer=tokenizer,\n",
" max_length = 512\n",
" )\n",
"\n",
" llm = HuggingFacePipeline(pipeline = _pipeline)\n",
" self.fact_chain, self.translate_chain = create_chains(llm)\n",
"\n",
" def _run_chain(self, text: str):\n",
" fact = self.fact_chain.run(text)\n",
" translation = self.translate_chain.run(fact)\n",
" return fact, translation\n",
"\n",
" async def __call__(self, request: Request):\n",
" # 1. Parse the request\n",
" text = request.query_params[\"text\"]\n",
" # 2. Run the chain\n",
" fact, translation = self._run_chain(text)\n",
" # 3. Return the response\n",
" return [fact, translation]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Bind the model to deployment\n",
"deployment = DeployLLM.bind()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"serve.run(deployment, host=\"0.0.0.0\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"\n",
"query = \"bunny\"\n",
"response = requests.post(f'http://ray-cluster-kuberay-head-svc:8000/?text={query}')\n",
"print(response.text)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 4
}