tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/e2e-genai-langchain.ipynb (223 lines of code) (raw):

{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "outputPrepend" ] }, "outputs": [], "source": [ "!pip install \"ray==2.8.1\"\n", "!pip install \"ray[serve]\" \n", "!pip install requests \n", "!pip install transformers \n", "!pip install langchain\n", "!pip install torch" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain.prompts import PromptTemplate\n", "from langchain.chains import LLMChain\n", "\n", "from langchain.prompts.chat import (\n", " ChatPromptTemplate,\n", " SystemMessagePromptTemplate,\n", " AIMessagePromptTemplate,\n", " HumanMessagePromptTemplate,\n", ")\n", "from langchain.schema import AIMessage, HumanMessage, SystemMessage\n", "from langchain.llms import OpenAI\n", "from langchain.chains import LLMChain\n", "from langchain import PromptTemplate\n", "\n", "\n", "\n", "template1 = \"\"\"Give me a fact about {topic}. \"\"\"\n", "template2 = \"Translate to french: {fact}\"\n", "\n", "# create the prompt\n", "prompt = PromptTemplate(\n", " input_variables=[\"topic\"],\n", " template=template1,\n", ")\n", "\n", "# create the second prompt\n", "second_prompt = PromptTemplate(\n", " input_variables=[\"fact\"],\n", " template=template2,\n", ")\n", "\n", "def create_chains (llm):\n", " # create two chains \n", " fact_chain = LLMChain(llm=llm, prompt=prompt)\n", " translate_chain = LLMChain(llm=llm, prompt=second_prompt)\n", "\n", " return fact_chain, translate_chain" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain.llms import HuggingFacePipeline\n", "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoConfig, pipeline\n", "\n", "model_id = 'google/flan-t5-small'\n", "\n", "config = AutoConfig.from_pretrained(model_id)\n", "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", "model = AutoModelForSeq2SeqLM.from_pretrained(model_id, config=config)\n", "\n", "_pipeline = pipeline('text2text-generation',\n", " model=model,\n", " tokenizer=tokenizer,\n", " max_length = 512\n", " )\n", "\n", "llm = HuggingFacePipeline(pipeline = _pipeline)\n", "\n", "fact_chain, translate_chain = create_chains(llm)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Run the chain specifying only the input variable for the first chain.\n", "fact = fact_chain.run(\"birds\")\n", "translation = translate_chain.run(fact)\n", "print (fact)\n", "print (translation)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import ray\n", "\n", "# initialize ray\n", "ray.init(\n", " address=\"ray://ray-cluster-kuberay-head-svc:10001\",\n", " runtime_env={\n", " \"pip\": [\n", " \"transformers>=4.26.0\",\n", " \"langchain\",\n", " \"requests\",\n", " \"torch\"\n", " ]\n", " }\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from ray import serve\n", "from starlette.requests import Request\n", "from langchain.llms import HuggingFacePipeline\n", "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoConfig, pipeline\n", "\n", "\n", "@serve.deployment(ray_actor_options={\"num_gpus\": 1})\n", "class DeployLLM:\n", " def __init__(self):\n", " model_id = 'google/flan-t5-small'\n", " config = AutoConfig.from_pretrained(model_id)\n", " tokenizer = AutoTokenizer.from_pretrained(model_id)\n", " model = AutoModelForSeq2SeqLM.from_pretrained(model_id, config=config)\n", " _pipeline = pipeline('text2text-generation',\n", " model=model,\n", " tokenizer=tokenizer,\n", " max_length = 512\n", " )\n", "\n", " llm = HuggingFacePipeline(pipeline = _pipeline)\n", " self.fact_chain, self.translate_chain = create_chains(llm)\n", "\n", " def _run_chain(self, text: str):\n", " fact = self.fact_chain.run(text)\n", " translation = self.translate_chain.run(fact)\n", " return fact, translation\n", "\n", " async def __call__(self, request: Request):\n", " # 1. Parse the request\n", " text = request.query_params[\"text\"]\n", " # 2. Run the chain\n", " fact, translation = self._run_chain(text)\n", " # 3. Return the response\n", " return [fact, translation]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Bind the model to deployment\n", "deployment = DeployLLM.bind()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "serve.run(deployment, host=\"0.0.0.0\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import requests\n", "\n", "query = \"bunny\"\n", "response = requests.post(f'http://ray-cluster-kuberay-head-svc:8000/?text={query}')\n", "print(response.text)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 4 }