supporting-blog-content/homecraft-vertex/pages/homecraft_finetuned.py (107 lines of code) (raw):

import os import streamlit as st from elasticsearch import Elasticsearch import vertexai from vertexai.preview.language_models import TextGenerationModel # WATCHOUT!!! For fine-tuning feature you need to import vertexai.preview instead of just vertexAI # This page shows the integration with a fine-tuned text-bison model via VertexAI # Code is presented for demo purposes but should not be used in production # You may encounter exceptions which are not handled in the code # Required Environment Variables # gcp_project_id - Google Cloud project ID # cloud_id - Elastic Cloud Deployment ID # cloud_user - Elasticsearch Cluster User # cloud_pass - Elasticsearch User Password projid = os.environ["gcp_project_id"] cid = os.environ["cloud_id"] cp = os.environ["cloud_pass"] cu = os.environ["cloud_user"] parameters = {"temperature": 0.5, "max_output_tokens": 606, "top_p": 0.8, "top_k": 40} vertexai.init(project="1059491012611", location="us-central1") # we are here referencing our custom fine-tuned model model = TextGenerationModel.from_pretrained("text-bison@001") model = model.get_tuned_model( "projects/1059491012611/locations/us-central1/models/5745671733780676608" ) # Connect to Elastic Cloud cluster def es_connect(cid, user, passwd): es = Elasticsearch(cloud_id=cid, http_auth=(user, passwd)) return es # Search ElasticSearch index and return details on relevant products def search_products(query_text): # Elasticsearch query (BM25) and kNN configuration for hybrid search query = { "bool": { "must": [{"match": {"title": {"query": query_text, "boost": 1}}}], "filter": [{"exists": {"field": "title-vector"}}], } } knn = { "field": "title-vector", "k": 1, "num_candidates": 20, "query_vector_builder": { "text_embedding": { "model_id": "sentence-transformers__all-distilroberta-v1", "model_text": query_text, } }, "boost": 24, } fields = [ "title", "description", "url", "availability", "price", "brand", "product_id", ] index = "home-depot-product-catalog-vector" resp = es.search( index=index, query=query, knn=knn, fields=fields, size=5, source=False ) doc_list = resp["hits"]["hits"] body = resp["hits"]["hits"] url = "" for doc in doc_list: # body = body + doc['fields']['description'][0] url = url + "\n\n" + doc["fields"]["url"][0] return body, url # Search ElasticSearch index and return body and URL for crawled docs def search_docs(query_text): # Elasticsearch query (BM25) and kNN configuration for hybrid search query = { "bool": { "must": [{"match": {"title": {"query": query_text, "boost": 1}}}], "filter": [{"exists": {"field": "title-vector"}}], } } knn = { "field": "title-vector", "k": 1, "num_candidates": 20, "query_vector_builder": { "text_embedding": { "model_id": "sentence-transformers__all-distilroberta-v1", "model_text": query_text, } }, "boost": 24, } fields = ["title", "body_content", "url"] index = "search-homecraft-ikea" resp = es.search( index=index, query=query, knn=knn, fields=fields, size=1, source=False ) body = resp["hits"]["hits"][0]["fields"]["body_content"][0] url = resp["hits"]["hits"][0]["fields"]["url"][0] return body, url def truncate_text(text, max_tokens): tokens = text.split() if len(tokens) <= max_tokens: return text return " ".join(tokens[:max_tokens]) # Generate a response from ChatGPT based on the given prompt def vertexAI(prompt): # Truncate the prompt content to fit within the model's context length # truncated_prompt = truncate_text(prompt, max_context_tokens - max_tokens - safety_margin) response = model.predict(prompt, **parameters) return response.text # image = Image.open('homecraft_logo.jpg') st.image("https://i.imgur.com/cdjafe0.png", caption=None) st.title("HomeCraft Search Bar") # Main chat form with st.form("chat_form"): query = st.text_input("You: ") submit_button = st.form_submit_button("Send") # Generate and display response on form submission negResponse = "I'm unable to answer the question based on the information I have from Homecraft dataset." if submit_button: es = es_connect(cid, cu, cp) resp_products, url_products = search_products(query) resp_docs, url_docs = search_docs(query) prompt = f"question: {query}" answer = vertexAI(prompt) if negResponse in answer: st.write(f"Search Assistant: \n\n{answer.strip()}") else: st.write(f"Search Assistant: {answer.strip()}\n\n")