ideas/asst-v2-py/search-files.py (211 lines of code) (raw):

import os import sys import glob from openai.types.beta.threads import Text, TextDelta from typing_extensions import override from openai import AssistantEventHandler from openai import OpenAI message = 'using only the uploaded documents, how do you create an assistant using the OpenAI API?' default_vector_store_name='my_vector_store' ASSISTANT_ID = os.getenv('ASSISTANT_ID', None) VECTOR_STORE_ID = os.getenv('VECTOR_STORE_ID', None) VECTOR_STORE_NAME = os.getenv('VECTOR_STORE_NAME', default_vector_store_name) DEBUG = os.getenv('DEBUG', None) if len(sys.argv) < 2: print("Usage: python3 search-files.py FILE1 [FILE2 ...]") print(" OR: python3 search-files.py \"**/*.txt\"") sys.exit(1) import logging if DEBUG: if DEBUG.startswith('file:'): logging.basicConfig(filename=DEBUG[5:], level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') else: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') files = [] for i, arg in enumerate(sys.argv): if i > 0: pattern = sys.argv[i] files += glob.glob(pattern, recursive=True) if "**" in pattern \ else glob.glob(pattern) if not files: print("No files found.") sys.exit(0) else: print(f'Found {len(files)} file(s):\n') for file in files: print(f' {file}') #----------------------- # NOTE: Never deploy your API Key in client-side environments like browsers or mobile apps # SEE: https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety # Get the required environment variables, and form the base URL for Azure OpenAI Assistants API AZURE_OPENAI_API_KEY = os.getenv('AZURE_OPENAI_API_KEY', '<insert your Azure OpenAI API key here>') AZURE_OPENAI_API_VERSION = os.getenv('AZURE_OPENAI_API_VERSION', '<insert your Azure OpenAI API version here>') AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT', '<insert your Azure OpenAI endpoint here>') AZURE_OPENAI_CHAT_DEPLOYMENT = os.getenv('AZURE_OPENAI_CHAT_DEPLOYMENT', '<insert your Azure OpenAI chat deployment here>') AZURE_OPENAI_BASE_URL = f'{AZURE_OPENAI_ENDPOINT.rstrip("/")}/openai' # Get the required environment variables, and form the base URL for OpenAI Platform API OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '<insert your OpenAI API key here>') OPENAI_MODEL_NAME = os.getenv('OPENAI_MODEL_NAME', '<insert your OpenAI model name here>') OPENAI_ORG_ID = os.getenv('OPENAI_ORG_ID', None) # Check if the required environment variables are set azureOk = \ AZURE_OPENAI_API_KEY != None and not AZURE_OPENAI_API_KEY.startswith('<insert') and \ AZURE_OPENAI_API_VERSION != None and not AZURE_OPENAI_API_VERSION.startswith('<insert') and \ AZURE_OPENAI_CHAT_DEPLOYMENT != None and not AZURE_OPENAI_CHAT_DEPLOYMENT.startswith('<insert') and \ AZURE_OPENAI_ENDPOINT != None and not AZURE_OPENAI_ENDPOINT.startswith('<insert') oaiOk = \ OPENAI_API_KEY != None and not OPENAI_API_KEY.startswith('<insert') and \ OPENAI_MODEL_NAME != None and not OPENAI_MODEL_NAME.startswith('<insert') ok = azureOk or oaiOk if not ok: print('To use OpenAI, set the following environment variables:\n' + '\n ASSISTANT_ID' + '\n OPENAI_API_KEY' + '\n OPENAI_MODEL_NAME' + '\n OPENAI_ORG_ID (optional)' + '\n VECTOR_STORE_ID (optional)') print('\nYou can easily obtain some of these values by visiting these links:\n' + '\n https://platform.openai.com/api-keys' + '\n https://platform.openai.com/settings/organization/general' + '\n https://platform.openai.com/playground/assistants' + '\n' + '\n Then, do one of the following:\n' + '\n ai dev shell' + '\n python main.py' + '\n' + '\n or' + '\n' + '\n ai dev shell --run "python main.py"'); os._exit(1) if not ok: print('To use Azure OpenAI, set the following environment variables:\n' + '\n ASSISTANT_ID' + '\n AZURE_OPENAI_API_KEY' + '\n AZURE_OPENAI_API_VERSION' + '\n AZURE_OPENAI_CHAT_DEPLOYMENT' + '\n AZURE_OPENAI_ENDPOINT') print('\nYou can easily do that using the Azure AI CLI by doing one of the following:\n' + '\n ai init' + '\n ai dev shell' + '\n python main.py' + '\n' + '\n or' + '\n' + '\n ai init' + '\n ai dev shell --run "python main.py"') os._exit(1) # Create the OpenAI client if azureOk: print('\nUsing Azure OpenAI (w/ API Key)...') client = OpenAI( api_key = AZURE_OPENAI_API_KEY, base_url = AZURE_OPENAI_BASE_URL, default_query= { 'api-version': AZURE_OPENAI_API_VERSION }, default_headers = { 'api-key': AZURE_OPENAI_API_KEY } ) else: print('\nUsing OpenAI...') client = OpenAI( api_key = OPENAI_API_KEY, organization = OPENAI_ORG_ID ) # --- Get or create Assistant --- if ASSISTANT_ID is None: print(f'\nCreating assistant...') assistant = client.beta.assistants.create( instructions="You are search assistant. You search documents that have been previously uploaded.", model=AZURE_OPENAI_CHAT_DEPLOYMENT if azureOk else OPENAI_MODEL_NAME, tools=[{"type": "file_search"}] ) else: print(f'\nRetrieving assistant...') assistant = client.beta.assistants.retrieve(ASSISTANT_ID) assistant_id = assistant.id print(f'------------------') print(f'Assistant: {assistant.id}') print(f'Name: {assistant.name}') print(f'Model: {assistant.model}') if assistant.tools: print(f'Tools: {assistant.tools}') print(f'\nInstructions:\n{assistant.instructions}') print(f'------------------\n') # --- Get or create Vector Store --- if VECTOR_STORE_ID is None: has_vector_store = assistant.tool_resources and \ "file_search" in assistant.tool_resources and \ "vector_store_ids" in assistant.tool_resources["file_search"] if has_vector_store: print(f'\nRetrieving vector store...') vector_store = client.beta.vector_stores.retrieve(assistant.tool_resources["file_search"]["vector_store_ids"][0]) else: print(f'\nCreating vector store...') vector_store = client.beta.vector_stores.create(name=VECTOR_STORE_NAME) else: has_vector_store = True print(f'\nRetrieving vector store...') vector_store = client.beta.vector_stores.retrieve(VECTOR_STORE_ID) vector_store_id = vector_store.id print(f'------------------') print(f'Vector Store: {vector_store.id}') print(f'Name: {vector_store.name}') print(f'------------------\n') # --- Upload files to Vector Store --- file_streams = [open(path, "rb") for path in files] if file_streams: print(f'\nUploading files to vector store...') file_batch = client.beta.vector_stores.file_batches.upload_and_poll( vector_store_id=vector_store.id, files=file_streams ) print(f'------------------') print(f'File Batch: {file_batch.id}') print(f'Status: {file_batch.status}') print(f'File(s): {file_batch.file_counts}') # --- Update the assistant to use the vector store needs_update = not has_vector_store if needs_update: print(f'\nUpdating assistant...') assistant = client.beta.assistants.update( assistant_id=assistant.id, tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}}, ) print(f'------------------') print(f'Assistant: {assistant.id}') print(f'Name: {assistant.name}') print(f'Model: {assistant.model}') if assistant.tools: print(f'Tools: {assistant.tools}') print(f'\nInstructions:\n{assistant.instructions}') print(f'------------------\n') # --- Create a thread and send a message print(f'\nCreating thread...') thread = client.beta.threads.create() thread_id = thread.id print(f'\nSending message...\n\nuser: {message}\n') message = client.beta.threads.messages.create( thread_id=thread_id, role="user", content=message ) class EventHandler(AssistantEventHandler): @override def on_event(self, event) -> None: logging.debug(f"\n-----\nEVENT > {event}\n-----") super().on_event(event) @override def on_text_created(self, text) -> None: print(f"\nassistant > ", end="", flush=True) @override def on_text_delta(self, delta, snapshot): content = delta.value if delta.annotations: for annotation in delta.annotations: content = content.replace(annotation.text, f"[{annotation.index}]") print(content, end="", flush=True) @override def on_tool_call_created(self, tool_call): print(f"\nassistant > {tool_call.type}\n", flush=True) @override def on_message_done(self, message) -> None: message_content = message.content[0].text annotations = message_content.annotations citations = [] for index, annotation in enumerate(annotations): if file_citation := getattr(annotation, "file_citation", None): cited_file = client.files.retrieve(file_citation.file_id) citations.append(f"[{index}] {cited_file.filename}") print("\n\n" + "\n".join(citations)) # --- Stream the assistant's responses --- print('\nAssistant: ', end='', flush=True) with client.beta.threads.runs.stream( thread_id=thread_id, assistant_id=assistant_id, event_handler=EventHandler() ) as stream: stream.until_done()