doc/code/memory/9_exporting_data.ipynb (230 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "id": "0", "metadata": {}, "source": [ "# 9. Exporting Data Example\n", "\n", "This notebook shows different ways to export data from memory. This first example exports all conversations from local DuckDB memory with their respective score values in a JSON format. The data can currently be exported both as JSON file or a CSV file that will be saved in your results folder within PyRIT. The CSV export is commented out below. In this example, all conversations are exported, but by using other export functions from `memory_interface`, we can export by specific labels and other methods." ] }, { "cell_type": "code", "execution_count": null, "id": "1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "f8ceb37b-ebd5-4efd-83b8-f9e579801db7\n", "None: user: Hi, chat bot! This is my initial prompt.\n", "None: assistant: Nice to meet you! This is my response.\n", "None: user: Wonderful! This is my second prompt to the chat bot!\n", "Exported conversation with scores to JSON: C:\\Users\\nichikan\\source\\repos\\PyRIT-internal\\PyRIT\\dbdata\\conversation_and_scores_json_example.json\n" ] } ], "source": [ "from uuid import uuid4\n", "\n", "from pyrit.common import DUCK_DB, initialize_pyrit\n", "from pyrit.common.path import DB_DATA_PATH\n", "from pyrit.memory import CentralMemory\n", "from pyrit.models import PromptRequestPiece, PromptRequestResponse\n", "\n", "initialize_pyrit(memory_db_type=DUCK_DB)\n", "\n", "conversation_id = str(uuid4())\n", "\n", "print(conversation_id)\n", "\n", "message_list = [\n", " PromptRequestPiece(\n", " role=\"user\", original_value=\"Hi, chat bot! This is my initial prompt.\", conversation_id=conversation_id\n", " ),\n", " PromptRequestPiece(\n", " role=\"assistant\", original_value=\"Nice to meet you! This is my response.\", conversation_id=conversation_id\n", " ),\n", " PromptRequestPiece(\n", " role=\"user\",\n", " original_value=\"Wonderful! This is my second prompt to the chat bot!\",\n", " conversation_id=conversation_id,\n", " ),\n", "]\n", "\n", "duckdb_memory = CentralMemory.get_memory_instance()\n", "duckdb_memory.add_request_response_to_memory(request=PromptRequestResponse([message_list[0]]))\n", "duckdb_memory.add_request_response_to_memory(request=PromptRequestResponse([message_list[1]]))\n", "duckdb_memory.add_request_response_to_memory(request=PromptRequestResponse([message_list[2]]))\n", "\n", "entries = duckdb_memory.get_conversation(conversation_id=conversation_id)\n", "\n", "for entry in entries:\n", " print(entry)\n", "\n", "# Define file path for export\n", "json_file_path = DB_DATA_PATH / \"conversation_and_scores_json_example.json\"\n", "# csv_file_path = DB_DATA_PATH / \"conversation_and_scores_csv_example.csv\"\n", "\n", "# Export the data to a JSON file\n", "conversation_with_scores = duckdb_memory.export_conversations(file_path=json_file_path, export_type=\"json\")\n", "print(f\"Exported conversation with scores to JSON: {json_file_path}\")\n", "\n", "# Export the data to a CSV file\n", "# conversation_with_scores = duckdb_memory.export_conversations(file_path=csv_file_path, export_type=\"csv\")\n", "# print(f\"Exported conversation with scores to CSV: {csv_file_path}\")\n", "\n", "# Cleanup memory resources\n", "duckdb_memory.dispose_engine()" ] }, { "cell_type": "markdown", "id": "2", "metadata": {}, "source": [ "You can also use the exported JSON or CSV files to import the data as a NumPy DataFrame. This can be useful for various data manipulation and analysis tasks." ] }, { "cell_type": "code", "execution_count": null, "id": "3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>id</th>\n", " <th>role</th>\n", " <th>conversation_id</th>\n", " <th>sequence</th>\n", " <th>timestamp</th>\n", " <th>labels</th>\n", " <th>prompt_metadata</th>\n", " <th>converter_identifiers</th>\n", " <th>prompt_target_identifier</th>\n", " <th>orchestrator_identifier</th>\n", " <th>...</th>\n", " <th>original_value_data_type</th>\n", " <th>original_value</th>\n", " <th>original_value_sha256</th>\n", " <th>converted_value_data_type</th>\n", " <th>converted_value</th>\n", " <th>converted_value_sha256</th>\n", " <th>response_error</th>\n", " <th>originator</th>\n", " <th>original_prompt_id</th>\n", " <th>scores</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>598e333a-ff30-403c-bda7-53b18e06e173</td>\n", " <td>user</td>\n", " <td>f8ceb37b-ebd5-4efd-83b8-f9e579801db7</td>\n", " <td>0</td>\n", " <td>2025-01-07 15:11:26.206293</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>text</td>\n", " <td>Hi, chat bot! This is my initial prompt.</td>\n", " <td>NaN</td>\n", " <td>text</td>\n", " <td>Hi, chat bot! This is my initial prompt.</td>\n", " <td>NaN</td>\n", " <td>none</td>\n", " <td>undefined</td>\n", " <td>598e333a-ff30-403c-bda7-53b18e06e173</td>\n", " <td>[]</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>1 rows × 21 columns</p>\n", "</div>" ], "text/plain": [ " id role \\\n", "0 598e333a-ff30-403c-bda7-53b18e06e173 user \n", "\n", " conversation_id sequence timestamp \\\n", "0 f8ceb37b-ebd5-4efd-83b8-f9e579801db7 0 2025-01-07 15:11:26.206293 \n", "\n", " labels prompt_metadata converter_identifiers prompt_target_identifier \\\n", "0 NaN NaN NaN NaN \n", "\n", " orchestrator_identifier ... original_value_data_type \\\n", "0 NaN ... text \n", "\n", " original_value original_value_sha256 \\\n", "0 Hi, chat bot! This is my initial prompt. NaN \n", "\n", " converted_value_data_type converted_value \\\n", "0 text Hi, chat bot! This is my initial prompt. \n", "\n", " converted_value_sha256 response_error originator \\\n", "0 NaN none undefined \n", "\n", " original_prompt_id scores \n", "0 598e333a-ff30-403c-bda7-53b18e06e173 [] \n", "\n", "[1 rows x 21 columns]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd # type: ignore\n", "\n", "df = pd.read_json(json_file_path)\n", "df.head(1)" ] } ], "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 5 }