databao/duckdb/react_tools.py (53 lines of code) (raw):
import json
from typing import Any
import pandas as pd
from duckdb import DuckDBPyConnection
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.tools import tool
from langgraph.graph.state import CompiledStateGraph
from langgraph.prebuilt import create_react_agent
from pydantic import BaseModel
from databao.duckdb.utils import describe_duckdb_schema
class AgentResponse(BaseModel):
"""Response model for ReAct DuckDB agent."""
sql: str
explanation: str
def execute_duckdb_sql(sql: str, con: DuckDBPyConnection, *, limit: int | None = None) -> pd.DataFrame:
# Use duckdb's Relation API to inject a LIMIT clause
rel = con.sql(sql) # A lazy Relation
# TODO Do we want to forbid non-SELECT statements?
# Non-Select queries (CREATE TABLE, etc.) are executed immediately and return None
if rel is None:
return pd.DataFrame()
if limit is not None:
rel = rel.limit(limit)
return rel.df() # Execute and return DataFrame
def make_duckdb_tool(con: DuckDBPyConnection) -> Any:
"""
Create a DuckDB SQL execution tool for LangChain executors.
Args:
con: DuckDB connection to execute queries against.
Returns:
A LangChain tool that executes SQL queries.
"""
@tool("execute_sql")
def execute_sql(sql: str, limit: int = 10) -> str:
"""
Execute any SQL against DuckDB.
Args:
sql: The SQL statement to execute (single statement).
limit: Optional row cap for result-returning statements (10 by default).
Returns:
JSON string: { "columns": [...], "rows": str, "limit": int, "note": str }
"""
try:
df = execute_duckdb_sql(sql, con, limit=limit)
payload = {
"columns": list(df.columns),
"rows": df.to_string(index=False),
"limit": limit,
"note": "Query executed successfully",
}
return json.dumps(payload)
except Exception as e:
payload = {
"columns": [],
"rows": [],
"limit": limit,
"note": f"SQL error: {type(e).__name__}: {e}",
}
return json.dumps(payload)
return execute_sql
def make_react_duckdb_agent(con: DuckDBPyConnection, llm: BaseChatModel) -> CompiledStateGraph[Any]:
"""
Create a ReAct agent configured to work with DuckDB.
Args:
con: DuckDB connection to execute queries against.
llm: Language model to use for the agent.
Returns:
A compiled LangGraph ReAct agent.
"""
schema_text = describe_duckdb_schema(con)
# TODO move to .jinja (and fix indendation)
SYSTEM_PROMPT = f"""You are a careful data analyst using the ReAct pattern with tools.
Use the `execute_sql` tool to run exactly one DuckDB SQL statement when needed.
Guidelines:
- Translate the NL question to ONE DuckDB SQL statement.
- Use provided schema.
- You can fetch extra details about schema/tables/columns if needed using SQL queries.
- After running, write a concise, user-friendly explanation.
- Do NOT write any tables/lists to the output.
- Always include the exact SQL you ran.
- Always use the full table name in query with db name and schema name.
Available schema:
{schema_text}
"""
# LangGraph prebuilt ReAct agent
execute_sql_tool = make_duckdb_tool(con)
tools = [execute_sql_tool]
agent = create_react_agent(
llm,
tools=tools,
prompt=SYSTEM_PROMPT,
response_format=AgentResponse,
)
return agent