ManagedkdbInsights/boto/r_query_welcomedb.ipynb (269 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"id": "b059d8b4-3abd-4654-8bf7-d0f8b5143730",
"metadata": {},
"source": [
"# Query Data with R\n",
"This notebook shows how to connect to a FinSpace Managed kdb cluster using the [rkdb](https://github.com/KxSystems/rkdb) library.\n",
"\n",
"## Setup\n",
"Update [env.r](env.r) with you environment's information (github has an empty example).\n",
"\n",
"## References\n",
"- [rkdb](https://github.com/KxSystems/rkdb) library (github)\n",
"- [paws](https://cran.r-project.org/web/packages/paws/index.html) library (CRAN) \n",
"- [nanotime](https://cran.r-project.org/web/packages/nanotime/index.html) library (CRAN) "
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "9fab5941-5fc1-420c-ba21-b78e7af11751",
"metadata": {},
"outputs": [],
"source": [
"#' Test if package is already installed\n",
"#' \n",
"#' @param mypkg package name to check\n",
"#' @returns TRUE: if package is installed, else FALSE\n",
"#'\n",
"is.installed <- function(mypkg) {\n",
" is.element(mypkg, installed.packages()[,1]) \n",
"}\n",
"\n",
"#' Install package if not installed\n",
"#' \n",
"#' @param x package name to confirm installation\n",
"#' @returns NULL\n",
"#'\n",
"pkgTest <- function(x) {\n",
" if ( !is.installed(x) ) {\n",
" install.packages(x, dep=TRUE)\n",
" } \n",
" return(NULL)\n",
"}\n",
"\n",
"#' Get a connection to a FinSpace managed kdb cluster\n",
"#' \n",
"#' @param environment_id FinSpace environment ID\n",
"#' @param cluster_name FinSpace cluster name to connect to\n",
"#' @param user_name FinSpace user connecting to cluster \n",
"#' @returns A connection to the requested cluster\n",
"#'\n",
"get_connection <- function(environment_id, cluster_name, user_name) {\n",
" # create finspace service client (uses default credentials)\n",
" finspace <- paws::finspace()\n",
" \n",
" # Call service to get information about the FinSpace user\n",
" user_resp <- finspace$get_kx_user(\n",
" environmentId = environment_id,\n",
" userName = user_name\n",
" )\n",
" \n",
" # get the connection string to the cluster for the finspace user, passing the userARN\n",
" conn_str <- finspace$get_kx_connection_string(\n",
" environmentId = environment_id, \n",
" userArn = user_resp$userArn, \n",
" clusterName = cluster_name\n",
" )\n",
" \n",
" # Break into parts for connection function call\n",
" split_df <- str_split(conn_str$signedConnectionString, \":\")\n",
" \n",
" # split the connection string\n",
" conn_parts <- split_df[[1]]\n",
" \n",
" # get out the bits\n",
" host<-gsub('/', '', conn_parts[3])\n",
" port<-conn_parts[4]\n",
" user<-conn_parts[5]\n",
" pass<-conn_parts[6]\n",
" \n",
" # username password are packed together for connection call\n",
" userpass<-paste(user, pass, sep=\":\")\n",
"\n",
" return(open_connection(host, port, userpass))\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9eead7ab-69ce-4ac6-81ea-1b5afc08b54d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# ensure needed packages are installed\n",
"for (p in c(\"paws\", \"nanotime\", \"stringr\", \"devtools\") ) {\n",
" pkgTest(p)\n",
"}\n",
"\n",
"# rkdb is not on CRAN, installing different (uses devtools)\n",
"if ( !is.installed(\"rkdb\") ) {\n",
" devtools::install_github('KxSystems/rkdb')\n",
"} "
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f18ca78a-660c-40fb-911b-69a873dafd2f",
"metadata": {},
"outputs": [],
"source": [
"library(rkdb)\n",
"library(nanotime)\n",
"library(stringr)\n",
"\n",
"# Source FinSpace environment information\n",
"source(\"env.r\")\n",
"\n",
"# Cluster to connect to, created by welcome notebook: welcome.ipynb\n",
"CLUSTER_NAME<-\"HDB_welcomedb\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "4da309b6-8b4b-4ae3-beed-1fd23a090b0b",
"metadata": {},
"outputs": [],
"source": [
"# connect to cluster\n",
"conn <- get_connection(ENV_ID, CLUSTER_NAME, KDB_USERNAME)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "09f2ff5e-c20e-407c-a3a3-3b9ce08d634a",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"'example'"
],
"text/latex": [
"'example'"
],
"text/markdown": [
"'example'"
],
"text/plain": [
"[1] \"example\""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"## Available Tables in FinSpace Cluster\n",
"execute(conn, 'tables[]')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "eb639f4d-a4c6-4274-8401-4ab0a59dcdbf",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"dataframe\">\n",
"<caption>A data.frame: 5 × 4</caption>\n",
"<thead>\n",
"\t<tr><th scope=col>date</th><th scope=col>sym</th><th scope=col>time</th><th scope=col>number</th></tr>\n",
"\t<tr><th scope=col><date></th><th scope=col><chr></th><th scope=col><nanotime></th><th scope=col><int64></th></tr>\n",
"</thead>\n",
"<tbody>\n",
"\t<tr><td>2024-11-16</td><td>aaa</td><td>2024-11-16T14:44:14.737+00:00</td><td> 53231</td></tr>\n",
"\t<tr><td>2024-11-16</td><td>aaa</td><td>2024-11-16T14:44:14.737+00:00</td><td>153560</td></tr>\n",
"\t<tr><td>2024-11-16</td><td>aaa</td><td>2024-11-16T14:44:14.737+00:00</td><td>449428</td></tr>\n",
"\t<tr><td>2024-11-16</td><td>aaa</td><td>2024-11-16T14:44:14.737+00:00</td><td>631966</td></tr>\n",
"\t<tr><td>2024-11-16</td><td>aaa</td><td>2024-11-16T14:44:14.737+00:00</td><td>941566</td></tr>\n",
"</tbody>\n",
"</table>\n"
],
"text/latex": [
"A data.frame: 5 × 4\n",
"\\begin{tabular}{llll}\n",
" date & sym & time & number\\\\\n",
" <date> & <chr> & <nanotime> & <int64>\\\\\n",
"\\hline\n",
"\t 2024-11-16 & aaa & 2024-11-16T14:44:14.737+00:00 & 53231\\\\\n",
"\t 2024-11-16 & aaa & 2024-11-16T14:44:14.737+00:00 & 153560\\\\\n",
"\t 2024-11-16 & aaa & 2024-11-16T14:44:14.737+00:00 & 449428\\\\\n",
"\t 2024-11-16 & aaa & 2024-11-16T14:44:14.737+00:00 & 631966\\\\\n",
"\t 2024-11-16 & aaa & 2024-11-16T14:44:14.737+00:00 & 941566\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 5 × 4\n",
"\n",
"| date <date> | sym <chr> | time <nanotime> | number <int64> |\n",
"|---|---|---|---|\n",
"| 2024-11-16 | aaa | 2024-11-16T14:44:14.737+00:00 | 53231 |\n",
"| 2024-11-16 | aaa | 2024-11-16T14:44:14.737+00:00 | 153560 |\n",
"| 2024-11-16 | aaa | 2024-11-16T14:44:14.737+00:00 | 449428 |\n",
"| 2024-11-16 | aaa | 2024-11-16T14:44:14.737+00:00 | 631966 |\n",
"| 2024-11-16 | aaa | 2024-11-16T14:44:14.737+00:00 | 941566 |\n",
"\n"
],
"text/plain": [
" date sym time number\n",
"1 2024-11-16 aaa 2024-11-16T14:44:14.737+00:00 53231\n",
"2 2024-11-16 aaa 2024-11-16T14:44:14.737+00:00 153560\n",
"3 2024-11-16 aaa 2024-11-16T14:44:14.737+00:00 449428\n",
"4 2024-11-16 aaa 2024-11-16T14:44:14.737+00:00 631966\n",
"5 2024-11-16 aaa 2024-11-16T14:44:14.737+00:00 941566"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"## Sample Records from Quote table\n",
"\n",
"execute(conn, 'select from example where date=min date, i<5')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aab2349a-dded-408f-bd44-fbfdc1e801bd",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "4.4.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}