# Create Cluster: HDB
This notebook will create (start) an HDB cluster on a named database.

In [None]:
import os
import boto3
import json
import datetime

import pykx as kx

from managed_kx import *
from env import *

# Managed KX Database and Cluster names to create
DB_NAME="welcomedb"

SEC_THREADS='4'
CLUSTER_NAME=f"HDB_{DB_NAME}"

# Cluster Settings
CODEBASE="code"
S3_CODE_PATH="code"

DB_PATHS = [ '/' ]

CAPACITY_CONFIG={ 'nodeCount': 3, 'nodeType': 'kx.s.xlarge'}
DATABASE_CONFIG=[{ 'databaseName': DB_NAME,'cacheConfigurations': [{'dbPaths': DB_PATHS, 'cacheType': 'CACHE_1000' }] }]
CACHE_CONFIG=[{'type': 'CACHE_1000', 'size':1200}]

CODE_CONFIG={ 's3Bucket': S3_BUCKET, 's3Key': f'{S3_CODE_PATH}/{CODEBASE}.zip' }

INIT_SCRIPT=f'init.q'
CMD_ARGS=[
    { 'key': 's', 'value': SEC_THREADS }, 
    { 'key': 'dbname', 'value': DB_NAME}, 
]

# VPC Configuration
VPC_CONFIG={ 
    'vpcId': VPC_ID,
    'securityGroupIds': SECURITY_GROUPS,
    'subnetIds': SUBNET_IDS,
    'ipAddressType': 'IP_V4' 
}


In [None]:
# Using credentials and create service client
session = boto3.Session()

# create finspace client
client = session.client(service_name='finspace')

## Check Database

In [None]:
note_str = ""

c_set_list = []

try:
    c_set_list = client.list_kx_changesets(environmentId=ENV_ID, databaseName=DB_NAME)['kxChangesets']
except:
    note_str = "<<Could not get changesets>>"

print(100*"=")
print(f"Database: {DB_NAME}, Changesets: {len(c_set_list)} {note_str}")
print(100*"=")

# sort by create time
c_set_list = sorted(c_set_list, key=lambda d: d['createdTimestamp']) 

for c in c_set_list:
    c_set_id = c['changesetId']
    print(f"Changeset ({c['status']}): {c_set_id}: Created: {c['createdTimestamp']}")
    c_rqs = client.get_kx_changeset(environmentId=ENV_ID, databaseName=DB_NAME, changesetId=c_set_id)['changeRequests']

    chs_pdf = pd.DataFrame.from_dict(c_rqs).style.hide(axis='index')
    display(chs_pdf)

## Create Cluster

In [None]:
# zip the code
#os.system(f"zip -r -X {CODEBASE}.zip {CODEBASE} -x '*.ipynb_checkpoints*'")
os.system(f"cd {CODEBASE}; zip -r -X ../{CODEBASE}.zip . -x '*.ipynb_checkpoints*';")

# copy code to S3
cp = ""

if AWS_ACCESS_KEY_ID is not None:
    cp = f"""
export AWS_ACCESS_KEY_ID={AWS_ACCESS_KEY_ID}
export AWS_SECRET_ACCESS_KEY={AWS_SECRET_ACCESS_KEY}
export AWS_SESSION_TOKEN={AWS_SESSION_TOKEN}
"""

cp += f"""
aws s3 cp  --exclude .DS_Store {CODEBASE}.zip s3://{S3_BUCKET}/code/{CODEBASE}.zip
aws s3 ls s3://{S3_BUCKET}/code/
"""
    
# execute the S3 copy
os.system(cp)

In [None]:
resp = client.create_kx_cluster(
    environmentId=ENV_ID, 
    clusterName=CLUSTER_NAME,
    clusterType='HDB',
    releaseLabel = '1.0',
    capacityConfiguration=CAPACITY_CONFIG,
    databases=DATABASE_CONFIG,
    cacheStorageConfigurations=CACHE_CONFIG,
    clusterDescription="Created with create_cluster_HDB notebook",
    code=CODE_CONFIG,
    initializationScript=INIT_SCRIPT,
    commandLineArguments=CMD_ARGS,
    azMode=AZ_MODE,
    availabilityZoneId=AZ_ID,
    vpcConfiguration=VPC_CONFIG
)

In [None]:
wait_for_cluster_status(client, environmentId=ENV_ID, clusterName=CLUSTER_NAME, show_wait=True)
print()
print("** DONE **")

In [None]:
# get the connection string to the cluster
conn_str = get_kx_connection_string(client, 
                          environmentId=ENV_ID, clusterName=CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# parse the connection string to components
host, port, username, password = parse_connection_string(conn_str)


In [None]:
%%q --host $host --port $port --user $username --pass $password
tables[]

In [None]:
cdf = get_clusters(client, environmentId=ENV_ID)

display(cdf)

In [None]:
print( f"Last Run: {datetime.datetime.now()}" )