in sdk/aws_orbit_sdk/emr.py [0:0]
def get_team_clusters(cluster_id: Optional[str] = None) -> Dict[str, Dict[str, str]]:
"""
Finds all running clusters for a user' team space and returns cluster-level details including status, hardware
and software configuration, VPC settings, and so on for each cluster.
Parameters
----------
cluster_id : str, optional
The unique EMR cluster ID to get information about.
Returns
-------
clusters_info : dict
A dictionary of one or many clusters and their configuration.
Example
-------
>>> import aws.utils.notebooks.spark.emr as sparkConnection
>>> sparkConnection.get_team_clusters()
"""
emr = boto3.client("emr")
props = get_properties()
if cluster_id is None:
clusters = emr.list_clusters(ClusterStates=["STARTING", "BOOTSTRAPPING", "RUNNING", "WAITING"])
if "Clusters" not in clusters:
raise Exception("Error calling list_clusters()")
if len(clusters["Clusters"]) == 0:
logger.info("no emr clusters found for team space")
return {}
clusters = clusters["Clusters"]
else:
clusters = [{"Id": cluster_id}]
clusters_info = {}
for cluster in clusters:
clstr_id = cluster["Id"]
cluster_info = emr.describe_cluster(ClusterId=clstr_id)
if "Cluster" not in cluster_info:
raise Exception("Error calling describe_cluster()")
tag_list = cluster_info["Cluster"]["Tags"]
tags = {}
for tag in tag_list:
tags[tag["Key"]] = tag["Value"]
if ORBIT_PRODUCT_KEY not in tags or tags[ORBIT_PRODUCT_KEY] != ORBIT_PRODUCT_NAME:
continue
if tags[ORBIT_ENV] != props["AWS_ORBIT_ENV"] or tags[AWS_ORBIT_TEAM_SPACE] != props["AWS_ORBIT_TEAM_SPACE"]:
continue
cluster_nodes_info = get_cluster_info(clstr_id)
ip = _get_cluster_ip(emr, clstr_id, False)
livy_url = f"http://{ip}:8998"
cluster_model = {}
cluster_model["cluster_id"] = clstr_id
cluster_model["livy_url"] = livy_url
cluster_model["ip"] = ip
cluster_model["Name"] = cluster["Name"]
cluster_model["State"] = cluster["Status"]["State"]
cluster_model["info"] = cluster_info
cluster_model["dashboard_link"] = "http://tbd"
cluster_model["instances"] = cluster_nodes_info
clusters_info[clstr_id] = cluster_model
return clusters_info