def get_team_clusters()

in sdk/aws_orbit_sdk/emr.py [0:0]


def get_team_clusters(cluster_id: Optional[str] = None) -> Dict[str, Dict[str, str]]:
    """
    Finds all running clusters for a user' team space and returns cluster-level details including status, hardware
    and software configuration, VPC settings, and so on for each cluster.

    Parameters
    ----------
    cluster_id : str, optional
        The unique EMR cluster ID to get information about.

    Returns
    -------
    clusters_info : dict
        A dictionary of one or many clusters and their configuration.

    Example
    -------
    >>> import aws.utils.notebooks.spark.emr as sparkConnection
    >>> sparkConnection.get_team_clusters()
    """

    emr = boto3.client("emr")
    props = get_properties()
    if cluster_id is None:
        clusters = emr.list_clusters(ClusterStates=["STARTING", "BOOTSTRAPPING", "RUNNING", "WAITING"])
        if "Clusters" not in clusters:
            raise Exception("Error calling list_clusters()")
        if len(clusters["Clusters"]) == 0:
            logger.info("no emr clusters found for team space")
            return {}
        clusters = clusters["Clusters"]
    else:
        clusters = [{"Id": cluster_id}]

    clusters_info = {}
    for cluster in clusters:
        clstr_id = cluster["Id"]
        cluster_info = emr.describe_cluster(ClusterId=clstr_id)
        if "Cluster" not in cluster_info:
            raise Exception("Error calling describe_cluster()")

        tag_list = cluster_info["Cluster"]["Tags"]
        tags = {}
        for tag in tag_list:
            tags[tag["Key"]] = tag["Value"]

        if ORBIT_PRODUCT_KEY not in tags or tags[ORBIT_PRODUCT_KEY] != ORBIT_PRODUCT_NAME:
            continue

        if tags[ORBIT_ENV] != props["AWS_ORBIT_ENV"] or tags[AWS_ORBIT_TEAM_SPACE] != props["AWS_ORBIT_TEAM_SPACE"]:
            continue

        cluster_nodes_info = get_cluster_info(clstr_id)
        ip = _get_cluster_ip(emr, clstr_id, False)
        livy_url = f"http://{ip}:8998"
        cluster_model = {}
        cluster_model["cluster_id"] = clstr_id
        cluster_model["livy_url"] = livy_url
        cluster_model["ip"] = ip
        cluster_model["Name"] = cluster["Name"]
        cluster_model["State"] = cluster["Status"]["State"]
        cluster_model["info"] = cluster_info
        cluster_model["dashboard_link"] = "http://tbd"
        cluster_model["instances"] = cluster_nodes_info
        clusters_info[clstr_id] = cluster_model

    return clusters_info