def gen_cluster

def gen_cluster_config()

in src/ClusterBootstrap/az_tools.py [0:0]
180 lines of code
35 McCabe index (conditional complexity)

def gen_cluster_config(output_file_name, output_file=True, no_az=False):
    if config["priority"] == "low":
        utils.render_template("./template/dns/cname_and_private_ips.sh.template", "scripts/cname_and_ips.sh", config)
        utils.exec_cmd_local("chmod +x scripts/cname_and_ips.sh; bash scripts/cname_and_ips.sh")
        print("\nPlease copy the commands in dns_add_commands and register the DNS records \n")
    bSQLOnly = (config["azure_cluster"]["infra_node_num"] <= 0)
    if useAzureFileshare() and not no_az:
        # theoretically it could be supported, but would require storage account to be created first in nested template and then
        # https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-group-template-functions-resource#listkeys
        # could be used to access storage keys - these could be assigned as variable which gets passed into main deployment template
        raise Exception("Azure file share not currently supported with no_az")
    if useAzureFileshare():
        cmd = """
            az storage account show-connection-string \
                -n %s \
                -g %s \
                --query 'connectionString' \
                -o tsv
            """ % (config["azure_cluster"]["storage_account_name"], config["azure_cluster"]["resource_group"])
        output = utils.exec_cmd_local(cmd)
        reoutput = re.search('AccountKey\=.*$', output)
        file_share_key = None
        if reoutput is not None:
            file_share_key = reoutput.group(0).replace("AccountKey=", "")

        reoutput = re.search('AccountName\=.*;', output)
        file_share_account_name = None
        if reoutput is not None:
            file_share_account_name = reoutput.group(
                0).replace("AccountName=", "")[:-1]

    cc = {}
    cc["cluster_name"] = config["azure_cluster"]["cluster_name"]
    if not bSQLOnly:
        cc["etcd_node_num"] = config["azure_cluster"]["infra_node_num"]

    if useSqlAzure():
        cc["sqlserver-hostname"] = "tcp:%s.database.windows.net" % config[
            "azure_cluster"]["sql_server_name"]
        cc["sqlserver-username"] = config["azure_cluster"]["sql_admin_name"]
        cc["sqlserver-password"] = config["azure_cluster"]["sql_admin_password"]
        cc["sqlserver-database"] = config["azure_cluster"]["sql_database_name"]
    if not bSQLOnly:
        cc["admin_username"] = config["cloud_config_nsg_rules"]["default_admin_username"]
        if useAzureFileshare():
            cc["workFolderAccessPoint"] = "file://%s.file.core.windows.net/%s/work/" % (
                config["azure_cluster"]["storage_account_name"], config["azure_cluster"]["file_share_name"])
            cc["dataFolderAccessPoint"] = "file://%s.file.core.windows.net/%s/storage/" % (
                config["azure_cluster"]["storage_account_name"], config["azure_cluster"]["file_share_name"])
            cc["smbUsername"] = file_share_account_name
            cc["smbUserPassword"] = file_share_key
    cc["useclusterfile"] = True
    cc["deploydockerETCD"] = False
    cc["platform-scripts"] = "ubuntu"
    cc["basic_auth"] = "%s,admin,1000" % uuid.uuid4().hex[:16]
    domain_mapping = {
        "regular":"%s.cloudapp.azure.com" % config["azure_cluster"]["azure_location"],
        "low": config.get("network_domain",config["azure_cluster"]["default_low_priority_domain"])}
    if not bSQLOnly:
        cc["network"] = {"domain": domain_mapping[config["priority"]]}

    cc["machines"] = {}
    for i in range(int(config["azure_cluster"]["infra_node_num"])):
        vmname = "{}-infra{:02d}".format(config["azure_cluster"]["cluster_name"], i + 1).lower()
        cc["machines"][vmname] = {"role": "infrastructure", "private-ip": get_vm_ip(i, "infra")}

    # Generate the workers in machines.
    vm_list = []

    if not no_az:
        vm_list = get_vm_list_by_grp()
    else:
        vm_list = get_vm_list_by_enum()

    vm_ip_names = get_vm_private_ip()
    # 2 to 3
    vm_list = [{k.decode():v.decode() for k,v in itm.items()} for itm in vm_list]
    vm_ip_names = [{k.decode():[vi.decode() for vi in v] if isinstance(v, list) else v.decode() for k,v in itm.items()} for itm in vm_ip_names]
    vm_ip_names = sorted(vm_ip_names, key = lambda x:x['name'])
    sku_mapping = config["sku_mapping"]

    worker_machines = []
    if config["priority"] == "low":
        with open("hostname_fqdn_map","r") as rf:
            for l in rf:
                worker_machines += l.split()[0],
        for vmname in worker_machines:
            cc["machines"][vmname.lower()] = {"role": "worker","node-group": config["azure_cluster"]["worker_vm_size"]}
    elif config["priority"] == "regular":
        for vm in vm_list:
            vmname = vm["name"]
            if "-worker" in vmname:
                worker_machines.append(vm),
        for vm in worker_machines:
            vmname = vm["name"]
            if isNewlyScaledMachine(vmname):
                cc["machines"][vmname.lower()] = {
                    "role": "worker", "scaled": True, "node-group": vm["vmSize"]}
            else:
                cc["machines"][vmname.lower()] = {
                    "role": "worker", "node-group": vm["vmSize"]}

    # Add mysqlserver nodes
    for vm in vm_list:
        vmname = vm["name"]
        if "-mysqlserver" in vmname:
            cc["machines"][vmname.lower()] = {
                "role": "mysqlserver",
                "node-group": vm["vmSize"]}

    # Add elasticsearch nodes
    for vm in vm_list:
        vmname = vm["name"]
        if "-elasticsearch" in vmname:
            cc["machines"][vmname.lower()] = {
                "role": "elasticsearch",
                "node-group": vm["vmSize"]}

    nfs_nodes = []
    for vm in vm_list:
        vmname = vm["name"]
        if "-nfs" in vmname:
            cc["machines"][vmname.lower()] = {
                "role": "nfs",
                "node-group": vm["vmSize"]}

    # Dilemma : Before the servers got created, you don't know their name, cannot specify which server does a mountpoint config group belongs to
    if int(config["azure_cluster"]["nfs_node_num"]) > 0:
        nfs_names2ip = {rec['name']:rec['privateIP'][0] for rec in vm_ip_names if "-nfs" in rec['name']}
    else:
        nfs_names2ip = {rec['name']:rec['privateIP'][0] for rec in vm_ip_names if "infra" in rec['name']}
    if not bSQLOnly:
        cc["nfs_disk_mnt"] = {}
        suffixed_name_2path = {"{}-nfs-{}".format(config["cluster_name"], vm["suffix"]):vm["data_disk_mnt_path"] for vm in config["azure_cluster"]["nfs_vm"] if "suffix" in vm}
        for svr_name, svr_ip in list(nfs_names2ip.items()):
            pth = suffixed_name_2path.get(svr_name, config["azure_cluster"]["nfs_data_disk_path"])
            role = "nfs" if "-nfs" in svr_name else "infra"
            cc["nfs_disk_mnt"][svr_name] = {"path": pth, "role": role, "ip": svr_ip, "fileshares":[]}
        cc["mountpoints"] = {}
        if useAzureFileshare():
            cc["mountpoints"]["rootshare"]["type"] = "azurefileshare"
            cc["mountpoints"]["rootshare"]["accountname"] = config[
                "azure_cluster"]["storage_account_name"]
            cc["mountpoints"]["rootshare"]["filesharename"] = config[
                "azure_cluster"]["file_share_name"]
            cc["mountpoints"]["rootshare"]["mountpoints"] = ""
            if file_share_key is not None:
                cc["mountpoints"]["rootshare"]["accesskey"] = file_share_key
        else:
            nfs_vm_suffixes2dpath = {vm["suffix"]:vm["data_disk_mnt_path"] for vm in config["azure_cluster"]["nfs_vm"] if "suffix" in vm}
            used_nfs_suffix = set([nfs_cnf["server_suffix"] for nfs_cnf in config["nfs_mnt_setup"] if "server_suffix" in nfs_cnf])
            assert (used_nfs_suffix - set(nfs_vm_suffixes2dpath.keys())) == set() and "suffix not in nfs_suffixes list!"
            assert len(nfs_names2ip) >= len(config["azure_cluster"]["nfs_vm"]) and "More NFS config items than #. of NFS server"
            suffix2used_nfs = {suffix: "{}-nfs-{}".format(config["cluster_name"], suffix).lower() for suffix in used_nfs_suffix}
            fullynamed_nfs = set([nfs_cnf["server_name"] for nfs_cnf in config["nfs_mnt_setup"] if "server_name" in nfs_cnf])

            # add private IP for fully named NFS to nfs_names2ip map
            for nfs_vm in fullynamed_nfs:
                nfs_vm_ip = None
                for vm_ip_name in vm_ip_names:
                    if vm_ip_name["name"] == nfs_vm:
                        nfs_vm_ip = vm_ip_name["privateIP"][0]
                        break
                assert nfs_vm_ip is not None, "Fully named NFS %s doesn't exist!" % nfs_vm
                nfs_names2ip[nfs_vm] = nfs_vm_ip

            # unused, either node without name suffix or those with suffix but not specified in any nfs_svr_setup item
            unused_nfs = sorted([s for s in list(nfs_names2ip.keys()) if s not in list(suffix2used_nfs.values()) and s not in fullynamed_nfs])
            unused_ID_cnt = 0
            for nfs_cnf in config["nfs_mnt_setup"]:
                if "server_name" in nfs_cnf:
                    server_name = nfs_cnf["server_name"]
                    mnt_parent_path = None
                elif "server_suffix" in nfs_cnf:
                    server_name = suffix2used_nfs[nfs_cnf["server_suffix"]]
                    mnt_parent_path = nfs_vm_suffixes2dpath[nfs_cnf["server_suffix"]]
                else:
                    server_name = unused_nfs[unused_ID_cnt]
                    unused_ID_cnt += 1
                    mnt_parent_path = config["azure_cluster"]["nfs_data_disk_path"]
                server_ip = nfs_names2ip[server_name]
                for mntname, mntcnf in list(nfs_cnf["mnt_point"].items()):
                    if not (mnt_parent_path is None or mntcnf["filesharename"].startswith(mnt_parent_path)):
                        print("Error: Wrong filesharename {}! Mount path is {} !".format(mntcnf["filesharename"], mnt_parent_path))
                        raise ValueError
                    if mntname in cc["mountpoints"]:
                        print("Warning, duplicated mountpoints item name {}, skipping".format(mntname))
                        continue

                    if server_name not in fullynamed_nfs:
                        cc["nfs_disk_mnt"][server_name]["fileshares"] += mntcnf["filesharename"],
                    cc["mountpoints"][mntname] = mntcnf
                    cc["mountpoints"][mntname]["type"] = "nfs"
                    cc["mountpoints"][mntname]["server"] = server_ip
                    cc["mountpoints"][mntname]["servername"] = server_name

    cntr = {}
    for mc in worker_machines:
        vm_sz = mc["vmSize"]
        cntr[vm_sz] = cntr.get(vm_sz, 0) + 1
    cc["worker_sku_cnt"] = cntr

    if "sku_mapping" in config:
        cc["sku_mapping"] = config["sku_mapping"]
        for sku in cc["worker_sku_cnt"]:
            # this means that the cluster deployed with this pipeline cannot be heterogeneous
            cc["gpu_type"] = cc["sku_mapping"].get(sku, {}).get('gpu-type', "None")
            break

    if output_file:
        print(yaml.dump(cc, default_flow_style=False))
        with open(output_file_name, 'w') as outfile:
            yaml.dump(cc, outfile, default_flow_style=False)

    return cc