in src/ClusterBootstrap/az_tools.py [0:0]
def gen_cluster_config(output_file_name, output_file=True, no_az=False):
if config["priority"] == "low":
utils.render_template("./template/dns/cname_and_private_ips.sh.template", "scripts/cname_and_ips.sh", config)
utils.exec_cmd_local("chmod +x scripts/cname_and_ips.sh; bash scripts/cname_and_ips.sh")
print("\nPlease copy the commands in dns_add_commands and register the DNS records \n")
bSQLOnly = (config["azure_cluster"]["infra_node_num"] <= 0)
if useAzureFileshare() and not no_az:
# theoretically it could be supported, but would require storage account to be created first in nested template and then
# https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-group-template-functions-resource#listkeys
# could be used to access storage keys - these could be assigned as variable which gets passed into main deployment template
raise Exception("Azure file share not currently supported with no_az")
if useAzureFileshare():
cmd = """
az storage account show-connection-string \
-n %s \
-g %s \
--query 'connectionString' \
-o tsv
""" % (config["azure_cluster"]["storage_account_name"], config["azure_cluster"]["resource_group"])
output = utils.exec_cmd_local(cmd)
reoutput = re.search('AccountKey\=.*$', output)
file_share_key = None
if reoutput is not None:
file_share_key = reoutput.group(0).replace("AccountKey=", "")
reoutput = re.search('AccountName\=.*;', output)
file_share_account_name = None
if reoutput is not None:
file_share_account_name = reoutput.group(
0).replace("AccountName=", "")[:-1]
cc = {}
cc["cluster_name"] = config["azure_cluster"]["cluster_name"]
if not bSQLOnly:
cc["etcd_node_num"] = config["azure_cluster"]["infra_node_num"]
if useSqlAzure():
cc["sqlserver-hostname"] = "tcp:%s.database.windows.net" % config[
"azure_cluster"]["sql_server_name"]
cc["sqlserver-username"] = config["azure_cluster"]["sql_admin_name"]
cc["sqlserver-password"] = config["azure_cluster"]["sql_admin_password"]
cc["sqlserver-database"] = config["azure_cluster"]["sql_database_name"]
if not bSQLOnly:
cc["admin_username"] = config["cloud_config_nsg_rules"]["default_admin_username"]
if useAzureFileshare():
cc["workFolderAccessPoint"] = "file://%s.file.core.windows.net/%s/work/" % (
config["azure_cluster"]["storage_account_name"], config["azure_cluster"]["file_share_name"])
cc["dataFolderAccessPoint"] = "file://%s.file.core.windows.net/%s/storage/" % (
config["azure_cluster"]["storage_account_name"], config["azure_cluster"]["file_share_name"])
cc["smbUsername"] = file_share_account_name
cc["smbUserPassword"] = file_share_key
cc["useclusterfile"] = True
cc["deploydockerETCD"] = False
cc["platform-scripts"] = "ubuntu"
cc["basic_auth"] = "%s,admin,1000" % uuid.uuid4().hex[:16]
domain_mapping = {
"regular":"%s.cloudapp.azure.com" % config["azure_cluster"]["azure_location"],
"low": config.get("network_domain",config["azure_cluster"]["default_low_priority_domain"])}
if not bSQLOnly:
cc["network"] = {"domain": domain_mapping[config["priority"]]}
cc["machines"] = {}
for i in range(int(config["azure_cluster"]["infra_node_num"])):
vmname = "{}-infra{:02d}".format(config["azure_cluster"]["cluster_name"], i + 1).lower()
cc["machines"][vmname] = {"role": "infrastructure", "private-ip": get_vm_ip(i, "infra")}
# Generate the workers in machines.
vm_list = []
if not no_az:
vm_list = get_vm_list_by_grp()
else:
vm_list = get_vm_list_by_enum()
vm_ip_names = get_vm_private_ip()
# 2 to 3
vm_list = [{k.decode():v.decode() for k,v in itm.items()} for itm in vm_list]
vm_ip_names = [{k.decode():[vi.decode() for vi in v] if isinstance(v, list) else v.decode() for k,v in itm.items()} for itm in vm_ip_names]
vm_ip_names = sorted(vm_ip_names, key = lambda x:x['name'])
sku_mapping = config["sku_mapping"]
worker_machines = []
if config["priority"] == "low":
with open("hostname_fqdn_map","r") as rf:
for l in rf:
worker_machines += l.split()[0],
for vmname in worker_machines:
cc["machines"][vmname.lower()] = {"role": "worker","node-group": config["azure_cluster"]["worker_vm_size"]}
elif config["priority"] == "regular":
for vm in vm_list:
vmname = vm["name"]
if "-worker" in vmname:
worker_machines.append(vm),
for vm in worker_machines:
vmname = vm["name"]
if isNewlyScaledMachine(vmname):
cc["machines"][vmname.lower()] = {
"role": "worker", "scaled": True, "node-group": vm["vmSize"]}
else:
cc["machines"][vmname.lower()] = {
"role": "worker", "node-group": vm["vmSize"]}
# Add mysqlserver nodes
for vm in vm_list:
vmname = vm["name"]
if "-mysqlserver" in vmname:
cc["machines"][vmname.lower()] = {
"role": "mysqlserver",
"node-group": vm["vmSize"]}
# Add elasticsearch nodes
for vm in vm_list:
vmname = vm["name"]
if "-elasticsearch" in vmname:
cc["machines"][vmname.lower()] = {
"role": "elasticsearch",
"node-group": vm["vmSize"]}
nfs_nodes = []
for vm in vm_list:
vmname = vm["name"]
if "-nfs" in vmname:
cc["machines"][vmname.lower()] = {
"role": "nfs",
"node-group": vm["vmSize"]}
# Dilemma : Before the servers got created, you don't know their name, cannot specify which server does a mountpoint config group belongs to
if int(config["azure_cluster"]["nfs_node_num"]) > 0:
nfs_names2ip = {rec['name']:rec['privateIP'][0] for rec in vm_ip_names if "-nfs" in rec['name']}
else:
nfs_names2ip = {rec['name']:rec['privateIP'][0] for rec in vm_ip_names if "infra" in rec['name']}
if not bSQLOnly:
cc["nfs_disk_mnt"] = {}
suffixed_name_2path = {"{}-nfs-{}".format(config["cluster_name"], vm["suffix"]):vm["data_disk_mnt_path"] for vm in config["azure_cluster"]["nfs_vm"] if "suffix" in vm}
for svr_name, svr_ip in list(nfs_names2ip.items()):
pth = suffixed_name_2path.get(svr_name, config["azure_cluster"]["nfs_data_disk_path"])
role = "nfs" if "-nfs" in svr_name else "infra"
cc["nfs_disk_mnt"][svr_name] = {"path": pth, "role": role, "ip": svr_ip, "fileshares":[]}
cc["mountpoints"] = {}
if useAzureFileshare():
cc["mountpoints"]["rootshare"]["type"] = "azurefileshare"
cc["mountpoints"]["rootshare"]["accountname"] = config[
"azure_cluster"]["storage_account_name"]
cc["mountpoints"]["rootshare"]["filesharename"] = config[
"azure_cluster"]["file_share_name"]
cc["mountpoints"]["rootshare"]["mountpoints"] = ""
if file_share_key is not None:
cc["mountpoints"]["rootshare"]["accesskey"] = file_share_key
else:
nfs_vm_suffixes2dpath = {vm["suffix"]:vm["data_disk_mnt_path"] for vm in config["azure_cluster"]["nfs_vm"] if "suffix" in vm}
used_nfs_suffix = set([nfs_cnf["server_suffix"] for nfs_cnf in config["nfs_mnt_setup"] if "server_suffix" in nfs_cnf])
assert (used_nfs_suffix - set(nfs_vm_suffixes2dpath.keys())) == set() and "suffix not in nfs_suffixes list!"
assert len(nfs_names2ip) >= len(config["azure_cluster"]["nfs_vm"]) and "More NFS config items than #. of NFS server"
suffix2used_nfs = {suffix: "{}-nfs-{}".format(config["cluster_name"], suffix).lower() for suffix in used_nfs_suffix}
fullynamed_nfs = set([nfs_cnf["server_name"] for nfs_cnf in config["nfs_mnt_setup"] if "server_name" in nfs_cnf])
# add private IP for fully named NFS to nfs_names2ip map
for nfs_vm in fullynamed_nfs:
nfs_vm_ip = None
for vm_ip_name in vm_ip_names:
if vm_ip_name["name"] == nfs_vm:
nfs_vm_ip = vm_ip_name["privateIP"][0]
break
assert nfs_vm_ip is not None, "Fully named NFS %s doesn't exist!" % nfs_vm
nfs_names2ip[nfs_vm] = nfs_vm_ip
# unused, either node without name suffix or those with suffix but not specified in any nfs_svr_setup item
unused_nfs = sorted([s for s in list(nfs_names2ip.keys()) if s not in list(suffix2used_nfs.values()) and s not in fullynamed_nfs])
unused_ID_cnt = 0
for nfs_cnf in config["nfs_mnt_setup"]:
if "server_name" in nfs_cnf:
server_name = nfs_cnf["server_name"]
mnt_parent_path = None
elif "server_suffix" in nfs_cnf:
server_name = suffix2used_nfs[nfs_cnf["server_suffix"]]
mnt_parent_path = nfs_vm_suffixes2dpath[nfs_cnf["server_suffix"]]
else:
server_name = unused_nfs[unused_ID_cnt]
unused_ID_cnt += 1
mnt_parent_path = config["azure_cluster"]["nfs_data_disk_path"]
server_ip = nfs_names2ip[server_name]
for mntname, mntcnf in list(nfs_cnf["mnt_point"].items()):
if not (mnt_parent_path is None or mntcnf["filesharename"].startswith(mnt_parent_path)):
print("Error: Wrong filesharename {}! Mount path is {} !".format(mntcnf["filesharename"], mnt_parent_path))
raise ValueError
if mntname in cc["mountpoints"]:
print("Warning, duplicated mountpoints item name {}, skipping".format(mntname))
continue
if server_name not in fullynamed_nfs:
cc["nfs_disk_mnt"][server_name]["fileshares"] += mntcnf["filesharename"],
cc["mountpoints"][mntname] = mntcnf
cc["mountpoints"][mntname]["type"] = "nfs"
cc["mountpoints"][mntname]["server"] = server_ip
cc["mountpoints"][mntname]["servername"] = server_name
cntr = {}
for mc in worker_machines:
vm_sz = mc["vmSize"]
cntr[vm_sz] = cntr.get(vm_sz, 0) + 1
cc["worker_sku_cnt"] = cntr
if "sku_mapping" in config:
cc["sku_mapping"] = config["sku_mapping"]
for sku in cc["worker_sku_cnt"]:
# this means that the cluster deployed with this pipeline cannot be heterogeneous
cc["gpu_type"] = cc["sku_mapping"].get(sku, {}).get('gpu-type', "None")
break
if output_file:
print(yaml.dump(cc, default_flow_style=False))
with open(output_file_name, 'w') as outfile:
yaml.dump(cc, outfile, default_flow_style=False)
return cc