modules/python/clusterloader2/network-load/network_load.py (219 lines of code) (raw):
import json
import os
import argparse
from datetime import datetime, timezone
from clusterloader2.utils import str2bool, parse_xml_to_json, run_cl2_command, get_measurement
DEFAULT_NODES_PER_NAMESPACE = 100
CPU_REQUEST_LIMIT_MILLI = 1
DAEMONSETS_PER_NODE = {
"aws": 2,
"azure": 6,
"aks": 6
}
CPU_CAPACITY = {
"aws": 0.94,
"azure": 0.87,
"aks": 0.87
}
# TODO: Remove aks once CL2 update provider name to be azure
def configure_clusterloader2(
override_file,
operation_timeout,
provider,
deployment_recreation_count,
cpu_per_node,
node_count,
fortio_servers_per_node,
fortio_clients_per_node,
fortio_client_queries_per_second,
fortio_client_connections,
fortio_namespaces,
fortio_deployments_per_namespace,
apply_fqdn_cnp):
# calculate CPU request per Pod based on pods/node and node CPU capacity
# Different cloud has different reserved values and number of daemonsets
# Using the same percentage will lead to incorrect nodes number as the number of nodes grow
# For AWS, see: https://github.com/awslabs/amazon-eks-ami/blob/main/templates/al2/runtime/bootstrap.sh#L290
# For Azure, see: https://learn.microsoft.com/en-us/azure/aks/node-resource-reservations#cpu-reservations
pods_per_node = fortio_servers_per_node + fortio_clients_per_node
capacity = CPU_CAPACITY[provider]
cpu_request = (cpu_per_node * 1000 * capacity) // pods_per_node
cpu_request = max(cpu_request, CPU_REQUEST_LIMIT_MILLI)
with open(override_file, 'w', encoding='utf-8') as file:
# generic config
file.write("CL2_GROUP_NAME: cilium-acns-network-load\n")
file.write(f"CL2_OPERATION_TIMEOUT: {operation_timeout}\n")
file.write("CL2_API_SERVER_CALLS_PER_SECOND: 100\n")
# repetition config
file.write(f"CL2_DEPLOYMENT_RECREATION_COUNT: {deployment_recreation_count}\n")
# scale logistics
# file.write(f"CL2_NODES_PER_STEP: {node_per_step}\n")
file.write("CL2_POD_STARTUP_LATENCY_THRESHOLD: 3m\n")
# topology config
file.write(f"CL2_NODES: {node_count}\n")
file.write(f"CL2_FORTIO_SERVERS_PER_NODE: {fortio_servers_per_node}\n")
file.write(f"CL2_FORTIO_CLIENTS_PER_NODE: {fortio_clients_per_node}\n")
file.write(f"CL2_FORTIO_CLIENT_QUERIES_PER_SECOND: {fortio_client_queries_per_second}\n")
file.write(f"CL2_FORTIO_CLIENT_CONNECTIONS: {fortio_client_connections}\n")
file.write(f"CL2_FORTIO_NAMESPACES: {fortio_namespaces}\n")
file.write(f"CL2_FORTIO_DEPLOYMENTS_PER_NAMESPACE: {fortio_deployments_per_namespace}\n")
file.write("CL2_FORTIO_POD_CPU: 10\n")
file.write("CL2_FORTIO_POD_MEMORY: 50\n")
# other test toggles
# creates Hubble DNS metrics
file.write(f"CL2_APPLY_FQDN_CNP: {apply_fqdn_cnp}\n")
# prometheus scrape config
file.write("CL2_CILIUM_METRICS_ENABLED: true\n")
file.write("CL2_PROMETHEUS_SCRAPE_CILIUM_OPERATOR: true\n")
file.write("CL2_PROMETHEUS_SCRAPE_CILIUM_AGENT: true\n")
file.write("CL2_PROMETHEUS_SCRAPE_CILIUM_AGENT_HUBBLE: true\n")
# prometheus server config
file.write("CL2_PROMETHEUS_TOLERATE_MASTER: true\n")
file.write("CL2_PROMETHEUS_MEMORY_LIMIT_FACTOR: 30.0\n")
file.write("CL2_PROMETHEUS_MEMORY_SCALE_FACTOR: 30.0\n")
file.write("CL2_PROMETHEUS_NODE_SELECTOR: \"prometheus: \\\"true\\\"\"\n")
with open(override_file, 'r', encoding='utf-8') as file:
print(f"Content of file {override_file}:\n{file.read()}")
file.close()
def execute_clusterloader2(cl2_image, cl2_config_dir, cl2_report_dir, cl2_config_file, kubeconfig, provider):
run_cl2_command(kubeconfig, cl2_image, cl2_config_dir, cl2_report_dir, provider, cl2_config_file=cl2_config_file, overrides=True, enable_prometheus=True)
def collect_clusterloader2(
cl2_report_dir,
cloud_info,
run_id,
run_url,
result_file,
deployment_recreation_count,
cpu_per_node,
node_count,
fortio_servers_per_node,
fortio_clients_per_node,
fortio_client_queries_per_second,
fortio_client_connections,
fortio_namespaces,
fortio_deployments_per_namespace,
apply_fqdn_cnp,
test_type="default_config"
):
details = parse_xml_to_json(os.path.join(cl2_report_dir, "junit.xml"), indent = 2)
json_data = json.loads(details)
testsuites = json_data["testsuites"]
# FIXME this is not working. always failure
if testsuites:
status = "success" if testsuites[0]["failures"] == 0 else "failure"
else:
raise Exception(f"No testsuites found in the report! Raw data: {details}")
template = {
"timestamp": datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
# includes provider
"cloud_info": cloud_info,
"run_id": run_id,
"run_url": run_url,
"test_type": test_type,
"status": status,
"group": None,
"measurement": None,
"result": None,
# parameters
"deployment_recreation_count": deployment_recreation_count,
"cpu_per_node": cpu_per_node,
"node_count": node_count,
"fortio_servers_per_node": fortio_servers_per_node,
"fortio_clients_per_node": fortio_clients_per_node,
"fortio_client_queries_per_second": fortio_client_queries_per_second,
"fortio_client_connections": fortio_client_connections,
"fortio_namespaces": fortio_namespaces,
"fortio_deployments_per_namespace": fortio_deployments_per_namespace,
"apply_fqdn_cnp": apply_fqdn_cnp,
}
content = ""
for f in os.listdir(cl2_report_dir):
file_path = os.path.join(cl2_report_dir, f)
with open(file_path, 'r', encoding='utf-8') as f:
print(f"Processing {file_path}")
measurement, group_name = get_measurement(file_path)
if not measurement:
continue
print(measurement, group_name)
data = json.loads(f.read())
if "dataItems" in data:
items = data["dataItems"]
if not items:
print(f"No data items found in {file_path}")
print(f"Data:\n{data}")
continue
for item in items:
result = template.copy()
result["group"] = group_name
result["measurement"] = measurement
result["result"] = item
content += json.dumps(result) + "\n"
else:
result = template.copy()
result["group"] = group_name
result["measurement"] = measurement
result["result"] = data
content += json.dumps(result) + "\n"
os.makedirs(os.path.dirname(result_file), exist_ok=True)
with open(result_file, 'w', encoding='utf-8') as f:
f.write(content)
def main():
parser = argparse.ArgumentParser(description="network-load test")
subparsers = parser.add_subparsers(dest="command")
# Sub-command for configure_clusterloader2
parser_configure = subparsers.add_parser("configure", help="Override CL2 config file")
parser_configure.add_argument("--cl2-override-file", type=str, required=True, help="Path to the overrides of CL2 config file")
parser_configure.add_argument("--operation-timeout", type=str, required=True, help="Timeout before failing the scale up test")
parser_configure.add_argument("--provider", type=str, required=True, help="Cloud provider name")
parser_configure.add_argument("--deployment-recreation-count", type=int, required=True, help="Number of times to recreate deployments")
parser_configure.add_argument("--cpu-per-node", type=int, required=True, help="CPU per node")
parser_configure.add_argument("--node-count", type=int, required=True, help="Number of nodes")
parser_configure.add_argument("--fortio-servers-per-node", type=int, required=True, help="Number of Fortio servers per node")
parser_configure.add_argument("--fortio-clients-per-node", type=int, required=True, help="Number of Fortio clients per node")
parser_configure.add_argument("--fortio-client-queries-per-second", type=int, required=True, help="Queries per second for each Fortio client pod. NOT queries per second per connection")
parser_configure.add_argument("--fortio-client-connections", type=int, required=True, help="Number of simultaneous connections for each Fortio client")
parser_configure.add_argument("--fortio-namespaces", type=int, required=True, help="Number of namespaces, each with their own service. Fortio clients query servers in the same namespace. Be weary of integer division causing less pods than expected regarding this parameter, pods, and pods per node.")
parser_configure.add_argument("--fortio-deployments-per-namespace", type=int, required=True, help="Number of Fortio server deployments (and number of client deployments) per service/partition. Be weary of integer division causing less pods than expected regarding this parameter, namespaces, pods, and pods per node.")
parser_configure.add_argument("--apply-fqdn-cnp", type=str2bool, choices=[True, False], default=False, help="Apply CNP that will generate DNS metrics")
# Sub-command for execute_clusterloader2
parser_execute = subparsers.add_parser("execute", help="Execute scale up operation")
parser_execute.add_argument("--cl2-image", type=str, required=True, help="Name of the CL2 image")
parser_execute.add_argument("--cl2-config-dir", type=str, required=True, help="Path to the CL2 config directory")
parser_execute.add_argument("--cl2-report-dir", type=str, required=True, help="Path to the CL2 report directory")
parser_execute.add_argument("--cl2-config-file", type=str, required=True, help="Path to the CL2 config file")
parser_execute.add_argument("--kubeconfig", type=str, required=True, help="Path to the kubeconfig file")
parser_execute.add_argument("--provider", type=str, required=True, help="Cloud provider name")
# Sub-command for collect_clusterloader2
parser_collect = subparsers.add_parser("collect", help="Collect scale up data")
parser_collect.add_argument("--cl2-report-dir", type=str, required=True, help="Path to the CL2 report directory")
parser_collect.add_argument("--cloud-info", type=str, required=True, help="Cloud information")
parser_collect.add_argument("--run-id", type=str, required=True, help="Run ID")
parser_collect.add_argument("--run-url", type=str, required=True, help="Run URL")
parser_collect.add_argument("--result-file", type=str, required=True, help="Path to the result file")
parser_collect.add_argument("--test-type", type=str, default="default-config", help="Description of test type")
parser_collect.add_argument("--deployment-recreation-count", type=int, required=True, help="Number of times to recreate deployments")
parser_collect.add_argument("--cpu-per-node", type=int, required=True, help="CPU per node")
parser_collect.add_argument("--node-count", type=int, required=True, help="Number of nodes")
parser_collect.add_argument("--fortio-servers-per-node", type=int, required=True, help="Number of Fortio servers per node")
parser_collect.add_argument("--fortio-clients-per-node", type=int, required=True, help="Number of Fortio clients per node")
parser_collect.add_argument("--fortio-client-queries-per-second", type=int, required=True, help="Queries per second for each Fortio client pod. NOT queries per second per connection")
parser_collect.add_argument("--fortio-client-connections", type=int, required=True, help="Number of simultaneous connections for each Fortio client")
parser_collect.add_argument("--fortio-namespaces", type=int, required=True, help="Number of namespaces, each with their own service. Fortio clients query servers in the same namespace. Be weary of integer division causing less pods than expected regarding this parameter, pods, and pods per node.")
parser_collect.add_argument("--fortio-deployments-per-namespace", type=int, required=True, help="Number of Fortio server deployments (and number of client deployments) per service/partition. Be weary of integer division causing less pods than expected regarding this parameter, namespaces, pods, and pods per node.")
parser_collect.add_argument("--apply-fqdn-cnp", type=str2bool, choices=[True, False], default=False, help="Apply CNP that will generate DNS metrics")
args = parser.parse_args()
if args.command == "configure":
configure_clusterloader2(
args.cl2_override_file,
args.operation_timeout,
args.provider,
args.deployment_recreation_count,
args.cpu_per_node,
args.node_count,
args.fortio_servers_per_node,
args.fortio_clients_per_node,
args.fortio_client_queries_per_second,
args.fortio_client_connections,
args.fortio_namespaces,
args.fortio_deployments_per_namespace,
args.apply_fqdn_cnp
)
elif args.command == "execute":
execute_clusterloader2(args.cl2_image, args.cl2_config_dir, args.cl2_report_dir, args.cl2_config_file,
args.kubeconfig, args.provider)
elif args.command == "collect":
collect_clusterloader2(
args.cl2_report_dir, args.cloud_info, args.run_id, args.run_url, args.result_file,
args.deployment_recreation_count,
args.cpu_per_node,
args.node_count,
args.fortio_servers_per_node,
args.fortio_clients_per_node,
args.fortio_client_queries_per_second,
args.fortio_client_connections,
args.fortio_namespaces,
args.fortio_deployments_per_namespace,
args.apply_fqdn_cnp,
test_type=args.test_type,
)
if __name__ == "__main__":
main()