static int ne_setup_cpu_pool()

in drivers/virt/nitro_enclaves/ne_misc_dev.c [264:461]


static int ne_setup_cpu_pool(const char *ne_cpu_list)
{
	int core_id = 0;
	unsigned int cpu = 0;
	unsigned int sibling = 0;
	cpumask_var_t cpu_pool;
	cpumask_var_t cpu_added;
	unsigned int cpu_sibling = 0;
	unsigned int i = 0;
	int numa_node = -1;
	int rc = -EINVAL;

	if (!zalloc_cpumask_var(&cpu_pool, GFP_KERNEL))
		return -ENOMEM;

	if (!zalloc_cpumask_var(&cpu_added, GFP_KERNEL))
		return -ENOMEM;

	mutex_lock(&ne_cpu_pool.mutex);

	rc = cpulist_parse(ne_cpu_list, cpu_pool);
	if (rc < 0) {
		pr_err("%s: Error in cpulist parse [rc=%d]\n", ne_misc_dev.name, rc);

		goto free_pool_cpumask;
	}

	cpu = cpumask_any(cpu_pool);
	if (cpu >= nr_cpu_ids) {
		pr_err("%s: No CPUs available in CPU pool\n", ne_misc_dev.name);

		rc = -EINVAL;

		goto free_pool_cpumask;
	}

	/*
	 * Check if the CPUs are online, to further get info about them
	 * e.g. numa node, core id, siblings.
	 */
	for_each_cpu(cpu, cpu_pool)
		if (cpu_is_offline(cpu)) {
			pr_err("%s: CPU %d is offline, has to be online to get its metadata\n",
			       ne_misc_dev.name, cpu);

			rc = -EINVAL;

			goto free_pool_cpumask;
		}

	/*
	 * Check if the CPUs from the NE CPU pool are from the same NUMA node.
	 */
	for_each_cpu(cpu, cpu_pool)
		if (numa_node < 0) {
			numa_node = cpu_to_node(cpu);
			if (numa_node < 0) {
				pr_err("%s: Invalid NUMA node %d\n",
				       ne_misc_dev.name, numa_node);

				rc = -EINVAL;

				goto free_pool_cpumask;
			}
		} else {
			if (numa_node != cpu_to_node(cpu)) {
				pr_err("%s: CPUs with different NUMA nodes\n",
				       ne_misc_dev.name);

				rc = -EINVAL;

				goto free_pool_cpumask;
			}
		}

	/*
	 * Check if CPU 0 and its siblings are included in the provided CPU pool
	 * They should remain available for the primary / parent VM.
	 */
	if (cpumask_test_cpu(0, cpu_pool)) {
		pr_err("%s: CPU 0 has to remain available\n", ne_misc_dev.name);

		rc = -EINVAL;

		goto free_pool_cpumask;
	}

	for_each_cpu(cpu_sibling, topology_sibling_cpumask(0)) {
		if (cpumask_test_cpu(cpu_sibling, cpu_pool)) {
			pr_err("%s: CPU sibling %d for CPU 0 is in CPU pool\n",
			       ne_misc_dev.name, cpu_sibling);

			rc = -EINVAL;

			goto free_pool_cpumask;
		}
	}

	/*
	 * Check if CPU siblings are included in the provided CPU pool. The
	 * expectation is that full CPU cores are made available in the CPU pool
	 * for enclaves.
	 */
	for_each_cpu(cpu, cpu_pool) {
		for_each_cpu(cpu_sibling, topology_sibling_cpumask(cpu)) {
			if (!cpumask_test_cpu(cpu_sibling, cpu_pool)) {
				pr_err("%s: CPU %d is not in CPU pool\n",
				       ne_misc_dev.name, cpu_sibling);

				rc = -EINVAL;

				goto free_pool_cpumask;
			}
		}
	}

	/* Calculate the number of threads from a full CPU core. */
	cpu = cpumask_any(cpu_pool);
	for_each_cpu(cpu_sibling, topology_sibling_cpumask(cpu))
		ne_cpu_pool.nr_threads_per_core++;

	ne_cpu_pool.nr_parent_vm_cores = nr_cpu_ids / ne_cpu_pool.nr_threads_per_core;

	ne_cpu_pool.avail_threads_per_core = kcalloc(ne_cpu_pool.nr_parent_vm_cores,
						     sizeof(*ne_cpu_pool.avail_threads_per_core),
						     GFP_KERNEL);
	if (!ne_cpu_pool.avail_threads_per_core) {
		rc = -ENOMEM;

		goto free_pool_cpumask;
	}

	for (i = 0; i < ne_cpu_pool.nr_parent_vm_cores; i++)
		if (!zalloc_cpumask_var(&ne_cpu_pool.avail_threads_per_core[i], GFP_KERNEL)) {
			rc = -ENOMEM;

			goto free_cores_cpumask;
		}

	/*
	 * Split the NE CPU pool in threads per core to keep the CPU topology
	 * after offlining the CPUs.
	 */
	for_each_cpu(cpu, cpu_pool)
		if (!cpumask_test_cpu(cpu, cpu_added)) {
			for_each_cpu(sibling, topology_sibling_cpumask(cpu)) {
					cpumask_set_cpu(sibling, cpu_added);
					cpumask_set_cpu(sibling, ne_cpu_pool.avail_threads_per_core[core_id]);
			}
			core_id++;
		}

	/*
	 * CPUs that are given to enclave(s) should not be considered online
	 * by Linux anymore, as the hypervisor will degrade them to floating.
	 * The physical CPUs (full cores) are carved out of the primary / parent
	 * VM and given to the enclave VM. The same number of vCPUs would run
	 * on less pCPUs for the primary / parent VM.
	 *
	 * We offline them here, to not degrade performance and expose correct
	 * topology to Linux and user space.
	 */
	for_each_cpu(cpu, cpu_pool) {
		rc = remove_cpu(cpu);
		if (rc != 0) {
			pr_err("%s: CPU %d is not offlined [rc=%d]\n",
			       ne_misc_dev.name, cpu, rc);

			goto online_cpus;
		}
	}

	free_cpumask_var(cpu_pool);

	ne_cpu_pool.numa_node = numa_node;

	mutex_unlock(&ne_cpu_pool.mutex);

	return 0;

online_cpus:
	for_each_cpu(cpu, cpu_pool)
		add_cpu(cpu);
	for (i = 0; i < ne_cpu_pool.nr_parent_vm_cores; i++)
		cpumask_clear(ne_cpu_pool.avail_threads_per_core[i]);
free_cores_cpumask:
	for (i = 0; i < ne_cpu_pool.nr_parent_vm_cores; i++)
		free_cpumask_var(ne_cpu_pool.avail_threads_per_core[i]);
	kfree(ne_cpu_pool.avail_threads_per_core);
free_pool_cpumask:
	free_cpumask_var(cpu_pool);
	ne_cpu_pool.nr_parent_vm_cores = 0;
	ne_cpu_pool.nr_threads_per_core = 0;
	ne_cpu_pool.numa_node = -1;
	mutex_unlock(&ne_cpu_pool.mutex);

	return rc;
}