static void hypervisor_xcall_deliver()

in kernel/smp_64.c [645:782]


static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
{
	int this_cpu, tot_cpus, prev_sent, i, rem;
	int usec_wait, retries, tot_retries;
	u16 first_cpu = 0xffff;
	unsigned long xc_rcvd = 0;
	unsigned long status;
	int ecpuerror_id = 0;
	int enocpu_id = 0;
	u16 *cpu_list;
	u16 cpu;

	this_cpu = smp_processor_id();
	cpu_list = __va(tb->cpu_list_pa);
	usec_wait = cnt * MONDO_USEC_WAIT_MIN;
	if (usec_wait > MONDO_USEC_WAIT_MAX)
		usec_wait = MONDO_USEC_WAIT_MAX;
	retries = tot_retries = 0;
	tot_cpus = cnt;
	prev_sent = 0;

	do {
		int n_sent, mondo_delivered, target_cpu_busy;

		status = sun4v_cpu_mondo_send(cnt,
					      tb->cpu_list_pa,
					      tb->cpu_mondo_block_pa);

		/* HV_EOK means all cpus received the xcall, we're done.  */
		if (likely(status == HV_EOK))
			goto xcall_done;

		/* If not these non-fatal errors, panic */
		if (unlikely((status != HV_EWOULDBLOCK) &&
			(status != HV_ECPUERROR) &&
			(status != HV_ENOCPU)))
			goto fatal_errors;

		/* First, see if we made any forward progress.
		 *
		 * Go through the cpu_list, count the target cpus that have
		 * received our mondo (n_sent), and those that did not (rem).
		 * Re-pack cpu_list with the cpus remain to be retried in the
		 * front - this simplifies tracking the truly stalled cpus.
		 *
		 * The hypervisor indicates successful sends by setting
		 * cpu list entries to the value 0xffff.
		 *
		 * EWOULDBLOCK means some target cpus did not receive the
		 * mondo and retry usually helps.
		 *
		 * ECPUERROR means at least one target cpu is in error state,
		 * it's usually safe to skip the faulty cpu and retry.
		 *
		 * ENOCPU means one of the target cpu doesn't belong to the
		 * domain, perhaps offlined which is unexpected, but not
		 * fatal and it's okay to skip the offlined cpu.
		 */
		rem = 0;
		n_sent = 0;
		for (i = 0; i < cnt; i++) {
			cpu = cpu_list[i];
			if (likely(cpu == 0xffff)) {
				n_sent++;
			} else if ((status == HV_ECPUERROR) &&
				(sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
				ecpuerror_id = cpu + 1;
			} else if (status == HV_ENOCPU && !cpu_online(cpu)) {
				enocpu_id = cpu + 1;
			} else {
				cpu_list[rem++] = cpu;
			}
		}

		/* No cpu remained, we're done. */
		if (rem == 0)
			break;

		/* Otherwise, update the cpu count for retry. */
		cnt = rem;

		/* Record the overall number of mondos received by the
		 * first of the remaining cpus.
		 */
		if (first_cpu != cpu_list[0]) {
			first_cpu = cpu_list[0];
			xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
		}

		/* Was any mondo delivered successfully? */
		mondo_delivered = (n_sent > prev_sent);
		prev_sent = n_sent;

		/* or, was any target cpu busy processing other mondos? */
		target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
		xc_rcvd = CPU_MONDO_COUNTER(first_cpu);

		/* Retry count is for no progress. If we're making progress,
		 * reset the retry count.
		 */
		if (likely(mondo_delivered || target_cpu_busy)) {
			tot_retries += retries;
			retries = 0;
		} else if (unlikely(retries > MONDO_RETRY_LIMIT)) {
			goto fatal_mondo_timeout;
		}

		/* Delay a little bit to let other cpus catch up on
		 * their cpu mondo queue work.
		 */
		if (!mondo_delivered)
			udelay(usec_wait);

		retries++;
	} while (1);

xcall_done:
	if (unlikely(ecpuerror_id > 0)) {
		pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
		       this_cpu, ecpuerror_id - 1);
	} else if (unlikely(enocpu_id > 0)) {
		pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
		       this_cpu, enocpu_id - 1);
	}
	return;

fatal_errors:
	/* fatal errors include bad alignment, etc */
	pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
	       this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
	panic("Unexpected SUN4V mondo error %lu\n", status);

fatal_mondo_timeout:
	/* some cpus being non-responsive to the cpu mondo */
	pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
	       this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
	panic("SUN4V mondo timeout panic\n");
}