in kvm/cpuid.c [709:1081]
static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
{
struct kvm_cpuid_entry2 *entry;
int r, i, max_idx;
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
r = -E2BIG;
entry = do_host_cpuid(array, function, 0);
if (!entry)
goto out;
switch (function) {
case 0:
/* Limited to the highest leaf implemented in KVM. */
entry->eax = min(entry->eax, 0x1fU);
break;
case 1:
cpuid_entry_override(entry, CPUID_1_EDX);
cpuid_entry_override(entry, CPUID_1_ECX);
break;
case 2:
/*
* On ancient CPUs, function 2 entries are STATEFUL. That is,
* CPUID(function=2, index=0) may return different results each
* time, with the least-significant byte in EAX enumerating the
* number of times software should do CPUID(2, 0).
*
* Modern CPUs, i.e. every CPU KVM has *ever* run on are less
* idiotic. Intel's SDM states that EAX & 0xff "will always
* return 01H. Software should ignore this value and not
* interpret it as an informational descriptor", while AMD's
* APM states that CPUID(2) is reserved.
*
* WARN if a frankenstein CPU that supports virtualization and
* a stateful CPUID.0x2 is encountered.
*/
WARN_ON_ONCE((entry->eax & 0xff) > 1);
break;
/* functions 4 and 0x8000001d have additional index. */
case 4:
case 0x8000001d:
/*
* Read entries until the cache type in the previous entry is
* zero, i.e. indicates an invalid entry.
*/
for (i = 1; entry->eax & 0x1f; ++i) {
entry = do_host_cpuid(array, function, i);
if (!entry)
goto out;
}
break;
case 6: /* Thermal management */
entry->eax = 0x4; /* allow ARAT */
entry->ebx = 0;
entry->ecx = 0;
entry->edx = 0;
break;
/* function 7 has additional index. */
case 7:
entry->eax = min(entry->eax, 1u);
cpuid_entry_override(entry, CPUID_7_0_EBX);
cpuid_entry_override(entry, CPUID_7_ECX);
cpuid_entry_override(entry, CPUID_7_EDX);
/* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */
if (entry->eax == 1) {
entry = do_host_cpuid(array, function, 1);
if (!entry)
goto out;
cpuid_entry_override(entry, CPUID_7_1_EAX);
entry->ebx = 0;
entry->ecx = 0;
entry->edx = 0;
}
break;
case 9:
break;
case 0xa: { /* Architectural Performance Monitoring */
struct x86_pmu_capability cap;
union cpuid10_eax eax;
union cpuid10_edx edx;
perf_get_x86_pmu_capability(&cap);
/*
* Only support guest architectural pmu on a host
* with architectural pmu.
*/
if (!cap.version)
memset(&cap, 0, sizeof(cap));
eax.split.version_id = min(cap.version, 2);
eax.split.num_counters = cap.num_counters_gp;
eax.split.bit_width = cap.bit_width_gp;
eax.split.mask_length = cap.events_mask_len;
edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS);
edx.split.bit_width_fixed = cap.bit_width_fixed;
if (cap.version)
edx.split.anythread_deprecated = 1;
edx.split.reserved1 = 0;
edx.split.reserved2 = 0;
entry->eax = eax.full;
entry->ebx = cap.events_mask;
entry->ecx = 0;
entry->edx = edx.full;
break;
}
/*
* Per Intel's SDM, the 0x1f is a superset of 0xb,
* thus they can be handled by common code.
*/
case 0x1f:
case 0xb:
/*
* Populate entries until the level type (ECX[15:8]) of the
* previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is
* the starting entry, filled by the primary do_host_cpuid().
*/
for (i = 1; entry->ecx & 0xff00; ++i) {
entry = do_host_cpuid(array, function, i);
if (!entry)
goto out;
}
break;
case 0xd: {
u64 guest_perm = xstate_get_guest_group_perm();
entry->eax &= supported_xcr0 & guest_perm;
entry->ebx = xstate_required_size(supported_xcr0, false);
entry->ecx = entry->ebx;
entry->edx &= (supported_xcr0 & guest_perm) >> 32;
if (!supported_xcr0)
break;
entry = do_host_cpuid(array, function, 1);
if (!entry)
goto out;
cpuid_entry_override(entry, CPUID_D_1_EAX);
if (entry->eax & (F(XSAVES)|F(XSAVEC)))
entry->ebx = xstate_required_size(supported_xcr0 | supported_xss,
true);
else {
WARN_ON_ONCE(supported_xss != 0);
entry->ebx = 0;
}
entry->ecx &= supported_xss;
entry->edx &= supported_xss >> 32;
for (i = 2; i < 64; ++i) {
bool s_state;
if (supported_xcr0 & BIT_ULL(i))
s_state = false;
else if (supported_xss & BIT_ULL(i))
s_state = true;
else
continue;
entry = do_host_cpuid(array, function, i);
if (!entry)
goto out;
/*
* The supported check above should have filtered out
* invalid sub-leafs. Only valid sub-leafs should
* reach this point, and they should have a non-zero
* save state size. Furthermore, check whether the
* processor agrees with supported_xcr0/supported_xss
* on whether this is an XCR0- or IA32_XSS-managed area.
*/
if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 0x1) != s_state)) {
--array->nent;
continue;
}
entry->edx = 0;
}
break;
}
case 0x12:
/* Intel SGX */
if (!kvm_cpu_cap_has(X86_FEATURE_SGX)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
}
/*
* Index 0: Sub-features, MISCSELECT (a.k.a extended features)
* and max enclave sizes. The SGX sub-features and MISCSELECT
* are restricted by kernel and KVM capabilities (like most
* feature flags), while enclave size is unrestricted.
*/
cpuid_entry_override(entry, CPUID_12_EAX);
entry->ebx &= SGX_MISC_EXINFO;
entry = do_host_cpuid(array, function, 1);
if (!entry)
goto out;
/*
* Index 1: SECS.ATTRIBUTES. ATTRIBUTES are restricted a la
* feature flags. Advertise all supported flags, including
* privileged attributes that require explicit opt-in from
* userspace. ATTRIBUTES.XFRM is not adjusted as userspace is
* expected to derive it from supported XCR0.
*/
entry->eax &= SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT |
SGX_ATTR_PROVISIONKEY | SGX_ATTR_EINITTOKENKEY |
SGX_ATTR_KSS;
entry->ebx &= 0;
break;
/* Intel PT */
case 0x14:
if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
}
for (i = 1, max_idx = entry->eax; i <= max_idx; ++i) {
if (!do_host_cpuid(array, function, i))
goto out;
}
break;
/* Intel AMX TILE */
case 0x1d:
if (!kvm_cpu_cap_has(X86_FEATURE_AMX_TILE)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
}
for (i = 1, max_idx = entry->eax; i <= max_idx; ++i) {
if (!do_host_cpuid(array, function, i))
goto out;
}
break;
case 0x1e: /* TMUL information */
if (!kvm_cpu_cap_has(X86_FEATURE_AMX_TILE)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
}
break;
case KVM_CPUID_SIGNATURE: {
const u32 *sigptr = (const u32 *)KVM_SIGNATURE;
entry->eax = KVM_CPUID_FEATURES;
entry->ebx = sigptr[0];
entry->ecx = sigptr[1];
entry->edx = sigptr[2];
break;
}
case KVM_CPUID_FEATURES:
entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
(1 << KVM_FEATURE_NOP_IO_DELAY) |
(1 << KVM_FEATURE_CLOCKSOURCE2) |
(1 << KVM_FEATURE_ASYNC_PF) |
(1 << KVM_FEATURE_PV_EOI) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
(1 << KVM_FEATURE_PV_UNHALT) |
(1 << KVM_FEATURE_PV_TLB_FLUSH) |
(1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
(1 << KVM_FEATURE_PV_SEND_IPI) |
(1 << KVM_FEATURE_POLL_CONTROL) |
(1 << KVM_FEATURE_PV_SCHED_YIELD) |
(1 << KVM_FEATURE_ASYNC_PF_INT);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
entry->ebx = 0;
entry->ecx = 0;
entry->edx = 0;
break;
case 0x80000000:
entry->eax = min(entry->eax, 0x8000001f);
break;
case 0x80000001:
cpuid_entry_override(entry, CPUID_8000_0001_EDX);
cpuid_entry_override(entry, CPUID_8000_0001_ECX);
break;
case 0x80000006:
/* L2 cache and TLB: pass through host info. */
break;
case 0x80000007: /* Advanced power management */
/* invariant TSC is CPUID.80000007H:EDX[8] */
entry->edx &= (1 << 8);
/* mask against host */
entry->edx &= boot_cpu_data.x86_power;
entry->eax = entry->ebx = entry->ecx = 0;
break;
case 0x80000008: {
unsigned g_phys_as = (entry->eax >> 16) & 0xff;
unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
unsigned phys_as = entry->eax & 0xff;
/*
* If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
* the guest operates in the same PA space as the host, i.e.
* reductions in MAXPHYADDR for memory encryption affect shadow
* paging, too.
*
* If TDP is enabled but an explicit guest MAXPHYADDR is not
* provided, use the raw bare metal MAXPHYADDR as reductions to
* the HPAs do not affect GPAs.
*/
if (!tdp_enabled)
g_phys_as = boot_cpu_data.x86_phys_bits;
else if (!g_phys_as)
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0008_EBX);
break;
}
case 0x8000000A:
if (!kvm_cpu_cap_has(X86_FEATURE_SVM)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
}
entry->eax = 1; /* SVM revision 1 */
entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
ASID emulation to nested SVM */
entry->ecx = 0; /* Reserved */
cpuid_entry_override(entry, CPUID_8000_000A_EDX);
break;
case 0x80000019:
entry->ecx = entry->edx = 0;
break;
case 0x8000001a:
case 0x8000001e:
break;
case 0x8000001F:
if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
} else {
cpuid_entry_override(entry, CPUID_8000_001F_EAX);
/*
* Enumerate '0' for "PA bits reduction", the adjusted
* MAXPHYADDR is enumerated directly (see 0x80000008).
*/
entry->ebx &= ~GENMASK(11, 6);
}
break;
/*Add support for Centaur's CPUID instruction*/
case 0xC0000000:
/*Just support up to 0xC0000004 now*/
entry->eax = min(entry->eax, 0xC0000004);
break;
case 0xC0000001:
cpuid_entry_override(entry, CPUID_C000_0001_EDX);
break;
case 3: /* Processor serial number */
case 5: /* MONITOR/MWAIT */
case 0xC0000002:
case 0xC0000003:
case 0xC0000004:
default:
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
}
r = 0;
out:
put_cpu();
return r;
}