in controller/pci-hyperv.c [3321:3538]
static int hv_pci_probe(struct hv_device *hdev,
const struct hv_vmbus_device_id *dev_id)
{
struct pci_host_bridge *bridge;
struct hv_pcibus_device *hbus;
u16 dom_req, dom;
char *name;
bool enter_d0_retry = true;
int ret;
/*
* hv_pcibus_device contains the hypercall arguments for retargeting in
* hv_irq_unmask(). Those must not cross a page boundary.
*/
BUILD_BUG_ON(sizeof(*hbus) > HV_HYP_PAGE_SIZE);
bridge = devm_pci_alloc_host_bridge(&hdev->device, 0);
if (!bridge)
return -ENOMEM;
/*
* With the recent 59bb47985c1d ("mm, sl[aou]b: guarantee natural
* alignment for kmalloc(power-of-two)"), kzalloc() is able to allocate
* a 4KB buffer that is guaranteed to be 4KB-aligned. Here the size and
* alignment of hbus is important because hbus's field
* retarget_msi_interrupt_params must not cross a 4KB page boundary.
*
* Here we prefer kzalloc to get_zeroed_page(), because a buffer
* allocated by the latter is not tracked and scanned by kmemleak, and
* hence kmemleak reports the pointer contained in the hbus buffer
* (i.e. the hpdev struct, which is created in new_pcichild_device() and
* is tracked by hbus->children) as memory leak (false positive).
*
* If the kernel doesn't have 59bb47985c1d, get_zeroed_page() *must* be
* used to allocate the hbus buffer and we can avoid the kmemleak false
* positive by using kmemleak_alloc() and kmemleak_free() to ask
* kmemleak to track and scan the hbus buffer.
*/
hbus = kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
if (!hbus)
return -ENOMEM;
hbus->bridge = bridge;
hbus->state = hv_pcibus_init;
hbus->wslot_res_allocated = -1;
/*
* The PCI bus "domain" is what is called "segment" in ACPI and other
* specs. Pull it from the instance ID, to get something usually
* unique. In rare cases of collision, we will find out another number
* not in use.
*
* Note that, since this code only runs in a Hyper-V VM, Hyper-V
* together with this guest driver can guarantee that (1) The only
* domain used by Gen1 VMs for something that looks like a physical
* PCI bus (which is actually emulated by the hypervisor) is domain 0.
* (2) There will be no overlap between domains (after fixing possible
* collisions) in the same VM.
*/
dom_req = hdev->dev_instance.b[5] << 8 | hdev->dev_instance.b[4];
dom = hv_get_dom_num(dom_req);
if (dom == HVPCI_DOM_INVALID) {
dev_err(&hdev->device,
"Unable to use dom# 0x%x or other numbers", dom_req);
ret = -EINVAL;
goto free_bus;
}
if (dom != dom_req)
dev_info(&hdev->device,
"PCI dom# 0x%x has collision, using 0x%x",
dom_req, dom);
hbus->bridge->domain_nr = dom;
#ifdef CONFIG_X86
hbus->sysdata.domain = dom;
#endif
hbus->hdev = hdev;
INIT_LIST_HEAD(&hbus->children);
INIT_LIST_HEAD(&hbus->dr_list);
spin_lock_init(&hbus->config_lock);
spin_lock_init(&hbus->device_list_lock);
spin_lock_init(&hbus->retarget_msi_interrupt_lock);
hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0,
hbus->bridge->domain_nr);
if (!hbus->wq) {
ret = -ENOMEM;
goto free_dom;
}
ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
hv_pci_onchannelcallback, hbus);
if (ret)
goto destroy_wq;
hv_set_drvdata(hdev, hbus);
ret = hv_pci_protocol_negotiation(hdev, pci_protocol_versions,
ARRAY_SIZE(pci_protocol_versions));
if (ret)
goto close;
ret = hv_allocate_config_window(hbus);
if (ret)
goto close;
hbus->cfg_addr = ioremap(hbus->mem_config->start,
PCI_CONFIG_MMIO_LENGTH);
if (!hbus->cfg_addr) {
dev_err(&hdev->device,
"Unable to map a virtual address for config space\n");
ret = -ENOMEM;
goto free_config;
}
name = kasprintf(GFP_KERNEL, "%pUL", &hdev->dev_instance);
if (!name) {
ret = -ENOMEM;
goto unmap;
}
hbus->fwnode = irq_domain_alloc_named_fwnode(name);
kfree(name);
if (!hbus->fwnode) {
ret = -ENOMEM;
goto unmap;
}
ret = hv_pcie_init_irq_domain(hbus);
if (ret)
goto free_fwnode;
retry:
ret = hv_pci_query_relations(hdev);
if (ret)
goto free_irq_domain;
ret = hv_pci_enter_d0(hdev);
/*
* In certain case (Kdump) the pci device of interest was
* not cleanly shut down and resource is still held on host
* side, the host could return invalid device status.
* We need to explicitly request host to release the resource
* and try to enter D0 again.
* Since the hv_pci_bus_exit() call releases structures
* of all its child devices, we need to start the retry from
* hv_pci_query_relations() call, requesting host to send
* the synchronous child device relations message before this
* information is needed in hv_send_resources_allocated()
* call later.
*/
if (ret == -EPROTO && enter_d0_retry) {
enter_d0_retry = false;
dev_err(&hdev->device, "Retrying D0 Entry\n");
/*
* Hv_pci_bus_exit() calls hv_send_resources_released()
* to free up resources of its child devices.
* In the kdump kernel we need to set the
* wslot_res_allocated to 255 so it scans all child
* devices to release resources allocated in the
* normal kernel before panic happened.
*/
hbus->wslot_res_allocated = 255;
ret = hv_pci_bus_exit(hdev, true);
if (ret == 0)
goto retry;
dev_err(&hdev->device,
"Retrying D0 failed with ret %d\n", ret);
}
if (ret)
goto free_irq_domain;
ret = hv_pci_allocate_bridge_windows(hbus);
if (ret)
goto exit_d0;
ret = hv_send_resources_allocated(hdev);
if (ret)
goto free_windows;
prepopulate_bars(hbus);
hbus->state = hv_pcibus_probed;
ret = create_root_hv_pci_bus(hbus);
if (ret)
goto free_windows;
return 0;
free_windows:
hv_pci_free_bridge_windows(hbus);
exit_d0:
(void) hv_pci_bus_exit(hdev, true);
free_irq_domain:
irq_domain_remove(hbus->irq_domain);
free_fwnode:
irq_domain_free_fwnode(hbus->fwnode);
unmap:
iounmap(hbus->cfg_addr);
free_config:
hv_free_config_window(hbus);
close:
vmbus_close(hdev->channel);
destroy_wq:
destroy_workqueue(hbus->wq);
free_dom:
hv_put_dom_num(hbus->bridge->domain_nr);
free_bus:
kfree(hbus);
return ret;
}