in platforms/pseries/iommu.c [1219:1487]
static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
{
int len = 0, ret;
int max_ram_len = order_base_2(ddw_memory_hotplug_max());
struct ddw_query_response query;
struct ddw_create_response create;
int page_shift;
u64 win_addr;
const char *win_name;
struct device_node *dn;
u32 ddw_avail[DDW_APPLICABLE_SIZE];
struct dma_win *window;
struct property *win64;
struct failed_ddw_pdn *fpdn;
bool default_win_removed = false, direct_mapping = false;
bool pmem_present;
struct pci_dn *pci = PCI_DN(pdn);
struct iommu_table *tbl = pci->table_group->tables[0];
dn = of_find_node_by_type(NULL, "ibm,pmemory");
pmem_present = dn != NULL;
of_node_put(dn);
mutex_lock(&dma_win_init_mutex);
if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) {
direct_mapping = (len >= max_ram_len);
goto out_unlock;
}
/*
* If we already went through this for a previous function of
* the same device and failed, we don't want to muck with the
* DMA window again, as it will race with in-flight operations
* and can lead to EEHs. The above mutex protects access to the
* list.
*/
list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) {
if (fpdn->pdn == pdn)
goto out_unlock;
}
/*
* the ibm,ddw-applicable property holds the tokens for:
* ibm,query-pe-dma-window
* ibm,create-pe-dma-window
* ibm,remove-pe-dma-window
* for the given node in that order.
* the property is actually in the parent, not the PE
*/
ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
&ddw_avail[0], DDW_APPLICABLE_SIZE);
if (ret)
goto out_failed;
/*
* Query if there is a second window of size to map the
* whole partition. Query returns number of windows, largest
* block assigned to PE (partition endpoint), and two bitmasks
* of page sizes: supported and supported for migrate-dma.
*/
dn = pci_device_to_OF_node(dev);
ret = query_ddw(dev, ddw_avail, &query, pdn);
if (ret != 0)
goto out_failed;
/*
* If there is no window available, remove the default DMA window,
* if it's present. This will make all the resources available to the
* new DDW window.
* If anything fails after this, we need to restore it, so also check
* for extensions presence.
*/
if (query.windows_available == 0) {
struct property *default_win;
int reset_win_ext;
/* DDW + IOMMU on single window may fail if there is any allocation */
if (iommu_table_in_use(tbl)) {
dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n");
goto out_failed;
}
default_win = of_find_property(pdn, "ibm,dma-window", NULL);
if (!default_win)
goto out_failed;
reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL);
if (reset_win_ext)
goto out_failed;
remove_dma_window(pdn, ddw_avail, default_win);
default_win_removed = true;
/* Query again, to check if the window is available */
ret = query_ddw(dev, ddw_avail, &query, pdn);
if (ret != 0)
goto out_failed;
if (query.windows_available == 0) {
/* no windows are available for this device. */
dev_dbg(&dev->dev, "no free dynamic windows");
goto out_failed;
}
}
page_shift = iommu_get_page_shift(query.page_size);
if (!page_shift) {
dev_dbg(&dev->dev, "no supported page size in mask %x",
query.page_size);
goto out_failed;
}
/*
* The "ibm,pmemory" can appear anywhere in the address space.
* Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
* for the upper limit and fallback to max RAM otherwise but this
* disables device::dma_ops_bypass.
*/
len = max_ram_len;
if (pmem_present) {
if (query.largest_available_block >=
(1ULL << (MAX_PHYSMEM_BITS - page_shift)))
len = MAX_PHYSMEM_BITS;
else
dev_info(&dev->dev, "Skipping ibm,pmemory");
}
/* check if the available block * number of ptes will map everything */
if (query.largest_available_block < (1ULL << (len - page_shift))) {
dev_dbg(&dev->dev,
"can't map partition max 0x%llx with %llu %llu-sized pages\n",
1ULL << len,
query.largest_available_block,
1ULL << page_shift);
len = order_base_2(query.largest_available_block << page_shift);
win_name = DMA64_PROPNAME;
} else {
direct_mapping = !default_win_removed ||
(len == MAX_PHYSMEM_BITS) ||
(!pmem_present && (len == max_ram_len));
win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME;
}
ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
if (ret != 0)
goto out_failed;
dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n",
create.liobn, dn);
win_addr = ((u64)create.addr_hi << 32) | create.addr_lo;
win64 = ddw_property_create(win_name, create.liobn, win_addr, page_shift, len);
if (!win64) {
dev_info(&dev->dev,
"couldn't allocate property, property name, or value\n");
goto out_remove_win;
}
ret = of_add_property(pdn, win64);
if (ret) {
dev_err(&dev->dev, "unable to add DMA window property for %pOF: %d",
pdn, ret);
goto out_free_prop;
}
window = ddw_list_new_entry(pdn, win64->value);
if (!window)
goto out_del_prop;
if (direct_mapping) {
/* DDW maps the whole partition, so enable direct DMA mapping */
ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
win64->value, tce_setrange_multi_pSeriesLP_walk);
if (ret) {
dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n",
dn, ret);
/* Make sure to clean DDW if any TCE was set*/
clean_dma_window(pdn, win64->value);
goto out_del_list;
}
} else {
struct iommu_table *newtbl;
int i;
unsigned long start = 0, end = 0;
for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) {
const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM;
/* Look for MMIO32 */
if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) {
start = pci->phb->mem_resources[i].start;
end = pci->phb->mem_resources[i].end;
break;
}
}
/* New table for using DDW instead of the default DMA window */
newtbl = iommu_pseries_alloc_table(pci->phb->node);
if (!newtbl) {
dev_dbg(&dev->dev, "couldn't create new IOMMU table\n");
goto out_del_list;
}
iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr,
1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops);
iommu_init_table(newtbl, pci->phb->node, start, end);
pci->table_group->tables[1] = newtbl;
/* Keep default DMA window stuct if removed */
if (default_win_removed) {
tbl->it_size = 0;
vfree(tbl->it_map);
tbl->it_map = NULL;
}
set_iommu_table_base(&dev->dev, newtbl);
}
spin_lock(&dma_win_list_lock);
list_add(&window->list, &dma_win_list);
spin_unlock(&dma_win_list_lock);
dev->dev.archdata.dma_offset = win_addr;
goto out_unlock;
out_del_list:
kfree(window);
out_del_prop:
of_remove_property(pdn, win64);
out_free_prop:
kfree(win64->name);
kfree(win64->value);
kfree(win64);
out_remove_win:
/* DDW is clean, so it's ok to call this directly. */
__remove_dma_window(pdn, ddw_avail, create.liobn);
out_failed:
if (default_win_removed)
reset_dma_window(dev, pdn);
fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
if (!fpdn)
goto out_unlock;
fpdn->pdn = pdn;
list_add(&fpdn->list, &failed_ddw_pdn_list);
out_unlock:
mutex_unlock(&dma_win_init_mutex);
/*
* If we have persistent memory and the window size is only as big
* as RAM, then we failed to create a window to cover persistent
* memory and need to set the DMA limit.
*/
if (pmem_present && direct_mapping && len == max_ram_len)
dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len);
return direct_mapping;
}