in vfio_iommu_type1.c [1290:1454]
static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
struct vfio_iommu_type1_dma_unmap *unmap,
struct vfio_bitmap *bitmap)
{
struct vfio_dma *dma, *dma_last = NULL;
size_t unmapped = 0, pgsize;
int ret = -EINVAL, retries = 0;
unsigned long pgshift;
dma_addr_t iova = unmap->iova;
u64 size = unmap->size;
bool unmap_all = unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL;
bool invalidate_vaddr = unmap->flags & VFIO_DMA_UNMAP_FLAG_VADDR;
struct rb_node *n, *first_n;
mutex_lock(&iommu->lock);
pgshift = __ffs(iommu->pgsize_bitmap);
pgsize = (size_t)1 << pgshift;
if (iova & (pgsize - 1))
goto unlock;
if (unmap_all) {
if (iova || size)
goto unlock;
size = U64_MAX;
} else if (!size || size & (pgsize - 1) ||
iova + size - 1 < iova || size > SIZE_MAX) {
goto unlock;
}
/* When dirty tracking is enabled, allow only min supported pgsize */
if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
(!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) {
goto unlock;
}
WARN_ON((pgsize - 1) & PAGE_MASK);
again:
/*
* vfio-iommu-type1 (v1) - User mappings were coalesced together to
* avoid tracking individual mappings. This means that the granularity
* of the original mapping was lost and the user was allowed to attempt
* to unmap any range. Depending on the contiguousness of physical
* memory and page sizes supported by the IOMMU, arbitrary unmaps may
* or may not have worked. We only guaranteed unmap granularity
* matching the original mapping; even though it was untracked here,
* the original mappings are reflected in IOMMU mappings. This
* resulted in a couple unusual behaviors. First, if a range is not
* able to be unmapped, ex. a set of 4k pages that was mapped as a
* 2M hugepage into the IOMMU, the unmap ioctl returns success but with
* a zero sized unmap. Also, if an unmap request overlaps the first
* address of a hugepage, the IOMMU will unmap the entire hugepage.
* This also returns success and the returned unmap size reflects the
* actual size unmapped.
*
* We attempt to maintain compatibility with this "v1" interface, but
* we take control out of the hands of the IOMMU. Therefore, an unmap
* request offset from the beginning of the original mapping will
* return success with zero sized unmap. And an unmap request covering
* the first iova of mapping will unmap the entire range.
*
* The v2 version of this interface intends to be more deterministic.
* Unmap requests must fully cover previous mappings. Multiple
* mappings may still be unmaped by specifying large ranges, but there
* must not be any previous mappings bisected by the range. An error
* will be returned if these conditions are not met. The v2 interface
* will only return success and a size of zero if there were no
* mappings within the range.
*/
if (iommu->v2 && !unmap_all) {
dma = vfio_find_dma(iommu, iova, 1);
if (dma && dma->iova != iova)
goto unlock;
dma = vfio_find_dma(iommu, iova + size - 1, 0);
if (dma && dma->iova + dma->size != iova + size)
goto unlock;
}
ret = 0;
n = first_n = vfio_find_dma_first_node(iommu, iova, size);
while (n) {
dma = rb_entry(n, struct vfio_dma, node);
if (dma->iova >= iova + size)
break;
if (!iommu->v2 && iova > dma->iova)
break;
/*
* Task with same address space who mapped this iova range is
* allowed to unmap the iova range.
*/
if (dma->task->mm != current->mm)
break;
if (invalidate_vaddr) {
if (dma->vaddr_invalid) {
struct rb_node *last_n = n;
for (n = first_n; n != last_n; n = rb_next(n)) {
dma = rb_entry(n,
struct vfio_dma, node);
dma->vaddr_invalid = false;
iommu->vaddr_invalid_count--;
}
ret = -EINVAL;
unmapped = 0;
break;
}
dma->vaddr_invalid = true;
iommu->vaddr_invalid_count++;
unmapped += dma->size;
n = rb_next(n);
continue;
}
if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
struct vfio_iommu_type1_dma_unmap nb_unmap;
if (dma_last == dma) {
BUG_ON(++retries > 10);
} else {
dma_last = dma;
retries = 0;
}
nb_unmap.iova = dma->iova;
nb_unmap.size = dma->size;
/*
* Notify anyone (mdev vendor drivers) to invalidate and
* unmap iovas within the range we're about to unmap.
* Vendor drivers MUST unpin pages in response to an
* invalidation.
*/
mutex_unlock(&iommu->lock);
blocking_notifier_call_chain(&iommu->notifier,
VFIO_IOMMU_NOTIFY_DMA_UNMAP,
&nb_unmap);
mutex_lock(&iommu->lock);
goto again;
}
if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
ret = update_user_bitmap(bitmap->data, iommu, dma,
iova, pgsize);
if (ret)
break;
}
unmapped += dma->size;
n = rb_next(n);
vfio_remove_dma(iommu, dma);
}
unlock:
mutex_unlock(&iommu->lock);
/* Report how much was unmapped */
unmap->size = unmapped;
return ret;
}