static int vfio_dma_do_unmap()

in vfio_iommu_type1.c [1290:1454]


static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
			     struct vfio_iommu_type1_dma_unmap *unmap,
			     struct vfio_bitmap *bitmap)
{
	struct vfio_dma *dma, *dma_last = NULL;
	size_t unmapped = 0, pgsize;
	int ret = -EINVAL, retries = 0;
	unsigned long pgshift;
	dma_addr_t iova = unmap->iova;
	u64 size = unmap->size;
	bool unmap_all = unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL;
	bool invalidate_vaddr = unmap->flags & VFIO_DMA_UNMAP_FLAG_VADDR;
	struct rb_node *n, *first_n;

	mutex_lock(&iommu->lock);

	pgshift = __ffs(iommu->pgsize_bitmap);
	pgsize = (size_t)1 << pgshift;

	if (iova & (pgsize - 1))
		goto unlock;

	if (unmap_all) {
		if (iova || size)
			goto unlock;
		size = U64_MAX;
	} else if (!size || size & (pgsize - 1) ||
		   iova + size - 1 < iova || size > SIZE_MAX) {
		goto unlock;
	}

	/* When dirty tracking is enabled, allow only min supported pgsize */
	if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
	    (!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) {
		goto unlock;
	}

	WARN_ON((pgsize - 1) & PAGE_MASK);
again:
	/*
	 * vfio-iommu-type1 (v1) - User mappings were coalesced together to
	 * avoid tracking individual mappings.  This means that the granularity
	 * of the original mapping was lost and the user was allowed to attempt
	 * to unmap any range.  Depending on the contiguousness of physical
	 * memory and page sizes supported by the IOMMU, arbitrary unmaps may
	 * or may not have worked.  We only guaranteed unmap granularity
	 * matching the original mapping; even though it was untracked here,
	 * the original mappings are reflected in IOMMU mappings.  This
	 * resulted in a couple unusual behaviors.  First, if a range is not
	 * able to be unmapped, ex. a set of 4k pages that was mapped as a
	 * 2M hugepage into the IOMMU, the unmap ioctl returns success but with
	 * a zero sized unmap.  Also, if an unmap request overlaps the first
	 * address of a hugepage, the IOMMU will unmap the entire hugepage.
	 * This also returns success and the returned unmap size reflects the
	 * actual size unmapped.
	 *
	 * We attempt to maintain compatibility with this "v1" interface, but
	 * we take control out of the hands of the IOMMU.  Therefore, an unmap
	 * request offset from the beginning of the original mapping will
	 * return success with zero sized unmap.  And an unmap request covering
	 * the first iova of mapping will unmap the entire range.
	 *
	 * The v2 version of this interface intends to be more deterministic.
	 * Unmap requests must fully cover previous mappings.  Multiple
	 * mappings may still be unmaped by specifying large ranges, but there
	 * must not be any previous mappings bisected by the range.  An error
	 * will be returned if these conditions are not met.  The v2 interface
	 * will only return success and a size of zero if there were no
	 * mappings within the range.
	 */
	if (iommu->v2 && !unmap_all) {
		dma = vfio_find_dma(iommu, iova, 1);
		if (dma && dma->iova != iova)
			goto unlock;

		dma = vfio_find_dma(iommu, iova + size - 1, 0);
		if (dma && dma->iova + dma->size != iova + size)
			goto unlock;
	}

	ret = 0;
	n = first_n = vfio_find_dma_first_node(iommu, iova, size);

	while (n) {
		dma = rb_entry(n, struct vfio_dma, node);
		if (dma->iova >= iova + size)
			break;

		if (!iommu->v2 && iova > dma->iova)
			break;
		/*
		 * Task with same address space who mapped this iova range is
		 * allowed to unmap the iova range.
		 */
		if (dma->task->mm != current->mm)
			break;

		if (invalidate_vaddr) {
			if (dma->vaddr_invalid) {
				struct rb_node *last_n = n;

				for (n = first_n; n != last_n; n = rb_next(n)) {
					dma = rb_entry(n,
						       struct vfio_dma, node);
					dma->vaddr_invalid = false;
					iommu->vaddr_invalid_count--;
				}
				ret = -EINVAL;
				unmapped = 0;
				break;
			}
			dma->vaddr_invalid = true;
			iommu->vaddr_invalid_count++;
			unmapped += dma->size;
			n = rb_next(n);
			continue;
		}

		if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
			struct vfio_iommu_type1_dma_unmap nb_unmap;

			if (dma_last == dma) {
				BUG_ON(++retries > 10);
			} else {
				dma_last = dma;
				retries = 0;
			}

			nb_unmap.iova = dma->iova;
			nb_unmap.size = dma->size;

			/*
			 * Notify anyone (mdev vendor drivers) to invalidate and
			 * unmap iovas within the range we're about to unmap.
			 * Vendor drivers MUST unpin pages in response to an
			 * invalidation.
			 */
			mutex_unlock(&iommu->lock);
			blocking_notifier_call_chain(&iommu->notifier,
						    VFIO_IOMMU_NOTIFY_DMA_UNMAP,
						    &nb_unmap);
			mutex_lock(&iommu->lock);
			goto again;
		}

		if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
			ret = update_user_bitmap(bitmap->data, iommu, dma,
						 iova, pgsize);
			if (ret)
				break;
		}

		unmapped += dma->size;
		n = rb_next(n);
		vfio_remove_dma(iommu, dma);
	}

unlock:
	mutex_unlock(&iommu->lock);

	/* Report how much was unmapped */
	unmap->size = unmapped;

	return ret;
}