kern/src/dma.c - upstream - Git at Google

 /* Copyright (c) 2019 Google Inc
  * Barret Rhoden <brho@cs.berkeley.edu>
  * See LICENSE for details.
  *
  * An arena for DMA-able memory and a 'pool' (slab/KC) for smaller objects.
  *
  * A DMA arena is a memory allocator returning two addresses for the same
  * memory: CPU/driver and device addresses, the later of which is returned by
  * reference (dma_handle, below).  Driver code uses the CPU address, and the
  * driver passes the device address to the hardware.
  *
  * dma_phys_pages is the default dma_arena.  This returns kernel virtual
  * addresses for the CPU and host physical addresses for the device.  Other DMA
  * arenas can be in other address spaces, such as with device addresses being
  * behind an IOMMU.
  *
  * Each dma_arena provides a source arena which allocs the actual physical
  * memory, mapped in the device's address space (dma_addr_t), and a function
  * pointer to convert the dma_addr_t to a CPU address.  For example,
  * dma_phys_pages's *arena* sources from kpages (kernel addresses for physical
  * pages), and it uses alloc / free arena-funcs to convert those to its
  * dma_addr_t: physical addresses.  That is its allocator for physical memory in
  * the device's address space.  It also uses a function pointer, paddr_to_kaddr,
  * to convert those to CPU/driver addresses.  The fact that it converts from
  * kpages to paddrs and back to kaddrs is an internal implementation detail.
  * (One could imagine us changing base and kpages to allocate physical
  * addresses.  Either way, dma_arenas return device addresses.  Not a big deal.)
  *
  * Sizes and alignments: for now, all arenas return PGSIZE quantums, and all
  * allocations are naturally aligned, e.g. an alloc of two 4096 pages is on an
  * 8192-aligned boundary.  This is a convenience for Linux drivers, which expect
  * this from their DMA API.  Some drivers don't mention that they need these
  * sorts of guarantees, notably bnx2x.
  *
  * We often translate between physical and virtual addresses.  Many arena
  * quantum / alignment guarantees go away.  We can maintain PGSIZE and lower
  * powers-of-two alignment.  But something like an odd alignment or an alignment
  * > PGSIZE may go away.  Odd alignments will fail because the upper bits of the
  * address change (i.e. the page address).  > PGSIZE alignments *may* fail,
  * depending on the mapping.  KERNBASE->PADDR will be OK (it's at the max
  * alignment for memory), but arbitrary virtual-to-physical mappings can change
  * the upper aligned bits.  If we want to maintain any of these alignments, the
  * onus is on the dma_arena, not the regular arena allocator.
  */

 #include <arena.h>
 #include <dma.h>
 #include <pmap.h>
 #include <kmalloc.h>
 #include <process.h>
 #include <mm.h>
 #include <umem.h>

 /* This arena is largely a wrapper around kpages.  The arena does impose some
  * overhead: btags and function pointers for every allocation.  In return, we
  * get the tracking code from arenas, integration with other arena allocators,
  * xalloc, and maybe more flexibility. */
 struct dma_arena dma_phys_pages;

 struct dma_arena *dev_to_dma_arena(struct device *d)
 {
 	struct pci_device *pdev;

 	if (!d)
 		return &dma_phys_pages;
 	pdev = container_of(d, struct pci_device, linux_dev);
 	if (!pdev->proc_owner)
 		return &dma_phys_pages;
 	if (!pdev->proc_owner->user_pages) {
 		warn("Proc %d owns a device, but has no user_pages!",
 		     pdev->proc_owner->pid);
 		return &dma_phys_pages;
 	}
 	return pdev->proc_owner->user_pages;
 }

 static void *dma_phys_a(struct arena *a, size_t amt, int flags)
 {
 	return (void*)PADDR(arena_alloc(a, amt, flags));
 }

 static void dma_phys_f(struct arena *a, void *obj, size_t amt)
 {
 	arena_free(a, KADDR((physaddr_t)obj), amt);
 }

 static void *dma_phys_pages_to_kaddr(struct dma_arena *da, physaddr_t paddr)
 {
 	return KADDR(paddr);
 }

 void dma_arena_init(void)
 {
 	__arena_create(&dma_phys_pages.arena, "dma_phys_pages", PGSIZE,
 		       dma_phys_a, dma_phys_f, kpages_arena, 0);
 	dma_phys_pages.to_cpu_addr = dma_phys_pages_to_kaddr;
 }

 void *dma_arena_alloc(struct dma_arena *da, size_t size, dma_addr_t *dma_handle,
 		      int mem_flags)
 {
 	void *paddr;

 	/* Linux's DMA API guarantees natural alignment, such that any
 	 * page allocation is rounded up to the next highest order.  e.g. 9
 	 * pages would be 16-page aligned.  The arena allocator only does
 	 * quantum alignment: PGSIZE for da->arena. */
 	if (size > da->arena.quantum)
 		paddr = arena_xalloc(&da->arena, size, ROUNDUPPWR2(size), 0, 0,
 				     NULL, NULL, mem_flags);
 	else
 		paddr = arena_alloc(&da->arena, size, mem_flags);
 	if (!paddr)
 		return NULL;
 	*dma_handle = (dma_addr_t)paddr;
 	return da->to_cpu_addr(da, (dma_addr_t)paddr);
 }

 void *dma_arena_zalloc(struct dma_arena *da, size_t size,
 		       dma_addr_t *dma_handle, int mem_flags)
 {
 	void *vaddr = dma_arena_alloc(da, size, dma_handle, mem_flags);

 	if (vaddr)
 		memset(vaddr, 0, size);
 	return vaddr;
 }

 void dma_arena_free(struct dma_arena *da, void *cpu_addr, dma_addr_t dma_handle,
 		    size_t size)
 {
 	if (size > da->arena.quantum)
 		arena_xfree(&da->arena, (void*)dma_handle, size);
 	else
 		arena_free(&da->arena, (void*)dma_handle, size);
 }

 /* DMA Pool allocator (Linux's interface), built on slabs/arenas.
  *
  * A dma_pool is an allocator for fixed-size objects of device memory,
  * ultimately sourced from a dma_arena, which provides device-addresses for
  * physical memory and cpu-addresses for driver code.
  *
  * It's just a slab/kmem cache allocator sourcing from the dma_arena's arena,
  * and applying the dma_arena's device-addr to cpu-addr translation.  Alignment
  * is trivially satisfied by the slab allocator.
  *
  * How do we ensure we do not cross a boundary?  I tried some crazy things, like
  * creating an intermediate arena per dma_pool, and having that arena source
  * with xalloc(nocross = boundary).  The issue with that was nocross <
  * source->quantum, among other things.
  *
  * The simplest thing is to just waste a little memory to guarantee the nocross
  * boundary is never crossed.  Here's the guts of it:
  *
  * 	Any naturally aligned power-of-two allocation will not cross a
  * 	boundary of greater or equal order.
  *
  * To make each allocation naturally aligned, we have to round up a bit.  This
  * could waste memory, but no more than 2x, similar to our arena free lists.
  * Considering most users end up with a power-of-two sized object, we're not
  * wasting anything.
  */

 struct dma_pool {
 	struct kmem_cache	kc;
 	struct dma_arena	*source;
 };

 struct dma_pool *dma_pool_create(const char *name, struct device *dev,
 				 size_t size, size_t align, size_t boundary)
 {
 	struct dma_pool *dp;

 	if (boundary) {
 		if (!IS_PWR2(boundary) || !IS_PWR2(align))
 			return NULL;
 		if (boundary < align)
 			return NULL;
 		size = ALIGN(size, align);
 		size = ROUNDUPPWR2(size);
 		/* subtle.  consider s=33, a=16.  s->64.  a must be 64, not 16,
 		 * to ensure natural alignment. */
 		align = size;
 	}
 	dp = kzmalloc(sizeof(struct dma_pool), MEM_WAIT);
 	dp->source = dev_to_dma_arena(dev);
 	/* We're sourcing directly from the dma_arena's arena. */
 	__kmem_cache_create(&dp->kc, name, size, align, KMC_NOTOUCH,
 			    &dp->source->arena, NULL, NULL, NULL);
 	return dp;
 }

 void dma_pool_destroy(struct dma_pool *dp)
 {
 	__kmem_cache_destroy(&dp->kc);
 	kfree(dp);
 }

 void *dma_pool_alloc(struct dma_pool *dp, int mem_flags, dma_addr_t *handle)
 {
 	void *paddr;

 	paddr = kmem_cache_alloc(&dp->kc, mem_flags);
 	if (!paddr)
 		return NULL;
 	*handle = (dma_addr_t)paddr;
 	return dp->source->to_cpu_addr(dp->source, (physaddr_t)paddr);
 }

 void *dma_pool_zalloc(struct dma_pool *dp, int mem_flags, dma_addr_t *handle)
 {
 	void *ret = dma_pool_alloc(dp, mem_flags, handle);

 	if (ret)
 		memset(ret, 0, dp->kc.obj_size);
 	return ret;
 }

 void dma_pool_free(struct dma_pool *dp, void *cpu_addr, dma_addr_t addr)
 {
 	kmem_cache_free(&dp->kc, (void*)addr);
 }

 static void *user_pages_a(struct arena *a, size_t amt, int flags)
 {
 	struct dma_arena *da = container_of(a, struct dma_arena, arena);
 	struct proc *p = da->data;
 	void *uaddr;

 	/* XXX need a 'can't be munmapped by userspace' flag?  what was the deal
 	 * with that?  can make the kernel driver PF if they trash the mapping
 	 */
 	uaddr = mmap(p, 0, amt, PROT_READ | PROT_WRITE,
 		     MAP_ANONYMOUS | MAP_POPULATE | MAP_PRIVATE, -1, 0);

 	/* TODO: think about OOM for user dma arenas, and MEM_ flags. */
 	if (uaddr == MAP_FAILED) {
 		warn("couldn't mmap %d bytes, will probably panic", amt);
 		return NULL;
 	}
 	return uaddr;
 }

 static void user_pages_f(struct arena *a, void *obj, size_t amt)
 {
 	struct dma_arena *da = container_of(a, struct dma_arena, arena);
 	struct proc *p = da->data;

 	munmap(p, (uintptr_t)obj, amt);
 }

 static void *user_addr_to_kaddr(struct dma_arena *da, physaddr_t uaddr)
 {
 	/* Our caller needs to be running in the user's address space.  We
 	 * either need to pin the pages or handle page faults.  We could use
 	 * uva2kva(), but that only works for single pages.  Handling contiguous
 	 * pages would require mmapping a KVA-contig chunk or other acrobatics.
 	 */
 	return (void*)uaddr;
 }

 /* Ensures a DMA arena exists for the proc.  No-op if it already exists. */
 void setup_dma_arena(struct proc *p)
 {
 	struct dma_arena *da;
 	char name[32];
 	bool exists = false;

 	/* lockless peek */
 	if (READ_ONCE(p->user_pages))
 		return;
 	da = kzmalloc(sizeof(struct dma_arena), MEM_WAIT);
 	snprintf(name, ARRAY_SIZE(name), "proc-%d", p->pid);

 	__arena_create(&da->arena, name, PGSIZE,
 		       user_pages_a, user_pages_f, ARENA_SELF_SOURCE, 0);

 	da->to_cpu_addr = user_addr_to_kaddr;
 	da->data = p;

 	spin_lock_irqsave(&p->proc_lock);
 	if (p->user_pages)
 		exists = true;
 	else
 		WRITE_ONCE(p->user_pages, da);
 	spin_unlock_irqsave(&p->proc_lock);

 	if (exists) {
 		__arena_destroy(&da->arena);
 		kfree(da);
 	}
 }

 /* Must be called only when all users (slabs, allocs) are done and freed.
  * Basically during __proc_free(). */
 void teardown_dma_arena(struct proc *p)
 {
 	struct dma_arena *da;

 	da = p->user_pages;
 	if (!da)
 		return;
 	p->user_pages = NULL;

 	__arena_destroy(&da->arena);
 	kfree(da);
 }
	/* Copyright (c) 2019 Google Inc
	* Barret Rhoden <brho@cs.berkeley.edu>
	* See LICENSE for details.
	*
	* An arena for DMA-able memory and a 'pool' (slab/KC) for smaller objects.
	*
	* A DMA arena is a memory allocator returning two addresses for the same
	* memory: CPU/driver and device addresses, the later of which is returned by
	* reference (dma_handle, below). Driver code uses the CPU address, and the
	* driver passes the device address to the hardware.
	*
	* dma_phys_pages is the default dma_arena. This returns kernel virtual
	* addresses for the CPU and host physical addresses for the device. Other DMA
	* arenas can be in other address spaces, such as with device addresses being
	* behind an IOMMU.
	*
	* Each dma_arena provides a source arena which allocs the actual physical
	* memory, mapped in the device's address space (dma_addr_t), and a function
	* pointer to convert the dma_addr_t to a CPU address. For example,
	* dma_phys_pages's arena sources from kpages (kernel addresses for physical
	* pages), and it uses alloc / free arena-funcs to convert those to its
	* dma_addr_t: physical addresses. That is its allocator for physical memory in
	* the device's address space. It also uses a function pointer, paddr_to_kaddr,
	* to convert those to CPU/driver addresses. The fact that it converts from
	* kpages to paddrs and back to kaddrs is an internal implementation detail.
	* (One could imagine us changing base and kpages to allocate physical
	* addresses. Either way, dma_arenas return device addresses. Not a big deal.)
	*
	* Sizes and alignments: for now, all arenas return PGSIZE quantums, and all
	* allocations are naturally aligned, e.g. an alloc of two 4096 pages is on an
	* 8192-aligned boundary. This is a convenience for Linux drivers, which expect
	* this from their DMA API. Some drivers don't mention that they need these
	* sorts of guarantees, notably bnx2x.
	*
	* We often translate between physical and virtual addresses. Many arena
	* quantum / alignment guarantees go away. We can maintain PGSIZE and lower
	* powers-of-two alignment. But something like an odd alignment or an alignment
	* > PGSIZE may go away. Odd alignments will fail because the upper bits of the
	* address change (i.e. the page address). > PGSIZE alignments may fail,
	* depending on the mapping. KERNBASE->PADDR will be OK (it's at the max
	* alignment for memory), but arbitrary virtual-to-physical mappings can change
	* the upper aligned bits. If we want to maintain any of these alignments, the
	* onus is on the dma_arena, not the regular arena allocator.
	*/

	#include <arena.h>
	#include <dma.h>
	#include <pmap.h>
	#include <kmalloc.h>
	#include <process.h>
	#include <mm.h>
	#include <umem.h>

	/* This arena is largely a wrapper around kpages. The arena does impose some
	* overhead: btags and function pointers for every allocation. In return, we
	* get the tracking code from arenas, integration with other arena allocators,
	* xalloc, and maybe more flexibility. */
	struct dma_arena dma_phys_pages;

	struct dma_arena dev_to_dma_arena(struct device d)
	{
	struct pci_device *pdev;

	if (!d)
	return &dma_phys_pages;
	pdev = container_of(d, struct pci_device, linux_dev);
	if (!pdev->proc_owner)
	return &dma_phys_pages;
	if (!pdev->proc_owner->user_pages) {
	warn("Proc %d owns a device, but has no user_pages!",
	pdev->proc_owner->pid);
	return &dma_phys_pages;
	}
	return pdev->proc_owner->user_pages;
	}

	static void dma_phys_a(struct arena a, size_t amt, int flags)
	{
	return (void*)PADDR(arena_alloc(a, amt, flags));
	}

	static void dma_phys_f(struct arena a, void obj, size_t amt)
	{
	arena_free(a, KADDR((physaddr_t)obj), amt);
	}

	static void dma_phys_pages_to_kaddr(struct dma_arena da, physaddr_t paddr)
	{
	return KADDR(paddr);
	}

	void dma_arena_init(void)
	{
	__arena_create(&dma_phys_pages.arena, "dma_phys_pages", PGSIZE,
	dma_phys_a, dma_phys_f, kpages_arena, 0);
	dma_phys_pages.to_cpu_addr = dma_phys_pages_to_kaddr;
	}

	void dma_arena_alloc(struct dma_arena da, size_t size, dma_addr_t *dma_handle,
	int mem_flags)
	{
	void *paddr;

	/* Linux's DMA API guarantees natural alignment, such that any
	* page allocation is rounded up to the next highest order. e.g. 9
	* pages would be 16-page aligned. The arena allocator only does
	* quantum alignment: PGSIZE for da->arena. */
	if (size > da->arena.quantum)
	paddr = arena_xalloc(&da->arena, size, ROUNDUPPWR2(size), 0, 0,
	NULL, NULL, mem_flags);
	else
	paddr = arena_alloc(&da->arena, size, mem_flags);
	if (!paddr)
	return NULL;
	*dma_handle = (dma_addr_t)paddr;
	return da->to_cpu_addr(da, (dma_addr_t)paddr);
	}

	void dma_arena_zalloc(struct dma_arena da, size_t size,
	dma_addr_t *dma_handle, int mem_flags)
	{
	void *vaddr = dma_arena_alloc(da, size, dma_handle, mem_flags);

	if (vaddr)
	memset(vaddr, 0, size);
	return vaddr;
	}

	void dma_arena_free(struct dma_arena da, void cpu_addr, dma_addr_t dma_handle,
	size_t size)
	{
	if (size > da->arena.quantum)
	arena_xfree(&da->arena, (void*)dma_handle, size);
	else
	arena_free(&da->arena, (void*)dma_handle, size);
	}

	/* DMA Pool allocator (Linux's interface), built on slabs/arenas.
	*
	* A dma_pool is an allocator for fixed-size objects of device memory,
	* ultimately sourced from a dma_arena, which provides device-addresses for
	* physical memory and cpu-addresses for driver code.
	*
	* It's just a slab/kmem cache allocator sourcing from the dma_arena's arena,
	* and applying the dma_arena's device-addr to cpu-addr translation. Alignment
	* is trivially satisfied by the slab allocator.
	*
	* How do we ensure we do not cross a boundary? I tried some crazy things, like
	* creating an intermediate arena per dma_pool, and having that arena source
	* with xalloc(nocross = boundary). The issue with that was nocross <
	* source->quantum, among other things.
	*
	* The simplest thing is to just waste a little memory to guarantee the nocross
	* boundary is never crossed. Here's the guts of it:
	*
	* Any naturally aligned power-of-two allocation will not cross a
	* boundary of greater or equal order.
	*
	* To make each allocation naturally aligned, we have to round up a bit. This
	* could waste memory, but no more than 2x, similar to our arena free lists.
	* Considering most users end up with a power-of-two sized object, we're not
	* wasting anything.
	*/

	struct dma_pool {
	struct kmem_cache kc;
	struct dma_arena *source;
	};

	struct dma_pool dma_pool_create(const char name, struct device *dev,
	size_t size, size_t align, size_t boundary)
	{
	struct dma_pool *dp;

	if (boundary) {
	if (!IS_PWR2(boundary) \|\| !IS_PWR2(align))
	return NULL;
	if (boundary < align)
	return NULL;
	size = ALIGN(size, align);
	size = ROUNDUPPWR2(size);
	/* subtle. consider s=33, a=16. s->64. a must be 64, not 16,
	* to ensure natural alignment. */
	align = size;
	}
	dp = kzmalloc(sizeof(struct dma_pool), MEM_WAIT);
	dp->source = dev_to_dma_arena(dev);
	/* We're sourcing directly from the dma_arena's arena. */
	__kmem_cache_create(&dp->kc, name, size, align, KMC_NOTOUCH,
	&dp->source->arena, NULL, NULL, NULL);
	return dp;
	}

	void dma_pool_destroy(struct dma_pool *dp)
	{
	__kmem_cache_destroy(&dp->kc);
	kfree(dp);
	}

	void dma_pool_alloc(struct dma_pool dp, int mem_flags, dma_addr_t *handle)
	{
	void *paddr;

	paddr = kmem_cache_alloc(&dp->kc, mem_flags);
	if (!paddr)
	return NULL;
	*handle = (dma_addr_t)paddr;
	return dp->source->to_cpu_addr(dp->source, (physaddr_t)paddr);
	}

	void dma_pool_zalloc(struct dma_pool dp, int mem_flags, dma_addr_t *handle)
	{
	void *ret = dma_pool_alloc(dp, mem_flags, handle);

	if (ret)
	memset(ret, 0, dp->kc.obj_size);
	return ret;
	}

	void dma_pool_free(struct dma_pool dp, void cpu_addr, dma_addr_t addr)
	{
	kmem_cache_free(&dp->kc, (void*)addr);
	}

	static void user_pages_a(struct arena a, size_t amt, int flags)
	{
	struct dma_arena *da = container_of(a, struct dma_arena, arena);
	struct proc *p = da->data;
	void *uaddr;

	/* XXX need a 'can't be munmapped by userspace' flag? what was the deal
	* with that? can make the kernel driver PF if they trash the mapping
	*/
	uaddr = mmap(p, 0, amt, PROT_READ \| PROT_WRITE,
	MAP_ANONYMOUS \| MAP_POPULATE \| MAP_PRIVATE, -1, 0);

	/* TODO: think about OOM for user dma arenas, and MEM_ flags. */
	if (uaddr == MAP_FAILED) {
	warn("couldn't mmap %d bytes, will probably panic", amt);
	return NULL;
	}
	return uaddr;
	}

	static void user_pages_f(struct arena a, void obj, size_t amt)
	{
	struct dma_arena *da = container_of(a, struct dma_arena, arena);
	struct proc *p = da->data;

	munmap(p, (uintptr_t)obj, amt);
	}

	static void user_addr_to_kaddr(struct dma_arena da, physaddr_t uaddr)
	{
	/* Our caller needs to be running in the user's address space. We
	* either need to pin the pages or handle page faults. We could use
	* uva2kva(), but that only works for single pages. Handling contiguous
	* pages would require mmapping a KVA-contig chunk or other acrobatics.
	*/
	return (void*)uaddr;
	}

	/* Ensures a DMA arena exists for the proc. No-op if it already exists. */
	void setup_dma_arena(struct proc *p)
	{
	struct dma_arena *da;
	char name[32];
	bool exists = false;

	/* lockless peek */
	if (READ_ONCE(p->user_pages))
	return;
	da = kzmalloc(sizeof(struct dma_arena), MEM_WAIT);
	snprintf(name, ARRAY_SIZE(name), "proc-%d", p->pid);

	__arena_create(&da->arena, name, PGSIZE,
	user_pages_a, user_pages_f, ARENA_SELF_SOURCE, 0);

	da->to_cpu_addr = user_addr_to_kaddr;
	da->data = p;

	spin_lock_irqsave(&p->proc_lock);
	if (p->user_pages)
	exists = true;
	else
	WRITE_ONCE(p->user_pages, da);
	spin_unlock_irqsave(&p->proc_lock);

	if (exists) {
	__arena_destroy(&da->arena);
	kfree(da);
	}
	}

	/* Must be called only when all users (slabs, allocs) are done and freed.
	* Basically during __proc_free(). */
	void teardown_dma_arena(struct proc *p)
	{
	struct dma_arena *da;

	da = p->user_pages;
	if (!da)
	return;
	p->user_pages = NULL;

	__arena_destroy(&da->arena);
	kfree(da);
	}