|  | /* Copyright (c) 2019 Google Inc | 
|  | * Barret Rhoden <brho@cs.berkeley.edu> | 
|  | * See LICENSE for details. | 
|  | * | 
|  | * An arena for DMA-able memory and a 'pool' (slab/KC) for smaller objects. | 
|  | * | 
|  | * A DMA arena is a memory allocator returning two addresses for the same | 
|  | * memory: CPU/driver and device addresses, the later of which is returned by | 
|  | * reference (dma_handle, below).  Driver code uses the CPU address, and the | 
|  | * driver passes the device address to the hardware. | 
|  | * | 
|  | * dma_phys_pages is the default dma_arena.  This returns kernel virtual | 
|  | * addresses for the CPU and host physical addresses for the device.  Other DMA | 
|  | * arenas can be in other address spaces, such as with device addresses being | 
|  | * behind an IOMMU. | 
|  | * | 
|  | * Each dma_arena provides a source arena which allocs the actual physical | 
|  | * memory, mapped in the device's address space (dma_addr_t), and a function | 
|  | * pointer to convert the dma_addr_t to a CPU address.  For example, | 
|  | * dma_phys_pages's *arena* sources from kpages (kernel addresses for physical | 
|  | * pages), and it uses alloc / free arena-funcs to convert those to its | 
|  | * dma_addr_t: physical addresses.  That is its allocator for physical memory in | 
|  | * the device's address space.  It also uses a function pointer, paddr_to_kaddr, | 
|  | * to convert those to CPU/driver addresses.  The fact that it converts from | 
|  | * kpages to paddrs and back to kaddrs is an internal implementation detail. | 
|  | * (One could imagine us changing base and kpages to allocate physical | 
|  | * addresses.  Either way, dma_arenas return device addresses.  Not a big deal.) | 
|  | * | 
|  | * Sizes and alignments: for now, all arenas return PGSIZE quantums, and all | 
|  | * allocations are naturally aligned, e.g. an alloc of two 4096 pages is on an | 
|  | * 8192-aligned boundary.  This is a convenience for Linux drivers, which expect | 
|  | * this from their DMA API.  Some drivers don't mention that they need these | 
|  | * sorts of guarantees, notably bnx2x. | 
|  | * | 
|  | * We often translate between physical and virtual addresses.  Many arena | 
|  | * quantum / alignment guarantees go away.  We can maintain PGSIZE and lower | 
|  | * powers-of-two alignment.  But something like an odd alignment or an alignment | 
|  | * > PGSIZE may go away.  Odd alignments will fail because the upper bits of the | 
|  | * address change (i.e. the page address).  > PGSIZE alignments *may* fail, | 
|  | * depending on the mapping.  KERNBASE->PADDR will be OK (it's at the max | 
|  | * alignment for memory), but arbitrary virtual-to-physical mappings can change | 
|  | * the upper aligned bits.  If we want to maintain any of these alignments, the | 
|  | * onus is on the dma_arena, not the regular arena allocator. | 
|  | */ | 
|  |  | 
|  | #include <arena.h> | 
|  | #include <dma.h> | 
|  | #include <pmap.h> | 
|  | #include <kmalloc.h> | 
|  |  | 
|  | /* This arena is largely a wrapper around kpages.  The arena does impose some | 
|  | * overhead: btags and function pointers for every allocation.  In return, we | 
|  | * get the tracking code from arenas, integration with other arena allocators, | 
|  | * xalloc, and maybe more flexibility. */ | 
|  | struct dma_arena dma_phys_pages; | 
|  |  | 
|  | static void *dma_phys_a(struct arena *a, size_t amt, int flags) | 
|  | { | 
|  | return (void*)PADDR(arena_alloc(a, amt, flags)); | 
|  | } | 
|  |  | 
|  | static void dma_phys_f(struct arena *a, void *obj, size_t amt) | 
|  | { | 
|  | arena_free(a, KADDR((physaddr_t)obj), amt); | 
|  | } | 
|  |  | 
|  | static void *dma_phys_pages_to_kaddr(struct dma_arena *da, physaddr_t paddr) | 
|  | { | 
|  | return KADDR(paddr); | 
|  | } | 
|  |  | 
|  | void dma_arena_init(void) | 
|  | { | 
|  | __arena_create(&dma_phys_pages.arena, "dma_phys_pages", PGSIZE, | 
|  | dma_phys_a, dma_phys_f, kpages_arena, 0); | 
|  | dma_phys_pages.to_cpu_addr = dma_phys_pages_to_kaddr; | 
|  | } | 
|  |  | 
|  | void *dma_arena_alloc(struct dma_arena *da, size_t size, dma_addr_t *dma_handle, | 
|  | int mem_flags) | 
|  | { | 
|  | void *paddr; | 
|  |  | 
|  | /* Linux's DMA API guarantees natural alignment, such that any | 
|  | * page allocation is rounded up to the next highest order.  e.g. 9 | 
|  | * pages would be 16-page aligned.  The arena allocator only does | 
|  | * quantum alignment: PGSIZE for da->arena. */ | 
|  | if (size > da->arena.quantum) | 
|  | paddr = arena_xalloc(&da->arena, size, ROUNDUPPWR2(size), 0, 0, | 
|  | NULL, NULL, mem_flags); | 
|  | else | 
|  | paddr = arena_alloc(&da->arena, size, mem_flags); | 
|  | if (!paddr) | 
|  | return NULL; | 
|  | *dma_handle = (dma_addr_t)paddr; | 
|  | return da->to_cpu_addr(da, (dma_addr_t)paddr); | 
|  | } | 
|  |  | 
|  | void *dma_arena_zalloc(struct dma_arena *da, size_t size, | 
|  | dma_addr_t *dma_handle, int mem_flags) | 
|  | { | 
|  | void *vaddr = dma_arena_alloc(da, size, dma_handle, mem_flags); | 
|  |  | 
|  | if (vaddr) | 
|  | memset(vaddr, 0, size); | 
|  | return vaddr; | 
|  | } | 
|  |  | 
|  | void dma_arena_free(struct dma_arena *da, void *cpu_addr, dma_addr_t dma_handle, | 
|  | size_t size) | 
|  | { | 
|  | if (size > da->arena.quantum) | 
|  | arena_xfree(&da->arena, (void*)dma_handle, size); | 
|  | else | 
|  | arena_free(&da->arena, (void*)dma_handle, size); | 
|  | } | 
|  |  | 
|  | /* DMA Pool allocator (Linux's interface), built on slabs/arenas. | 
|  | * | 
|  | * A dma_pool is an allocator for fixed-size objects of device memory, | 
|  | * ultimately sourced from a dma_arena, which provides device-addresses for | 
|  | * physical memory and cpu-addresses for driver code. | 
|  | * | 
|  | * It's just a slab/kmem cache allocator sourcing from the dma_arena's arena, | 
|  | * and applying the dma_arena's device-addr to cpu-addr translation.  Alignment | 
|  | * is trivially satisfied by the slab allocator. | 
|  | * | 
|  | * How do we ensure we do not cross a boundary?  I tried some crazy things, like | 
|  | * creating an intermediate arena per dma_pool, and having that arena source | 
|  | * with xalloc(nocross = boundary).  The issue with that was nocross < | 
|  | * source->quantum, among other things. | 
|  | * | 
|  | * The simplest thing is to just waste a little memory to guarantee the nocross | 
|  | * boundary is never crossed.  Here's the guts of it: | 
|  | * | 
|  | * 	Any naturally aligned power-of-two allocation will not cross a | 
|  | * 	boundary of greater or equal order. | 
|  | * | 
|  | * To make each allocation naturally aligned, we have to round up a bit.  This | 
|  | * could waste memory, but no more than 2x, similar to our arena free lists. | 
|  | * Considering most users end up with a power-of-two sized object, we're not | 
|  | * wasting anything. | 
|  | */ | 
|  |  | 
|  | struct dma_pool { | 
|  | struct kmem_cache	kc; | 
|  | struct dma_arena	*source; | 
|  | }; | 
|  |  | 
|  | struct dma_pool *dma_pool_create(const char *name, void *dev, | 
|  | size_t size, size_t align, size_t boundary) | 
|  | { | 
|  | struct dma_pool *dp; | 
|  |  | 
|  | if (boundary) { | 
|  | if (!IS_PWR2(boundary) || !IS_PWR2(align)) | 
|  | return NULL; | 
|  | if (boundary < align) | 
|  | return NULL; | 
|  | size = ALIGN(size, align); | 
|  | size = ROUNDUPPWR2(size); | 
|  | /* subtle.  consider s=33, a=16.  s->64.  a must be 64, not 16, | 
|  | * to ensure natural alignment. */ | 
|  | align = size; | 
|  | } | 
|  | dp = kzmalloc(sizeof(struct dma_pool), MEM_WAIT); | 
|  | /* TODO: this will be device specific.  Assuming the default. */ | 
|  | dp->source = &dma_phys_pages; | 
|  | /* We're sourcing directly from the dma_arena's arena. */ | 
|  | __kmem_cache_create(&dp->kc, name, size, align, KMC_NOTOUCH, | 
|  | &dp->source->arena, NULL, NULL, NULL); | 
|  | return dp; | 
|  | } | 
|  |  | 
|  | void dma_pool_destroy(struct dma_pool *dp) | 
|  | { | 
|  | __kmem_cache_destroy(&dp->kc); | 
|  | kfree(dp); | 
|  | } | 
|  |  | 
|  | void *dma_pool_alloc(struct dma_pool *dp, int mem_flags, dma_addr_t *handle) | 
|  | { | 
|  | void *paddr; | 
|  |  | 
|  | paddr = kmem_cache_alloc(&dp->kc, mem_flags); | 
|  | if (!paddr) | 
|  | return NULL; | 
|  | *handle = (dma_addr_t)paddr; | 
|  | return dp->source->to_cpu_addr(dp->source, (physaddr_t)paddr); | 
|  | } | 
|  |  | 
|  | void *dma_pool_zalloc(struct dma_pool *dp, int mem_flags, dma_addr_t *handle) | 
|  | { | 
|  | void *ret = dma_pool_alloc(dp, mem_flags, handle); | 
|  |  | 
|  | if (ret) | 
|  | memset(ret, 0, dp->kc.obj_size); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | void dma_pool_free(struct dma_pool *dp, void *cpu_addr, dma_addr_t addr) | 
|  | { | 
|  | kmem_cache_free(&dp->kc, (void*)addr); | 
|  | } |