2 * Coherent per-device memory handling.
6 #define pr_fmt(fmt) "%s:%d: " fmt, __func__, __LINE__
7 #include <linux/slab.h>
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/dma-mapping.h>
11 #include <linux/dma-attrs.h>
12 #include <linux/dma-contiguous.h>
13 #include <linux/debugfs.h>
14 #include <linux/highmem.h>
15 #include <asm/cacheflush.h>
19 /* number of devices pointed by devs */
20 unsigned int num_devs;
21 /* devs to manage cma/coherent memory allocs, if resize allowed */
23 /* device to allocate memory from cma */
24 struct device *cma_dev;
25 /* lock to synchronise heap resizing */
26 struct mutex resize_lock;
27 /* CMA chunk size if resize supported */
28 size_t cma_chunk_size;
35 size_t rem_chunk_size;
36 struct dentry *dma_debug_root;
37 int (*update_resize_cfg)(phys_addr_t , size_t);
40 #define DMA_RESERVED_COUNT 8
41 static struct dma_coherent_reserved {
42 const struct device *dev;
43 } dma_coherent_reserved[DMA_RESERVED_COUNT];
45 static unsigned dma_coherent_reserved_count;
47 #ifdef CONFIG_ARM_DMA_IOMMU_ALIGNMENT
48 #define DMA_BUF_ALIGNMENT CONFIG_ARM_DMA_IOMMU_ALIGNMENT
50 #define DMA_BUF_ALIGNMENT 8
53 struct dma_coherent_mem {
55 dma_addr_t device_base;
59 unsigned long *bitmap;
62 static bool dma_is_coherent_dev(struct device *dev)
65 struct dma_coherent_reserved *r = dma_coherent_reserved;
67 for (i = 0; i < dma_coherent_reserved_count; i++, r++) {
73 static void dma_debugfs_init(struct device *dev, struct heap_info *heap)
75 if (!heap->dma_debug_root) {
76 heap->dma_debug_root = debugfs_create_dir(dev_name(dev), NULL);
77 if (IS_ERR_OR_NULL(heap->dma_debug_root)) {
78 dev_err(dev, "couldn't create debug files\n");
83 debugfs_create_x32("base", S_IRUGO,
84 heap->dma_debug_root, (u32 *)&heap->base);
85 debugfs_create_x32("size", S_IRUGO,
86 heap->dma_debug_root, (u32 *)&heap->len);
87 debugfs_create_x32("cma_base", S_IRUGO,
88 heap->dma_debug_root, (u32 *)&heap->cma_base);
89 debugfs_create_x32("cma_size", S_IRUGO,
90 heap->dma_debug_root, (u32 *)&heap->cma_len);
91 debugfs_create_x32("cma_chunk_size", S_IRUGO,
92 heap->dma_debug_root, (u32 *)&heap->cma_chunk_size);
93 debugfs_create_x32("num_cma_chunks", S_IRUGO,
94 heap->dma_debug_root, (u32 *)&heap->num_devs);
97 static struct device *dma_create_dma_devs(const char *name, int num_devs)
102 devs = kzalloc(num_devs * sizeof(*devs), GFP_KERNEL);
106 for (idx = 0; idx < num_devs; idx++)
107 dev_set_name(&devs[idx], "%s-heap-%d", name, idx);
112 int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
113 dma_addr_t device_addr, size_t size, int flags)
115 void __iomem *mem_base = NULL;
116 int pages = size >> PAGE_SHIFT;
117 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
120 (DMA_MEMORY_MAP | DMA_MEMORY_IO | DMA_MEMORY_NOMAP)) == 0)
127 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
129 dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
132 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
133 if (!dev->dma_mem->bitmap)
136 if (flags & DMA_MEMORY_NOMAP)
139 mem_base = ioremap(bus_addr, size);
142 dev->dma_mem->virt_base = mem_base;
145 dev->dma_mem->device_base = device_addr;
146 dev->dma_mem->pfn_base = PFN_DOWN(bus_addr);
147 dev->dma_mem->size = pages;
148 dev->dma_mem->flags = flags;
150 if (flags & DMA_MEMORY_MAP)
151 return DMA_MEMORY_MAP;
153 if (flags & DMA_MEMORY_NOMAP)
154 return DMA_MEMORY_NOMAP;
156 return DMA_MEMORY_IO;
166 EXPORT_SYMBOL(dma_declare_coherent_memory);
168 static int declare_coherent_heap(struct device *dev, phys_addr_t base,
173 BUG_ON(dev->dma_mem);
174 dma_set_coherent_mask(dev, DMA_BIT_MASK(64));
175 err = dma_declare_coherent_memory(dev, 0,
176 base, size, DMA_MEMORY_NOMAP);
177 if (err & DMA_MEMORY_NOMAP) {
178 dev_dbg(dev, "dma coherent mem base (0x%pa) size (0x%zx)\n",
182 dev_err(dev, "declare dma coherent_mem fail 0x%pa 0x%zx\n",
187 int dma_declare_coherent_resizable_cma_memory(struct device *dev,
188 struct dma_declare_info *dma_info)
192 struct heap_info *heap_info = NULL;
193 struct dma_contiguous_stats stats;
194 struct dma_coherent_reserved *r =
195 &dma_coherent_reserved[dma_coherent_reserved_count];
197 if (dma_coherent_reserved_count == ARRAY_SIZE(dma_coherent_reserved)) {
198 pr_err("Not enough slots for DMA Coherent reserved regions!\n");
202 if (!dev || !dma_info || !dma_info->name || !dma_info->cma_dev)
205 heap_info = kzalloc(sizeof(*heap_info), GFP_KERNEL);
209 heap_info->name = kmalloc(strlen(dma_info->name) + 1, GFP_KERNEL);
210 if (!heap_info->name) {
215 dma_get_contiguous_stats(dma_info->cma_dev, &stats);
216 pr_info("resizable heap=%s, base=0x%pa, size=0x%zx\n",
217 dma_info->name, &stats.base, stats.size);
218 strcpy(heap_info->name, dma_info->name);
219 dev_set_name(dev, "dma-%s", heap_info->name);
220 heap_info->cma_dev = dma_info->cma_dev;
221 heap_info->cma_chunk_size = dma_info->size;
222 heap_info->cma_base = stats.base;
223 heap_info->cma_len = stats.size;
224 dev_set_name(heap_info->cma_dev, "cma-%s-heap", heap_info->name);
225 mutex_init(&heap_info->resize_lock);
227 if (heap_info->cma_len < heap_info->cma_chunk_size) {
228 dev_err(dev, "error cma_len(0x%zx) < cma_chunk_size(0x%zx)\n",
229 heap_info->cma_len, heap_info->cma_chunk_size);
234 heap_info->num_devs = div_u64_rem(heap_info->cma_len,
235 (u32)heap_info->cma_chunk_size, (u32 *)&heap_info->rem_chunk_size);
236 if (heap_info->rem_chunk_size) {
237 heap_info->num_devs++;
238 dev_info(dev, "heap size is not multiple of cma_chunk_size "
239 "heap_info->num_devs (%d) rem_chunk_size(0x%zx)\n",
240 heap_info->num_devs, heap_info->rem_chunk_size);
242 heap_info->rem_chunk_size = heap_info->cma_chunk_size;
243 heap_info->devs = dma_create_dma_devs(heap_info->name,
244 heap_info->num_devs);
245 if (!heap_info->devs) {
246 dev_err(dev, "failed to alloc devices\n");
250 if (dma_info->notifier.ops)
251 heap_info->update_resize_cfg =
252 dma_info->notifier.ops->resize;
255 dma_coherent_reserved_count++;
257 dev_set_drvdata(dev, heap_info);
258 dma_debugfs_init(dev, heap_info);
259 pr_info("resizable cma heap=%s create successful", heap_info->name);
268 EXPORT_SYMBOL(dma_declare_coherent_resizable_cma_memory);
270 static phys_addr_t alloc_from_contiguous_heap(
272 phys_addr_t base, size_t len)
278 dev_dbg(h->cma_dev, "req at base (0x%pa) size (0x%zx)\n",
280 order = get_order(len);
281 count = PAGE_ALIGN(len) >> PAGE_SHIFT;
282 page = dma_alloc_at_from_contiguous(h->cma_dev, count, order, base);
284 dev_err(h->cma_dev, "dma_alloc_at_from_contiguous failed\n");
288 base = page_to_phys(page);
289 dev_dbg(h->cma_dev, "allocated at base (0x%pa) size (0x%zx)\n",
291 BUG_ON(base < h->cma_base ||
292 base - h->cma_base + len > h->cma_len);
296 return DMA_ERROR_CODE;
299 static void release_from_contiguous_heap(
301 phys_addr_t base, size_t len)
303 struct page *page = phys_to_page(base);
304 size_t count = PAGE_ALIGN(len) >> PAGE_SHIFT;
306 dma_release_from_contiguous(h->cma_dev, page, count);
309 static void get_first_and_last_idx(struct heap_info *h,
310 int *first_alloc_idx, int *last_alloc_idx)
315 *first_alloc_idx = -1;
316 *last_alloc_idx = h->num_devs;
318 for (idx = 0; idx < h->num_devs; idx++) {
321 if (*first_alloc_idx == -1)
322 *first_alloc_idx = idx;
323 *last_alloc_idx = idx;
328 static void update_heap_base_len(struct heap_info *h)
332 phys_addr_t base = 0;
335 for (idx = 0; idx < h->num_devs; idx++) {
339 base = idx * h->cma_chunk_size + h->cma_base;
340 len += (idx == h->num_devs - 1) ?
341 h->rem_chunk_size : h->cma_chunk_size;
349 static int heap_resize_locked(struct heap_info *h)
353 phys_addr_t base = -1;
354 size_t len = h->cma_chunk_size;
355 phys_addr_t prev_base = h->base;
356 size_t prev_len = h->len;
357 int alloc_at_idx = 0;
360 phys_addr_t start_addr = 0;
362 get_first_and_last_idx(h, &first_alloc_idx, &last_alloc_idx);
363 pr_debug("req resize, fi=%d,li=%d\n", first_alloc_idx, last_alloc_idx);
365 /* All chunks are in use. Can't grow it. */
366 if (first_alloc_idx == 0 && last_alloc_idx == h->num_devs - 1)
369 /* All chunks are free. Can allocate anywhere in CMA with
370 * cma_chunk_size alignment.
372 if (first_alloc_idx == -1) {
373 base = alloc_from_contiguous_heap(h, start_addr, len);
374 if (!dma_mapping_error(h->cma_dev, base))
378 /* Free chunk before previously allocated chunk. Attempt
379 * to allocate only immediate previous chunk.
381 if (first_alloc_idx > 0) {
382 alloc_at_idx = first_alloc_idx - 1;
383 start_addr = alloc_at_idx * h->cma_chunk_size + h->cma_base;
384 base = alloc_from_contiguous_heap(h, start_addr, len);
385 if (base == start_addr)
387 BUG_ON(!dma_mapping_error(h->cma_dev, base));
390 /* Free chunk after previously allocated chunk. */
391 if (last_alloc_idx < h->num_devs - 1) {
392 alloc_at_idx = last_alloc_idx + 1;
393 len = (alloc_at_idx == h->num_devs - 1) ?
394 h->rem_chunk_size : h->cma_chunk_size;
395 start_addr = alloc_at_idx * h->cma_chunk_size + h->cma_base;
396 base = alloc_from_contiguous_heap(h, start_addr, len);
397 if (base == start_addr)
399 BUG_ON(!dma_mapping_error(h->cma_dev, base));
402 if (dma_mapping_error(h->cma_dev, base))
403 dev_err(&h->devs[alloc_at_idx],
404 "Failed to allocate contiguous memory on heap grow req\n");
409 if (declare_coherent_heap(&h->devs[alloc_at_idx], base, len)) {
410 dev_err(&h->devs[alloc_at_idx],
411 "Failed to declare coherent memory\n");
415 for (i = 0; i < len >> PAGE_SHIFT; i++) {
416 struct page *page = phys_to_page(i + base);
418 if (PageHighMem(page)) {
419 void *ptr = kmap_atomic(page);
420 dmac_flush_range(ptr, ptr + PAGE_SIZE);
423 void *ptr = page_address(page);
424 dmac_flush_range(ptr, ptr + PAGE_SIZE);
428 update_heap_base_len(h);
430 /* Handle VPR configuration updates*/
431 if (h->update_resize_cfg) {
432 err = h->update_resize_cfg(h->base, h->len);
434 dev_err(&h->devs[alloc_at_idx], "Failed to update heap resize\n");
439 dev_dbg(&h->devs[alloc_at_idx],
440 "grow heap base from=0x%pa to=0x%pa,"
441 " len from=0x%zx to=0x%zx\n",
442 &prev_base, &h->base, prev_len, h->len);
446 dma_release_declared_memory(&h->devs[alloc_at_idx]);
448 release_from_contiguous_heap(h, base, len);
454 /* retval: !0 on success, 0 on failure */
455 static int dma_alloc_from_coherent_dev(struct device *dev, ssize_t size,
456 dma_addr_t *dma_handle, void **ret,
457 struct dma_attrs *attrs)
459 struct dma_coherent_mem *mem;
460 int order = get_order(size);
471 *dma_handle = DMA_ERROR_CODE;
474 if (unlikely(size > (mem->size << PAGE_SHIFT)))
477 if (order > DMA_BUF_ALIGNMENT)
478 align = (1 << DMA_BUF_ALIGNMENT) - 1;
480 align = (1 << order) - 1;
482 if (dma_get_attr(DMA_ATTR_ALLOC_EXACT_SIZE, attrs))
483 count = PAGE_ALIGN(size) >> PAGE_SHIFT;
487 pageno = bitmap_find_next_zero_area(mem->bitmap, mem->size,
490 if (pageno >= mem->size)
493 bitmap_set(mem->bitmap, pageno, count);
496 * Memory was found in the per-device area.
498 *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
499 if (!(mem->flags & DMA_MEMORY_NOMAP)) {
500 *ret = mem->virt_base + (pageno << PAGE_SHIFT);
501 memset(*ret, 0, size);
508 * In the case where the allocation can not be satisfied from the
509 * per-device area, try to fall back to generic memory if the
510 * constraints allow it.
512 return mem->flags & DMA_MEMORY_EXCLUSIVE;
515 /* retval: !0 on success, 0 on failure */
516 static int dma_alloc_from_coherent_heap_dev(struct device *dev, size_t len,
517 dma_addr_t *dma_handle, void **ret,
518 struct dma_attrs *attrs)
521 struct heap_info *h = NULL;
524 *dma_handle = DMA_ERROR_CODE;
525 if (!dma_is_coherent_dev(dev))
528 h = dev_get_drvdata(dev);
532 dma_set_attr(DMA_ATTR_ALLOC_EXACT_SIZE, attrs);
534 mutex_lock(&h->resize_lock);
536 /* Try allocation from already existing CMA chunks */
537 for (idx = 0; idx < h->num_devs; idx++) {
541 if (dma_alloc_from_coherent_dev(
542 d, len, dma_handle, ret, attrs)) {
543 dev_dbg(d, "allocated addr 0x%pa len 0x%zx\n",
549 if (!heap_resize_locked(h))
552 mutex_unlock(&h->resize_lock);
553 return DMA_MEMORY_EXCLUSIVE;
556 /* retval: !0 on success, 0 on failure */
557 static int dma_release_from_coherent_dev(struct device *dev, size_t size,
558 void *vaddr, struct dma_attrs *attrs)
560 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
568 if (mem->flags & DMA_MEMORY_NOMAP)
569 mem_addr = (void *)(uintptr_t)mem->device_base;
571 mem_addr = mem->virt_base;
573 if (mem && vaddr >= mem_addr &&
574 vaddr - mem_addr < mem->size << PAGE_SHIFT) {
576 pageno = (vaddr - mem_addr) >> PAGE_SHIFT;
578 if (dma_get_attr(DMA_ATTR_ALLOC_EXACT_SIZE, attrs))
579 count = PAGE_ALIGN(size) >> PAGE_SHIFT;
581 count = 1 << get_order(size);
583 bitmap_clear(mem->bitmap, pageno, count);
590 static int dma_release_from_coherent_heap_dev(struct device *dev, size_t len,
591 void *base, struct dma_attrs *attrs)
598 struct heap_info *h = NULL;
603 if (!dma_is_coherent_dev(dev))
606 h = dev_get_drvdata(dev);
610 if ((uintptr_t)base < h->cma_base ||
611 len > h->cma_chunk_size ||
612 (uintptr_t)base - h->cma_base > h->cma_len - len) {
617 dma_set_attr(DMA_ATTR_ALLOC_EXACT_SIZE, attrs);
619 mutex_lock(&h->resize_lock);
621 idx = div_u64((uintptr_t)base - h->cma_base, h->cma_chunk_size);
622 dev_dbg(&h->devs[idx], "req free addr (%p) size (0x%zx) idx (%d)\n",
624 err = dma_release_from_coherent_dev(&h->devs[idx], len, base, attrs);
630 get_first_and_last_idx(h, &first_alloc_idx, &last_alloc_idx);
632 /* Check if heap can be shrinked */
633 if (idx == first_alloc_idx || idx == last_alloc_idx) {
634 /* check if entire chunk is free */
635 if (idx == h->num_devs - 1)
636 chunk_size = h->rem_chunk_size;
638 chunk_size = h->cma_chunk_size;
640 resize_err = dma_alloc_from_coherent_dev(&h->devs[idx],
642 &dev_base, &ret, attrs);
646 dev_dbg(&h->devs[idx],
647 "prep to remove chunk b=0x%pa, s=0x%zx\n",
648 &dev_base, chunk_size);
649 resize_err = dma_release_from_coherent_dev(
650 &h->devs[idx], chunk_size,
651 (void *)(uintptr_t)dev_base, attrs);
653 dev_err(&h->devs[idx], "failed to rel mem\n");
657 dma_release_declared_memory(&h->devs[idx]);
658 BUG_ON(h->devs[idx].dma_mem != NULL);
659 update_heap_base_len(h);
661 /* Handle VPR configuration updates */
662 if (h->update_resize_cfg) {
664 h->update_resize_cfg(h->base, h->len);
666 dev_err(&h->devs[idx],
667 "update resize failed\n");
668 /* On update failure re-declare heap */
669 resize_err = declare_coherent_heap(
670 &h->devs[idx], dev_base,
673 /* on declare coherent failure
676 release_from_contiguous_heap(h,
677 dev_base, chunk_size);
678 dev_err(&h->devs[idx],
681 update_heap_base_len(h);
686 idx == first_alloc_idx ? ++idx : --idx;
687 release_from_contiguous_heap(h, dev_base, chunk_size);
688 dev_dbg(&h->devs[idx], "removed chunk b=0x%pa, s=0x%zx"
689 "new heap b=0x%pa, s=0x%zx",
690 &dev_base, chunk_size, &h->base, h->len);
692 if (idx < h->num_devs)
693 goto check_next_chunk;
696 mutex_unlock(&h->resize_lock);
700 void dma_release_declared_memory(struct device *dev)
702 struct dma_coherent_mem *mem = dev->dma_mem;
708 if (!(mem->flags & DMA_MEMORY_NOMAP))
709 iounmap(mem->virt_base);
714 EXPORT_SYMBOL(dma_release_declared_memory);
716 void *dma_mark_declared_memory_occupied(struct device *dev,
717 dma_addr_t device_addr, size_t size)
719 struct dma_coherent_mem *mem = dev->dma_mem;
722 size += device_addr & ~PAGE_MASK;
725 return ERR_PTR(-EINVAL);
727 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
728 err = bitmap_allocate_region(mem->bitmap, pos, get_order(size));
731 return mem->virt_base + (pos << PAGE_SHIFT);
733 EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
736 * dma_alloc_from_coherent_attr() - try to allocate memory from the per-device
739 * @dev: device from which we allocate memory
740 * @size: size of requested memory area
741 * @dma_handle: This will be filled with the correct dma handle
742 * @ret: This pointer will be filled with the virtual address
744 * @attrs: DMA Attribute
745 * This function should be only called from per-arch dma_alloc_coherent()
746 * to support allocation from per-device coherent memory pools.
748 * Returns 0 if dma_alloc_coherent_attr should continue with allocating from
749 * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
751 int dma_alloc_from_coherent_attr(struct device *dev, ssize_t size,
752 dma_addr_t *dma_handle, void **ret,
753 struct dma_attrs *attrs)
759 return dma_alloc_from_coherent_dev(dev, size, dma_handle, ret,
762 return dma_alloc_from_coherent_heap_dev(dev, size, dma_handle,
765 EXPORT_SYMBOL(dma_alloc_from_coherent_attr);
768 * dma_release_from_coherent_attr() - try to free the memory allocated from
769 * per-device coherent memory pool
770 * @dev: device from which the memory was allocated
771 * @size: size of the memory area to free
772 * @vaddr: virtual address of allocated pages
773 * @attrs: DMA Attribute
775 * This checks whether the memory was allocated from the per-device
776 * coherent memory pool and if so, releases that memory.
778 * Returns 1 if we correctly released the memory, or 0 if
779 * dma_release_coherent_attr() should proceed with releasing memory from
782 int dma_release_from_coherent_attr(struct device *dev, size_t size, void *vaddr,
783 struct dma_attrs *attrs)
789 return dma_release_from_coherent_dev(dev, size, vaddr, attrs);
791 return dma_release_from_coherent_heap_dev(dev, size, vaddr,
794 EXPORT_SYMBOL(dma_release_from_coherent_attr);
797 * dma_mmap_from_coherent() - try to mmap the memory allocated from
798 * per-device coherent memory pool to userspace
799 * @dev: device from which the memory was allocated
800 * @vma: vm_area for the userspace memory
801 * @vaddr: cpu address returned by dma_alloc_from_coherent
802 * @size: size of the memory buffer allocated by dma_alloc_from_coherent
803 * @ret: result from remap_pfn_range()
805 * This checks whether the memory was allocated from the per-device
806 * coherent memory pool and if so, maps that memory to the provided vma.
808 * Returns 1 if we correctly mapped the memory, or 0 if the caller should
809 * proceed with mapping memory from generic pools.
811 int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
812 void *vaddr, size_t size, int *ret)
814 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
820 if (mem->flags & DMA_MEMORY_NOMAP)
821 mem_addr = (void *)(uintptr_t)mem->device_base;
823 mem_addr = mem->virt_base;
825 if (mem && vaddr >= mem_addr && vaddr + size <=
826 (mem_addr + (mem->size << PAGE_SHIFT))) {
827 unsigned long off = vma->vm_pgoff;
828 int start = (vaddr - mem_addr) >> PAGE_SHIFT;
829 int user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
830 int count = size >> PAGE_SHIFT;
833 if (off < count && user_count <= count - off) {
834 unsigned pfn = mem->pfn_base + start + off;
835 *ret = remap_pfn_range(vma, vma->vm_start, pfn,
836 user_count << PAGE_SHIFT,
843 EXPORT_SYMBOL(dma_mmap_from_coherent);