2 * drivers/video/tegra/host/gk20a/mm_gk20a.c
4 * GK20A memory management
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
22 #include <linux/delay.h>
23 #include <linux/highmem.h>
24 #include <linux/log2.h>
25 #include <linux/nvhost.h>
26 #include <linux/pm_runtime.h>
27 #include <linux/scatterlist.h>
28 #include <linux/nvmap.h>
29 #include <linux/tegra-soc.h>
30 #include <linux/vmalloc.h>
31 #include <linux/dma-buf.h>
32 #include <asm/cacheflush.h>
36 #include "hw_gmmu_gk20a.h"
37 #include "hw_fb_gk20a.h"
38 #include "hw_bus_gk20a.h"
39 #include "hw_ram_gk20a.h"
40 #include "hw_mc_gk20a.h"
41 #include "hw_flush_gk20a.h"
42 #include "hw_ltc_gk20a.h"
44 #include "kind_gk20a.h"
47 #define outer_flush_range(a, b)
48 #define __cpuc_flush_dcache_area __flush_dcache_area
52 * GPU mapping life cycle
53 * ======================
58 * Kernel mappings are created through vm.map(..., false):
60 * - Mappings to the same allocations are reused and refcounted.
61 * - This path does not support deferred unmapping (i.e. kernel must wait for
62 * all hw operations on the buffer to complete before unmapping).
63 * - References to dmabuf are owned and managed by the (kernel) clients of
70 * User space mappings are created through as.map_buffer -> vm.map(..., true):
72 * - Mappings to the same allocations are reused and refcounted.
73 * - This path supports deferred unmapping (i.e. we delay the actual unmapping
74 * until all hw operations have completed).
75 * - References to dmabuf are owned and managed by the vm_gk20a
76 * layer itself. vm.map acquires these refs, and sets
77 * mapped_buffer->own_mem_ref to record that we must release the refs when we
82 static inline int vm_aspace_id(struct vm_gk20a *vm)
84 /* -1 is bar1 or pmu, etc. */
85 return vm->as_share ? vm->as_share->id : -1;
87 static inline u32 hi32(u64 f)
89 return (u32)(f >> 32);
91 static inline u32 lo32(u64 f)
93 return (u32)(f & 0xffffffff);
96 #define FLUSH_CPU_DCACHE(va, pa, size) \
98 __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
99 outer_flush_range(pa, pa + (size_t)(size)); \
102 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
103 static struct mapped_buffer_node *find_mapped_buffer_locked(
104 struct rb_root *root, u64 addr);
105 static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
106 struct rb_root *root, struct dma_buf *dmabuf,
108 static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
109 enum gmmu_pgsz_gk20a pgsz_idx,
110 struct sg_table *sgt, u64 buffer_offset,
111 u64 first_vaddr, u64 last_vaddr,
112 u8 kind_v, u32 ctag_offset, bool cacheable,
114 static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
115 static void gk20a_vm_remove_support(struct vm_gk20a *vm);
118 /* note: keep the page sizes sorted lowest to highest here */
119 static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
120 static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
121 static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
123 static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
125 struct gk20a_comptags {
130 struct gk20a_dmabuf_priv {
133 struct gk20a_allocator *comptag_allocator;
134 struct gk20a_comptags comptags;
136 struct dma_buf_attachment *attach;
137 struct sg_table *sgt;
142 static void gk20a_mm_delete_priv(void *_priv)
144 struct gk20a_dmabuf_priv *priv = _priv;
148 if (priv->comptags.lines) {
149 BUG_ON(!priv->comptag_allocator);
150 priv->comptag_allocator->free(priv->comptag_allocator,
151 priv->comptags.offset,
152 priv->comptags.lines);
158 struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf)
160 struct gk20a_dmabuf_priv *priv;
162 priv = dma_buf_get_drvdata(dmabuf, dev);
164 return ERR_PTR(-EINVAL);
166 mutex_lock(&priv->lock);
168 if (priv->pin_count == 0) {
169 priv->attach = dma_buf_attach(dmabuf, dev);
170 if (IS_ERR(priv->attach)) {
171 mutex_unlock(&priv->lock);
172 return (struct sg_table *)priv->attach;
175 priv->sgt = dma_buf_map_attachment(priv->attach,
177 if (IS_ERR(priv->sgt)) {
178 dma_buf_detach(dmabuf, priv->attach);
179 mutex_unlock(&priv->lock);
185 mutex_unlock(&priv->lock);
189 void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
190 struct sg_table *sgt)
192 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
195 if (IS_ERR(priv) || !priv)
198 mutex_lock(&priv->lock);
199 WARN_ON(priv->sgt != sgt);
201 WARN_ON(priv->pin_count < 0);
202 dma_addr = sg_dma_address(priv->sgt->sgl);
203 if (priv->pin_count == 0) {
204 dma_buf_unmap_attachment(priv->attach, priv->sgt,
206 dma_buf_detach(dmabuf, priv->attach);
208 mutex_unlock(&priv->lock);
212 static void gk20a_get_comptags(struct device *dev,
213 struct dma_buf *dmabuf,
214 struct gk20a_comptags *comptags)
216 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
223 comptags->offset = 0;
227 *comptags = priv->comptags;
230 static int gk20a_alloc_comptags(struct device *dev,
231 struct dma_buf *dmabuf,
232 struct gk20a_allocator *allocator,
235 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
245 /* store the allocator so we can use it when we free the ctags */
246 priv->comptag_allocator = allocator;
247 err = allocator->alloc(allocator, &offset, lines);
249 priv->comptags.lines = lines;
250 priv->comptags.offset = offset;
258 static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
264 if (g->ops.fb.init_fs_state)
265 g->ops.fb.init_fs_state(g);
270 void gk20a_remove_mm_support(struct mm_gk20a *mm)
272 struct gk20a *g = mm->g;
273 struct device *d = dev_from_gk20a(g);
274 struct vm_gk20a *vm = &mm->bar1.vm;
275 struct inst_desc *inst_block = &mm->bar1.inst_block;
279 if (inst_block->cpuva)
280 dma_free_coherent(d, inst_block->size,
281 inst_block->cpuva, inst_block->iova);
282 inst_block->cpuva = NULL;
283 inst_block->iova = 0;
285 gk20a_vm_remove_support(vm);
288 int gk20a_init_mm_setup_sw(struct gk20a *g)
290 struct mm_gk20a *mm = &g->mm;
296 gk20a_dbg_fn("skip init");
301 mutex_init(&mm->l2_op_lock);
302 mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
303 mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
304 mm->pde_stride = mm->big_page_size << 10;
305 mm->pde_stride_shift = ilog2(mm->pde_stride);
306 BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
308 for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) {
310 u32 num_ptes, pte_space, num_pages;
312 /* assuming "full" page tables */
313 num_ptes = mm->pde_stride / gmmu_page_sizes[i];
315 pte_space = num_ptes * gmmu_pte__size_v();
316 /* allocate whole pages */
317 pte_space = roundup(pte_space, PAGE_SIZE);
319 num_pages = pte_space / PAGE_SIZE;
320 /* make sure "order" is viable */
321 BUG_ON(!is_power_of_2(num_pages));
323 mm->page_table_sizing[i].num_ptes = num_ptes;
324 mm->page_table_sizing[i].order = ilog2(num_pages);
327 /*TBD: make channel vm size configurable */
328 mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
330 gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
332 gk20a_dbg_info("small page-size (%dKB) pte array: %dKB",
333 gmmu_page_sizes[gmmu_page_size_small] >> 10,
334 (mm->page_table_sizing[gmmu_page_size_small].num_ptes *
335 gmmu_pte__size_v()) >> 10);
337 gk20a_dbg_info("big page-size (%dKB) pte array: %dKB",
338 gmmu_page_sizes[gmmu_page_size_big] >> 10,
339 (mm->page_table_sizing[gmmu_page_size_big].num_ptes *
340 gmmu_pte__size_v()) >> 10);
343 gk20a_init_bar1_vm(mm);
345 mm->remove_support = gk20a_remove_mm_support;
348 gk20a_dbg_fn("done");
352 /* make sure gk20a_init_mm_support is called before */
353 static int gk20a_init_mm_setup_hw(struct gk20a *g)
355 struct mm_gk20a *mm = &g->mm;
356 struct inst_desc *inst_block = &mm->bar1.inst_block;
357 phys_addr_t inst_pa = inst_block->cpu_pa;
361 /* set large page size in fb
362 * note this is very early on, can we defer it ? */
364 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
366 if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K)
367 fb_mmu_ctrl = (fb_mmu_ctrl &
368 ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
369 fb_mmu_ctrl_vm_pg_size_128kb_f();
371 BUG_ON(1); /* no support/testing for larger ones yet */
373 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
376 inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a());
377 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa);
379 gk20a_writel(g, bus_bar1_block_r(),
380 bus_bar1_block_target_vid_mem_f() |
381 bus_bar1_block_mode_virtual_f() |
382 bus_bar1_block_ptr_f(inst_pa));
383 if (gk20a_mm_fb_flush(g) || gk20a_mm_fb_flush(g))
386 gk20a_dbg_fn("done");
390 int gk20a_init_mm_support(struct gk20a *g)
394 err = gk20a_init_mm_reset_enable_hw(g);
398 err = gk20a_init_mm_setup_sw(g);
402 err = gk20a_init_mm_setup_hw(g);
409 #ifdef CONFIG_GK20A_PHYS_PAGE_TABLES
410 static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
412 struct sg_table **sgt,
415 u32 num_pages = 1 << order;
416 u32 len = num_pages * PAGE_SIZE;
422 pages = alloc_pages(GFP_KERNEL, order);
424 gk20a_dbg(gpu_dbg_pte, "alloc_pages failed\n");
427 *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
429 gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table");
432 err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
434 gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed\n");
437 sg_set_page((*sgt)->sgl, pages, len, 0);
438 *handle = page_address(pages);
439 memset(*handle, 0, len);
441 FLUSH_CPU_DCACHE(*handle, sg_phys((*sgt)->sgl), len);
448 __free_pages(pages, order);
453 static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
454 struct sg_table *sgt, u32 order,
459 free_pages((unsigned long)handle, order);
464 static int map_gmmu_pages(void *handle, struct sg_table *sgt,
465 void **va, size_t size)
467 FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
472 static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
474 FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
478 static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
480 struct sg_table **sgt,
483 struct device *d = dev_from_vm(vm);
484 u32 num_pages = 1 << order;
485 u32 len = num_pages * PAGE_SIZE;
487 DEFINE_DMA_ATTRS(attrs);
496 if (IS_ENABLED(CONFIG_ARM64)) {
497 cpuva = dma_zalloc_coherent(d, len, &iova, GFP_KERNEL);
499 gk20a_err(d, "memory allocation failed\n");
503 err = gk20a_get_sgtable(d, sgt, cpuva, iova, len);
505 gk20a_err(d, "sgt allocation failed\n");
511 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
512 pages = dma_alloc_attrs(d, len, &iova, GFP_KERNEL, &attrs);
514 gk20a_err(d, "memory allocation failed\n");
518 err = gk20a_get_sgtable_from_pages(d, sgt, pages,
521 gk20a_err(d, "sgt allocation failed\n");
525 *handle = (void *)pages;
531 if (IS_ENABLED(CONFIG_ARM64)) {
532 dma_free_coherent(d, len, handle, iova);
535 dma_free_attrs(d, len, pages, iova, &attrs);
543 static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
544 struct sg_table *sgt, u32 order,
547 struct device *d = dev_from_vm(vm);
549 DEFINE_DMA_ATTRS(attrs);
555 iova = sg_dma_address(sgt->sgl);
557 gk20a_free_sgtable(&sgt);
559 if (IS_ENABLED(CONFIG_ARM64)) {
560 dma_free_coherent(d, size, handle, iova);
562 pages = (struct page **)handle;
563 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
564 dma_free_attrs(d, size, pages, iova, &attrs);
572 static int map_gmmu_pages(void *handle, struct sg_table *sgt,
573 void **kva, size_t size)
575 int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
579 if (IS_ENABLED(CONFIG_ARM64)) {
582 pages = (struct page **)handle;
583 *kva = vmap(pages, count, 0, pgprot_dmacoherent(PAGE_KERNEL));
591 static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
595 if (!IS_ENABLED(CONFIG_ARM64))
601 /* allocate a phys contig region big enough for a full
602 * sized gmmu page table for the given gmmu_page_size.
603 * the whole range is zeroed so it's "invalid"/will fault
606 static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
607 enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
608 struct page_table_gk20a *pte)
613 struct sg_table *sgt;
618 /* allocate enough pages for the table */
619 pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order;
621 err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size);
625 gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d",
626 pte, gk20a_mm_iova_addr(sgt->sgl), pte_order);
635 /* given address range (inclusive) determine the pdes crossed */
636 static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm,
637 u64 addr_lo, u64 addr_hi,
638 u32 *pde_lo, u32 *pde_hi)
640 *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift);
641 *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift);
642 gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d",
643 addr_lo, addr_hi, vm->mm->pde_stride_shift);
644 gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d",
648 static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i)
650 return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v());
653 static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm,
654 u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
657 /* mask off pde part */
658 addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1));
659 /* shift over to get pte index. note assumption that pte index
660 * doesn't leak over into the high 32b */
661 ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]);
663 gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
667 static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page,
670 /* ptes are 8B regardless of pagesize */
671 /* pte space pages are 4KB. so 512 ptes per 4KB page*/
674 /* this offset is a pte offset, not a byte offset */
675 *pte_offset = i & ((1<<9)-1);
677 gk20a_dbg(gpu_dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x",
678 i, *pte_page, *pte_offset);
683 * given a pde index/page table number make sure it has
684 * backing store and if not go ahead allocate it and
685 * record it in the appropriate pde
687 static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
688 u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
691 struct page_table_gk20a *pte =
692 vm->pdes.ptes[gmmu_pgsz_idx] + i;
696 /* if it's already in place it's valid */
700 gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d",
701 gmmu_page_sizes[gmmu_pgsz_idx]/1024, i);
703 err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte);
708 update_gmmu_pde_locked(vm, i);
713 static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm,
716 struct vm_reserved_va_node *va_node;
717 list_for_each_entry(va_node, &vm->reserved_va_list, reserved_va_list)
718 if (addr >= va_node->vaddr_start &&
719 addr < (u64)va_node->vaddr_start + (u64)va_node->size)
725 int gk20a_vm_get_buffers(struct vm_gk20a *vm,
726 struct mapped_buffer_node ***mapped_buffers,
729 struct mapped_buffer_node *mapped_buffer;
730 struct mapped_buffer_node **buffer_list;
731 struct rb_node *node;
734 mutex_lock(&vm->update_gmmu_lock);
736 buffer_list = kzalloc(sizeof(*buffer_list) *
737 vm->num_user_mapped_buffers, GFP_KERNEL);
739 mutex_unlock(&vm->update_gmmu_lock);
743 node = rb_first(&vm->mapped_buffers);
746 container_of(node, struct mapped_buffer_node, node);
747 if (mapped_buffer->user_mapped) {
748 buffer_list[i] = mapped_buffer;
749 kref_get(&mapped_buffer->ref);
752 node = rb_next(&mapped_buffer->node);
755 BUG_ON(i != vm->num_user_mapped_buffers);
757 *num_buffers = vm->num_user_mapped_buffers;
758 *mapped_buffers = buffer_list;
760 mutex_unlock(&vm->update_gmmu_lock);
765 static void gk20a_vm_unmap_locked_kref(struct kref *ref)
767 struct mapped_buffer_node *mapped_buffer =
768 container_of(ref, struct mapped_buffer_node, ref);
769 gk20a_vm_unmap_locked(mapped_buffer);
772 void gk20a_vm_put_buffers(struct vm_gk20a *vm,
773 struct mapped_buffer_node **mapped_buffers,
778 mutex_lock(&vm->update_gmmu_lock);
780 for (i = 0; i < num_buffers; ++i)
781 kref_put(&mapped_buffers[i]->ref,
782 gk20a_vm_unmap_locked_kref);
784 mutex_unlock(&vm->update_gmmu_lock);
786 kfree(mapped_buffers);
789 static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
791 struct device *d = dev_from_vm(vm);
793 struct mapped_buffer_node *mapped_buffer;
795 mutex_lock(&vm->update_gmmu_lock);
797 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
798 if (!mapped_buffer) {
799 mutex_unlock(&vm->update_gmmu_lock);
800 gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
804 if (mapped_buffer->flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
805 mutex_unlock(&vm->update_gmmu_lock);
809 if (atomic_read(&mapped_buffer->ref.refcount) == 1)
815 gk20a_err(d, "sync-unmap failed on 0x%llx",
817 mutex_lock(&vm->update_gmmu_lock);
820 mapped_buffer->user_mapped--;
821 if (mapped_buffer->user_mapped == 0)
822 vm->num_user_mapped_buffers--;
823 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
825 mutex_unlock(&vm->update_gmmu_lock);
828 static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
830 enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
833 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
836 u32 start_page_nr = 0, num_pages;
837 u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx];
839 if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) {
840 dev_warn(dev_from_vm(vm),
841 "invalid page size requested in gk20a vm alloc");
845 if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
846 dev_warn(dev_from_vm(vm),
847 "unsupportd page size requested");
852 /* be certain we round up to gmmu_page_size if needed */
853 /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
854 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
856 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
857 gmmu_page_sizes[gmmu_pgsz_idx]>>10);
859 /* The vma allocator represents page accounting. */
860 num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx];
862 err = vma->alloc(vma, &start_page_nr, num_pages);
865 gk20a_err(dev_from_vm(vm),
866 "%s oom: sz=0x%llx", vma->name, size);
870 offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx];
871 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
876 static int gk20a_vm_free_va(struct vm_gk20a *vm,
877 u64 offset, u64 size,
878 enum gmmu_pgsz_gk20a pgsz_idx)
880 struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
881 u32 page_size = gmmu_page_sizes[pgsz_idx];
882 u32 page_shift = gmmu_page_shifts[pgsz_idx];
883 u32 start_page_nr, num_pages;
886 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
887 vma->name, offset, size);
889 start_page_nr = (u32)(offset >> page_shift);
890 num_pages = (u32)((size + page_size - 1) >> page_shift);
892 err = vma->free(vma, start_page_nr, num_pages);
894 gk20a_err(dev_from_vm(vm),
895 "not found: offset=0x%llx, sz=0x%llx",
902 static int insert_mapped_buffer(struct rb_root *root,
903 struct mapped_buffer_node *mapped_buffer)
905 struct rb_node **new_node = &(root->rb_node), *parent = NULL;
907 /* Figure out where to put new node */
909 struct mapped_buffer_node *cmp_with =
910 container_of(*new_node, struct mapped_buffer_node,
915 if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */
916 new_node = &((*new_node)->rb_left);
917 else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */
918 new_node = &((*new_node)->rb_right);
920 return -EINVAL; /* no fair dup'ing */
923 /* Add new node and rebalance tree. */
924 rb_link_node(&mapped_buffer->node, parent, new_node);
925 rb_insert_color(&mapped_buffer->node, root);
930 static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
931 struct rb_root *root, struct dma_buf *dmabuf,
934 struct rb_node *node = rb_first(root);
936 struct mapped_buffer_node *mapped_buffer =
937 container_of(node, struct mapped_buffer_node, node);
938 if (mapped_buffer->dmabuf == dmabuf &&
939 kind == mapped_buffer->kind)
940 return mapped_buffer;
941 node = rb_next(&mapped_buffer->node);
946 static struct mapped_buffer_node *find_mapped_buffer_locked(
947 struct rb_root *root, u64 addr)
950 struct rb_node *node = root->rb_node;
952 struct mapped_buffer_node *mapped_buffer =
953 container_of(node, struct mapped_buffer_node, node);
954 if (mapped_buffer->addr > addr) /* u64 cmp */
955 node = node->rb_left;
956 else if (mapped_buffer->addr != addr) /* u64 cmp */
957 node = node->rb_right;
959 return mapped_buffer;
964 static struct mapped_buffer_node *find_mapped_buffer_range_locked(
965 struct rb_root *root, u64 addr)
967 struct rb_node *node = root->rb_node;
969 struct mapped_buffer_node *m =
970 container_of(node, struct mapped_buffer_node, node);
971 if (m->addr <= addr && m->addr + m->size > addr)
973 else if (m->addr > addr) /* u64 cmp */
974 node = node->rb_left;
976 node = node->rb_right;
981 #define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0]))
983 struct buffer_attrs {
984 struct sg_table *sgt;
994 static void gmmu_select_page_size(struct buffer_attrs *bfr)
997 /* choose the biggest first (top->bottom) */
998 for (i = (gmmu_nr_page_sizes-1); i >= 0; i--)
999 if (!(gmmu_page_offset_masks[i] & bfr->align)) {
1000 /* would like to add this too but nvmap returns the
1001 * original requested size not the allocated size.
1002 * (!(gmmu_page_offset_masks[i] & bfr->size)) */
1008 static int setup_buffer_kind_and_compression(struct device *d,
1010 struct buffer_attrs *bfr,
1011 enum gmmu_pgsz_gk20a pgsz_idx)
1013 bool kind_compressible;
1015 if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
1016 bfr->kind_v = gmmu_pte_kind_pitch_v();
1018 if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) {
1019 gk20a_err(d, "kind 0x%x not supported", bfr->kind_v);
1023 bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
1024 /* find a suitable uncompressed kind if it becomes necessary later */
1025 kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
1026 if (kind_compressible) {
1027 bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
1028 if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) {
1029 /* shouldn't happen, but it is worth cross-checking */
1030 gk20a_err(d, "comptag kind 0x%x can't be"
1031 " downgraded to uncompressed kind",
1036 /* comptags only supported for suitable kinds, 128KB pagesize */
1037 if (unlikely(kind_compressible &&
1038 (gmmu_page_sizes[pgsz_idx] != 128*1024))) {
1040 gk20a_warn(d, "comptags specified"
1041 " but pagesize being used doesn't support it");*/
1042 /* it is safe to fall back to uncompressed as
1043 functionality is not harmed */
1044 bfr->kind_v = bfr->uc_kind_v;
1045 kind_compressible = false;
1047 if (kind_compressible)
1048 bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >>
1049 COMP_TAG_LINE_SIZE_SHIFT;
1051 bfr->ctag_lines = 0;
1056 static int validate_fixed_buffer(struct vm_gk20a *vm,
1057 struct buffer_attrs *bfr,
1058 u64 map_offset, u64 map_size)
1060 struct device *dev = dev_from_vm(vm);
1061 struct vm_reserved_va_node *va_node;
1062 struct mapped_buffer_node *buffer;
1064 if (map_offset & gmmu_page_offset_masks[bfr->pgsz_idx]) {
1065 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
1070 /* find the space reservation */
1071 va_node = addr_to_reservation(vm, map_offset);
1073 gk20a_warn(dev, "fixed offset mapping without space allocation");
1077 /* check that this mappings does not collide with existing
1078 * mappings by checking the overlapping area between the current
1079 * buffer and all other mapped buffers */
1081 list_for_each_entry(buffer,
1082 &va_node->va_buffers_list, va_buffers_list) {
1083 s64 begin = max(buffer->addr, map_offset);
1084 s64 end = min(buffer->addr +
1085 buffer->size, map_offset + map_size);
1086 if (end - begin > 0) {
1087 gk20a_warn(dev, "overlapping buffer map requested");
1095 static u64 __locked_gmmu_map(struct vm_gk20a *vm,
1097 struct sg_table *sgt,
1107 bool allocated = false;
1109 struct device *d = dev_from_vm(vm);
1111 /* Allocate (or validate when map_offset != 0) the virtual address. */
1113 map_offset = gk20a_vm_alloc_va(vm, size,
1116 gk20a_err(d, "failed to allocate va space");
1123 pde_range_from_vaddr_range(vm,
1125 map_offset + size - 1,
1128 /* mark the addr range valid (but with 0 phys addr, which will fault) */
1129 for (i = pde_lo; i <= pde_hi; i++) {
1130 err = validate_gmmu_page_table_gk20a_locked(vm, i,
1133 gk20a_err(d, "failed to validate page table %d: %d",
1139 err = update_gmmu_ptes_locked(vm, pgsz_idx,
1142 map_offset, map_offset + size - 1,
1146 NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1149 gk20a_err(d, "failed to update ptes on map");
1156 gk20a_vm_free_va(vm, map_offset, size, pgsz_idx);
1158 gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
1162 static void __locked_gmmu_unmap(struct vm_gk20a *vm,
1170 struct gk20a *g = gk20a_from_vm(vm);
1173 err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
1175 dev_err(dev_from_vm(vm),
1176 "failed to free va");
1181 /* unmap here needs to know the page size we assigned at mapping */
1182 err = update_gmmu_ptes_locked(vm,
1184 0, /* n/a for unmap */
1188 0, 0, false /* n/a for unmap */,
1191 dev_err(dev_from_vm(vm),
1192 "failed to update gmmu ptes on unmap");
1194 /* detect which if any pdes/ptes can now be released */
1196 /* flush l2 so any dirty lines are written out *now*.
1197 * also as we could potentially be switching this buffer
1198 * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
1199 * some point in the future we need to invalidate l2. e.g. switching
1200 * from a render buffer unmap (here) to later using the same memory
1201 * for gmmu ptes. note the positioning of this relative to any smmu
1202 * unmapping (below). */
1204 gk20a_mm_l2_flush(g, true);
1207 static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
1208 struct dma_buf *dmabuf,
1212 struct sg_table **sgt,
1216 struct mapped_buffer_node *mapped_buffer = 0;
1219 find_mapped_buffer_reverse_locked(&vm->mapped_buffers,
1224 if (mapped_buffer->flags != flags)
1227 if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET &&
1228 mapped_buffer->addr != offset_align)
1231 BUG_ON(mapped_buffer->vm != vm);
1233 /* mark the buffer as used */
1235 if (mapped_buffer->user_mapped == 0)
1236 vm->num_user_mapped_buffers++;
1237 mapped_buffer->user_mapped++;
1239 /* If the mapping comes from user space, we own
1240 * the handle ref. Since we reuse an
1241 * existing mapping here, we need to give back those
1242 * refs once in order not to leak.
1244 if (mapped_buffer->own_mem_ref)
1245 dma_buf_put(mapped_buffer->dmabuf);
1247 mapped_buffer->own_mem_ref = true;
1249 kref_get(&mapped_buffer->ref);
1251 gk20a_dbg(gpu_dbg_map,
1252 "reusing as=%d pgsz=%d flags=0x%x ctags=%d "
1253 "start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x "
1254 "own_mem_ref=%d user_mapped=%d",
1255 vm_aspace_id(vm), mapped_buffer->pgsz_idx,
1256 mapped_buffer->flags,
1257 mapped_buffer->ctag_lines,
1258 mapped_buffer->ctag_offset,
1259 hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1260 hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1261 lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1262 hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1263 lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1264 mapped_buffer->own_mem_ref, user_mapped);
1267 *sgt = mapped_buffer->sgt;
1268 return mapped_buffer->addr;
1271 u64 gk20a_vm_map(struct vm_gk20a *vm,
1272 struct dma_buf *dmabuf,
1274 u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/,
1276 struct sg_table **sgt,
1282 struct gk20a *g = gk20a_from_vm(vm);
1283 struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags;
1284 struct device *d = dev_from_vm(vm);
1285 struct mapped_buffer_node *mapped_buffer = 0;
1286 bool inserted = false, va_allocated = false;
1287 u32 gmmu_page_size = 0;
1290 struct buffer_attrs bfr = {0};
1291 struct gk20a_comptags comptags;
1294 mutex_lock(&vm->update_gmmu_lock);
1296 /* check if this buffer is already mapped */
1297 map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align,
1299 user_mapped, rw_flag);
1301 mutex_unlock(&vm->update_gmmu_lock);
1305 /* pin buffer to get phys/iovmm addr */
1306 bfr.sgt = gk20a_mm_pin(d, dmabuf);
1307 if (IS_ERR(bfr.sgt)) {
1308 /* Falling back to physical is actually possible
1309 * here in many cases if we use 4K phys pages in the
1310 * gmmu. However we have some regions which require
1311 * contig regions to work properly (either phys-contig
1312 * or contig through smmu io_vaspace). Until we can
1313 * track the difference between those two cases we have
1314 * to fail the mapping when we run out of SMMU space.
1316 gk20a_warn(d, "oom allocating tracking buffer");
1324 bfr.size = dmabuf->size;
1325 buf_addr = (u64)sg_dma_address(bfr.sgt->sgl);
1326 if (unlikely(!buf_addr))
1327 buf_addr = (u64)sg_phys(bfr.sgt->sgl);
1328 bfr.align = 1 << __ffs(buf_addr);
1330 mapping_size = mapping_size ? mapping_size : bfr.size;
1332 /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select
1333 * page size according to memory alignment */
1334 if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1335 bfr.pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ?
1336 gmmu_page_size_big : gmmu_page_size_small;
1339 gmmu_select_page_size(&bfr);
1341 bfr.pgsz_idx = gmmu_page_size_small;
1344 /* validate/adjust bfr attributes */
1345 if (unlikely(bfr.pgsz_idx == -1)) {
1346 gk20a_err(d, "unsupported page size detected");
1350 if (unlikely(bfr.pgsz_idx < gmmu_page_size_small ||
1351 bfr.pgsz_idx > gmmu_page_size_big)) {
1356 gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx];
1358 /* Check if we should use a fixed offset for mapping this buffer */
1360 if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1361 err = validate_fixed_buffer(vm, &bfr,
1362 offset_align, mapping_size);
1366 map_offset = offset_align;
1367 va_allocated = false;
1369 va_allocated = true;
1374 err = setup_buffer_kind_and_compression(d, flags, &bfr, bfr.pgsz_idx);
1375 if (unlikely(err)) {
1376 gk20a_err(d, "failure setting up kind and compression");
1380 /* bar1 and pmu vm don't need ctag */
1381 if (!vm->enable_ctag)
1384 gk20a_get_comptags(d, dmabuf, &comptags);
1386 if (bfr.ctag_lines && !comptags.lines) {
1387 /* allocate compression resources if needed */
1388 err = gk20a_alloc_comptags(d, dmabuf, ctag_allocator,
1391 /* ok to fall back here if we ran out */
1392 /* TBD: we can partially alloc ctags as well... */
1393 bfr.ctag_lines = bfr.ctag_offset = 0;
1394 bfr.kind_v = bfr.uc_kind_v;
1396 gk20a_get_comptags(d, dmabuf, &comptags);
1398 /* init/clear the ctag buffer */
1399 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
1401 comptags.offset + comptags.lines - 1);
1405 /* store the comptag info */
1406 bfr.ctag_offset = comptags.offset;
1408 /* update gmmu ptes */
1409 map_offset = __locked_gmmu_map(vm, map_offset,
1411 buffer_offset, /* sg offset */
1421 gk20a_dbg(gpu_dbg_map,
1423 "kind=0x%x kind_uc=0x%x flags=0x%x "
1424 "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x",
1425 vm_aspace_id(vm), gmmu_page_size,
1426 bfr.kind_v, bfr.uc_kind_v, flags,
1427 bfr.ctag_lines, bfr.ctag_offset,
1428 hi32(map_offset), lo32(map_offset),
1429 hi32((u64)sg_dma_address(bfr.sgt->sgl)),
1430 lo32((u64)sg_dma_address(bfr.sgt->sgl)),
1431 hi32((u64)sg_phys(bfr.sgt->sgl)),
1432 lo32((u64)sg_phys(bfr.sgt->sgl)));
1434 #if defined(NVHOST_DEBUG)
1437 struct scatterlist *sg = NULL;
1438 gk20a_dbg(gpu_dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)");
1439 for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) {
1440 u64 da = sg_dma_address(sg);
1441 u64 pa = sg_phys(sg);
1442 u64 len = sg->length;
1443 gk20a_dbg(gpu_dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x",
1444 i, hi32(pa), lo32(pa), hi32(da), lo32(da),
1445 hi32(len), lo32(len));
1450 /* keep track of the buffer for unmapping */
1451 /* TBD: check for multiple mapping of same buffer */
1452 mapped_buffer = kzalloc(sizeof(*mapped_buffer), GFP_KERNEL);
1453 if (!mapped_buffer) {
1454 gk20a_warn(d, "oom allocating tracking buffer");
1457 mapped_buffer->dmabuf = dmabuf;
1458 mapped_buffer->sgt = bfr.sgt;
1459 mapped_buffer->addr = map_offset;
1460 mapped_buffer->size = mapping_size;
1461 mapped_buffer->pgsz_idx = bfr.pgsz_idx;
1462 mapped_buffer->ctag_offset = bfr.ctag_offset;
1463 mapped_buffer->ctag_lines = bfr.ctag_lines;
1464 mapped_buffer->vm = vm;
1465 mapped_buffer->flags = flags;
1466 mapped_buffer->kind = kind;
1467 mapped_buffer->va_allocated = va_allocated;
1468 mapped_buffer->user_mapped = user_mapped ? 1 : 0;
1469 mapped_buffer->own_mem_ref = user_mapped;
1470 INIT_LIST_HEAD(&mapped_buffer->unmap_list);
1471 INIT_LIST_HEAD(&mapped_buffer->va_buffers_list);
1472 kref_init(&mapped_buffer->ref);
1474 err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer);
1476 gk20a_err(d, "failed to insert into mapped buffer tree");
1481 vm->num_user_mapped_buffers++;
1483 gk20a_dbg_info("allocated va @ 0x%llx", map_offset);
1485 if (!va_allocated) {
1486 struct vm_reserved_va_node *va_node;
1488 /* find the space reservation */
1489 va_node = addr_to_reservation(vm, map_offset);
1490 list_add_tail(&mapped_buffer->va_buffers_list,
1491 &va_node->va_buffers_list);
1492 mapped_buffer->va_node = va_node;
1495 mutex_unlock(&vm->update_gmmu_lock);
1497 /* Invalidate kernel mappings immediately */
1498 if (vm_aspace_id(vm) == -1)
1499 gk20a_mm_tlb_invalidate(vm);
1505 rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
1507 vm->num_user_mapped_buffers--;
1509 kfree(mapped_buffer);
1511 gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
1512 if (!IS_ERR(bfr.sgt))
1513 gk20a_mm_unpin(d, dmabuf, bfr.sgt);
1515 mutex_unlock(&vm->update_gmmu_lock);
1516 gk20a_dbg_info("err=%d\n", err);
1520 u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1521 struct sg_table **sgt,
1528 mutex_lock(&vm->update_gmmu_lock);
1529 vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */
1530 *sgt, /* sg table */
1533 0, /* page size index = 0 i.e. SZ_4K */
1535 0, /* ctag_offset */
1537 mutex_unlock(&vm->update_gmmu_lock);
1539 gk20a_err(dev_from_vm(vm), "failed to allocate va space");
1543 /* Invalidate kernel mappings immediately */
1544 gk20a_mm_tlb_invalidate(vm);
1549 void gk20a_gmmu_unmap(struct vm_gk20a *vm,
1554 mutex_lock(&vm->update_gmmu_lock);
1555 __locked_gmmu_unmap(vm,
1558 0, /* page size 4K */
1559 true, /*va_allocated */
1561 mutex_unlock(&vm->update_gmmu_lock);
1564 phys_addr_t gk20a_get_phys_from_iova(struct device *d,
1570 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
1574 iova = dma_addr & PAGE_MASK;
1575 phys = iommu_iova_to_phys(mapping->domain, iova);
1579 /* get sg_table from already allocated buffer */
1580 int gk20a_get_sgtable(struct device *d, struct sg_table **sgt,
1581 void *cpuva, u64 iova,
1585 *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1587 dev_err(d, "failed to allocate memory\n");
1591 err = dma_get_sgtable(d, *sgt,
1595 dev_err(d, "failed to create sg table\n");
1598 sg_dma_address((*sgt)->sgl) = iova;
1609 int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt,
1610 struct page **pages, u64 iova,
1614 *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1616 dev_err(d, "failed to allocate memory\n");
1620 err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
1622 dev_err(d, "failed to allocate sg_table\n");
1625 sg_set_page((*sgt)->sgl, *pages, size, 0);
1626 sg_dma_address((*sgt)->sgl) = iova;
1637 void gk20a_free_sgtable(struct sg_table **sgt)
1639 sg_free_table(*sgt);
1644 u64 gk20a_mm_iova_addr(struct scatterlist *sgl)
1646 u64 result = sg_phys(sgl);
1647 #ifdef CONFIG_TEGRA_IOMMU_SMMU
1648 if (sg_dma_address(sgl) == DMA_ERROR_CODE)
1650 else if (sg_dma_address(sgl)) {
1651 result = sg_dma_address(sgl) |
1652 1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT;
1658 static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1659 enum gmmu_pgsz_gk20a pgsz_idx,
1660 struct sg_table *sgt,
1662 u64 first_vaddr, u64 last_vaddr,
1663 u8 kind_v, u32 ctag_offset,
1668 u32 pde_lo, pde_hi, pde_i;
1669 struct scatterlist *cur_chunk;
1670 unsigned int cur_offset;
1671 u32 pte_w[2] = {0, 0}; /* invalid pte */
1672 u32 ctag = ctag_offset;
1674 u32 page_size = gmmu_page_sizes[pgsz_idx];
1676 u64 space_to_skip = buffer_offset;
1678 pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
1681 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
1682 pgsz_idx, pde_lo, pde_hi);
1684 /* If ctag_offset !=0 add 1 else add 0. The idea is to avoid a branch
1685 * below (per-pte). Note: this doesn't work unless page size (when
1686 * comptags are active) is 128KB. We have checks elsewhere for that. */
1687 ctag_incr = !!ctag_offset;
1691 cur_chunk = sgt->sgl;
1692 /* space_to_skip must be page aligned */
1693 BUG_ON(space_to_skip & (page_size - 1));
1695 while (space_to_skip > 0 && cur_chunk) {
1696 u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
1701 cur_offset += page_size;
1704 cur_offset >= cur_chunk->length) {
1705 cur_offset -= cur_chunk->length;
1706 cur_chunk = sg_next(cur_chunk);
1708 space_to_skip -= page_size;
1714 for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
1719 struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
1721 if (pde_i == pde_lo)
1722 pte_lo = pte_index_from_vaddr(vm, first_vaddr,
1727 if ((pde_i != pde_hi) && (pde_hi != pde_lo))
1728 pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1;
1730 pte_hi = pte_index_from_vaddr(vm, last_vaddr,
1733 /* get cpu access to the ptes */
1734 err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur,
1737 gk20a_err(dev_from_vm(vm),
1738 "couldn't map ptes for update as=%d pte_ref_cnt=%d",
1739 vm_aspace_id(vm), pte->ref_cnt);
1743 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
1744 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
1746 u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
1751 pte_w[0] = gmmu_pte_valid_true_f() |
1752 gmmu_pte_address_sys_f(addr
1753 >> gmmu_pte_address_shift_v());
1754 pte_w[1] = gmmu_pte_aperture_video_memory_f() |
1755 gmmu_pte_kind_f(kind_v) |
1756 gmmu_pte_comptagline_f(ctag);
1758 if (rw_flag == gk20a_mem_flag_read_only) {
1759 pte_w[0] |= gmmu_pte_read_only_true_f();
1761 gmmu_pte_write_disable_true_f();
1762 } else if (rw_flag ==
1763 gk20a_mem_flag_write_only) {
1765 gmmu_pte_read_disable_true_f();
1768 pte_w[1] |= gmmu_pte_vol_true_f();
1771 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d addr=0x%x,%08x kind=%d"
1772 " ctag=%d vol=%d refs=%d"
1774 pte_cur, hi32(addr), lo32(addr),
1775 kind_v, ctag, !cacheable,
1776 pte->ref_cnt, pte_w[1], pte_w[0]);
1778 cur_offset += page_size;
1781 cur_offset >= cur_chunk->length) {
1782 cur_offset -= cur_chunk->length;
1783 cur_chunk = sg_next(cur_chunk);
1788 gk20a_dbg(gpu_dbg_pte,
1789 "pte_cur=%d ref=%d [0x0,0x0]",
1790 pte_cur, pte->ref_cnt);
1793 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
1794 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]);
1797 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
1799 if (pte->ref_cnt == 0) {
1800 void *pte_ref_ptr = pte->ref;
1802 /* It can make sense to keep around one page table for
1803 * each flavor (empty)... in case a new map is coming
1804 * right back to alloc (and fill it in) again.
1805 * But: deferring unmapping should help with pathologic
1806 * unmap/map/unmap/map cases where we'd trigger pte
1807 * free/alloc/free/alloc.
1812 update_gmmu_pde_locked(vm, pde_i);
1814 free_gmmu_pages(vm, pte_ref_ptr, pte->sgt,
1815 vm->mm->page_table_sizing[pgsz_idx].order,
1823 vm->tlb_dirty = true;
1824 gk20a_dbg_fn("set tlb dirty");
1829 /*TBD: potentially rewrite above to pre-map everything it needs to
1830 * as that's the only way it can fail */
1836 /* for gk20a the "video memory" apertures here are misnomers. */
1837 static inline u32 big_valid_pde0_bits(u64 pte_addr)
1840 gmmu_pde_aperture_big_video_memory_f() |
1841 gmmu_pde_address_big_sys_f(
1842 (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1845 static inline u32 small_valid_pde1_bits(u64 pte_addr)
1848 gmmu_pde_aperture_small_video_memory_f() |
1849 gmmu_pde_vol_small_true_f() | /* tbd: why? */
1850 gmmu_pde_address_small_sys_f(
1851 (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1855 /* Given the current state of the ptes associated with a pde,
1856 determine value and write it out. There's no checking
1857 here to determine whether or not a change was actually
1858 made. So, superfluous updates will cause unnecessary
1861 static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1863 bool small_valid, big_valid;
1864 u64 pte_addr[2] = {0, 0};
1865 struct page_table_gk20a *small_pte =
1866 vm->pdes.ptes[gmmu_page_size_small] + i;
1867 struct page_table_gk20a *big_pte =
1868 vm->pdes.ptes[gmmu_page_size_big] + i;
1869 u32 pde_v[2] = {0, 0};
1872 small_valid = small_pte && small_pte->ref;
1873 big_valid = big_pte && big_pte->ref;
1876 pte_addr[gmmu_page_size_small] =
1877 gk20a_mm_iova_addr(small_pte->sgt->sgl);
1879 pte_addr[gmmu_page_size_big] =
1880 gk20a_mm_iova_addr(big_pte->sgt->sgl);
1882 pde_v[0] = gmmu_pde_size_full_f();
1883 pde_v[0] |= big_valid ?
1884 big_valid_pde0_bits(pte_addr[gmmu_page_size_big])
1886 (gmmu_pde_aperture_big_invalid_f());
1888 pde_v[1] |= (small_valid ?
1889 small_valid_pde1_bits(pte_addr[gmmu_page_size_small])
1891 (gmmu_pde_aperture_small_invalid_f() |
1892 gmmu_pde_vol_small_false_f())
1895 (big_valid ? (gmmu_pde_vol_big_true_f()) :
1896 gmmu_pde_vol_big_false_f());
1898 pde = pde_from_index(vm, i);
1900 gk20a_mem_wr32(pde, 0, pde_v[0]);
1901 gk20a_mem_wr32(pde, 1, pde_v[1]);
1905 FLUSH_CPU_DCACHE(pde,
1906 sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()),
1909 gk20a_mm_l2_invalidate(vm->mm->g);
1911 gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
1913 vm->tlb_dirty = true;
1917 static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1918 u32 num_pages, u32 pgsz_idx)
1920 struct mm_gk20a *mm = vm->mm;
1921 struct gk20a *g = mm->g;
1922 u32 pgsz = gmmu_page_sizes[pgsz_idx];
1926 /* allocate the zero page if the va does not already have one */
1927 if (!vm->zero_page_cpuva) {
1929 vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev,
1933 if (!vm->zero_page_cpuva) {
1934 dev_err(&g->dev->dev, "failed to allocate zero page\n");
1938 vm->zero_page_iova = iova;
1939 err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt,
1940 vm->zero_page_cpuva, vm->zero_page_iova,
1943 dma_free_coherent(&g->dev->dev, mm->big_page_size,
1944 vm->zero_page_cpuva,
1945 vm->zero_page_iova);
1946 vm->zero_page_iova = 0;
1947 vm->zero_page_cpuva = NULL;
1949 dev_err(&g->dev->dev, "failed to create sg table for zero page\n");
1954 for (i = 0; i < num_pages; i++) {
1955 u64 page_vaddr = __locked_gmmu_map(vm, vaddr,
1956 vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0,
1957 NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
1958 gk20a_mem_flag_none);
1961 gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!");
1972 /* something went wrong. unmap pages */
1975 __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
1976 gk20a_mem_flag_none);
1982 /* NOTE! mapped_buffers lock must be held */
1983 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
1985 struct vm_gk20a *vm = mapped_buffer->vm;
1987 if (mapped_buffer->va_node &&
1988 mapped_buffer->va_node->sparse) {
1989 u64 vaddr = mapped_buffer->addr;
1990 u32 pgsz_idx = mapped_buffer->pgsz_idx;
1991 u32 num_pages = mapped_buffer->size >>
1992 gmmu_page_shifts[pgsz_idx];
1994 /* there is little we can do if this fails... */
1995 gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
1998 __locked_gmmu_unmap(vm,
1999 mapped_buffer->addr,
2000 mapped_buffer->size,
2001 mapped_buffer->pgsz_idx,
2002 mapped_buffer->va_allocated,
2003 gk20a_mem_flag_none);
2005 gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d",
2006 vm_aspace_id(vm), gmmu_page_sizes[mapped_buffer->pgsz_idx],
2007 hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
2008 mapped_buffer->own_mem_ref);
2010 gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
2011 mapped_buffer->sgt);
2013 /* remove from mapped buffer tree and remove list, free */
2014 rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
2015 if (!list_empty(&mapped_buffer->va_buffers_list))
2016 list_del(&mapped_buffer->va_buffers_list);
2018 /* keep track of mapped buffers */
2019 if (mapped_buffer->user_mapped)
2020 vm->num_user_mapped_buffers--;
2022 if (mapped_buffer->own_mem_ref)
2023 dma_buf_put(mapped_buffer->dmabuf);
2025 kfree(mapped_buffer);
2030 void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
2032 struct device *d = dev_from_vm(vm);
2033 struct mapped_buffer_node *mapped_buffer;
2035 mutex_lock(&vm->update_gmmu_lock);
2036 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
2037 if (!mapped_buffer) {
2038 mutex_unlock(&vm->update_gmmu_lock);
2039 gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
2043 kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
2044 mutex_unlock(&vm->update_gmmu_lock);
2047 static void gk20a_vm_remove_support(struct vm_gk20a *vm)
2049 struct gk20a *g = vm->mm->g;
2050 struct mapped_buffer_node *mapped_buffer;
2051 struct vm_reserved_va_node *va_node, *va_node_tmp;
2052 struct rb_node *node;
2056 mutex_lock(&vm->update_gmmu_lock);
2058 /* TBD: add a flag here for the unmap code to recognize teardown
2059 * and short-circuit any otherwise expensive operations. */
2061 node = rb_first(&vm->mapped_buffers);
2064 container_of(node, struct mapped_buffer_node, node);
2065 gk20a_vm_unmap_locked(mapped_buffer);
2066 node = rb_first(&vm->mapped_buffers);
2069 /* destroy remaining reserved memory areas */
2070 list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
2072 list_del(&va_node->reserved_va_list);
2076 /* unmapping all buffers above may not actually free
2077 * all vm ptes. jettison them here for certain... */
2078 for (i = 0; i < vm->pdes.num_pdes; i++) {
2079 struct page_table_gk20a *pte =
2080 &vm->pdes.ptes[gmmu_page_size_small][i];
2082 free_gmmu_pages(vm, pte->ref, pte->sgt,
2083 vm->mm->page_table_sizing[gmmu_page_size_small].order,
2087 pte = &vm->pdes.ptes[gmmu_page_size_big][i];
2089 free_gmmu_pages(vm, pte->ref, pte->sgt,
2090 vm->mm->page_table_sizing[gmmu_page_size_big].order,
2096 unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
2097 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, vm->pdes.size);
2099 kfree(vm->pdes.ptes[gmmu_page_size_small]);
2100 kfree(vm->pdes.ptes[gmmu_page_size_big]);
2101 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2102 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
2104 mutex_unlock(&vm->update_gmmu_lock);
2106 /* release zero page if used */
2107 if (vm->zero_page_cpuva)
2108 dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
2109 vm->zero_page_cpuva, vm->zero_page_iova);
2111 /* vm is not used anymore. release it. */
2115 static void gk20a_vm_remove_support_kref(struct kref *ref)
2117 struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
2118 gk20a_vm_remove_support(vm);
2121 void gk20a_vm_get(struct vm_gk20a *vm)
2126 void gk20a_vm_put(struct vm_gk20a *vm)
2128 kref_put(&vm->ref, gk20a_vm_remove_support_kref);
2131 /* address space interfaces for the gk20a module */
2132 int gk20a_vm_alloc_share(struct gk20a_as_share *as_share)
2134 struct gk20a_as *as = as_share->as;
2135 struct gk20a *g = gk20a_from_as(as);
2136 struct mm_gk20a *mm = &g->mm;
2137 struct vm_gk20a *vm;
2139 u32 num_pages, low_hole_pages;
2145 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
2152 vm->as_share = as_share;
2154 vm->big_pages = true;
2156 vm->va_start = mm->pde_stride; /* create a one pde hole */
2157 vm->va_limit = mm->channel.size; /* note this means channel.size is
2158 really just the max */
2161 pde_range_from_vaddr_range(vm,
2164 vm->pdes.num_pdes = pde_hi + 1;
2167 vm->pdes.ptes[gmmu_page_size_small] =
2168 kzalloc(sizeof(struct page_table_gk20a) *
2169 vm->pdes.num_pdes, GFP_KERNEL);
2171 vm->pdes.ptes[gmmu_page_size_big] =
2172 kzalloc(sizeof(struct page_table_gk20a) *
2173 vm->pdes.num_pdes, GFP_KERNEL);
2175 if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2176 vm->pdes.ptes[gmmu_page_size_big]))
2179 gk20a_dbg_info("init space for va_limit=0x%llx num_pdes=%d",
2180 vm->va_limit, vm->pdes.num_pdes);
2182 /* allocate the page table directory */
2183 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2184 &vm->pdes.sgt, &vm->pdes.size);
2188 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2191 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2195 gk20a_dbg(gpu_dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx",
2197 gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2198 /* we could release vm->pdes.kv but it's only one page... */
2201 /* low-half: alloc small pages */
2202 /* high-half: alloc big pages */
2203 vma_size = mm->channel.size >> 1;
2205 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
2206 gmmu_page_sizes[gmmu_page_size_small]>>10);
2207 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
2209 /* num_pages above is without regard to the low-side hole. */
2210 low_hole_pages = (vm->va_start >>
2211 gmmu_page_shifts[gmmu_page_size_small]);
2213 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
2214 low_hole_pages, /* start */
2215 num_pages - low_hole_pages, /* length */
2218 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
2219 gmmu_page_sizes[gmmu_page_size_big]>>10);
2221 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
2222 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
2223 num_pages, /* start */
2224 num_pages, /* length */
2227 vm->mapped_buffers = RB_ROOT;
2229 mutex_init(&vm->update_gmmu_lock);
2230 kref_init(&vm->ref);
2231 INIT_LIST_HEAD(&vm->reserved_va_list);
2233 vm->enable_ctag = true;
2239 int gk20a_vm_release_share(struct gk20a_as_share *as_share)
2241 struct vm_gk20a *vm = as_share->vm;
2245 vm->as_share = NULL;
2247 /* put as reference to vm */
2250 as_share->vm = NULL;
2256 int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2257 struct nvhost_as_alloc_space_args *args)
2259 { int err = -ENOMEM;
2262 struct gk20a_allocator *vma;
2263 struct vm_gk20a *vm = as_share->vm;
2264 struct vm_reserved_va_node *va_node;
2265 u64 vaddr_start = 0;
2267 gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx",
2268 args->flags, args->page_size, args->pages,
2271 /* determine pagesz idx */
2272 for (pgsz_idx = gmmu_page_size_small;
2273 pgsz_idx < gmmu_nr_page_sizes;
2275 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
2279 if (pgsz_idx >= gmmu_nr_page_sizes) {
2284 va_node = kzalloc(sizeof(*va_node), GFP_KERNEL);
2290 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE &&
2291 pgsz_idx != gmmu_page_size_big) {
2298 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
2299 start_page_nr = (u32)(args->o_a.offset >>
2300 gmmu_page_shifts[pgsz_idx]);
2302 vma = &vm->vma[pgsz_idx];
2303 err = vma->alloc(vma, &start_page_nr, args->pages);
2309 vaddr_start = (u64)start_page_nr << gmmu_page_shifts[pgsz_idx];
2311 va_node->vaddr_start = vaddr_start;
2312 va_node->size = (u64)args->page_size * (u64)args->pages;
2313 va_node->pgsz_idx = args->page_size;
2314 INIT_LIST_HEAD(&va_node->va_buffers_list);
2315 INIT_LIST_HEAD(&va_node->reserved_va_list);
2317 mutex_lock(&vm->update_gmmu_lock);
2319 /* mark that we need to use sparse mappings here */
2320 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
2321 err = gk20a_vm_put_empty(vm, vaddr_start, args->pages,
2324 mutex_unlock(&vm->update_gmmu_lock);
2325 vma->free(vma, start_page_nr, args->pages);
2330 va_node->sparse = true;
2332 list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list);
2334 mutex_unlock(&vm->update_gmmu_lock);
2336 args->o_a.offset = vaddr_start;
2342 int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2343 struct nvhost_as_free_space_args *args)
2348 struct gk20a_allocator *vma;
2349 struct vm_gk20a *vm = as_share->vm;
2350 struct vm_reserved_va_node *va_node;
2352 gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
2353 args->pages, args->offset);
2355 /* determine pagesz idx */
2356 for (pgsz_idx = gmmu_page_size_small;
2357 pgsz_idx < gmmu_nr_page_sizes;
2359 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
2363 if (pgsz_idx >= gmmu_nr_page_sizes) {
2368 start_page_nr = (u32)(args->offset >>
2369 gmmu_page_shifts[pgsz_idx]);
2371 vma = &vm->vma[pgsz_idx];
2372 err = vma->free(vma, start_page_nr, args->pages);
2377 mutex_lock(&vm->update_gmmu_lock);
2378 va_node = addr_to_reservation(vm, args->offset);
2380 struct mapped_buffer_node *buffer;
2382 /* there is no need to unallocate the buffers in va. Just
2383 * convert them into normal buffers */
2385 list_for_each_entry(buffer,
2386 &va_node->va_buffers_list, va_buffers_list)
2387 list_del_init(&buffer->va_buffers_list);
2389 list_del(&va_node->reserved_va_list);
2391 /* if this was a sparse mapping, free the va */
2392 if (va_node->sparse)
2393 __locked_gmmu_unmap(vm,
2394 va_node->vaddr_start,
2398 gk20a_mem_flag_none);
2401 mutex_unlock(&vm->update_gmmu_lock);
2407 int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
2408 struct channel_gk20a *ch)
2411 struct vm_gk20a *vm = as_share->vm;
2416 err = channel_gk20a_commit_va(ch);
2423 int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
2425 struct gk20a_dmabuf_priv *priv;
2426 static DEFINE_MUTEX(priv_lock);
2428 priv = dma_buf_get_drvdata(dmabuf, dev);
2432 mutex_lock(&priv_lock);
2433 priv = dma_buf_get_drvdata(dmabuf, dev);
2435 goto priv_exist_or_err;
2436 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
2438 priv = ERR_PTR(-ENOMEM);
2439 goto priv_exist_or_err;
2441 mutex_init(&priv->lock);
2442 dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
2444 mutex_unlock(&priv_lock);
2452 static int gk20a_dmabuf_get_kind(struct dma_buf *dmabuf)
2455 #ifdef CONFIG_TEGRA_NVMAP
2459 err = nvmap_get_dmabuf_param(dmabuf, NVMAP_HANDLE_PARAM_KIND,
2461 kind = err ? kind : nvmap_param;
2466 int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
2469 u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/
2475 struct vm_gk20a *vm = as_share->vm;
2476 struct dma_buf *dmabuf;
2481 /* get ref to the mem handle (released on unmap_locked) */
2482 dmabuf = dma_buf_get(dmabuf_fd);
2486 err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
2488 dma_buf_put(dmabuf);
2493 kind = gk20a_dmabuf_get_kind(dmabuf);
2495 ret_va = gk20a_vm_map(vm, dmabuf, *offset_align,
2496 flags, kind, NULL, true,
2497 gk20a_mem_flag_none,
2501 *offset_align = ret_va;
2503 dma_buf_put(dmabuf);
2510 int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset)
2512 struct vm_gk20a *vm = as_share->vm;
2516 gk20a_vm_unmap_user(vm, offset);
2520 int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2523 phys_addr_t inst_pa;
2525 struct vm_gk20a *vm = &mm->bar1.vm;
2526 struct gk20a *g = gk20a_from_mm(mm);
2527 struct device *d = dev_from_gk20a(g);
2528 struct inst_desc *inst_block = &mm->bar1.inst_block;
2536 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
2538 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
2540 vm->va_start = mm->pde_stride * 1;
2541 vm->va_limit = mm->bar1.aperture_size;
2545 pde_range_from_vaddr_range(vm,
2548 vm->pdes.num_pdes = pde_hi + 1;
2551 /* bar1 is likely only to ever use/need small page sizes. */
2552 /* But just in case, for now... arrange for both.*/
2553 vm->pdes.ptes[gmmu_page_size_small] =
2554 kzalloc(sizeof(struct page_table_gk20a) *
2555 vm->pdes.num_pdes, GFP_KERNEL);
2557 vm->pdes.ptes[gmmu_page_size_big] =
2558 kzalloc(sizeof(struct page_table_gk20a) *
2559 vm->pdes.num_pdes, GFP_KERNEL);
2561 if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2562 vm->pdes.ptes[gmmu_page_size_big]))
2565 gk20a_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d",
2566 vm->va_limit, vm->pdes.num_pdes);
2569 /* allocate the page table directory */
2570 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2571 &vm->pdes.sgt, &vm->pdes.size);
2575 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2578 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2582 gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
2583 vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2584 /* we could release vm->pdes.kv but it's only one page... */
2586 pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2587 pde_addr_lo = u64_lo32(pde_addr >> 12);
2588 pde_addr_hi = u64_hi32(pde_addr);
2590 gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2591 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl),
2592 pde_addr_lo, pde_addr_hi);
2594 /* allocate instance mem for bar1 */
2595 inst_block->size = ram_in_alloc_size_v();
2596 inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2598 if (!inst_block->cpuva) {
2599 gk20a_err(d, "%s: memory allocation failed\n", __func__);
2604 inst_block->iova = iova;
2605 inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2606 if (!inst_block->cpu_pa) {
2607 gk20a_err(d, "%s: failed to get phys address\n", __func__);
2612 inst_pa = inst_block->cpu_pa;
2613 inst_ptr = inst_block->cpuva;
2615 gk20a_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p",
2616 (u64)inst_pa, inst_ptr);
2618 memset(inst_ptr, 0, ram_fc_size_val_v());
2620 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2621 ram_in_page_dir_base_target_vid_mem_f() |
2622 ram_in_page_dir_base_vol_true_f() |
2623 ram_in_page_dir_base_lo_f(pde_addr_lo));
2625 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2626 ram_in_page_dir_base_hi_f(pde_addr_hi));
2628 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2629 u64_lo32(vm->va_limit) | 0xFFF);
2631 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2632 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2634 gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa);
2635 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1",
2637 (vm->va_limit >> 12) - 1 /* length*/,
2639 /* initialize just in case we try to use it anyway */
2640 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused",
2641 0x0badc0de, /* start */
2645 vm->mapped_buffers = RB_ROOT;
2647 mutex_init(&vm->update_gmmu_lock);
2648 kref_init(&vm->ref);
2649 INIT_LIST_HEAD(&vm->reserved_va_list);
2655 if (inst_block->cpuva)
2656 dma_free_coherent(d, inst_block->size,
2657 inst_block->cpuva, inst_block->iova);
2658 inst_block->cpuva = NULL;
2659 inst_block->iova = 0;
2663 /* pmu vm, share channel_vm interfaces */
2664 int gk20a_init_pmu_vm(struct mm_gk20a *mm)
2667 phys_addr_t inst_pa;
2669 struct vm_gk20a *vm = &mm->pmu.vm;
2670 struct gk20a *g = gk20a_from_mm(mm);
2671 struct device *d = dev_from_gk20a(g);
2672 struct inst_desc *inst_block = &mm->pmu.inst_block;
2680 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
2682 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
2684 vm->va_start = GK20A_PMU_VA_START;
2685 vm->va_limit = vm->va_start + mm->pmu.aperture_size;
2689 pde_range_from_vaddr_range(vm,
2692 vm->pdes.num_pdes = pde_hi + 1;
2695 /* The pmu is likely only to ever use/need small page sizes. */
2696 /* But just in case, for now... arrange for both.*/
2697 vm->pdes.ptes[gmmu_page_size_small] =
2698 kzalloc(sizeof(struct page_table_gk20a) *
2699 vm->pdes.num_pdes, GFP_KERNEL);
2701 vm->pdes.ptes[gmmu_page_size_big] =
2702 kzalloc(sizeof(struct page_table_gk20a) *
2703 vm->pdes.num_pdes, GFP_KERNEL);
2705 if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2706 vm->pdes.ptes[gmmu_page_size_big]))
2709 gk20a_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d",
2710 vm->va_limit, vm->pdes.num_pdes);
2712 /* allocate the page table directory */
2713 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2714 &vm->pdes.sgt, &vm->pdes.size);
2718 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2721 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2725 gk20a_dbg_info("pmu pdes phys @ 0x%llx",
2726 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2727 /* we could release vm->pdes.kv but it's only one page... */
2729 pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2730 pde_addr_lo = u64_lo32(pde_addr >> 12);
2731 pde_addr_hi = u64_hi32(pde_addr);
2733 gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2734 (u64)pde_addr, pde_addr_lo, pde_addr_hi);
2736 /* allocate instance mem for pmu */
2737 inst_block->size = GK20A_PMU_INST_SIZE;
2738 inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2740 if (!inst_block->cpuva) {
2741 gk20a_err(d, "%s: memory allocation failed\n", __func__);
2746 inst_block->iova = iova;
2747 inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2748 if (!inst_block->cpu_pa) {
2749 gk20a_err(d, "%s: failed to get phys address\n", __func__);
2754 inst_pa = inst_block->cpu_pa;
2755 inst_ptr = inst_block->cpuva;
2757 gk20a_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa);
2759 memset(inst_ptr, 0, GK20A_PMU_INST_SIZE);
2761 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2762 ram_in_page_dir_base_target_vid_mem_f() |
2763 ram_in_page_dir_base_vol_true_f() |
2764 ram_in_page_dir_base_lo_f(pde_addr_lo));
2766 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2767 ram_in_page_dir_base_hi_f(pde_addr_hi));
2769 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2770 u64_lo32(vm->va_limit) | 0xFFF);
2772 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2773 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2775 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu",
2776 (vm->va_start >> 12), /* start */
2777 (vm->va_limit - vm->va_start) >> 12, /*length*/
2779 /* initialize just in case we try to use it anyway */
2780 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused",
2781 0x0badc0de, /* start */
2786 vm->mapped_buffers = RB_ROOT;
2788 mutex_init(&vm->update_gmmu_lock);
2789 kref_init(&vm->ref);
2790 INIT_LIST_HEAD(&vm->reserved_va_list);
2796 if (inst_block->cpuva)
2797 dma_free_coherent(d, inst_block->size,
2798 inst_block->cpuva, inst_block->iova);
2799 inst_block->cpuva = NULL;
2800 inst_block->iova = 0;
2804 int gk20a_mm_fb_flush(struct gk20a *g)
2806 struct mm_gk20a *mm = &g->mm;
2813 mutex_lock(&mm->l2_op_lock);
2815 /* Make sure all previous writes are committed to the L2. There's no
2816 guarantee that writes are to DRAM. This will be a sysmembar internal
2818 gk20a_writel(g, flush_fb_flush_r(),
2819 flush_fb_flush_pending_busy_f());
2822 data = gk20a_readl(g, flush_fb_flush_r());
2824 if (flush_fb_flush_outstanding_v(data) ==
2825 flush_fb_flush_outstanding_true_v() ||
2826 flush_fb_flush_pending_v(data) ==
2827 flush_fb_flush_pending_busy_v()) {
2828 gk20a_dbg_info("fb_flush 0x%x", data);
2830 usleep_range(20, 40);
2833 } while (retry >= 0 || !tegra_platform_is_silicon());
2836 gk20a_warn(dev_from_gk20a(g),
2837 "fb_flush too many retries");
2841 mutex_unlock(&mm->l2_op_lock);
2846 static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
2851 /* Invalidate any clean lines from the L2 so subsequent reads go to
2852 DRAM. Dirty lines are not affected by this operation. */
2853 gk20a_writel(g, flush_l2_system_invalidate_r(),
2854 flush_l2_system_invalidate_pending_busy_f());
2857 data = gk20a_readl(g, flush_l2_system_invalidate_r());
2859 if (flush_l2_system_invalidate_outstanding_v(data) ==
2860 flush_l2_system_invalidate_outstanding_true_v() ||
2861 flush_l2_system_invalidate_pending_v(data) ==
2862 flush_l2_system_invalidate_pending_busy_v()) {
2863 gk20a_dbg_info("l2_system_invalidate 0x%x",
2866 usleep_range(20, 40);
2869 } while (retry >= 0 || !tegra_platform_is_silicon());
2872 gk20a_warn(dev_from_gk20a(g),
2873 "l2_system_invalidate too many retries");
2876 void gk20a_mm_l2_invalidate(struct gk20a *g)
2878 struct mm_gk20a *mm = &g->mm;
2879 mutex_lock(&mm->l2_op_lock);
2880 gk20a_mm_l2_invalidate_locked(g);
2881 mutex_unlock(&mm->l2_op_lock);
2884 void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
2886 struct mm_gk20a *mm = &g->mm;
2892 mutex_lock(&mm->l2_op_lock);
2894 /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
2895 as clean, so subsequent reads might hit in the L2. */
2896 gk20a_writel(g, flush_l2_flush_dirty_r(),
2897 flush_l2_flush_dirty_pending_busy_f());
2900 data = gk20a_readl(g, flush_l2_flush_dirty_r());
2902 if (flush_l2_flush_dirty_outstanding_v(data) ==
2903 flush_l2_flush_dirty_outstanding_true_v() ||
2904 flush_l2_flush_dirty_pending_v(data) ==
2905 flush_l2_flush_dirty_pending_busy_v()) {
2906 gk20a_dbg_info("l2_flush_dirty 0x%x", data);
2908 usleep_range(20, 40);
2911 } while (retry >= 0 || !tegra_platform_is_silicon());
2914 gk20a_warn(dev_from_gk20a(g),
2915 "l2_flush_dirty too many retries");
2918 gk20a_mm_l2_invalidate_locked(g);
2920 mutex_unlock(&mm->l2_op_lock);
2924 int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
2925 struct dma_buf **dmabuf,
2928 struct mapped_buffer_node *mapped_buffer;
2930 gk20a_dbg_fn("gpu_va=0x%llx", gpu_va);
2932 mutex_lock(&vm->update_gmmu_lock);
2934 mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers,
2936 if (!mapped_buffer) {
2937 mutex_unlock(&vm->update_gmmu_lock);
2941 *dmabuf = mapped_buffer->dmabuf;
2942 *offset = gpu_va - mapped_buffer->addr;
2944 mutex_unlock(&vm->update_gmmu_lock);
2949 void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
2951 struct gk20a *g = gk20a_from_vm(vm);
2952 u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12);
2955 static DEFINE_MUTEX(tlb_lock);
2959 /* pagetables are considered sw states which are preserved after
2960 prepare_poweroff. When gk20a deinit releases those pagetables,
2961 common code in vm unmap path calls tlb invalidate that touches
2962 hw. Use the power_on flag to skip tlb invalidation when gpu
2963 power is turned off */
2968 /* No need to invalidate if tlb is clean */
2969 mutex_lock(&vm->update_gmmu_lock);
2970 if (!vm->tlb_dirty) {
2971 mutex_unlock(&vm->update_gmmu_lock);
2975 mutex_lock(&tlb_lock);
2977 data = gk20a_readl(g, fb_mmu_ctrl_r());
2978 if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
2980 usleep_range(20, 40);
2982 } while (retry >= 0 || !tegra_platform_is_silicon());
2985 gk20a_warn(dev_from_gk20a(g),
2986 "wait mmu fifo space too many retries");
2990 gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
2991 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
2992 fb_mmu_invalidate_pdb_aperture_vid_mem_f());
2994 gk20a_writel(g, fb_mmu_invalidate_r(),
2995 fb_mmu_invalidate_all_va_true_f() |
2996 fb_mmu_invalidate_trigger_true_f());
2999 data = gk20a_readl(g, fb_mmu_ctrl_r());
3000 if (fb_mmu_ctrl_pri_fifo_empty_v(data) !=
3001 fb_mmu_ctrl_pri_fifo_empty_false_f())
3004 usleep_range(20, 40);
3005 } while (retry >= 0 || !tegra_platform_is_silicon());
3008 gk20a_warn(dev_from_gk20a(g),
3009 "mmu invalidate too many retries");
3012 mutex_unlock(&tlb_lock);
3013 vm->tlb_dirty = false;
3014 mutex_unlock(&vm->update_gmmu_lock);
3017 int gk20a_mm_suspend(struct gk20a *g)
3021 g->ops.ltc.elpg_flush(g);
3023 gk20a_dbg_fn("done");
3027 void gk20a_mm_ltc_isr(struct gk20a *g)
3031 intr = gk20a_readl(g, ltc_ltc0_ltss_intr_r());
3032 gk20a_err(dev_from_gk20a(g), "ltc: %08x\n", intr);
3033 gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
3036 bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
3038 u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
3039 return fb_mmu_debug_ctrl_debug_v(debug_ctrl) ==
3040 fb_mmu_debug_ctrl_debug_enabled_v();