]> rtime.felk.cvut.cz Git - sojka/nv-tegra/linux-3.10.git/commitdiff
gpu: nvgpu: Remove gk20a sparse texture & PTE freeing
authorTerje Bergstrom <tbergstrom@nvidia.com>
Fri, 12 Dec 2014 11:52:28 +0000 (13:52 +0200)
committerTerje Bergstrom <tbergstrom@nvidia.com>
Wed, 17 Dec 2014 06:45:34 +0000 (22:45 -0800)
Remove support for gk20a sparse textures. We're using implementation
from user space, so gk20a code is never invoked.

Also removes ref_cnt for PTEs, so we never free PTEs when unmapping
pages, but only at VM delete time.

Change-Id: I04d7d43d9bff23ee46fd0570ad189faece35dd14
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/663294

drivers/gpu/nvgpu/gk20a/gk20a.c
drivers/gpu/nvgpu/gk20a/gk20a.h
drivers/gpu/nvgpu/gk20a/mm_gk20a.c
drivers/gpu/nvgpu/gk20a/mm_gk20a.h
drivers/gpu/nvgpu/gm20b/mm_gm20b.c

index 23c8e44bf8f89460007c8da8eb90efa876a878d1..970f23a9a1fa289c9b9a91c1e88f055615203845 100644 (file)
@@ -1786,9 +1786,11 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
                gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes();
 
        gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS
-               | NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS
                | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS;
 
+       if (g->ops.mm.set_sparse)
+               gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS;
+
        if (IS_ENABLED(CONFIG_TEGRA_GK20A) &&
            gk20a_platform_has_syncpoints(g->dev))
                gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
index 4333cd20dc72ee84be4c162ce3dec88874bbbab7..4ddea43178280a4d6355b04d721562569d3202a7 100644 (file)
@@ -292,10 +292,6 @@ struct gpu_ops {
        struct {
                int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr,
                               u32 num_pages, u32 pgsz_idx, bool refplus);
-               int (*put_empty)(struct vm_gk20a *vm, u64 vaddr,
-                              u32 num_pages, u32 pgsz_idx);
-               void (*clear_sparse)(struct vm_gk20a *vm, u64 vaddr,
-                              u64 size, u32 pgsz_idx);
                bool (*is_debug_mode_enabled)(struct gk20a *g);
                u64 (*gmmu_map)(struct vm_gk20a *vm,
                                u64 map_offset,
index 5e925d65aad55e738d6487c2019850f215571664..e51ce7c5e52f50c873af9996f1ba7979b119b152 100644 (file)
@@ -1806,8 +1806,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
                                     pte->size);
                if (err) {
                        gk20a_err(dev_from_vm(vm),
-                                  "couldn't map ptes for update as=%d pte_ref_cnt=%d",
-                                  vm_aspace_id(vm), pte->ref_cnt);
+                                  "couldn't map ptes for update as=%d",
+                                  vm_aspace_id(vm));
                        goto clean_up;
                }
 
@@ -1839,13 +1839,12 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
                                if (!cacheable)
                                        pte_w[1] |= gmmu_pte_vol_true_f();
 
-                               pte->ref_cnt++;
                                gk20a_dbg(gpu_dbg_pte, "pte_cur=%d addr=0x%x,%08x kind=%d"
-                                          " ctag=%d vol=%d refs=%d"
+                                          " ctag=%d vol=%d"
                                           " [0x%08x,0x%08x]",
                                           pte_cur, hi32(addr), lo32(addr),
                                           kind_v, ctag, !cacheable,
-                                          pte->ref_cnt, pte_w[1], pte_w[0]);
+                                          pte_w[1], pte_w[0]);
                                ctag += ctag_incr;
                                cur_offset += page_size;
                                addr += page_size;
@@ -1856,10 +1855,9 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
                                }
 
                        } else {
-                               pte->ref_cnt--;
                                gk20a_dbg(gpu_dbg_pte,
-                                          "pte_cur=%d ref=%d [0x0,0x0]",
-                                          pte_cur, pte->ref_cnt);
+                                          "pte_cur=%d [0x0,0x0]",
+                                          pte_cur);
                        }
 
                        gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
@@ -1867,24 +1865,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
                }
 
                unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
-
-               if (pte->ref_cnt == 0) {
-                       /* It can make sense to keep around one page table for
-                        * each flavor (empty)... in case a new map is coming
-                        * right back to alloc (and fill it in) again.
-                        * But: deferring unmapping should help with pathologic
-                        * unmap/map/unmap/map cases where we'd trigger pte
-                        * free/alloc/free/alloc.
-                        */
-                       free_gmmu_pages(vm, pte->ref, pte->sgt,
-                               vm->page_table_sizing[pgsz_idx].order,
-                               pte->size);
-                       pte->ref = NULL;
-
-                       /* rewrite pde */
-                       update_gmmu_pde_locked(vm, pde_i);
-               }
-
        }
 
        smp_mb();
@@ -1982,85 +1962,6 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
        vm->tlb_dirty  = true;
 }
 
-static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
-                              u32 num_pages, u32 pgsz_idx)
-{
-       struct mm_gk20a *mm = vm->mm;
-       struct gk20a *g = mm->g;
-       u32 pgsz = vm->gmmu_page_sizes[pgsz_idx];
-       u32 i;
-       dma_addr_t iova;
-
-       /* allocate the zero page if the va does not already have one */
-       if (!vm->zero_page_cpuva) {
-               int err = 0;
-               vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev,
-                                                        vm->big_page_size,
-                                                        &iova,
-                                                        GFP_KERNEL);
-               if (!vm->zero_page_cpuva) {
-                       dev_err(&g->dev->dev, "failed to allocate zero page\n");
-                       return -ENOMEM;
-               }
-
-               vm->zero_page_iova = iova;
-               err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt,
-                                       vm->zero_page_cpuva, vm->zero_page_iova,
-                                       vm->big_page_size);
-               if (err) {
-                       dma_free_coherent(&g->dev->dev, vm->big_page_size,
-                                         vm->zero_page_cpuva,
-                                         vm->zero_page_iova);
-                       vm->zero_page_iova = 0;
-                       vm->zero_page_cpuva = NULL;
-
-                       dev_err(&g->dev->dev, "failed to create sg table for zero page\n");
-                       return -ENOMEM;
-               }
-       }
-
-       for (i = 0; i < num_pages; i++) {
-               u64 page_vaddr = g->ops.mm.gmmu_map(vm, vaddr,
-                       vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0,
-                       NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
-                       gk20a_mem_flag_none, false);
-
-               if (!page_vaddr) {
-                       gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!");
-                       goto err_unmap;
-               }
-               vaddr += pgsz;
-       }
-
-       return 0;
-
-err_unmap:
-
-       WARN_ON(1);
-       /* something went wrong. unmap pages */
-       while (i--) {
-               vaddr -= pgsz;
-               g->ops.mm.gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
-                                   gk20a_mem_flag_none);
-       }
-
-       return -EINVAL;
-}
-
-static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
-                              u32 num_pages, u32 pgsz_idx, bool refplus)
-{
-       return gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
-}
-
-static void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
-                              u64 size, u32 pgsz_idx) {
-       struct gk20a *g = vm->mm->g;
-
-       g->ops.mm.gmmu_unmap(vm, vaddr, size, pgsz_idx,
-                       false, gk20a_mem_flag_none);
-}
-
 /* NOTE! mapped_buffers lock must be held */
 void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
 {
@@ -2075,18 +1976,14 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
                        ilog2(vm->gmmu_page_sizes[pgsz_idx]);
 
                /* there is little we can do if this fails... */
-               if (g->ops.mm.put_empty) {
-                       g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx);
-               } else {
-                       g->ops.mm.gmmu_unmap(vm,
-                               mapped_buffer->addr,
-                               mapped_buffer->size,
-                               mapped_buffer->pgsz_idx,
-                               mapped_buffer->va_allocated,
-                               gk20a_mem_flag_none);
-                       g->ops.mm.set_sparse(vm, vaddr,
-                                       num_pages, pgsz_idx, false);
-               }
+               g->ops.mm.gmmu_unmap(vm,
+                       mapped_buffer->addr,
+                       mapped_buffer->size,
+                       mapped_buffer->pgsz_idx,
+                       mapped_buffer->va_allocated,
+                       gk20a_mem_flag_none);
+               g->ops.mm.set_sparse(vm, vaddr,
+                               num_pages, pgsz_idx, false);
        } else
                g->ops.mm.gmmu_unmap(vm,
                                mapped_buffer->addr,
@@ -2140,7 +2037,6 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
 
 static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
 {
-       struct gk20a *g = vm->mm->g;
        struct mapped_buffer_node *mapped_buffer;
        struct vm_reserved_va_node *va_node, *va_node_tmp;
        struct rb_node *node;
@@ -2197,11 +2093,6 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
                gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
 
        mutex_unlock(&vm->update_gmmu_lock);
-
-       /* release zero page if used */
-       if (vm->zero_page_cpuva)
-               dma_free_coherent(&g->dev->dev, vm->big_page_size,
-                                 vm->zero_page_cpuva, vm->zero_page_iova);
 }
 
 void gk20a_vm_remove_support(struct vm_gk20a *vm)
@@ -2582,10 +2473,12 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
 
                /* if this was a sparse mapping, free the va */
                if (va_node->sparse)
-                       g->ops.mm.clear_sparse(vm,
+                       g->ops.mm.gmmu_unmap(vm,
                                        va_node->vaddr_start,
                                        va_node->size,
-                                       va_node->pgsz_idx);
+                                       va_node->pgsz_idx,
+                                       true,
+                                       gk20a_mem_flag_none);
                kfree(va_node);
        }
        mutex_unlock(&vm->update_gmmu_lock);
@@ -3180,12 +3073,6 @@ u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g)
 
 void gk20a_init_mm(struct gpu_ops *gops)
 {
-       /* remember to remove NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS in
-        * characteristics flags if sparse support is removed */
-       gops->mm.set_sparse = gk20a_vm_put_sparse;
-       gops->mm.put_empty = gk20a_vm_put_empty;
-       gops->mm.clear_sparse = gk20a_vm_clear_sparse;
-
        gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled;
        gops->mm.gmmu_map = gk20a_locked_gmmu_map;
        gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
index 4dbde580de2f2166ffb73c5e90d95b269cc7361a..b35644096946b3c6e10b80c3298406e08948c589 100644 (file)
@@ -193,7 +193,6 @@ struct page_table_gk20a {
        /* Either a *page or a *mem_handle */
        void *ref;
        /* track mapping cnt on this page table */
-       u32 ref_cnt;
        struct sg_table *sgt;
        size_t size;
 };
@@ -308,10 +307,6 @@ struct vm_gk20a {
 
        struct list_head reserved_va_list;
 
-       dma_addr_t zero_page_iova;
-       void *zero_page_cpuva;
-       struct sg_table *zero_page_sgt;
-
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
        u64 handle;
 #endif
index cd40132fe9599477481a97740373b3ec5584d11c..5b1a9a04393362577548bace62529b17fac6118f 100644 (file)
@@ -47,8 +47,6 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
        BUG_ON(pde_lo != pde_hi);
 
        pte = vm->pdes.ptes[pgsz_idx] + pde_lo;
-       if (refplus)
-               pte->ref_cnt++;
 
        pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx);
        pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx);
@@ -64,10 +62,10 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
                pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f();
 
                gk20a_dbg(gpu_dbg_pte,
-                          "pte_cur=%d addr=%llx refs=%d"
+                          "pte_cur=%d addr=%llx"
                           " [0x%08x,0x%08x]",
                           pte_cur, addr,
-                          pte->ref_cnt, pte_w[1], pte_w[0]);
+                          pte_w[1], pte_w[0]);
 
                gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
                gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]);
@@ -220,39 +218,6 @@ fail:
        return ret;
 }
 
-static void gm20b_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
-                              u64 size, u32 pgsz_idx) {
-       u64 vaddr_hi;
-       u32 pde_lo, pde_hi, pde_i;
-
-       gk20a_dbg_fn("");
-       vaddr_hi = vaddr + size - 1;
-       pde_range_from_vaddr_range(vm,
-                                  vaddr,
-                                  vaddr_hi,
-                                  &pde_lo, &pde_hi);
-
-       gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, "
-                       "pde_hi: 0x%x, pgsz_idx: %d, pde_stride_shift: %d",
-                       vaddr, vaddr_hi, pde_lo, pde_hi, pgsz_idx,
-                       vm->pde_stride_shift);
-
-       for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
-               struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
-               pte->ref_cnt--;
-
-               if (pte->ref_cnt == 0) {
-                       free_gmmu_pages(vm, pte->ref, pte->sgt,
-                               vm->page_table_sizing[pgsz_idx].order,
-                               pte->size);
-                       pte->ref = NULL;
-                       update_gmmu_pde_locked(vm, pde_i);
-               }
-       }
-
-       return;
-}
-
 static bool gm20b_mm_mmu_debug_mode_enabled(struct gk20a *g)
 {
        u32 debug_ctrl = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r());
@@ -288,7 +253,6 @@ static u32 gm20b_mm_get_big_page_sizes(void)
 void gm20b_init_mm(struct gpu_ops *gops)
 {
        gops->mm.set_sparse = gm20b_vm_put_sparse;
-       gops->mm.clear_sparse = gm20b_vm_clear_sparse;
        gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled;
        gops->mm.gmmu_map = gk20a_locked_gmmu_map;
        gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;