pte->size);
if (err) {
gk20a_err(dev_from_vm(vm),
- "couldn't map ptes for update as=%d pte_ref_cnt=%d",
- vm_aspace_id(vm), pte->ref_cnt);
+ "couldn't map ptes for update as=%d",
+ vm_aspace_id(vm));
goto clean_up;
}
if (!cacheable)
pte_w[1] |= gmmu_pte_vol_true_f();
- pte->ref_cnt++;
gk20a_dbg(gpu_dbg_pte, "pte_cur=%d addr=0x%x,%08x kind=%d"
- " ctag=%d vol=%d refs=%d"
+ " ctag=%d vol=%d"
" [0x%08x,0x%08x]",
pte_cur, hi32(addr), lo32(addr),
kind_v, ctag, !cacheable,
- pte->ref_cnt, pte_w[1], pte_w[0]);
+ pte_w[1], pte_w[0]);
ctag += ctag_incr;
cur_offset += page_size;
addr += page_size;
}
} else {
- pte->ref_cnt--;
gk20a_dbg(gpu_dbg_pte,
- "pte_cur=%d ref=%d [0x0,0x0]",
- pte_cur, pte->ref_cnt);
+ "pte_cur=%d [0x0,0x0]",
+ pte_cur);
}
gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
}
unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
-
- if (pte->ref_cnt == 0) {
- /* It can make sense to keep around one page table for
- * each flavor (empty)... in case a new map is coming
- * right back to alloc (and fill it in) again.
- * But: deferring unmapping should help with pathologic
- * unmap/map/unmap/map cases where we'd trigger pte
- * free/alloc/free/alloc.
- */
- free_gmmu_pages(vm, pte->ref, pte->sgt,
- vm->page_table_sizing[pgsz_idx].order,
- pte->size);
- pte->ref = NULL;
-
- /* rewrite pde */
- update_gmmu_pde_locked(vm, pde_i);
- }
-
}
smp_mb();
vm->tlb_dirty = true;
}
-static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
- u32 num_pages, u32 pgsz_idx)
-{
- struct mm_gk20a *mm = vm->mm;
- struct gk20a *g = mm->g;
- u32 pgsz = vm->gmmu_page_sizes[pgsz_idx];
- u32 i;
- dma_addr_t iova;
-
- /* allocate the zero page if the va does not already have one */
- if (!vm->zero_page_cpuva) {
- int err = 0;
- vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev,
- vm->big_page_size,
- &iova,
- GFP_KERNEL);
- if (!vm->zero_page_cpuva) {
- dev_err(&g->dev->dev, "failed to allocate zero page\n");
- return -ENOMEM;
- }
-
- vm->zero_page_iova = iova;
- err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt,
- vm->zero_page_cpuva, vm->zero_page_iova,
- vm->big_page_size);
- if (err) {
- dma_free_coherent(&g->dev->dev, vm->big_page_size,
- vm->zero_page_cpuva,
- vm->zero_page_iova);
- vm->zero_page_iova = 0;
- vm->zero_page_cpuva = NULL;
-
- dev_err(&g->dev->dev, "failed to create sg table for zero page\n");
- return -ENOMEM;
- }
- }
-
- for (i = 0; i < num_pages; i++) {
- u64 page_vaddr = g->ops.mm.gmmu_map(vm, vaddr,
- vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0,
- NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
- gk20a_mem_flag_none, false);
-
- if (!page_vaddr) {
- gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!");
- goto err_unmap;
- }
- vaddr += pgsz;
- }
-
- return 0;
-
-err_unmap:
-
- WARN_ON(1);
- /* something went wrong. unmap pages */
- while (i--) {
- vaddr -= pgsz;
- g->ops.mm.gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
- gk20a_mem_flag_none);
- }
-
- return -EINVAL;
-}
-
-static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
- u32 num_pages, u32 pgsz_idx, bool refplus)
-{
- return gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
-}
-
-static void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
- u64 size, u32 pgsz_idx) {
- struct gk20a *g = vm->mm->g;
-
- g->ops.mm.gmmu_unmap(vm, vaddr, size, pgsz_idx,
- false, gk20a_mem_flag_none);
-}
-
/* NOTE! mapped_buffers lock must be held */
void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
{
ilog2(vm->gmmu_page_sizes[pgsz_idx]);
/* there is little we can do if this fails... */
- if (g->ops.mm.put_empty) {
- g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx);
- } else {
- g->ops.mm.gmmu_unmap(vm,
- mapped_buffer->addr,
- mapped_buffer->size,
- mapped_buffer->pgsz_idx,
- mapped_buffer->va_allocated,
- gk20a_mem_flag_none);
- g->ops.mm.set_sparse(vm, vaddr,
- num_pages, pgsz_idx, false);
- }
+ g->ops.mm.gmmu_unmap(vm,
+ mapped_buffer->addr,
+ mapped_buffer->size,
+ mapped_buffer->pgsz_idx,
+ mapped_buffer->va_allocated,
+ gk20a_mem_flag_none);
+ g->ops.mm.set_sparse(vm, vaddr,
+ num_pages, pgsz_idx, false);
} else
g->ops.mm.gmmu_unmap(vm,
mapped_buffer->addr,
static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
{
- struct gk20a *g = vm->mm->g;
struct mapped_buffer_node *mapped_buffer;
struct vm_reserved_va_node *va_node, *va_node_tmp;
struct rb_node *node;
gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
mutex_unlock(&vm->update_gmmu_lock);
-
- /* release zero page if used */
- if (vm->zero_page_cpuva)
- dma_free_coherent(&g->dev->dev, vm->big_page_size,
- vm->zero_page_cpuva, vm->zero_page_iova);
}
void gk20a_vm_remove_support(struct vm_gk20a *vm)
/* if this was a sparse mapping, free the va */
if (va_node->sparse)
- g->ops.mm.clear_sparse(vm,
+ g->ops.mm.gmmu_unmap(vm,
va_node->vaddr_start,
va_node->size,
- va_node->pgsz_idx);
+ va_node->pgsz_idx,
+ true,
+ gk20a_mem_flag_none);
kfree(va_node);
}
mutex_unlock(&vm->update_gmmu_lock);
void gk20a_init_mm(struct gpu_ops *gops)
{
- /* remember to remove NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS in
- * characteristics flags if sparse support is removed */
- gops->mm.set_sparse = gk20a_vm_put_sparse;
- gops->mm.put_empty = gk20a_vm_put_empty;
- gops->mm.clear_sparse = gk20a_vm_clear_sparse;
-
gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled;
gops->mm.gmmu_map = gk20a_locked_gmmu_map;
gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
BUG_ON(pde_lo != pde_hi);
pte = vm->pdes.ptes[pgsz_idx] + pde_lo;
- if (refplus)
- pte->ref_cnt++;
pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx);
pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx);
pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f();
gk20a_dbg(gpu_dbg_pte,
- "pte_cur=%d addr=%llx refs=%d"
+ "pte_cur=%d addr=%llx"
" [0x%08x,0x%08x]",
pte_cur, addr,
- pte->ref_cnt, pte_w[1], pte_w[0]);
+ pte_w[1], pte_w[0]);
gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]);
return ret;
}
-static void gm20b_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
- u64 size, u32 pgsz_idx) {
- u64 vaddr_hi;
- u32 pde_lo, pde_hi, pde_i;
-
- gk20a_dbg_fn("");
- vaddr_hi = vaddr + size - 1;
- pde_range_from_vaddr_range(vm,
- vaddr,
- vaddr_hi,
- &pde_lo, &pde_hi);
-
- gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, "
- "pde_hi: 0x%x, pgsz_idx: %d, pde_stride_shift: %d",
- vaddr, vaddr_hi, pde_lo, pde_hi, pgsz_idx,
- vm->pde_stride_shift);
-
- for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
- struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
- pte->ref_cnt--;
-
- if (pte->ref_cnt == 0) {
- free_gmmu_pages(vm, pte->ref, pte->sgt,
- vm->page_table_sizing[pgsz_idx].order,
- pte->size);
- pte->ref = NULL;
- update_gmmu_pde_locked(vm, pde_i);
- }
- }
-
- return;
-}
-
static bool gm20b_mm_mmu_debug_mode_enabled(struct gk20a *g)
{
u32 debug_ctrl = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r());
void gm20b_init_mm(struct gpu_ops *gops)
{
gops->mm.set_sparse = gm20b_vm_put_sparse;
- gops->mm.clear_sparse = gm20b_vm_clear_sparse;
gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled;
gops->mm.gmmu_map = gk20a_locked_gmmu_map;
gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;