]> rtime.felk.cvut.cz Git - sojka/nv-tegra/linux-3.10.git/commitdiff
gpu: nvgpu: support skipping buffer refcounting in submit
authorDeepak Nibade <dnibade@nvidia.com>
Thu, 29 Oct 2015 09:50:50 +0000 (15:20 +0530)
committermobile promotions <svcmobile_promotions@nvidia.com>
Wed, 4 Nov 2015 05:22:39 +0000 (21:22 -0800)
In job submission path, we always take refcount on all
the mapped buffers to safeguard against case where user
space releases the buffer early

But in case user space itself is doing proper buffer
management, kernel need not take refcounts on all the
buffers - which is also a overhead in submit path

Hence, provide a new submit flag
NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING to
optionally skip taking refcounts on all the buffers

Also, if we do not take refcounts, then no need to drop
any refcounts in gk20a_channel_update() as well

Bug 1698667
Bug 200141116

Change-Id: I81bb7a03240300b691c70bcec04ea1badd5934f4
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/824718
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
drivers/gpu/nvgpu/gk20a/channel_gk20a.c
include/uapi/linux/nvgpu.h

index a60a903c8e4068e842635d5e264b77f2770d000f..21c23988dd6619bb86ec9440e6bb608083dec600 100644 (file)
@@ -1545,20 +1545,24 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
 
 static int gk20a_channel_add_job(struct channel_gk20a *c,
                                 struct gk20a_fence *pre_fence,
-                                struct gk20a_fence *post_fence)
+                                struct gk20a_fence *post_fence,
+                                bool skip_buffer_refcounting)
 {
        struct vm_gk20a *vm = c->vm;
        struct channel_gk20a_job *job = NULL;
        struct mapped_buffer_node **mapped_buffers = NULL;
-       int err = 0, num_mapped_buffers;
+       int err = 0, num_mapped_buffers = 0;
 
        /* job needs reference to this vm (released in channel_update) */
        gk20a_vm_get(vm);
 
-       err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
-       if (err) {
-               gk20a_vm_put(vm);
-               return err;
+       if (!skip_buffer_refcounting) {
+               err = gk20a_vm_get_buffers(vm, &mapped_buffers,
+                                       &num_mapped_buffers);
+               if (err) {
+                       gk20a_vm_put(vm);
+                       return err;
+               }
        }
 
        job = kzalloc(sizeof(*job), GFP_KERNEL);
@@ -1609,7 +1613,8 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
                if (c->sync)
                        c->sync->signal_timeline(c->sync);
 
-               gk20a_vm_put_buffers(vm, job->mapped_buffers,
+               if (job->num_mapped_buffers)
+                       gk20a_vm_put_buffers(vm, job->mapped_buffers,
                                job->num_mapped_buffers);
 
                /* Close the fences (this will unref the semaphores and release
@@ -1669,6 +1674,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
         * and one for post fence. */
        const int extra_entries = 2;
        bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
+       bool skip_buffer_refcounting = (flags &
+                       NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
 
        if (c->has_timedout)
                return -ETIMEDOUT;
@@ -1912,7 +1919,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                *fence_out = gk20a_fence_get(post_fence);
 
        /* TODO! Check for errors... */
-       gk20a_channel_add_job(c, pre_fence, post_fence);
+       gk20a_channel_add_job(c, pre_fence, post_fence,
+                               skip_buffer_refcounting);
 
        c->cmds_pending = true;
        gk20a_bar1_writel(g,
index 660662674f39e39ec7df68c5eadcbc12abd9dea2..5eaff06ae6171367c385fc89ca4f5647a59d1104 100644 (file)
@@ -608,6 +608,8 @@ struct nvgpu_fence {
 #define NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE   (1 << 3)
 /* suppress WFI before fence trigger */
 #define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI (1 << 4)
+/* skip buffer refcounting during submit */
+#define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING      (1 << 5)
 
 struct nvgpu_submit_gpfifo_args {
        __u64 gpfifo;