]> rtime.felk.cvut.cz Git - sojka/nv-tegra/linux-3.10.git/commitdiff
gpu: nvgpu: remove temporary gpfifo allocation in submit path
authorDeepak Nibade <dnibade@nvidia.com>
Mon, 26 Oct 2015 13:17:55 +0000 (18:47 +0530)
committermobile promotions <svcmobile_promotions@nvidia.com>
Mon, 23 Nov 2015 21:13:17 +0000 (13:13 -0800)
In GPU job submit path gk20a_ioctl_channel_submit_gpfifo(),
we currently allocate a temporary gpfifo, copy user space
gpfifo content into this temporary buffer, and then copy
temp buffer content into channel's gpfifo.

Allocation/copy/free of temporary buffer adds additional
overhead

Rewrite this sequence such that gk20a_submit_channel_gpfifo()
can receive either a pre-filled gpfifo or pointer to
user provided args.
And then we can direclty copy the user provided gpfifo
into the channel's gpfifo

Also, if command buffer tracing is enabled, we still need
to copy user provided gpfifo into temporaty buffer for reading
But that should not cause overhead in real world use case

Bug 200141116

Change-Id: I7166c9271da2694059da9853ab8839e98457b941
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/823386
(cherry picked from commit 3e0702db006c262dd8737a567b8e06f7ff005e2c)
Reviewed-on: http://git-master/r/835816
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
drivers/gpu/nvgpu/gk20a/cde_gk20a.c
drivers/gpu/nvgpu/gk20a/channel_gk20a.c
drivers/gpu/nvgpu/gk20a/channel_gk20a.h

index f148c65ab87754f589d222f25ab1b6572f94a297..b3f3d662b52f4ca7e5be415445cc541c6dc4ec41 100644 (file)
@@ -717,7 +717,7 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
                return -ENOSYS;
        }
 
-       return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo,
+       return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
                                           num_entries, flags, fence, fence_out);
 }
 
index 3f610de96e66f78b557909435509601ea8547ad7..84727b29e96c13764ba3ef95dc05cfebc3afb82c 100644 (file)
@@ -1517,14 +1517,42 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
 
 static void trace_write_pushbuffer_range(struct channel_gk20a *c,
                                         struct nvgpu_gpfifo *g,
+                                        struct nvgpu_submit_gpfifo_args *args,
+                                        int offset,
                                         int count)
 {
-       if (gk20a_debug_trace_cmdbuf) {
-               int i;
-               struct nvgpu_gpfifo *gp = g;
-               for (i = 0; i < count; i++, gp++)
-                       trace_write_pushbuffer(c, gp);
+       u32 size;
+       int i;
+       struct nvgpu_gpfifo *gp;
+       bool gpfifo_allocated = false;
+
+       if (!gk20a_debug_trace_cmdbuf)
+               return;
+
+       if (!g && !args)
+               return;
+
+       if (!g) {
+               size = args->num_entries * sizeof(struct nvgpu_gpfifo);
+               if (size) {
+                       g = nvgpu_alloc(size, false);
+                       if (!g)
+                               return;
+
+                       if (copy_from_user(g,
+                               (void __user *)(uintptr_t)args->gpfifo, size)) {
+                               return;
+                       }
+               }
+               gpfifo_allocated = true;
        }
+
+       gp = g + offset;
+       for (i = 0; i < count; i++, gp++)
+               trace_write_pushbuffer(c, gp);
+
+       if (gpfifo_allocated)
+               nvgpu_free(g);
 }
 
 static int gk20a_channel_add_job(struct channel_gk20a *c,
@@ -1633,6 +1661,7 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
 
 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                                struct nvgpu_gpfifo *gpfifo,
+                               struct nvgpu_submit_gpfifo_args *args,
                                u32 num_entries,
                                u32 flags,
                                struct nvgpu_fence *fence,
@@ -1664,6 +1693,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                return -ENOMEM;
        }
 
+       if (!gpfifo && !args)
+               return -EINVAL;
+
        if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
                      NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
            !fence)
@@ -1808,24 +1840,72 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        start = c->gpfifo.put;
        end = start + num_entries;
 
-       if (end > c->gpfifo.entry_num) {
-               int length0 = c->gpfifo.entry_num - start;
-               int length1 = num_entries - length0;
+       if (gpfifo) {
+               if (end > c->gpfifo.entry_num) {
+                       int length0 = c->gpfifo.entry_num - start;
+                       int length1 = num_entries - length0;
 
-               memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start, gpfifo,
-                      length0 * sizeof(*gpfifo));
+                       memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
+                               gpfifo,
+                               length0 * sizeof(*gpfifo));
 
-               memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va, gpfifo + length0,
-                      length1 * sizeof(*gpfifo));
+                       memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va,
+                               gpfifo + length0,
+                               length1 * sizeof(*gpfifo));
+
+                       trace_write_pushbuffer_range(c, gpfifo, NULL,
+                                       0, length0);
+                       trace_write_pushbuffer_range(c, gpfifo, NULL,
+                                       length0, length1);
+               } else {
+                       memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
+                               gpfifo,
+                               num_entries * sizeof(*gpfifo));
 
-               trace_write_pushbuffer_range(c, gpfifo, length0);
-               trace_write_pushbuffer_range(c, gpfifo + length0, length1);
+                       trace_write_pushbuffer_range(c, gpfifo, NULL,
+                                       0, num_entries);
+               }
        } else {
-               memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start, gpfifo,
-                      num_entries * sizeof(*gpfifo));
+               struct nvgpu_gpfifo __user *user_gpfifo =
+                       (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
+               if (end > c->gpfifo.entry_num) {
+                       int length0 = c->gpfifo.entry_num - start;
+                       int length1 = num_entries - length0;
+
+                       err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
+                               user_gpfifo,
+                               length0 * sizeof(*user_gpfifo));
+                       if (err) {
+                               mutex_unlock(&c->submit_lock);
+                               goto clean_up;
+                       }
+
+                       err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va,
+                               user_gpfifo + length0,
+                               length1 * sizeof(*user_gpfifo));
+                       if (err) {
+                               mutex_unlock(&c->submit_lock);
+                               goto clean_up;
+                       }
 
-               trace_write_pushbuffer_range(c, gpfifo, num_entries);
+                       trace_write_pushbuffer_range(c, NULL, args,
+                                       0, length0);
+                       trace_write_pushbuffer_range(c, NULL, args,
+                                       length0, length1);
+               } else {
+                       err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
+                               user_gpfifo,
+                               num_entries * sizeof(*user_gpfifo));
+                       if (err) {
+                               mutex_unlock(&c->submit_lock);
+                               goto clean_up;
+                       }
+
+                       trace_write_pushbuffer_range(c, NULL, args,
+                                       0, num_entries);
+               }
        }
+
        c->gpfifo.put = (c->gpfifo.put + num_entries) &
                (c->gpfifo.entry_num - 1);
 
@@ -2318,8 +2398,6 @@ static int gk20a_ioctl_channel_submit_gpfifo(
        struct nvgpu_submit_gpfifo_args *args)
 {
        struct gk20a_fence *fence_out;
-       void *gpfifo = NULL;
-       u32 size;
        int ret = 0;
 
        gk20a_dbg_fn("");
@@ -2327,23 +2405,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
        if (ch->has_timedout)
                return -ETIMEDOUT;
 
-       /* zero-sized submits are allowed, since they can be used for
-        * synchronization; we might still wait and do an increment */
-       size = args->num_entries * sizeof(struct nvgpu_gpfifo);
-       if (size) {
-               gpfifo = nvgpu_alloc(size, false);
-               if (!gpfifo)
-                       return -ENOMEM;
-
-               if (copy_from_user(gpfifo,
-                                       (void __user *)(uintptr_t)args->gpfifo,
-                                       size)) {
-                       ret = -EINVAL;
-                       goto clean_up;
-               }
-       }
-
-       ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
+       ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
                                          args->flags, &args->fence,
                                          &fence_out);
 
@@ -2366,7 +2428,6 @@ static int gk20a_ioctl_channel_submit_gpfifo(
        gk20a_fence_put(fence_out);
 
 clean_up:
-       nvgpu_free(gpfifo);
        return ret;
 }
 
index d0e79dcb67116df98029651d27bbf636da8b40dc..4932ba732f97be2d71bc226b546eefb1676508fc 100644 (file)
@@ -237,6 +237,7 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
 
 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                                struct nvgpu_gpfifo *gpfifo,
+                               struct nvgpu_submit_gpfifo_args *args,
                                u32 num_entries,
                                u32 flags,
                                struct nvgpu_fence *fence,