gpu: nvgpu: remove temporary gpfifo allocation in submit path

author Deepak Nibade <dnibade@nvidia.com>

Mon, 26 Oct 2015 13:17:55 +0000 (18:47 +0530)

committer mobile promotions <svcmobile_promotions@nvidia.com>

Mon, 23 Nov 2015 21:13:17 +0000 (13:13 -0800)
author Deepak Nibade <dnibade@nvidia.com>
Mon, 26 Oct 2015 13:17:55 +0000 (18:47 +0530)
committer mobile promotions <svcmobile_promotions@nvidia.com>
Mon, 23 Nov 2015 21:13:17 +0000 (13:13 -0800)
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c

index f148c65ab87754f589d222f25ab1b6572f94a297..b3f3d662b52f4ca7e5be415445cc541c6dc4ec41 100644 (file)
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -717,7 +717,7 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
                 return -ENOSYS;
         }
  
-       return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo,
+       return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
                                            num_entries, flags, fence, fence_out);
  }
  
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c

index 3f610de96e66f78b557909435509601ea8547ad7..84727b29e96c13764ba3ef95dc05cfebc3afb82c 100644 (file)
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1517,14 +1517,42 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
  
  static void trace_write_pushbuffer_range(struct channel_gk20a *c,
                                          struct nvgpu_gpfifo *g,
+                                        struct nvgpu_submit_gpfifo_args *args,
+                                        int offset,
                                          int count)
  {
-       if (gk20a_debug_trace_cmdbuf) {
-               int i;
-               struct nvgpu_gpfifo *gp = g;
-               for (i = 0; i < count; i++, gp++)
-                       trace_write_pushbuffer(c, gp);
+       u32 size;
+       int i;
+       struct nvgpu_gpfifo *gp;
+       bool gpfifo_allocated = false;
+
+       if (!gk20a_debug_trace_cmdbuf)
+               return;
+
+       if (!g && !args)
+               return;
+
+       if (!g) {
+               size = args->num_entries * sizeof(struct nvgpu_gpfifo);
+               if (size) {
+                       g = nvgpu_alloc(size, false);
+                       if (!g)
+                               return;
+
+                       if (copy_from_user(g,
+                               (void __user *)(uintptr_t)args->gpfifo, size)) {
+                               return;
+                       }
+               }
+               gpfifo_allocated = true;
         }
+
+       gp = g + offset;
+       for (i = 0; i < count; i++, gp++)
+               trace_write_pushbuffer(c, gp);
+
+       if (gpfifo_allocated)
+               nvgpu_free(g);
  }
  
  static int gk20a_channel_add_job(struct channel_gk20a *c,
@@ -1633,6 +1661,7 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
  
  int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                                 struct nvgpu_gpfifo *gpfifo,
+                               struct nvgpu_submit_gpfifo_args *args,
                                 u32 num_entries,
                                 u32 flags,
                                 struct nvgpu_fence *fence,
@@ -1664,6 +1693,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                 return -ENOMEM;
         }
  
+       if (!gpfifo && !args)
+               return -EINVAL;
+
         if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
                       NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
             !fence)
@@ -1808,24 +1840,72 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
         start = c->gpfifo.put;
         end = start + num_entries;
  
-       if (end > c->gpfifo.entry_num) {
-               int length0 = c->gpfifo.entry_num - start;
-               int length1 = num_entries - length0;
+       if (gpfifo) {
+               if (end > c->gpfifo.entry_num) {
+                       int length0 = c->gpfifo.entry_num - start;
+                       int length1 = num_entries - length0;
  
-               memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start, gpfifo,
-                      length0 * sizeof(*gpfifo));
+                       memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
+                               gpfifo,
+                               length0 * sizeof(*gpfifo));
  
-               memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va, gpfifo + length0,
-                      length1 * sizeof(*gpfifo));
+                       memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va,
+                               gpfifo + length0,
+                               length1 * sizeof(*gpfifo));
+
+                       trace_write_pushbuffer_range(c, gpfifo, NULL,
+                                       0, length0);
+                       trace_write_pushbuffer_range(c, gpfifo, NULL,
+                                       length0, length1);
+               } else {
+                       memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
+                               gpfifo,
+                               num_entries * sizeof(*gpfifo));
  
-               trace_write_pushbuffer_range(c, gpfifo, length0);
-               trace_write_pushbuffer_range(c, gpfifo + length0, length1);
+                       trace_write_pushbuffer_range(c, gpfifo, NULL,
+                                       0, num_entries);
+               }
         } else {
-               memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start, gpfifo,
-                      num_entries * sizeof(*gpfifo));
+               struct nvgpu_gpfifo __user *user_gpfifo =
+                       (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
+               if (end > c->gpfifo.entry_num) {
+                       int length0 = c->gpfifo.entry_num - start;
+                       int length1 = num_entries - length0;
+
+                       err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
+                               user_gpfifo,
+                               length0 * sizeof(*user_gpfifo));
+                       if (err) {
+                               mutex_unlock(&c->submit_lock);
+                               goto clean_up;
+                       }
+
+                       err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va,
+                               user_gpfifo + length0,
+                               length1 * sizeof(*user_gpfifo));
+                       if (err) {
+                               mutex_unlock(&c->submit_lock);
+                               goto clean_up;
+                       }
  
-               trace_write_pushbuffer_range(c, gpfifo, num_entries);
+                       trace_write_pushbuffer_range(c, NULL, args,
+                                       0, length0);
+                       trace_write_pushbuffer_range(c, NULL, args,
+                                       length0, length1);
+               } else {
+                       err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
+                               user_gpfifo,
+                               num_entries * sizeof(*user_gpfifo));
+                       if (err) {
+                               mutex_unlock(&c->submit_lock);
+                               goto clean_up;
+                       }
+
+                       trace_write_pushbuffer_range(c, NULL, args,
+                                       0, num_entries);
+               }
         }
+
         c->gpfifo.put = (c->gpfifo.put + num_entries) &
                 (c->gpfifo.entry_num - 1);
  
@@ -2318,8 +2398,6 @@ static int gk20a_ioctl_channel_submit_gpfifo(
         struct nvgpu_submit_gpfifo_args *args)
  {
         struct gk20a_fence *fence_out;
-       void *gpfifo = NULL;
-       u32 size;
         int ret = 0;
  
         gk20a_dbg_fn("");
@@ -2327,23 +2405,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
         if (ch->has_timedout)
                 return -ETIMEDOUT;
  
-       /* zero-sized submits are allowed, since they can be used for
-        * synchronization; we might still wait and do an increment */
-       size = args->num_entries * sizeof(struct nvgpu_gpfifo);
-       if (size) {
-               gpfifo = nvgpu_alloc(size, false);
-               if (!gpfifo)
-                       return -ENOMEM;
-
-               if (copy_from_user(gpfifo,
-                                       (void __user *)(uintptr_t)args->gpfifo,
-                                       size)) {
-                       ret = -EINVAL;
-                       goto clean_up;
-               }
-       }
-
-       ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
+       ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
                                           args->flags, &args->fence,
                                           &fence_out);
  
@@ -2366,7 +2428,6 @@ static int gk20a_ioctl_channel_submit_gpfifo(
         gk20a_fence_put(fence_out);
  
  clean_up:
-       nvgpu_free(gpfifo);
         return ret;
  }
  
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h

index d0e79dcb67116df98029651d27bbf636da8b40dc..4932ba732f97be2d71bc226b546eefb1676508fc 100644 (file)
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -237,6 +237,7 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
  
  int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                                 struct nvgpu_gpfifo *gpfifo,
+                               struct nvgpu_submit_gpfifo_args *args,
                                 u32 num_entries,
                                 u32 flags,
                                 struct nvgpu_fence *fence,
author	Deepak Nibade <dnibade@nvidia.com>
	Mon, 26 Oct 2015 13:17:55 +0000 (18:47 +0530)
committer	mobile promotions <svcmobile_promotions@nvidia.com>
	Mon, 23 Nov 2015 21:13:17 +0000 (13:13 -0800)
drivers/gpu/nvgpu/gk20a/cde_gk20a.c		patch \| blob \| history
drivers/gpu/nvgpu/gk20a/channel_gk20a.c		patch \| blob \| history
drivers/gpu/nvgpu/gk20a/channel_gk20a.h		patch \| blob \| history