2 * Color decompression engine support
4 * Copyright (c) 2014-2015, NVIDIA Corporation. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 #include <linux/dma-mapping.h>
20 #include <linux/firmware.h>
22 #include <linux/debugfs.h>
23 #include <linux/dma-buf.h>
25 #include <trace/events/gk20a.h>
28 #include "channel_gk20a.h"
30 #include "cde_gk20a.h"
31 #include "fence_gk20a.h"
33 #include "debug_gk20a.h"
34 #include "semaphore_gk20a.h"
36 #include "hw_ccsr_gk20a.h"
37 #include "hw_pbdma_gk20a.h"
39 static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
40 static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g);
42 #define CTX_DELETE_TIME 1000
44 #define MAX_CTX_USE_COUNT 42
45 #define MAX_CTX_RETRY_TIME 2000
47 static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
51 for (i = 0; i < cde_ctx->num_bufs; i++) {
52 struct mem_desc *mem = cde_ctx->mem + i;
53 gk20a_gmmu_unmap_free(cde_ctx->vm, mem);
56 kfree(cde_ctx->init_convert_cmd);
58 cde_ctx->convert_cmd = NULL;
59 cde_ctx->init_convert_cmd = NULL;
60 cde_ctx->num_bufs = 0;
61 cde_ctx->num_params = 0;
62 cde_ctx->init_cmd_num_entries = 0;
63 cde_ctx->convert_cmd_num_entries = 0;
64 cde_ctx->init_cmd_executed = false;
67 static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx)
68 __must_hold(&cde_app->mutex)
70 struct gk20a *g = cde_ctx->g;
71 struct channel_gk20a *ch = cde_ctx->ch;
72 struct vm_gk20a *vm = ch->vm;
74 trace_gk20a_cde_remove_ctx(cde_ctx);
76 /* release mapped memory */
77 gk20a_deinit_cde_img(cde_ctx);
78 gk20a_gmmu_unmap(vm, cde_ctx->backing_store_vaddr,
79 g->gr.compbit_store.size, 1);
81 /* free the channel */
82 gk20a_channel_close(ch);
84 /* housekeeping on app */
85 list_del(&cde_ctx->list);
86 cde_ctx->g->cde_app.ctx_count--;
90 static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
92 __releases(&cde_app->mutex)
93 __acquires(&cde_app->mutex)
95 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app;
97 /* permanent contexts do not have deleter works */
98 if (!cde_ctx->is_temporary)
102 mutex_unlock(&cde_app->mutex);
103 cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work);
104 mutex_lock(&cde_app->mutex);
106 cancel_delayed_work(&cde_ctx->ctx_deleter_work);
110 static void gk20a_cde_remove_contexts(struct gk20a *g)
111 __must_hold(&cde_app->mutex)
113 struct gk20a_cde_app *cde_app = &g->cde_app;
114 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
116 /* safe to go off the mutex in cancel_deleter since app is
117 * deinitialised; no new jobs are started. deleter works may be only at
118 * waiting for the mutex or before, going to abort */
120 list_for_each_entry_safe(cde_ctx, cde_ctx_save,
121 &cde_app->free_contexts, list) {
122 gk20a_cde_cancel_deleter(cde_ctx, true);
123 gk20a_cde_remove_ctx(cde_ctx);
126 list_for_each_entry_safe(cde_ctx, cde_ctx_save,
127 &cde_app->used_contexts, list) {
128 gk20a_cde_cancel_deleter(cde_ctx, true);
129 gk20a_cde_remove_ctx(cde_ctx);
133 static void gk20a_cde_stop(struct gk20a *g)
134 __must_hold(&cde_app->mutex)
136 struct gk20a_cde_app *cde_app = &g->cde_app;
138 /* prevent further conversions and delayed works from working */
139 cde_app->initialised = false;
140 /* free all data, empty the list */
141 gk20a_cde_remove_contexts(g);
144 void gk20a_cde_destroy(struct gk20a *g)
145 __acquires(&cde_app->mutex)
146 __releases(&cde_app->mutex)
148 struct gk20a_cde_app *cde_app = &g->cde_app;
150 if (!cde_app->initialised)
153 mutex_lock(&cde_app->mutex);
155 mutex_unlock(&cde_app->mutex);
158 void gk20a_cde_suspend(struct gk20a *g)
159 __acquires(&cde_app->mutex)
160 __releases(&cde_app->mutex)
162 struct gk20a_cde_app *cde_app = &g->cde_app;
163 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
165 if (!cde_app->initialised)
168 mutex_lock(&cde_app->mutex);
170 list_for_each_entry_safe(cde_ctx, cde_ctx_save,
171 &cde_app->free_contexts, list) {
172 gk20a_cde_cancel_deleter(cde_ctx, false);
175 list_for_each_entry_safe(cde_ctx, cde_ctx_save,
176 &cde_app->used_contexts, list) {
177 gk20a_cde_cancel_deleter(cde_ctx, false);
180 mutex_unlock(&cde_app->mutex);
184 static int gk20a_cde_create_context(struct gk20a *g)
185 __must_hold(&cde_app->mutex)
187 struct gk20a_cde_app *cde_app = &g->cde_app;
188 struct gk20a_cde_ctx *cde_ctx;
190 cde_ctx = gk20a_cde_allocate_context(g);
192 return PTR_ERR(cde_ctx);
194 list_add(&cde_ctx->list, &cde_app->free_contexts);
195 cde_app->ctx_count++;
196 if (cde_app->ctx_count > cde_app->ctx_count_top)
197 cde_app->ctx_count_top = cde_app->ctx_count;
202 static int gk20a_cde_create_contexts(struct gk20a *g)
203 __must_hold(&g->cde_app->mutex)
208 for (i = 0; i < NUM_CDE_CONTEXTS; i++) {
209 err = gk20a_cde_create_context(g);
216 gk20a_cde_remove_contexts(g);
220 static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
221 const struct firmware *img,
222 struct gk20a_cde_hdr_buf *buf)
224 struct mem_desc *mem;
227 /* check that the file can hold the buf */
228 if (buf->data_byte_offset != 0 &&
229 buf->data_byte_offset + buf->num_bytes > img->size) {
230 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid data section. buffer idx = %d",
235 /* check that we have enough buf elems available */
236 if (cde_ctx->num_bufs >= MAX_CDE_BUFS) {
237 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid data section. buffer idx = %d",
243 mem = cde_ctx->mem + cde_ctx->num_bufs;
244 err = gk20a_gmmu_alloc_map(cde_ctx->vm, buf->num_bytes, mem);
246 gk20a_warn(&cde_ctx->pdev->dev, "cde: could not allocate device memory. buffer idx = %d",
251 /* copy the content */
252 if (buf->data_byte_offset != 0)
253 memcpy(mem->cpu_va, img->data + buf->data_byte_offset,
261 static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
262 int type, s32 shift, u64 mask, u64 value)
264 u32 *target_mem_ptr = target;
265 u64 *target_mem_ptr_u64 = target;
266 u64 current_value, new_value;
268 value = (shift >= 0) ? value << shift : value >> -shift;
271 /* read current data from the location */
273 if (type == TYPE_PARAM_TYPE_U32) {
274 if (mask != 0xfffffffful)
275 current_value = *target_mem_ptr;
276 } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) {
278 current_value = *target_mem_ptr_u64;
279 } else if (type == TYPE_PARAM_TYPE_U64_BIG) {
280 current_value = *target_mem_ptr_u64;
281 current_value = (u64)(current_value >> 32) |
282 (u64)(current_value << 32);
284 gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown type. type=%d",
289 current_value &= ~mask;
290 new_value = current_value | value;
292 /* store the element data back */
293 if (type == TYPE_PARAM_TYPE_U32)
294 *target_mem_ptr = (u32)new_value;
295 else if (type == TYPE_PARAM_TYPE_U64_LITTLE)
296 *target_mem_ptr_u64 = new_value;
298 new_value = (u64)(new_value >> 32) |
299 (u64)(new_value << 32);
300 *target_mem_ptr_u64 = new_value;
306 static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
307 const struct firmware *img,
308 struct gk20a_cde_hdr_replace *replace)
310 struct mem_desc *source_mem;
311 struct mem_desc *target_mem;
316 if (replace->target_buf >= cde_ctx->num_bufs ||
317 replace->source_buf >= cde_ctx->num_bufs) {
318 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d",
319 replace->target_buf, replace->source_buf,
324 source_mem = cde_ctx->mem + replace->source_buf;
325 target_mem = cde_ctx->mem + replace->target_buf;
326 target_mem_ptr = target_mem->cpu_va;
328 if (source_mem->size < (replace->source_byte_offset + 3) ||
329 target_mem->size < (replace->target_byte_offset + 3)) {
330 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu",
331 replace->target_byte_offset,
332 replace->source_byte_offset,
338 /* calculate the target pointer */
339 target_mem_ptr += (replace->target_byte_offset / sizeof(u32));
341 /* determine patch value */
342 vaddr = source_mem->gpu_va + replace->source_byte_offset;
343 err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type,
344 replace->shift, replace->mask,
347 gk20a_warn(&cde_ctx->pdev->dev, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld",
348 err, replace->target_buf,
349 replace->target_byte_offset,
351 replace->source_byte_offset);
357 static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
359 struct gk20a *g = cde_ctx->g;
360 struct mem_desc *target_mem;
363 int user_id = 0, i, err;
365 for (i = 0; i < cde_ctx->num_params; i++) {
366 struct gk20a_cde_hdr_param *param = cde_ctx->params + i;
367 target_mem = cde_ctx->mem + param->target_buf;
368 target_mem_ptr = target_mem->cpu_va;
369 target_mem_ptr += (param->target_byte_offset / sizeof(u32));
372 case TYPE_PARAM_COMPTAGS_PER_CACHELINE:
373 new_data = g->gr.comptags_per_cacheline;
375 case TYPE_PARAM_GPU_CONFIGURATION:
376 new_data = g->ltc_count * g->gr.slices_per_ltc *
377 g->gr.cacheline_size;
379 case TYPE_PARAM_FIRSTPAGEOFFSET:
380 new_data = cde_ctx->surf_param_offset;
382 case TYPE_PARAM_NUMPAGES:
383 new_data = cde_ctx->surf_param_lines;
385 case TYPE_PARAM_BACKINGSTORE:
386 new_data = cde_ctx->backing_store_vaddr;
388 case TYPE_PARAM_DESTINATION:
389 new_data = cde_ctx->compbit_vaddr;
391 case TYPE_PARAM_DESTINATION_SIZE:
392 new_data = cde_ctx->compbit_size;
394 case TYPE_PARAM_BACKINGSTORE_SIZE:
395 new_data = g->gr.compbit_store.size;
397 case TYPE_PARAM_SOURCE_SMMU_ADDR:
398 new_data = gk20a_mm_gpuva_to_iova_base(cde_ctx->vm,
399 cde_ctx->surf_vaddr);
403 case TYPE_PARAM_BACKINGSTORE_BASE_HW:
404 new_data = g->gr.compbit_store.base_hw;
406 case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE:
407 new_data = g->gr.gobs_per_comptagline_per_slice;
410 user_id = param->id - NUM_RESERVED_PARAMS;
411 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
413 new_data = cde_ctx->user_param_values[user_id];
416 gk20a_dbg(gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx",
417 i, param->id, param->target_buf,
418 param->target_byte_offset, new_data,
419 param->data_offset, param->type, param->shift,
422 new_data += param->data_offset;
424 err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type,
425 param->shift, param->mask, new_data);
428 gk20a_warn(&cde_ctx->pdev->dev, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu",
429 err, i, param->id, param->target_buf,
430 param->target_byte_offset, new_data);
438 static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
439 const struct firmware *img,
440 struct gk20a_cde_hdr_param *param)
442 struct mem_desc *target_mem;
444 if (param->target_buf >= cde_ctx->num_bufs) {
445 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
446 cde_ctx->num_params, param->target_buf,
451 target_mem = cde_ctx->mem + param->target_buf;
452 if (target_mem->size< (param->target_byte_offset + 3)) {
453 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu",
454 cde_ctx->num_params, param->target_byte_offset,
459 /* does this parameter fit into our parameter structure */
460 if (cde_ctx->num_params >= MAX_CDE_PARAMS) {
461 gk20a_warn(&cde_ctx->pdev->dev, "cde: no room for new parameters param idx = %d",
462 cde_ctx->num_params);
466 /* is the given id valid? */
467 if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) {
468 gk20a_warn(&cde_ctx->pdev->dev, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u",
469 param->id, cde_ctx->num_params,
470 NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS);
474 cde_ctx->params[cde_ctx->num_params] = *param;
475 cde_ctx->num_params++;
480 static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
481 const struct firmware *img,
484 struct nvgpu_alloc_obj_ctx_args alloc_obj_ctx;
487 alloc_obj_ctx.class_num = required_class;
488 alloc_obj_ctx.flags = 0;
491 cde_ctx->ch->cde = 1;
493 err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx);
495 gk20a_warn(&cde_ctx->pdev->dev, "cde: failed to allocate ctx. err=%d",
503 static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
504 const struct firmware *img,
506 struct gk20a_cde_cmd_elem *cmd_elem,
509 struct nvgpu_gpfifo **gpfifo, *gpfifo_elem;
513 /* check command type */
514 if (op == TYPE_BUF_COMMAND_INIT) {
515 gpfifo = &cde_ctx->init_convert_cmd;
516 num_entries = &cde_ctx->init_cmd_num_entries;
517 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
518 gpfifo = &cde_ctx->convert_cmd;
519 num_entries = &cde_ctx->convert_cmd_num_entries;
521 gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown command. op=%u",
526 /* allocate gpfifo entries to be pushed */
527 *gpfifo = kzalloc(sizeof(struct nvgpu_gpfifo) * num_elems,
530 gk20a_warn(&cde_ctx->pdev->dev, "cde: could not allocate memory for gpfifo entries");
534 gpfifo_elem = *gpfifo;
535 for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) {
536 struct mem_desc *target_mem;
538 /* validate the current entry */
539 if (cmd_elem->target_buf >= cde_ctx->num_bufs) {
540 gk20a_warn(&cde_ctx->pdev->dev, "cde: target buffer is not available (target=%u, num_bufs=%u)",
541 cmd_elem->target_buf, cde_ctx->num_bufs);
545 target_mem = cde_ctx->mem + cmd_elem->target_buf;
546 if (target_mem->size<
547 cmd_elem->target_byte_offset + cmd_elem->num_bytes) {
548 gk20a_warn(&cde_ctx->pdev->dev, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)",
550 cmd_elem->target_byte_offset,
551 cmd_elem->num_bytes);
555 /* store the element into gpfifo */
556 gpfifo_elem->entry0 =
557 u64_lo32(target_mem->gpu_va +
558 cmd_elem->target_byte_offset);
559 gpfifo_elem->entry1 =
560 u64_hi32(target_mem->gpu_va +
561 cmd_elem->target_byte_offset) |
562 pbdma_gp_entry1_length_f(cmd_elem->num_bytes /
566 *num_entries = num_elems;
570 static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
572 unsigned long init_bytes = cde_ctx->init_cmd_num_entries *
573 sizeof(struct nvgpu_gpfifo);
574 unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries *
575 sizeof(struct nvgpu_gpfifo);
576 unsigned long total_bytes = init_bytes + conv_bytes;
577 struct nvgpu_gpfifo *combined_cmd;
579 /* allocate buffer that has space for both */
580 combined_cmd = kzalloc(total_bytes, GFP_KERNEL);
582 gk20a_warn(&cde_ctx->pdev->dev,
583 "cde: could not allocate memory for gpfifo entries");
587 /* move the original init here and append convert */
588 memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes);
589 memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
590 cde_ctx->convert_cmd, conv_bytes);
592 kfree(cde_ctx->init_convert_cmd);
593 kfree(cde_ctx->convert_cmd);
595 cde_ctx->init_convert_cmd = combined_cmd;
596 cde_ctx->convert_cmd = combined_cmd
597 + cde_ctx->init_cmd_num_entries;
602 static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
603 const struct firmware *img)
605 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app;
606 u32 *data = (u32 *)img->data;
608 struct gk20a_cde_hdr_elem *elem;
613 min_size += 2 * sizeof(u32);
614 if (img->size < min_size) {
615 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid image header");
619 cde_app->firmware_version = data[0];
620 num_of_elems = data[1];
622 min_size += num_of_elems * sizeof(*elem);
623 if (img->size < min_size) {
624 gk20a_warn(&cde_ctx->pdev->dev, "cde: bad image");
628 elem = (struct gk20a_cde_hdr_elem *)&data[2];
629 for (i = 0; i < num_of_elems; i++) {
631 switch (elem->type) {
633 err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf);
636 err = gk20a_init_cde_replace(cde_ctx, img,
640 err = gk20a_init_cde_param(cde_ctx, img, &elem->param);
642 case TYPE_REQUIRED_CLASS:
643 err = gk20a_init_cde_required_class(cde_ctx, img,
644 elem->required_class);
648 struct gk20a_cde_cmd_elem *cmd = (void *)
649 &img->data[elem->command.data_byte_offset];
650 err = gk20a_init_cde_command(cde_ctx, img,
651 elem->command.op, cmd,
652 elem->command.num_entries);
656 memcpy(&cde_app->arrays[elem->array.id][0],
658 MAX_CDE_ARRAY_ENTRIES*sizeof(u32));
661 gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown header element");
671 if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) {
672 gk20a_warn(&cde_ctx->pdev->dev, "cde: convert command not defined");
677 if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) {
678 gk20a_warn(&cde_ctx->pdev->dev, "cde: convert command not defined");
683 err = gk20a_cde_pack_cmdbufs(cde_ctx);
690 gk20a_deinit_cde_img(cde_ctx);
694 static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
695 u32 op, struct nvgpu_fence *fence,
696 u32 flags, struct gk20a_fence **fence_out)
698 struct nvgpu_gpfifo *gpfifo = NULL;
701 /* check command type */
702 if (op == TYPE_BUF_COMMAND_INIT) {
703 /* both init and convert combined */
704 gpfifo = cde_ctx->init_convert_cmd;
705 num_entries = cde_ctx->init_cmd_num_entries
706 + cde_ctx->convert_cmd_num_entries;
707 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
708 gpfifo = cde_ctx->convert_cmd;
709 num_entries = cde_ctx->convert_cmd_num_entries;
711 gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown buffer");
715 if (gpfifo == NULL || num_entries == 0) {
716 gk20a_warn(&cde_ctx->pdev->dev, "cde: buffer not available");
720 return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo,
721 num_entries, flags, fence, fence_out);
724 static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
725 __acquires(&cde_app->mutex)
726 __releases(&cde_app->mutex)
728 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app;
730 gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
731 trace_gk20a_cde_release(cde_ctx);
733 mutex_lock(&cde_app->mutex);
735 if (cde_ctx->in_use) {
736 cde_ctx->in_use = false;
737 list_move(&cde_ctx->list, &cde_app->free_contexts);
738 cde_app->ctx_usecount--;
740 gk20a_dbg_info("double release cde context %p", cde_ctx);
743 mutex_unlock(&cde_app->mutex);
746 static void gk20a_cde_ctx_deleter_fn(struct work_struct *work)
747 __acquires(&cde_app->mutex)
748 __releases(&cde_app->mutex)
750 struct delayed_work *delay_work = to_delayed_work(work);
751 struct gk20a_cde_ctx *cde_ctx = container_of(delay_work,
752 struct gk20a_cde_ctx, ctx_deleter_work);
753 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app;
754 struct platform_device *pdev = cde_ctx->pdev;
757 /* someone has just taken it? engine deletion started? */
758 if (cde_ctx->in_use || !cde_app->initialised)
761 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
762 "cde: attempting to delete temporary %p", cde_ctx);
764 err = gk20a_busy(pdev);
766 /* this context would find new use anyway later, so not freeing
767 * here does not leak anything */
768 gk20a_warn(&pdev->dev, "cde: cannot set gk20a on, postponing"
769 " temp ctx deletion");
773 mutex_lock(&cde_app->mutex);
774 if (cde_ctx->in_use || !cde_app->initialised) {
775 gk20a_dbg(gpu_dbg_cde_ctx,
776 "cde: context use raced, not deleting %p",
781 WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work),
782 "double pending %p", cde_ctx);
784 gk20a_cde_remove_ctx(cde_ctx);
785 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
786 "cde: destroyed %p count=%d use=%d max=%d",
787 cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount,
788 cde_app->ctx_count_top);
791 mutex_unlock(&cde_app->mutex);
795 static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct gk20a *g)
796 __must_hold(&cde_app->mutex)
798 struct gk20a_cde_app *cde_app = &g->cde_app;
799 struct gk20a_cde_ctx *cde_ctx;
803 if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT)
804 return ERR_PTR(-EAGAIN);
806 /* idle context available? */
808 if (!list_empty(&cde_app->free_contexts)) {
809 cde_ctx = list_first_entry(&cde_app->free_contexts,
810 struct gk20a_cde_ctx, list);
811 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
812 "cde: got free %p count=%d use=%d max=%d",
813 cde_ctx, cde_app->ctx_count,
814 cde_app->ctx_usecount,
815 cde_app->ctx_count_top);
816 trace_gk20a_cde_get_context(cde_ctx);
818 /* deleter work may be scheduled, but in_use prevents it */
819 cde_ctx->in_use = true;
820 list_move(&cde_ctx->list, &cde_app->used_contexts);
821 cde_app->ctx_usecount++;
823 /* cancel any deletions now that ctx is in use */
824 gk20a_cde_cancel_deleter(cde_ctx, true);
828 /* no free contexts, get a temporary one */
830 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
831 "cde: no free contexts, count=%d",
834 cde_ctx = gk20a_cde_allocate_context(g);
835 if (IS_ERR(cde_ctx)) {
836 gk20a_warn(&g->dev->dev, "cde: cannot allocate context: %ld",
841 trace_gk20a_cde_get_context(cde_ctx);
842 cde_ctx->in_use = true;
843 cde_ctx->is_temporary = true;
844 cde_app->ctx_usecount++;
845 cde_app->ctx_count++;
846 if (cde_app->ctx_count > cde_app->ctx_count_top)
847 cde_app->ctx_count_top = cde_app->ctx_count;
848 list_add(&cde_ctx->list, &cde_app->used_contexts);
853 static struct gk20a_cde_ctx *gk20a_cde_get_context(struct gk20a *g)
854 __releases(&cde_app->mutex)
855 __acquires(&cde_app->mutex)
857 struct gk20a_cde_app *cde_app = &g->cde_app;
858 struct gk20a_cde_ctx *cde_ctx = NULL;
859 unsigned long end = jiffies + msecs_to_jiffies(MAX_CTX_RETRY_TIME);
862 cde_ctx = gk20a_cde_do_get_context(g);
863 if (PTR_ERR(cde_ctx) != -EAGAIN)
866 /* exhausted, retry */
867 mutex_unlock(&cde_app->mutex);
869 mutex_lock(&cde_app->mutex);
870 } while (time_before(jiffies, end));
875 static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g)
877 struct gk20a_cde_ctx *cde_ctx;
880 cde_ctx = kzalloc(sizeof(*cde_ctx), GFP_KERNEL);
882 return ERR_PTR(-ENOMEM);
885 cde_ctx->pdev = g->dev;
887 ret = gk20a_cde_load(cde_ctx);
893 INIT_LIST_HEAD(&cde_ctx->list);
894 cde_ctx->is_temporary = false;
895 cde_ctx->in_use = false;
896 INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work,
897 gk20a_cde_ctx_deleter_fn);
899 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx);
900 trace_gk20a_cde_allocate_context(cde_ctx);
904 int gk20a_cde_convert(struct gk20a *g,
905 struct dma_buf *compbits_buf,
906 s32 compbits_kind, u64 compbits_byte_offset,
907 u32 compbits_size, struct nvgpu_fence *fence,
908 u32 __flags, struct gk20a_cde_param *params,
909 int num_params, struct gk20a_fence **fence_out)
910 __acquires(&cde_app->mutex)
911 __releases(&cde_app->mutex)
913 struct gk20a_cde_ctx *cde_ctx = NULL;
914 struct gk20a_comptags comptags;
915 u64 compbits_offset = 0;
919 u64 big_page_mask = 0;
923 mutex_lock(&g->cde_app.mutex);
925 cde_ctx = gk20a_cde_get_context(g);
926 if (IS_ERR(cde_ctx)) {
927 err = PTR_ERR(cde_ctx);
931 /* First, map the buffer to local va */
933 /* ensure that the compbits buffer has drvdata */
934 err = gk20a_dmabuf_alloc_drvdata(compbits_buf, &g->dev->dev);
938 /* compbits don't start at page aligned offset, so we need to align
939 the region to be mapped */
940 big_page_mask = cde_ctx->vm->big_page_size - 1;
941 map_offset = compbits_byte_offset & ~big_page_mask;
943 /* compute compbit start offset from the beginning of the mapped
945 compbits_offset = compbits_byte_offset & big_page_mask;
947 if (!compbits_size) {
948 compbits_size = compbits_buf->size - compbits_byte_offset;
949 map_size = compbits_buf->size - map_offset;
952 /* map the destination buffer */
953 get_dma_buf(compbits_buf); /* a ref for gk20a_vm_map */
954 map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_buf, 0,
955 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
956 compbits_kind, NULL, true,
958 map_offset, map_size);
960 dma_buf_put(compbits_buf);
965 /* store source buffer compression tags */
966 gk20a_get_comptags(&g->dev->dev, compbits_buf, &comptags);
967 cde_ctx->surf_param_offset = comptags.offset;
968 cde_ctx->surf_param_lines = comptags.lines;
970 /* store surface vaddr. This is actually compbit vaddr, but since
971 compbits live in the same surface, and we can get the alloc base
972 address by using gk20a_mm_gpuva_to_iova_base, this will do */
973 cde_ctx->surf_vaddr = map_vaddr;
975 /* store information about destination */
976 cde_ctx->compbit_vaddr = map_vaddr + compbits_offset;
977 cde_ctx->compbit_size = compbits_size;
979 /* remove existing argument data */
980 memset(cde_ctx->user_param_values, 0,
981 sizeof(cde_ctx->user_param_values));
983 /* read user space arguments for the conversion */
984 for (i = 0; i < num_params; i++) {
985 struct gk20a_cde_param *param = params + i;
986 int id = param->id - NUM_RESERVED_PARAMS;
988 if (id < 0 || id >= MAX_CDE_USER_PARAMS) {
989 gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown user parameter");
993 cde_ctx->user_param_values[id] = param->value;
997 err = gk20a_cde_patch_params(cde_ctx);
999 gk20a_warn(&cde_ctx->pdev->dev, "cde: failed to patch parameters");
1003 gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
1004 g->gr.compbit_store.size, cde_ctx->backing_store_vaddr);
1005 gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
1006 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
1009 /* take always the postfence as it is needed for protecting the
1011 flags = __flags | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
1013 /* execute the conversion buffer, combined with init first if it's the
1015 err = gk20a_cde_execute_buffer(cde_ctx,
1016 cde_ctx->init_cmd_executed
1017 ? TYPE_BUF_COMMAND_CONVERT
1018 : TYPE_BUF_COMMAND_INIT,
1019 fence, flags, fence_out);
1021 cde_ctx->init_cmd_executed = true;
1025 /* unmap the buffers - channel holds references to them now */
1027 gk20a_vm_unmap(cde_ctx->vm, map_vaddr);
1029 mutex_unlock(&g->cde_app.mutex);
1033 static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data)
1034 __acquires(&cde_app->mutex)
1035 __releases(&cde_app->mutex)
1037 struct gk20a_cde_ctx *cde_ctx = data;
1038 struct gk20a *g = cde_ctx->g;
1039 struct gk20a_cde_app *cde_app = &g->cde_app;
1042 mutex_lock(&ch->jobs_lock);
1043 channel_idle = list_empty(&ch->jobs);
1044 mutex_unlock(&ch->jobs_lock);
1049 trace_gk20a_cde_finished_ctx_cb(cde_ctx);
1050 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx);
1051 if (!cde_ctx->in_use)
1052 gk20a_dbg_info("double finish cde context %p on channel %p",
1055 if (ch->has_timedout) {
1056 if (cde_ctx->is_temporary) {
1057 gk20a_warn(&cde_ctx->pdev->dev,
1058 "cde: channel had timed out"
1059 " (temporary channel)");
1060 /* going to be deleted anyway */
1062 gk20a_warn(&cde_ctx->pdev->dev,
1063 "cde: channel had timed out"
1065 /* mark it to be deleted, replace with a new one */
1066 mutex_lock(&cde_app->mutex);
1067 cde_ctx->is_temporary = true;
1068 if (gk20a_cde_create_context(g)) {
1069 gk20a_err(&cde_ctx->pdev->dev,
1070 "cde: can't replace context");
1072 mutex_unlock(&cde_app->mutex);
1076 /* delete temporary contexts later (watch for doubles) */
1077 if (cde_ctx->is_temporary && cde_ctx->in_use) {
1078 WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work));
1079 schedule_delayed_work(&cde_ctx->ctx_deleter_work,
1080 msecs_to_jiffies(CTX_DELETE_TIME));
1083 if (!ch->has_timedout)
1084 gk20a_cde_ctx_release(cde_ctx);
1087 static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
1089 struct gk20a *g = cde_ctx->g;
1090 const struct firmware *img;
1091 struct channel_gk20a *ch;
1092 struct gr_gk20a *gr = &g->gr;
1096 img = gk20a_request_firmware(g, "gpu2cde.bin");
1098 dev_err(&cde_ctx->pdev->dev, "cde: could not fetch the firmware");
1102 ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb,
1105 gk20a_warn(&cde_ctx->pdev->dev, "cde: gk20a channel not available");
1107 goto err_get_gk20a_channel;
1110 /* bind the channel to the vm */
1111 gk20a_vm_get(&g->mm.pmu.vm);
1112 ch->vm = &g->mm.pmu.vm;
1113 err = channel_gk20a_commit_va(ch);
1115 gk20a_warn(&cde_ctx->pdev->dev, "cde: could not bind vm");
1119 /* allocate gpfifo (1024 should be more than enough) */
1120 err = gk20a_alloc_channel_gpfifo(ch,
1121 &(struct nvgpu_alloc_gpfifo_args){1024, 0});
1123 gk20a_warn(&cde_ctx->pdev->dev, "cde: unable to allocate gpfifo");
1124 goto err_alloc_gpfifo;
1127 /* map backing store to gpu virtual space */
1128 vaddr = gk20a_gmmu_map(ch->vm, &gr->compbit_store.sgt,
1129 g->gr.compbit_store.size,
1130 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1131 gk20a_mem_flag_read_only);
1134 gk20a_warn(&cde_ctx->pdev->dev, "cde: cannot map compression bit backing store");
1136 goto err_map_backingstore;
1139 /* store initialisation data */
1141 cde_ctx->vm = ch->vm;
1142 cde_ctx->backing_store_vaddr = vaddr;
1144 /* initialise the firmware */
1145 err = gk20a_init_cde_img(cde_ctx, img);
1147 gk20a_warn(&cde_ctx->pdev->dev, "cde: image initialisation failed");
1148 goto err_init_cde_img;
1151 /* initialisation done */
1152 release_firmware(img);
1157 gk20a_gmmu_unmap(ch->vm, vaddr, g->gr.compbit_store.size, 1);
1158 err_map_backingstore:
1160 gk20a_vm_put(ch->vm);
1162 err_get_gk20a_channel:
1163 release_firmware(img);
1164 dev_err(&cde_ctx->pdev->dev, "cde: couldn't initialise buffer converter: %d",
1169 int gk20a_cde_reload(struct gk20a *g)
1170 __acquires(&cde_app->mutex)
1171 __releases(&cde_app->mutex)
1173 struct gk20a_cde_app *cde_app = &g->cde_app;
1176 if (!cde_app->initialised)
1179 err = gk20a_busy(g->dev);
1183 mutex_lock(&cde_app->mutex);
1187 err = gk20a_cde_create_contexts(g);
1189 cde_app->initialised = true;
1191 mutex_unlock(&cde_app->mutex);
1197 int gk20a_init_cde_support(struct gk20a *g)
1198 __acquires(&cde_app->mutex)
1199 __releases(&cde_app->mutex)
1201 struct gk20a_cde_app *cde_app = &g->cde_app;
1204 if (cde_app->initialised)
1207 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init");
1209 mutex_init(&cde_app->mutex);
1210 mutex_lock(&cde_app->mutex);
1212 INIT_LIST_HEAD(&cde_app->free_contexts);
1213 INIT_LIST_HEAD(&cde_app->used_contexts);
1214 cde_app->ctx_count = 0;
1215 cde_app->ctx_count_top = 0;
1216 cde_app->ctx_usecount = 0;
1218 err = gk20a_cde_create_contexts(g);
1220 cde_app->initialised = true;
1222 mutex_unlock(&cde_app->mutex);
1223 gk20a_dbg(gpu_dbg_cde_ctx, "cde: init finished: %d", err);
1227 enum cde_launch_patch_id {
1228 PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024,
1229 PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025,
1230 PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */
1231 PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027,
1232 PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028,
1233 PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */
1234 PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */
1235 PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */
1236 PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032,
1237 PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */
1238 PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */
1239 PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035,
1240 PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036,
1241 PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037,
1242 PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038,
1243 PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039,
1244 PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040,
1245 PATCH_USER_CONST_XBLOCKS_ID = 1041,
1246 PATCH_H_USER_CONST_DSTOFFSET_ID = 1042,
1247 PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043,
1248 PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044,
1249 PATCH_V_USER_CONST_DSTOFFSET_ID = 1045,
1250 PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046,
1251 PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047,
1252 PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048,
1253 PATCH_H_LAUNCH_WORD1_ID = 1049,
1254 PATCH_H_LAUNCH_WORD2_ID = 1050,
1255 PATCH_V_LAUNCH_WORD1_ID = 1051,
1256 PATCH_V_LAUNCH_WORD2_ID = 1052,
1257 PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053,
1258 PATCH_H_QMD_REGISTER_COUNT_ID = 1054,
1259 PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055,
1260 PATCH_V_QMD_REGISTER_COUNT_ID = 1056,
1265 PROG_VPASS_LARGE = 1,
1266 PROG_VPASS_SMALL = 2,
1267 PROG_HPASS_DEBUG = 3,
1268 PROG_VPASS_LARGE_DEBUG = 4,
1269 PROG_VPASS_SMALL_DEBUG = 5,
1270 PROG_PASSTHROUGH = 6,
1274 /* maximum number of WRITE_PATCHes in the below function */
1275 #define MAX_CDE_LAUNCH_PATCHES 32
1277 static int gk20a_buffer_convert_gpu_to_cde_v1(
1279 struct dma_buf *dmabuf, u32 consumer,
1280 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1281 u32 width, u32 height, u32 block_height_log2,
1282 u32 submit_flags, struct nvgpu_fence *fence_in,
1283 struct gk20a_buffer_state *state)
1285 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1288 struct gk20a_fence *new_fence = NULL;
1291 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
1292 const int xalign = compbits_per_byte * wgx;
1293 const int yalign = wgy;
1295 /* Compute per launch parameters */
1296 const int xtiles = (width + 7) >> 3;
1297 const int ytiles = (height + 7) >> 3;
1298 const int gridw_h = roundup(xtiles, xalign) / xalign;
1299 const int gridh_h = roundup(ytiles, yalign) / yalign;
1300 const int gridw_v = roundup(ytiles, xalign) / xalign;
1301 const int gridh_v = roundup(xtiles, yalign) / yalign;
1302 const int xblocks = (xtiles + 1) >> 1;
1303 const int voffset = compbits_voffset - compbits_hoffset;
1305 int hprog = PROG_HPASS;
1306 int vprog = (block_height_log2 >= 2) ?
1307 PROG_VPASS_LARGE : PROG_VPASS_SMALL;
1308 if (g->cde_app.shader_parameter == 1) {
1309 hprog = PROG_PASSTHROUGH;
1310 vprog = PROG_PASSTHROUGH;
1311 } else if (g->cde_app.shader_parameter == 2) {
1312 hprog = PROG_HPASS_DEBUG;
1313 vprog = (block_height_log2 >= 2) ?
1314 PROG_VPASS_LARGE_DEBUG :
1315 PROG_VPASS_SMALL_DEBUG;
1318 if (xtiles > 8192 / 8 || ytiles > 8192 / 8)
1319 gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1322 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx",
1323 width, height, block_height_log2,
1324 compbits_hoffset, compbits_voffset);
1325 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1326 width, height, xtiles, ytiles);
1327 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
1328 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
1329 gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
1331 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
1332 g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
1334 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
1335 g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1337 /* Write parameters */
1338 #define WRITE_PATCH(NAME, VALUE) \
1339 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
1340 WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks);
1341 WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
1343 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
1344 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
1345 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
1346 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
1347 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
1349 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h);
1350 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h);
1351 WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0);
1352 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h);
1353 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h);
1354 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
1356 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v);
1357 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v);
1358 WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset);
1359 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v);
1360 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v);
1361 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
1363 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
1364 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
1365 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
1366 g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
1367 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
1368 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
1369 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
1370 g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1372 if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
1373 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1374 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1375 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1376 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1378 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1379 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1380 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1381 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1384 if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
1385 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1386 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1387 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1388 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1390 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1391 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1392 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1393 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1397 err = gk20a_cde_convert(g, dmabuf,
1400 0, /* dst_size, 0 = auto */
1401 fence_in, submit_flags,
1402 params, param, &new_fence);
1406 /* compbits generated, update state & fence */
1407 gk20a_fence_put(state->fence);
1408 state->fence = new_fence;
1409 state->valid_compbits |= consumer &
1410 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1415 static int gk20a_buffer_convert_gpu_to_cde(
1416 struct gk20a *g, struct dma_buf *dmabuf, u32 consumer,
1417 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1418 u32 width, u32 height, u32 block_height_log2,
1419 u32 submit_flags, struct nvgpu_fence *fence_in,
1420 struct gk20a_buffer_state *state)
1424 if (!g->cde_app.initialised)
1427 err = gk20a_busy(g->dev);
1431 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n",
1432 g->cde_app.firmware_version);
1434 if (g->cde_app.firmware_version == 1) {
1435 err = gk20a_buffer_convert_gpu_to_cde_v1(
1436 g, dmabuf, consumer, offset, compbits_hoffset,
1437 compbits_voffset, width, height, block_height_log2,
1438 submit_flags, fence_in, state);
1440 dev_err(dev_from_gk20a(g), "unsupported CDE firmware version %d",
1441 g->cde_app.firmware_version);
1449 int gk20a_prepare_compressible_read(
1450 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset,
1451 u64 compbits_hoffset, u64 compbits_voffset,
1452 u32 width, u32 height, u32 block_height_log2,
1453 u32 submit_flags, struct nvgpu_fence *fence,
1454 u32 *valid_compbits, u32 *zbc_color,
1455 struct gk20a_fence **fence_out)
1458 struct gk20a_buffer_state *state;
1459 struct dma_buf *dmabuf;
1462 dmabuf = dma_buf_get(buffer_fd);
1466 err = gk20a_dmabuf_get_state(dmabuf, dev_from_gk20a(g),
1469 dma_buf_put(dmabuf);
1473 missing_bits = (state->valid_compbits ^ request) & request;
1475 mutex_lock(&state->lock);
1477 if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
1479 gk20a_fence_put(state->fence);
1480 state->fence = NULL;
1481 /* state->fence = decompress();
1482 state->valid_compbits = 0; */
1485 } else if (missing_bits) {
1486 u32 missing_cde_bits = missing_bits &
1487 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1488 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
1490 err = gk20a_buffer_convert_gpu_to_cde(
1493 offset, compbits_hoffset,
1495 width, height, block_height_log2,
1496 submit_flags, fence,
1503 if (state->fence && fence_out)
1504 *fence_out = gk20a_fence_get(state->fence);
1507 *valid_compbits = state->valid_compbits;
1510 *zbc_color = state->zbc_color;
1513 mutex_unlock(&state->lock);
1514 dma_buf_put(dmabuf);
1518 int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
1519 u32 valid_compbits, u64 offset, u32 zbc_color)
1522 struct gk20a_buffer_state *state;
1523 struct dma_buf *dmabuf;
1525 dmabuf = dma_buf_get(buffer_fd);
1526 if (IS_ERR(dmabuf)) {
1527 dev_err(dev_from_gk20a(g), "invalid dmabuf");
1531 err = gk20a_dmabuf_get_state(dmabuf, dev_from_gk20a(g), offset, &state);
1533 dev_err(dev_from_gk20a(g), "could not get state from dmabuf");
1534 dma_buf_put(dmabuf);
1538 mutex_lock(&state->lock);
1540 /* Update the compbits state. */
1541 state->valid_compbits = valid_compbits;
1542 state->zbc_color = zbc_color;
1544 /* Discard previous compbit job fence. */
1545 gk20a_fence_put(state->fence);
1546 state->fence = NULL;
1548 mutex_unlock(&state->lock);
1549 dma_buf_put(dmabuf);
1553 static ssize_t gk20a_cde_reload_write(struct file *file,
1554 const char __user *userbuf, size_t count, loff_t *ppos)
1556 struct gk20a *g = file->private_data;
1557 gk20a_cde_reload(g);
1561 static const struct file_operations gk20a_cde_reload_fops = {
1562 .open = simple_open,
1563 .write = gk20a_cde_reload_write,
1566 void gk20a_cde_debugfs_init(struct platform_device *dev)
1568 struct gk20a_platform *platform = platform_get_drvdata(dev);
1569 struct gk20a *g = get_gk20a(dev);
1571 debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
1572 platform->debugfs, &g->cde_app.shader_parameter);
1573 debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
1574 platform->debugfs, &g->cde_app.ctx_count);
1575 debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
1576 platform->debugfs, &g->cde_app.ctx_usecount);
1577 debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
1578 platform->debugfs, &g->cde_app.ctx_count_top);
1579 debugfs_create_file("reload_cde_firmware", S_IWUSR, platform->debugfs,
1580 g, &gk20a_cde_reload_fops);