priv->timeout_debug_dump = true;
if (!tegra_platform_is_silicon())
priv->timeout = 0;
-
return 0;
fail:
nvhost_channelrelease(inode, filp);
break;
}
case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT:
- priv->timeout =
+ {
+ u32 timeout =
(u32)((struct nvhost_set_timeout_args *)buf)->timeout;
+
+ priv->timeout = timeout;
dev_dbg(&priv->ch->dev->dev,
"%s: setting buffer timeout (%d ms) for userctx 0x%p\n",
__func__, priv->timeout, priv);
+ if (priv->hwctx)
+ priv->hwctx->timeout_ms_max = timeout;
break;
+ }
case NVHOST_IOCTL_CHANNEL_GET_TIMEDOUT:
((struct nvhost_get_param_args *)buf)->value =
priv->hwctx->has_timedout;
err = nvhost_ioctl_channel_submit(priv, (void *)buf);
break;
case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT_EX:
- priv->timeout = (u32)
- ((struct nvhost_set_timeout_ex_args *)buf)->timeout;
- priv->timeout_debug_dump = !((u32)
+ {
+ u32 timeout =
+ (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
+ bool timeout_debug_dump = !((u32)
((struct nvhost_set_timeout_ex_args *)buf)->flags &
(1 << NVHOST_TIMEOUT_FLAG_DISABLE_DUMP));
+ priv->timeout = timeout;
+ priv->timeout_debug_dump = timeout_debug_dump;
dev_dbg(&priv->ch->dev->dev,
"%s: setting buffer timeout (%d ms) for userctx 0x%p\n",
__func__, priv->timeout, priv);
+ if (priv->hwctx) {
+ priv->hwctx->timeout_ms_max = timeout;
+ priv->hwctx->timeout_debug_dump = timeout_debug_dump;
+ }
break;
+ }
case NVHOST_IOCTL_CHANNEL_SET_CTXSWITCH:
err = nvhost_ioctl_channel_set_ctxswitch(priv, (void *)buf);
break;
nvhost_dbg_info("freeing bound channel context, timeout=%ld",
timeout);
- gk20a_disable_channel(ch, finish, timeout);
+ gk20a_disable_channel(ch, finish && !ch->hwctx->has_timedout, timeout);
gk20a_free_error_notifiers(ctx);
channel_gk20a_bind(ch_gk20a);
ch_gk20a->pid = current->pid;
+ /* reset timeout counter and update timestamp */
+ ch_gk20a->timeout_accumulated_ms = 0;
+ ch_gk20a->timeout_gpfifo_get = 0;
+ /* set gr host default timeout */
+ ch_gk20a->hwctx->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
+
/* The channel is *not* runnable at this point. It still needs to have
* an address space bound and allocate a gpfifo and grctx. */
-
init_waitqueue_head(&ch_gk20a->notifier_wq);
init_waitqueue_head(&ch_gk20a->semaphore_wq);
init_waitqueue_head(&ch_gk20a->submit_wq);
c->gpfifo.entry_num;
}
+bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
+ u32 timeout_delta_ms)
+{
+ u32 gpfifo_get = update_gp_get(ch->g, ch);
+ /* Count consequent timeout isr */
+ if (gpfifo_get == ch->timeout_gpfifo_get) {
+ /* we didn't advance since previous channel timeout check */
+ ch->timeout_accumulated_ms += timeout_delta_ms;
+ } else {
+ /* first timeout isr encountered */
+ ch->timeout_accumulated_ms = timeout_delta_ms;
+ }
+
+ ch->timeout_gpfifo_get = gpfifo_get;
+
+ return ch->g->timeouts_enabled &&
+ ch->timeout_accumulated_ms > ch->hwctx->timeout_ms_max;
+}
+
+
/* Issue a syncpoint increment *preceded* by a wait-for-idle
* command. All commands on the channel will have been
* consumed at the time the fence syncpoint increment occurs.
u32 free_count;
int err;
+ if (c->hwctx->has_timedout)
+ return -ETIMEDOUT;
+
cmd_size = 4 + wfi_cmd_size();
update_gp_get(g, c);
* wait and one for syncpoint increment */
const int extra_entries = 2;
+ if (c->hwctx->has_timedout)
+ return -ETIMEDOUT;
+
if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
!fence)
* wait for signals from completed submits */
if (gp_free_count(c) < num_entries + extra_entries) {
err = wait_event_interruptible(c->submit_wq,
- get_gp_free_count(c) >= num_entries + extra_entries);
+ get_gp_free_count(c) >= num_entries + extra_entries ||
+ c->hwctx->has_timedout);
+ }
+
+ if (c->hwctx->has_timedout) {
+ err = -ETIMEDOUT;
+ goto clean_up;
}
if (err) {
if (!ch->cmds_pending)
return 0;
+ /* Do not wait for a timedout channel */
+ if (ch->hwctx && ch->hwctx->has_timedout)
+ return -ETIMEDOUT;
+
if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) {
nvhost_dbg_fn("issuing wfi, incr to finish the channel");
fence.syncpt_id = ch->hw_chid + pdata->syncpt_base;
ch->last_submit_fence.syncpt_id,
ch->last_submit_fence.syncpt_value);
- /* Do not wait for a timedout channel. Just check if it's done */
- if (ch->hwctx && ch->hwctx->has_timedout)
- timeout = 0;
-
err = nvhost_syncpt_wait_timeout(sp,
ch->last_submit_fence.syncpt_id,
ch->last_submit_fence.syncpt_value,
int ret = 0;
long remain;
+ /* do not wait if channel has timed out */
+ if (ch->hwctx->has_timedout)
+ return -ETIMEDOUT;
+
handle_ref = nvhost_memmgr_get(memmgr, id, pdev);
if (IS_ERR(handle_ref)) {
nvhost_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
remain = wait_event_interruptible_timeout(
ch->semaphore_wq,
- *semaphore == payload,
+ *semaphore == payload || ch->hwctx->has_timedout,
timeout);
if (remain == 0 && *semaphore != payload)
nvhost_dbg_fn("");
+ if (ch->hwctx->has_timedout)
+ return -ETIMEDOUT;
+
if (args->timeout == NVHOST_NO_TIMEOUT)
timeout = MAX_SCHEDULE_TIMEOUT;
else
* calling this ioctl */
remain = wait_event_interruptible_timeout(
ch->notifier_wq,
- notif->status == 0,
+ notif->status == 0 || ch->hwctx->has_timedout,
timeout);
if (remain == 0 && notif->status != 0) {
return -ENOMEM;
}
+#define GRFIFO_TIMEOUT_CHECK_PERIOD_US 100000
+
int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
{
u32 intr_stall;
timeout &= ~fifo_pb_timeout_detection_enabled_f();
gk20a_writel(g, fifo_pb_timeout_r(), timeout);
+ timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US |
+ fifo_eng_timeout_detection_enabled_f();
+ gk20a_writel(g, fifo_eng_timeout_r(), timeout);
+
nvhost_dbg_fn("done");
return 0;
runlist = &f->runlist_info[runlist_id];
wake_up(&runlist->runlist_wq);
}
+
}
static int gk20a_init_fifo_setup_hw(struct gk20a *g)
schedule_work(&g->fifo.fault_restore_thread);
}
-static void gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
- struct nvhost_hwctx *hwctx) {
- if (hwctx) {
- nvhost_err(dev_from_gk20a(g),
- "channel with hwctx generated a mmu fault");
- if (hwctx->error_notifier) {
- if (hwctx->error_notifier->info32) {
- /* If error code is already set, this mmu fault
- * was triggered as part of recovery from other
- * error condition.
- * Don't overwrite error flag. */
- } else {
- gk20a_set_error_notifier(hwctx,
- NVHOST_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
- }
+static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
+ struct channel_gk20a *ch) {
+ bool verbose = true;
+ if (!ch || !ch->hwctx)
+ return;
+
+ nvhost_err(dev_from_gk20a(g),
+ "channel %d with hwctx generated a mmu fault",
+ ch->hw_chid);
+ if (ch->hwctx->error_notifier) {
+ u32 err = ch->hwctx->error_notifier->info32;
+ if (err) {
+ /* If error code is already set, this mmu fault
+ * was triggered as part of recovery from other
+ * error condition.
+ * Don't overwrite error flag. */
+
+ /* Fifo timeout debug spew is controlled by user */
+ if (err == NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT)
+ verbose = ch->hwctx->timeout_debug_dump;
+ } else {
+ gk20a_set_error_notifier(ch->hwctx,
+ NVHOST_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
}
- /* mark channel as faulted */
- hwctx->has_timedout = true;
}
+ /* mark channel as faulted */
+ ch->hwctx->has_timedout = true;
+ wmb();
+ /* unblock pending waits */
+ wake_up(&ch->semaphore_wq);
+ wake_up(&ch->notifier_wq);
+ wake_up(&ch->submit_wq);
+ return verbose;
}
-static void gk20a_fifo_handle_mmu_fault(struct gk20a *g)
+static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
{
bool fake_fault;
unsigned long fault_id;
u32 engine_mmu_id;
int i;
-
+ bool verbose = true;
nvhost_dbg_fn("");
g->fifo.deferred_reset_pending = false;
}
if (ch) {
- gk20a_fifo_set_ctx_mmu_error(g, ch->hwctx);
+ verbose = gk20a_fifo_set_ctx_mmu_error(g, ch);
if (ch->in_use) {
/* disable the channel from hw and increment
* syncpoints */
fifo_engine_status_id_v(status);
}
-void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids)
+void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
+ bool verbose)
{
unsigned long end_jiffies = jiffies +
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
unsigned long engine_ids = 0;
int ret;
- nvhost_debug_dump(g->host);
+ if (verbose)
+ nvhost_debug_dump(g->host);
/* store faulted engines in advance */
g->fifo.mmu_fault_engines = 0;
gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), 0);
}
+
static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
{
u32 sched_error;
}
}
- nvhost_err(dev_from_gk20a(g), "fifo sched error : 0x%08x, engine=%u, %s=%d",
- sched_error, engine_id, non_chid ? "non-ch" : "ch", id);
-
/* could not find the engine - should never happen */
if (unlikely(engine_id >= g->fifo.max_engines))
- return true;
+ goto err;
if (fifo_intr_sched_error_code_f(sched_error) ==
- fifo_intr_sched_error_code_ctxsw_timeout_v()) {
- if (!non_chid) {
- struct fifo_gk20a *f = &g->fifo;
- struct nvhost_hwctx *hwctx = f->channel[id].hwctx;
+ fifo_intr_sched_error_code_ctxsw_timeout_v()) {
+ struct fifo_gk20a *f = &g->fifo;
+ struct channel_gk20a *ch = &f->channel[id];
+ struct nvhost_hwctx *hwctx = ch->hwctx;
+
+ if (non_chid) {
+ gk20a_fifo_recover(g, BIT(engine_id), true);
+ goto err;
+ }
+
+ if (gk20a_channel_update_and_check_timeout(ch,
+ GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000)) {
gk20a_set_error_notifier(hwctx,
NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
- hwctx->has_timedout = true;
+ nvhost_err(dev_from_gk20a(g),
+ "fifo sched ctxsw timeout error:"
+ "engine = %u, ch = %d", engine_id, id);
+ gk20a_fifo_recover(g, BIT(engine_id),
+ hwctx ? hwctx->timeout_debug_dump : true);
+ } else {
+ nvhost_warn(dev_from_gk20a(g),
+ "fifo is waiting for ctx switch for %d ms,"
+ "ch = %d\n",
+ ch->timeout_accumulated_ms,
+ id);
}
- gk20a_fifo_recover(g, BIT(engine_id));
- return false;
+ return hwctx->timeout_debug_dump;
}
+err:
+ nvhost_err(dev_from_gk20a(g), "fifo sched error : 0x%08x, engine=%u, %s=%d",
+ sched_error, engine_id, non_chid ? "non-ch" : "ch", id);
return true;
}
static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
{
- bool reset_channel = false, reset_engine = false;
+ bool print_channel_reset_log = false, reset_engine = false;
struct device *dev = dev_from_gk20a(g);
u32 handled = 0;
if (fifo_intr & fifo_intr_0_bind_error_pending_f()) {
u32 bind_error = gk20a_readl(g, fifo_intr_bind_error_r());
nvhost_err(dev, "fifo bind error: 0x%08x", bind_error);
- reset_channel = true;
+ print_channel_reset_log = true;
handled |= fifo_intr_0_bind_error_pending_f();
}
if (fifo_intr & fifo_intr_0_sched_error_pending_f()) {
- reset_channel = gk20a_fifo_handle_sched_error(g);
+ print_channel_reset_log = gk20a_fifo_handle_sched_error(g);
handled |= fifo_intr_0_sched_error_pending_f();
}
}
if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) {
- gk20a_fifo_handle_mmu_fault(g);
- reset_channel = true;
+ print_channel_reset_log = gk20a_fifo_handle_mmu_fault(g);
reset_engine = true;
handled |= fifo_intr_0_mmu_fault_pending_f();
}
handled |= fifo_intr_0_dropped_mmu_fault_pending_f();
}
- reset_channel = !g->fifo.deferred_reset_pending && (reset_channel || fifo_intr);
+ print_channel_reset_log = !g->fifo.deferred_reset_pending
+ && print_channel_reset_log;
- if (reset_channel) {
+ if (print_channel_reset_log) {
int engine_id;
nvhost_err(dev_from_gk20a(g),
"channel reset initated from %s", __func__);
engine_id < g->fifo.max_engines;
engine_id++) {
nvhost_dbg_fn("enum:%d -> engine_id:%d", engine_id,
- g->fifo.engine_info[engine_id].engine_id);
+ g->fifo.engine_info[engine_id].engine_id);
fifo_pbdma_exception_status(g,
&g->fifo.engine_info[engine_id]);
fifo_engine_exception_status(g,
}
gk20a_set_error_notifier(ch->hwctx,
NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
- gk20a_fifo_recover(g, engines);
+ gk20a_fifo_recover(g, engines, true);
}
/* re-enable elpg or release pmu mutex */
(f->engine_info[i].runlist_id == runlist_id))
engines |= BIT(i);
}
- gk20a_fifo_recover(g, engines);
+ gk20a_fifo_recover(g, engines, true);
}
static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)
bool pending;
runlist = &g->fifo.runlist_info[runlist_id];
-
remain = wait_event_timeout(runlist->runlist_wq,
((pending = gk20a_readl(g, fifo_eng_runlist_r(runlist_id)) &
fifo_eng_runlist_pending_true_f()) == 0),
u32 old_buf, new_buf;
u32 chid;
u32 count = 0;
-
runlist = &f->runlist_info[runlist_id];
/* valid channel, add/remove it from active list.