Merge branch '4.0.8-rt6'

[zynq/linux.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index f4da2cbbfd7f080399fce21699859ebd184f82d0..c7f32d72627c87dd2bb900ac4c21e7eea979f856 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -282,7 +282,11 @@ late_initcall(sched_init_debug);
   * Number of tasks to iterate in a single balance run.
   * Limited because this is done with IRQs disabled.
   */
+#ifndef CONFIG_PREEMPT_RT_FULL
  const_debug unsigned int sysctl_sched_nr_migrate = 32;
+#else
+const_debug unsigned int sysctl_sched_nr_migrate = 8;
+#endif
  
  /*
   * period over which we average the RT time consumption, measured
@@ -458,6 +462,7 @@ static void init_rq_hrtick(struct rq *rq)
  
         hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
         rq->hrtick_timer.function = hrtick;
+       rq->hrtick_timer.irqsafe = 1;
  }
  #else  /* CONFIG_SCHED_HRTICK */
  static inline void hrtick_clear(struct rq *rq)
@@ -538,6 +543,52 @@ static bool set_nr_if_polling(struct task_struct *p)
  #endif
  #endif
  
+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+{
+       struct wake_q_node *node = &task->wake_q;
+
+       /*
+        * Atomically grab the task, if ->wake_q is !nil already it means
+        * its already queued (either by us or someone else) and will get the
+        * wakeup due to that.
+        *
+        * This cmpxchg() implies a full barrier, which pairs with the write
+        * barrier implied by the wakeup in wake_up_list().
+        */
+       if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
+               return;
+
+       get_task_struct(task);
+
+       /*
+        * The head is context local, there can be no concurrency.
+        */
+       *head->lastp = node;
+       head->lastp = &node->next;
+}
+
+void wake_up_q(struct wake_q_head *head)
+{
+       struct wake_q_node *node = head->first;
+
+       while (node != WAKE_Q_TAIL) {
+               struct task_struct *task;
+
+               task = container_of(node, struct task_struct, wake_q);
+               BUG_ON(!task);
+               /* task can safely be re-inserted now */
+               node = node->next;
+               task->wake_q.next = NULL;
+
+               /*
+                * wake_up_process() implies a wmb() to pair with the queueing
+                * in wake_q_add() so as not to miss wakeups.
+                */
+               wake_up_process(task);
+               put_task_struct(task);
+       }
+}
+
  /*
   * resched_curr - mark rq's current task 'to be rescheduled now'.
   *
@@ -569,6 +620,38 @@ void resched_curr(struct rq *rq)
                 trace_sched_wake_idle_without_ipi(cpu);
  }
  
+#ifdef CONFIG_PREEMPT_LAZY
+void resched_curr_lazy(struct rq *rq)
+{
+       struct task_struct *curr = rq->curr;
+       int cpu;
+
+       if (!sched_feat(PREEMPT_LAZY)) {
+               resched_curr(rq);
+               return;
+       }
+
+       lockdep_assert_held(&rq->lock);
+
+       if (test_tsk_need_resched(curr))
+               return;
+
+       if (test_tsk_need_resched_lazy(curr))
+               return;
+
+       set_tsk_need_resched_lazy(curr);
+
+       cpu = cpu_of(rq);
+       if (cpu == smp_processor_id())
+               return;
+
+       /* NEED_RESCHED_LAZY must be visible before we test polling */
+       smp_mb();
+       if (!tsk_is_polling(curr))
+               smp_send_reschedule(cpu);
+}
+#endif
+
  void resched_cpu(int cpu)
  {
         struct rq *rq = cpu_rq(cpu);
@@ -592,12 +675,14 @@ void resched_cpu(int cpu)
   */
  int get_nohz_timer_target(int pinned)
  {
-       int cpu = smp_processor_id();
+       int cpu;
         int i;
         struct sched_domain *sd;
  
+       preempt_disable_rt();
+       cpu = smp_processor_id();
         if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu))
-               return cpu;
+               goto preempt_en_rt;
  
         rcu_read_lock();
         for_each_domain(cpu, sd) {
@@ -610,6 +695,8 @@ int get_nohz_timer_target(int pinned)
         }
  unlock:
         rcu_read_unlock();
+preempt_en_rt:
+       preempt_enable_rt();
         return cpu;
  }
  /*
@@ -687,14 +774,29 @@ static inline bool got_nohz_idle_kick(void)
  #endif /* CONFIG_NO_HZ_COMMON */
  
  #ifdef CONFIG_NO_HZ_FULL
+
+static int ksoftirqd_running(void)
+{
+       struct task_struct *softirqd;
+
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
+               return 0;
+       softirqd = this_cpu_ksoftirqd();
+       if (softirqd && softirqd->on_rq)
+               return 1;
+       return 0;
+}
+
  bool sched_can_stop_tick(void)
  {
         /*
          * More than one running task need preemption.
          * nr_running update is assumed to be visible
          * after IPI is sent from wakers.
+        *
+        * NOTE, RT: if ksoftirqd is awake, subtract it.
          */
-       if (this_rq()->nr_running > 1)
+       if (this_rq()->nr_running - ksoftirqd_running() > 1)
                 return false;
  
         return true;
@@ -1159,6 +1261,18 @@ struct migration_arg {
  
  static int migration_cpu_stop(void *data);
  
+static bool check_task_state(struct task_struct *p, long match_state)
+{
+       bool match = false;
+
+       raw_spin_lock_irq(&p->pi_lock);
+       if (p->state == match_state || p->saved_state == match_state)
+               match = true;
+       raw_spin_unlock_irq(&p->pi_lock);
+
+       return match;
+}
+
  /*
   * wait_task_inactive - wait for a thread to unschedule.
   *
@@ -1203,7 +1317,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
                  * is actually now running somewhere else!
                  */
                 while (task_running(rq, p)) {
-                       if (match_state && unlikely(p->state != match_state))
+                       if (match_state && !check_task_state(p, match_state))
                                 return 0;
                         cpu_relax();
                 }
@@ -1218,7 +1332,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
                 running = task_running(rq, p);
                 queued = task_on_rq_queued(p);
                 ncsw = 0;
-               if (!match_state || p->state == match_state)
+               if (!match_state || p->state == match_state ||
+                   p->saved_state == match_state)
                         ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
                 task_rq_unlock(rq, p, &flags);
  
@@ -1444,10 +1559,6 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
  {
         activate_task(rq, p, en_flags);
         p->on_rq = TASK_ON_RQ_QUEUED;
-
-       /* if a worker is waking up, notify workqueue */
-       if (p->flags & PF_WQ_WORKER)
-               wq_worker_waking_up(p, cpu_of(rq));
  }
  
  /*
@@ -1661,8 +1772,27 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
          */
         smp_mb__before_spinlock();
         raw_spin_lock_irqsave(&p->pi_lock, flags);
-       if (!(p->state & state))
+       if (!(p->state & state)) {
+               /*
+                * The task might be running due to a spinlock sleeper
+                * wakeup. Check the saved state and set it to running
+                * if the wakeup condition is true.
+                */
+               if (!(wake_flags & WF_LOCK_SLEEPER)) {
+                       if (p->saved_state & state) {
+                               p->saved_state = TASK_RUNNING;
+                               success = 1;
+                       }
+               }
                 goto out;
+       }
+
+       /*
+        * If this is a regular wakeup, then we can unconditionally
+        * clear the saved state of a "lock sleeper".
+        */
+       if (!(wake_flags & WF_LOCK_SLEEPER))
+               p->saved_state = TASK_RUNNING;
  
         success = 1; /* we're going to change ->state */
         cpu = task_cpu(p);
@@ -1704,42 +1834,6 @@ out:
         return success;
  }
  
-/**
- * try_to_wake_up_local - try to wake up a local task with rq lock held
- * @p: the thread to be awakened
- *
- * Put @p on the run-queue if it's not already there. The caller must
- * ensure that this_rq() is locked, @p is bound to this_rq() and not
- * the current task.
- */
-static void try_to_wake_up_local(struct task_struct *p)
-{
-       struct rq *rq = task_rq(p);
-
-       if (WARN_ON_ONCE(rq != this_rq()) ||
-           WARN_ON_ONCE(p == current))
-               return;
-
-       lockdep_assert_held(&rq->lock);
-
-       if (!raw_spin_trylock(&p->pi_lock)) {
-               raw_spin_unlock(&rq->lock);
-               raw_spin_lock(&p->pi_lock);
-               raw_spin_lock(&rq->lock);
-       }
-
-       if (!(p->state & TASK_NORMAL))
-               goto out;
-
-       if (!task_on_rq_queued(p))
-               ttwu_activate(rq, p, ENQUEUE_WAKEUP);
-
-       ttwu_do_wakeup(rq, p, 0);
-       ttwu_stat(p, smp_processor_id(), 0);
-out:
-       raw_spin_unlock(&p->pi_lock);
-}
-
  /**
   * wake_up_process - Wake up a specific process
   * @p: The process to be woken up.
@@ -1754,11 +1848,23 @@ out:
   */
  int wake_up_process(struct task_struct *p)
  {
-       WARN_ON(task_is_stopped_or_traced(p));
+       WARN_ON(__task_is_stopped_or_traced(p));
         return try_to_wake_up(p, TASK_NORMAL, 0);
  }
  EXPORT_SYMBOL(wake_up_process);
  
+/**
+ * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
+ * @p: The process to be woken up.
+ *
+ * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
+ * the nature of the wakeup.
+ */
+int wake_up_lock_sleeper(struct task_struct *p)
+{
+       return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
+}
+
  int wake_up_state(struct task_struct *p, unsigned int state)
  {
         return try_to_wake_up(p, state, 0);
@@ -1954,6 +2060,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
         p->on_cpu = 0;
  #endif
         init_task_preempt_count(p);
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+       task_thread_info(p)->preempt_lazy_count = 0;
+#endif
  #ifdef CONFIG_SMP
         plist_node_init(&p->pushable_tasks, MAX_PRIO);
         RB_CLEAR_NODE(&p->pushable_dl_tasks);
@@ -2226,8 +2335,12 @@ static struct rq *finish_task_switch(struct task_struct *prev)
         finish_arch_post_lock_switch();
  
         fire_sched_in_preempt_notifiers(current);
+       /*
+        * We use mmdrop_delayed() here so we don't have to do the
+        * full __mmdrop() when we are the last user.
+        */
         if (mm)
-               mmdrop(mm);
+               mmdrop_delayed(mm);
         if (unlikely(prev_state == TASK_DEAD)) {
                 if (prev->sched_class->task_dead)
                         prev->sched_class->task_dead(prev);
@@ -2645,6 +2758,133 @@ static inline void schedule_debug(struct task_struct *prev)
         schedstat_inc(this_rq(), sched_count);
  }
  
+#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
+#define MIGRATE_DISABLE_SET_AFFIN      (1<<30) /* Can't make a negative */
+#define migrate_disabled_updated(p)    ((p)->migrate_disable & MIGRATE_DISABLE_SET_AFFIN)
+#define migrate_disable_count(p)       ((p)->migrate_disable & ~MIGRATE_DISABLE_SET_AFFIN)
+
+static inline void update_migrate_disable(struct task_struct *p)
+{
+       const struct cpumask *mask;
+
+       if (likely(!p->migrate_disable))
+               return;
+
+       /* Did we already update affinity? */
+       if (unlikely(migrate_disabled_updated(p)))
+               return;
+
+       /*
+        * Since this is always current we can get away with only locking
+        * rq->lock, the ->cpus_allowed value can normally only be changed
+        * while holding both p->pi_lock and rq->lock, but seeing that this
+        * is current, we cannot actually be waking up, so all code that
+        * relies on serialization against p->pi_lock is out of scope.
+        *
+        * Having rq->lock serializes us against things like
+        * set_cpus_allowed_ptr() that can still happen concurrently.
+        */
+       mask = tsk_cpus_allowed(p);
+
+       if (p->sched_class->set_cpus_allowed)
+               p->sched_class->set_cpus_allowed(p, mask);
+       /* mask==cpumask_of(task_cpu(p)) which has a cpumask_weight==1 */
+       p->nr_cpus_allowed = 1;
+
+       /* Let migrate_enable know to fix things back up */
+       p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN;
+}
+
+void migrate_disable(void)
+{
+       struct task_struct *p = current;
+
+       if (in_atomic()) {
+#ifdef CONFIG_SCHED_DEBUG
+               p->migrate_disable_atomic++;
+#endif
+               return;
+       }
+
+#ifdef CONFIG_SCHED_DEBUG
+       if (unlikely(p->migrate_disable_atomic)) {
+               tracing_off();
+               WARN_ON_ONCE(1);
+       }
+#endif
+
+       if (p->migrate_disable) {
+               p->migrate_disable++;
+               return;
+       }
+
+       preempt_disable();
+       preempt_lazy_disable();
+       pin_current_cpu();
+       p->migrate_disable = 1;
+       preempt_enable();
+}
+EXPORT_SYMBOL(migrate_disable);
+
+void migrate_enable(void)
+{
+       struct task_struct *p = current;
+       const struct cpumask *mask;
+       unsigned long flags;
+       struct rq *rq;
+
+       if (in_atomic()) {
+#ifdef CONFIG_SCHED_DEBUG
+               p->migrate_disable_atomic--;
+#endif
+               return;
+       }
+
+#ifdef CONFIG_SCHED_DEBUG
+       if (unlikely(p->migrate_disable_atomic)) {
+               tracing_off();
+               WARN_ON_ONCE(1);
+       }
+#endif
+       WARN_ON_ONCE(p->migrate_disable <= 0);
+
+       if (migrate_disable_count(p) > 1) {
+               p->migrate_disable--;
+               return;
+       }
+
+       preempt_disable();
+       if (unlikely(migrate_disabled_updated(p))) {
+               /*
+                * Undo whatever update_migrate_disable() did, also see there
+                * about locking.
+                */
+               rq = this_rq();
+               raw_spin_lock_irqsave(&rq->lock, flags);
+
+               /*
+                * Clearing migrate_disable causes tsk_cpus_allowed to
+                * show the tasks original cpu affinity.
+                */
+               p->migrate_disable = 0;
+               mask = tsk_cpus_allowed(p);
+               if (p->sched_class->set_cpus_allowed)
+                       p->sched_class->set_cpus_allowed(p, mask);
+               p->nr_cpus_allowed = cpumask_weight(mask);
+               raw_spin_unlock_irqrestore(&rq->lock, flags);
+       } else
+               p->migrate_disable = 0;
+
+       unpin_current_cpu();
+       preempt_enable();
+       preempt_lazy_enable();
+}
+EXPORT_SYMBOL(migrate_enable);
+#else
+static inline void update_migrate_disable(struct task_struct *p) { }
+#define migrate_disabled_updated(p)            0
+#endif
+
  /*
   * Pick up the highest-prio task:
   */
@@ -2751,6 +2991,8 @@ static void __sched __schedule(void)
         smp_mb__before_spinlock();
         raw_spin_lock_irq(&rq->lock);
  
+       update_migrate_disable(prev);
+
         rq->clock_skip_update <<= 1; /* promote REQ to ACT */
  
         switch_count = &prev->nivcsw;
@@ -2760,19 +3002,6 @@ static void __sched __schedule(void)
                 } else {
                         deactivate_task(rq, prev, DEQUEUE_SLEEP);
                         prev->on_rq = 0;
-
-                       /*
-                        * If a worker went to sleep, notify and ask workqueue
-                        * whether it wants to wake up a task to maintain
-                        * concurrency.
-                        */
-                       if (prev->flags & PF_WQ_WORKER) {
-                               struct task_struct *to_wakeup;
-
-                               to_wakeup = wq_worker_sleeping(prev, cpu);
-                               if (to_wakeup)
-                                       try_to_wake_up_local(to_wakeup);
-                       }
                 }
                 switch_count = &prev->nvcsw;
         }
@@ -2782,6 +3011,7 @@ static void __sched __schedule(void)
  
         next = pick_next_task(rq, prev);
         clear_tsk_need_resched(prev);
+       clear_tsk_need_resched_lazy(prev);
         clear_preempt_need_resched();
         rq->clock_skip_update = 0;
  
@@ -2802,8 +3032,19 @@ static void __sched __schedule(void)
  
  static inline void sched_submit_work(struct task_struct *tsk)
  {
-       if (!tsk->state || tsk_is_pi_blocked(tsk))
+       if (!tsk->state)
                 return;
+       /*
+        * If a worker went to sleep, notify and ask workqueue whether
+        * it wants to wake up a task to maintain concurrency.
+        */
+       if (tsk->flags & PF_WQ_WORKER)
+               wq_worker_sleeping(tsk);
+
+
+       if (tsk_is_pi_blocked(tsk))
+               return;
+
         /*
          * If we are going to sleep and we have plugged IO queued,
          * make sure to submit it to avoid deadlocks.
@@ -2812,6 +3053,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
                 blk_schedule_flush_plug(tsk);
  }
  
+static void sched_update_worker(struct task_struct *tsk)
+{
+       if (tsk->flags & PF_WQ_WORKER)
+               wq_worker_running(tsk);
+}
+
  asmlinkage __visible void __sched schedule(void)
  {
         struct task_struct *tsk = current;
@@ -2820,6 +3067,7 @@ asmlinkage __visible void __sched schedule(void)
         do {
                 __schedule();
         } while (need_resched());
+       sched_update_worker(tsk);
  }
  EXPORT_SYMBOL(schedule);
  
@@ -2911,6 +3159,14 @@ asmlinkage __visible void __sched notrace preempt_schedule_context(void)
         if (likely(!preemptible()))
                 return;
  
+#ifdef CONFIG_PREEMPT_LAZY
+       /*
+        * Check for lazy preemption
+        */
+       if (current_thread_info()->preempt_lazy_count &&
+                       !test_thread_flag(TIF_NEED_RESCHED))
+               return;
+#endif
         do {
                 __preempt_count_add(PREEMPT_ACTIVE);
                 /*
@@ -2919,7 +3175,16 @@ asmlinkage __visible void __sched notrace preempt_schedule_context(void)
                  * an infinite recursion.
                  */
                 prev_ctx = exception_enter();
+               /*
+                * The add/subtract must not be traced by the function
+                * tracer. But we still want to account for the
+                * preempt off latency tracer. Since the _notrace versions
+                * of add/subtract skip the accounting for latency tracer
+                * we must force it manually.
+                */
+               start_critical_timings();
                 __schedule();
+               stop_critical_timings();
                 exception_exit(prev_ctx);
  
                 __preempt_count_sub(PREEMPT_ACTIVE);
@@ -4256,6 +4521,7 @@ int __cond_resched_lock(spinlock_t *lock)
  }
  EXPORT_SYMBOL(__cond_resched_lock);
  
+#ifndef CONFIG_PREEMPT_RT_FULL
  int __sched __cond_resched_softirq(void)
  {
         BUG_ON(!in_softirq());
@@ -4269,6 +4535,7 @@ int __sched __cond_resched_softirq(void)
         return 0;
  }
  EXPORT_SYMBOL(__cond_resched_softirq);
+#endif
  
  /**
   * yield - yield the current processor to other threads.
@@ -4623,7 +4890,9 @@ void init_idle(struct task_struct *idle, int cpu)
  
         /* Set the preempt count _outside_ the spinlocks! */
         init_idle_preempt_count(idle, cpu);
-
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+       task_thread_info(idle)->preempt_lazy_count = 0;
+#endif
         /*
          * The idle tasks have their own, simple scheduling class:
          */
@@ -4743,11 +5012,91 @@ static struct rq *move_queued_task(struct task_struct *p, int new_cpu)
  
  void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  {
-       if (p->sched_class->set_cpus_allowed)
-               p->sched_class->set_cpus_allowed(p, new_mask);
+       if (!migrate_disabled_updated(p)) {
+               if (p->sched_class->set_cpus_allowed)
+                       p->sched_class->set_cpus_allowed(p, new_mask);
+               p->nr_cpus_allowed = cpumask_weight(new_mask);
+       }
  
         cpumask_copy(&p->cpus_allowed, new_mask);
-       p->nr_cpus_allowed = cpumask_weight(new_mask);
+}
+
+static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
+static DEFINE_MUTEX(sched_down_mutex);
+static cpumask_t sched_down_cpumask;
+
+void tell_sched_cpu_down_begin(int cpu)
+{
+       mutex_lock(&sched_down_mutex);
+       cpumask_set_cpu(cpu, &sched_down_cpumask);
+       mutex_unlock(&sched_down_mutex);
+}
+
+void tell_sched_cpu_down_done(int cpu)
+{
+       mutex_lock(&sched_down_mutex);
+       cpumask_clear_cpu(cpu, &sched_down_cpumask);
+       mutex_unlock(&sched_down_mutex);
+}
+
+/**
+ * migrate_me - try to move the current task off this cpu
+ *
+ * Used by the pin_current_cpu() code to try to get tasks
+ * to move off the current CPU as it is going down.
+ * It will only move the task if the task isn't pinned to
+ * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
+ * and the task has to be in a RUNNING state. Otherwise the
+ * movement of the task will wake it up (change its state
+ * to running) when the task did not expect it.
+ *
+ * Returns 1 if it succeeded in moving the current task
+ *         0 otherwise.
+ */
+int migrate_me(void)
+{
+       struct task_struct *p = current;
+       struct migration_arg arg;
+       struct cpumask *cpumask;
+       struct cpumask *mask;
+       unsigned long flags;
+       unsigned int dest_cpu;
+       struct rq *rq;
+
+       /*
+        * We can not migrate tasks bounded to a CPU or tasks not
+        * running. The movement of the task will wake it up.
+        */
+       if (p->flags & PF_NO_SETAFFINITY || p->state)
+               return 0;
+
+       mutex_lock(&sched_down_mutex);
+       rq = task_rq_lock(p, &flags);
+
+       cpumask = this_cpu_ptr(&sched_cpumasks);
+       mask = &p->cpus_allowed;
+
+       cpumask_andnot(cpumask, mask, &sched_down_cpumask);
+
+       if (!cpumask_weight(cpumask)) {
+               /* It's only on this CPU? */
+               task_rq_unlock(rq, p, &flags);
+               mutex_unlock(&sched_down_mutex);
+               return 0;
+       }
+
+       dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
+
+       arg.task = p;
+       arg.dest_cpu = dest_cpu;
+
+       task_rq_unlock(rq, p, &flags);
+
+       stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
+       tlb_migrate_finish(p->mm);
+       mutex_unlock(&sched_down_mutex);
+
+       return 1;
  }
  
  /*
@@ -4793,7 +5142,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
         do_set_cpus_allowed(p, new_mask);
  
         /* Can the task run on the task's current CPU? If so, we're done */
-       if (cpumask_test_cpu(task_cpu(p), new_mask))
+       if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
                 goto out;
  
         dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
@@ -4933,6 +5282,8 @@ static int migration_cpu_stop(void *data)
  
  #ifdef CONFIG_HOTPLUG_CPU
  
+static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
+
  /*
   * Ensures that the idle task is using init_mm right before its cpu goes
   * offline.
@@ -4947,7 +5298,11 @@ void idle_task_exit(void)
                 switch_mm(mm, &init_mm, current);
                 finish_arch_post_lock_switch();
         }
-       mmdrop(mm);
+       /*
+        * Defer the cleanup to an alive cpu. On RT we can neither
+        * call mmdrop() nor mmdrop_delayed() from here.
+        */
+       per_cpu(idle_last_mm, smp_processor_id()) = mm;
  }
  
  /*
@@ -5290,6 +5645,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
  
         case CPU_DEAD:
                 calc_load_migrate(rq);
+               if (per_cpu(idle_last_mm, cpu)) {
+                       mmdrop(per_cpu(idle_last_mm, cpu));
+                       per_cpu(idle_last_mm, cpu) = NULL;
+               }
                 break;
  #endif
         }
@@ -7281,7 +7640,8 @@ void __init sched_init(void)
  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  static inline int preempt_count_equals(int preempt_offset)
  {
-       int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
+       int nested = (preempt_count() & ~PREEMPT_ACTIVE) +
+               sched_rcu_preempt_depth();
  
         return (nested == preempt_offset);
  }