sched: Force sleep on consecutive sched_yields

author Sai Gurrappadi <sgurrappadi@nvidia.com>

Wed, 22 Jan 2014 00:41:37 +0000 (16:41 -0800)

committer Diwakar Tundlam <dtundlam@nvidia.com>

Tue, 4 Mar 2014 03:38:54 +0000 (19:38 -0800)
author Sai Gurrappadi <sgurrappadi@nvidia.com>
Wed, 22 Jan 2014 00:41:37 +0000 (16:41 -0800)
committer Diwakar Tundlam <dtundlam@nvidia.com>
Tue, 4 Mar 2014 03:38:54 +0000 (19:38 -0800)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 787e0d535e55b60cc9340a7286794760ba1ae2d3..c4398dffb746c2ef6188a1b6fdcee7ec372b6380 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1058,6 +1058,7 @@ struct task_struct {
         atomic_t usage;
         unsigned int flags;     /* per process flags, defined below */
         unsigned int ptrace;
+       unsigned int yield_count;
  
  #ifdef CONFIG_SMP
         struct llist_node wake_entry;
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h

index bf8086b2506e807d9b3c97679f0917d8d5944a96..9c7aee6c0309693061fb3edcb10090e138f3a27c 100644 (file)
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -57,7 +57,8 @@ extern unsigned int sysctl_sched_nr_migrate;
  extern unsigned int sysctl_sched_time_avg;
  extern unsigned int sysctl_timer_migration;
  extern unsigned int sysctl_sched_shares_window;
-
+extern unsigned int sysctl_sched_yield_sleep_duration;
+extern int sysctl_sched_yield_sleep_threshold;
  int sched_proc_update_handler(struct ctl_table *table, int write,
                 void __user *buffer, size_t *length,
                 loff_t *ppos);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 62cb6b24ab46bdb55a94ce340328b40f0fe2b995..825447720620f54ad9d70e9c7355bc348262d692 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -297,7 +297,18 @@ __read_mostly int scheduler_running;
   */
  int sysctl_sched_rt_runtime = 950000;
  
+/*
+ * Number of sched_yield calls that result in a thread yielding
+ * to itself before a sleep is injected in its next sched_yield call
+ * Setting this to -1 will disable adding sleep in sched_yield
+ */
+const_debug int sysctl_sched_yield_sleep_threshold = 4;
  
+/*
+ * Sleep duration in us used when sched_yield_sleep_threshold
+ * is exceeded.
+ */
+const_debug unsigned int sysctl_sched_yield_sleep_duration = 50;
  
  /*
   * __task_rq_lock - lock the rq @p resides on.
@@ -3035,6 +3046,7 @@ need_resched:
         if (likely(prev != next)) {
                 rq->nr_switches++;
                 rq->curr = next;
+               prev->yield_count = 0;
                 ++*switch_count;
  
                 context_switch(rq, prev, next); /* unlocks the rq */
@@ -3046,8 +3058,10 @@ need_resched:
                  */
                 cpu = smp_processor_id();
                 rq = cpu_rq(cpu);
-       } else
+       } else {
+               prev->yield_count++;
                 raw_spin_unlock_irq(&rq->lock);
+       }
  
         post_schedule(rq);
  
@@ -4352,6 +4366,8 @@ SYSCALL_DEFINE0(sched_yield)
         struct rq *rq = this_rq_lock();
  
         schedstat_inc(rq, yld_count);
+       if (rq->curr->yield_count == sysctl_sched_yield_sleep_threshold)
+               schedstat_inc(rq, yield_sleep_count);
         current->sched_class->yield_task(rq);
  
         /*
@@ -4363,7 +4379,11 @@ SYSCALL_DEFINE0(sched_yield)
         do_raw_spin_unlock(&rq->lock);
         sched_preempt_enable_no_resched();
  
-       schedule();
+       if (rq->curr->yield_count == sysctl_sched_yield_sleep_threshold)
+               usleep_range(sysctl_sched_yield_sleep_duration,
+                               sysctl_sched_yield_sleep_duration + 5);
+       else
+               schedule();
  
         return 0;
  }
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c

index 75024a673520b9c5e0e506b1e0963d782cc84c5d..068ad55aa641fbecb69185c2c4206af7eb9fd07e 100644 (file)
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -302,6 +302,7 @@ do {                                                                        \
  #define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
  
         P(yld_count);
+       P(yield_sleep_count);
  
         P(sched_count);
         P(sched_goidle);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 35bd8b7f3a873778a1e52cea2bc9b5546e9e89a1..0fc275c70d7d08baa7ad894419672418d252f4bc 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -511,6 +511,7 @@ struct rq {
  
         /* sys_sched_yield() stats */
         unsigned int yld_count;
+       unsigned int yield_sleep_count;
  
         /* schedule() stats */
         unsigned int sched_count;
@@ -1143,6 +1144,8 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
  extern const_debug unsigned int sysctl_sched_time_avg;
  extern const_debug unsigned int sysctl_sched_nr_migrate;
  extern const_debug unsigned int sysctl_sched_migration_cost;
+extern const_debug unsigned int sysctl_sched_yield_sleep_duration;
+extern const_debug int sysctl_sched_yield_sleep_threshold;
  
  static inline u64 sched_avg_period(void)
  {
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c

index da98af347e8b6ed212e67086ae8576628c11d0dd..dff505e53f7f1acc953215d61829238350a9f494 100644 (file)
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -35,12 +35,13 @@ static int show_schedstat(struct seq_file *seq, void *v)
  
                 /* runqueue-specific stats */
                 seq_printf(seq,
-                   "cpu%d %u 0 %u %u %u %u %llu %llu %lu",
+                   "cpu%d %u 0 %u %u %u %u %llu %llu %lu %u",
                     cpu, rq->yld_count,
                     rq->sched_count, rq->sched_goidle,
                     rq->ttwu_count, rq->ttwu_local,
                     rq->rq_cpu_time,
-                   rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
+                   rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount,
+                   rq->yield_sleep_count);
  
                 seq_printf(seq, "\n");
  
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index 2ba77228ff97b800ac392ea3decfe1684419afe9..207454a598f44903570ece93d1104195a4a2dd1d 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -312,6 +312,20 @@ static struct ctl_table kern_table[] = {
                 .extra1         = &min_wakeup_granularity_ns,
                 .extra2         = &max_wakeup_granularity_ns,
         },
+       {
+               .procname       = "sched_yield_sleep_threshold",
+               .data           = &sysctl_sched_yield_sleep_threshold,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "sched_yield_sleep_duration",
+               .data           = &sysctl_sched_yield_sleep_duration,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
  #ifdef CONFIG_SMP
         {
                 .procname       = "sched_tunable_scaling",
author	Sai Gurrappadi <sgurrappadi@nvidia.com>
	Wed, 22 Jan 2014 00:41:37 +0000 (16:41 -0800)
committer	Diwakar Tundlam <dtundlam@nvidia.com>
	Tue, 4 Mar 2014 03:38:54 +0000 (19:38 -0800)
include/linux/sched.h		patch \| blob \| history
include/linux/sched/sysctl.h		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history
kernel/sched/debug.c		patch \| blob \| history
kernel/sched/sched.h		patch \| blob \| history
kernel/sched/stats.c		patch \| blob \| history
kernel/sysctl.c		patch \| blob \| history