If a task sched_yields to itself continuously, force the task to
sleep in sched_yield. This will lower the CPU load of this task
thereby lowering the cpu frequency and improving power.
Added a stat variable to track how many times we sleep due these
consecutive sched_yields. Also added sysctl knobs to control the number
of consecutive sched_yields before which the sleep kicks in and the
duration fo the sleep in us.
Bug
1424617
Change-Id: Ie92412b8b900365816e17237fcbd0aac6e9c94ce
Signed-off-by: Sai Gurrappadi <sgurrappadi@nvidia.com>
Reviewed-on: http://git-master/r/358455
Reviewed-by: Wen Yi <wyi@nvidia.com>
Reviewed-by: Peter Zu <pzu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Diwakar Tundlam <dtundlam@nvidia.com>
atomic_t usage;
unsigned int flags; /* per process flags, defined below */
unsigned int ptrace;
+ unsigned int yield_count;
#ifdef CONFIG_SMP
struct llist_node wake_entry;
extern unsigned int sysctl_sched_time_avg;
extern unsigned int sysctl_timer_migration;
extern unsigned int sysctl_sched_shares_window;
-
+extern unsigned int sysctl_sched_yield_sleep_duration;
+extern int sysctl_sched_yield_sleep_threshold;
int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length,
loff_t *ppos);
*/
int sysctl_sched_rt_runtime = 950000;
+/*
+ * Number of sched_yield calls that result in a thread yielding
+ * to itself before a sleep is injected in its next sched_yield call
+ * Setting this to -1 will disable adding sleep in sched_yield
+ */
+const_debug int sysctl_sched_yield_sleep_threshold = 4;
+/*
+ * Sleep duration in us used when sched_yield_sleep_threshold
+ * is exceeded.
+ */
+const_debug unsigned int sysctl_sched_yield_sleep_duration = 50;
/*
* __task_rq_lock - lock the rq @p resides on.
if (likely(prev != next)) {
rq->nr_switches++;
rq->curr = next;
+ prev->yield_count = 0;
++*switch_count;
context_switch(rq, prev, next); /* unlocks the rq */
*/
cpu = smp_processor_id();
rq = cpu_rq(cpu);
- } else
+ } else {
+ prev->yield_count++;
raw_spin_unlock_irq(&rq->lock);
+ }
post_schedule(rq);
struct rq *rq = this_rq_lock();
schedstat_inc(rq, yld_count);
+ if (rq->curr->yield_count == sysctl_sched_yield_sleep_threshold)
+ schedstat_inc(rq, yield_sleep_count);
current->sched_class->yield_task(rq);
/*
do_raw_spin_unlock(&rq->lock);
sched_preempt_enable_no_resched();
- schedule();
+ if (rq->curr->yield_count == sysctl_sched_yield_sleep_threshold)
+ usleep_range(sysctl_sched_yield_sleep_duration,
+ sysctl_sched_yield_sleep_duration + 5);
+ else
+ schedule();
return 0;
}
#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
P(yld_count);
+ P(yield_sleep_count);
P(sched_count);
P(sched_goidle);
/* sys_sched_yield() stats */
unsigned int yld_count;
+ unsigned int yield_sleep_count;
/* schedule() stats */
unsigned int sched_count;
extern const_debug unsigned int sysctl_sched_time_avg;
extern const_debug unsigned int sysctl_sched_nr_migrate;
extern const_debug unsigned int sysctl_sched_migration_cost;
+extern const_debug unsigned int sysctl_sched_yield_sleep_duration;
+extern const_debug int sysctl_sched_yield_sleep_threshold;
static inline u64 sched_avg_period(void)
{
/* runqueue-specific stats */
seq_printf(seq,
- "cpu%d %u 0 %u %u %u %u %llu %llu %lu",
+ "cpu%d %u 0 %u %u %u %u %llu %llu %lu %u",
cpu, rq->yld_count,
rq->sched_count, rq->sched_goidle,
rq->ttwu_count, rq->ttwu_local,
rq->rq_cpu_time,
- rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
+ rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount,
+ rq->yield_sleep_count);
seq_printf(seq, "\n");
.extra1 = &min_wakeup_granularity_ns,
.extra2 = &max_wakeup_granularity_ns,
},
+ {
+ .procname = "sched_yield_sleep_threshold",
+ .data = &sysctl_sched_yield_sleep_threshold,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_yield_sleep_duration",
+ .data = &sysctl_sched_yield_sleep_duration,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_SMP
{
.procname = "sched_tunable_scaling",