mirror of
https://github.com/adulau/aha.git
synced 2024-12-28 03:36:19 +00:00
sched: de-SCHED_OTHER-ize the RT path
The current wake-up code path tries to determine if it can optimize the wake-up to "this_cpu" by computing load calculations. The problem is that these calculations are only relevant to SCHED_OTHER tasks where load is king. For RT tasks, priority is king. So the load calculation is completely wasted bandwidth. Therefore, we create a new sched_class interface to help with pre-wakeup routing decisions and move the load calculation as a function of CFS task's class. Signed-off-by: Gregory Haskins <ghaskins@novell.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
697f0a487f
commit
e7693a362e
5 changed files with 195 additions and 140 deletions
|
@ -827,6 +827,7 @@ struct sched_class {
|
|||
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
|
||||
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
|
||||
void (*yield_task) (struct rq *rq);
|
||||
int (*select_task_rq)(struct task_struct *p, int sync);
|
||||
|
||||
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
|
||||
|
||||
|
|
167
kernel/sched.c
167
kernel/sched.c
|
@ -960,6 +960,13 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
|
|||
update_load_sub(&rq->load, load);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static unsigned long source_load(int cpu, int type);
|
||||
static unsigned long target_load(int cpu, int type);
|
||||
static unsigned long cpu_avg_load_per_task(int cpu);
|
||||
static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#include "sched_stats.h"
|
||||
#include "sched_idletask.c"
|
||||
#include "sched_fair.c"
|
||||
|
@ -1118,7 +1125,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
|
|||
/*
|
||||
* Is this task likely cache-hot:
|
||||
*/
|
||||
static inline int
|
||||
static int
|
||||
task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
|
||||
{
|
||||
s64 delta;
|
||||
|
@ -1343,7 +1350,7 @@ static unsigned long target_load(int cpu, int type)
|
|||
/*
|
||||
* Return the average load per task on the cpu's run queue
|
||||
*/
|
||||
static inline unsigned long cpu_avg_load_per_task(int cpu)
|
||||
static unsigned long cpu_avg_load_per_task(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long total = weighted_cpuload(cpu);
|
||||
|
@ -1500,58 +1507,6 @@ static int sched_balance_self(int cpu, int flag)
|
|||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* wake_idle() will wake a task on an idle cpu if task->cpu is
|
||||
* not idle and an idle cpu is available. The span of cpus to
|
||||
* search starts with cpus closest then further out as needed,
|
||||
* so we always favor a closer, idle cpu.
|
||||
*
|
||||
* Returns the CPU we should wake onto.
|
||||
*/
|
||||
#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
|
||||
static int wake_idle(int cpu, struct task_struct *p)
|
||||
{
|
||||
cpumask_t tmp;
|
||||
struct sched_domain *sd;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* If it is idle, then it is the best cpu to run this task.
|
||||
*
|
||||
* This cpu is also the best, if it has more than one task already.
|
||||
* Siblings must be also busy(in most cases) as they didn't already
|
||||
* pickup the extra load from this cpu and hence we need not check
|
||||
* sibling runqueue info. This will avoid the checks and cache miss
|
||||
* penalities associated with that.
|
||||
*/
|
||||
if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1)
|
||||
return cpu;
|
||||
|
||||
for_each_domain(cpu, sd) {
|
||||
if (sd->flags & SD_WAKE_IDLE) {
|
||||
cpus_and(tmp, sd->span, p->cpus_allowed);
|
||||
for_each_cpu_mask(i, tmp) {
|
||||
if (idle_cpu(i)) {
|
||||
if (i != task_cpu(p)) {
|
||||
schedstat_inc(p,
|
||||
se.nr_wakeups_idle);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return cpu;
|
||||
}
|
||||
#else
|
||||
static inline int wake_idle(int cpu, struct task_struct *p)
|
||||
{
|
||||
return cpu;
|
||||
}
|
||||
#endif
|
||||
|
||||
/***
|
||||
* try_to_wake_up - wake up a thread
|
||||
* @p: the to-be-woken-up thread
|
||||
|
@ -1573,8 +1528,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
|
|||
long old_state;
|
||||
struct rq *rq;
|
||||
#ifdef CONFIG_SMP
|
||||
struct sched_domain *sd, *this_sd = NULL;
|
||||
unsigned long load, this_load;
|
||||
int new_cpu;
|
||||
#endif
|
||||
|
||||
|
@ -1594,90 +1547,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
|
|||
if (unlikely(task_running(rq, p)))
|
||||
goto out_activate;
|
||||
|
||||
new_cpu = cpu;
|
||||
|
||||
schedstat_inc(rq, ttwu_count);
|
||||
if (cpu == this_cpu) {
|
||||
schedstat_inc(rq, ttwu_local);
|
||||
goto out_set_cpu;
|
||||
}
|
||||
|
||||
for_each_domain(this_cpu, sd) {
|
||||
if (cpu_isset(cpu, sd->span)) {
|
||||
schedstat_inc(sd, ttwu_wake_remote);
|
||||
this_sd = sd;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
|
||||
goto out_set_cpu;
|
||||
|
||||
/*
|
||||
* Check for affine wakeup and passive balancing possibilities.
|
||||
*/
|
||||
if (this_sd) {
|
||||
int idx = this_sd->wake_idx;
|
||||
unsigned int imbalance;
|
||||
|
||||
imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
|
||||
|
||||
load = source_load(cpu, idx);
|
||||
this_load = target_load(this_cpu, idx);
|
||||
|
||||
new_cpu = this_cpu; /* Wake to this CPU if we can */
|
||||
|
||||
if (this_sd->flags & SD_WAKE_AFFINE) {
|
||||
unsigned long tl = this_load;
|
||||
unsigned long tl_per_task;
|
||||
|
||||
/*
|
||||
* Attract cache-cold tasks on sync wakeups:
|
||||
*/
|
||||
if (sync && !task_hot(p, rq->clock, this_sd))
|
||||
goto out_set_cpu;
|
||||
|
||||
schedstat_inc(p, se.nr_wakeups_affine_attempts);
|
||||
tl_per_task = cpu_avg_load_per_task(this_cpu);
|
||||
|
||||
/*
|
||||
* If sync wakeup then subtract the (maximum possible)
|
||||
* effect of the currently running task from the load
|
||||
* of the current CPU:
|
||||
*/
|
||||
if (sync)
|
||||
tl -= current->se.load.weight;
|
||||
|
||||
if ((tl <= load &&
|
||||
tl + target_load(cpu, idx) <= tl_per_task) ||
|
||||
100*(tl + p->se.load.weight) <= imbalance*load) {
|
||||
/*
|
||||
* This domain has SD_WAKE_AFFINE and
|
||||
* p is cache cold in this domain, and
|
||||
* there is no bad imbalance.
|
||||
*/
|
||||
schedstat_inc(this_sd, ttwu_move_affine);
|
||||
schedstat_inc(p, se.nr_wakeups_affine);
|
||||
goto out_set_cpu;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Start passive balancing when half the imbalance_pct
|
||||
* limit is reached.
|
||||
*/
|
||||
if (this_sd->flags & SD_WAKE_BALANCE) {
|
||||
if (imbalance*this_load <= 100*load) {
|
||||
schedstat_inc(this_sd, ttwu_move_balance);
|
||||
schedstat_inc(p, se.nr_wakeups_passive);
|
||||
goto out_set_cpu;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
|
||||
out_set_cpu:
|
||||
new_cpu = wake_idle(new_cpu, p);
|
||||
new_cpu = p->sched_class->select_task_rq(p, sync);
|
||||
if (new_cpu != cpu) {
|
||||
set_task_cpu(p, new_cpu);
|
||||
task_rq_unlock(rq, &flags);
|
||||
|
@ -1693,6 +1563,23 @@ out_set_cpu:
|
|||
cpu = task_cpu(p);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
schedstat_inc(rq, ttwu_count);
|
||||
if (cpu == this_cpu)
|
||||
schedstat_inc(rq, ttwu_local);
|
||||
else {
|
||||
struct sched_domain *sd;
|
||||
for_each_domain(this_cpu, sd) {
|
||||
if (cpu_isset(cpu, sd->span)) {
|
||||
schedstat_inc(sd, ttwu_wake_remote);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
out_activate:
|
||||
#endif /* CONFIG_SMP */
|
||||
schedstat_inc(p, se.nr_wakeups);
|
||||
|
|
|
@ -860,6 +860,151 @@ static void yield_task_fair(struct rq *rq)
|
|||
se->vruntime = rightmost->vruntime + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* wake_idle() will wake a task on an idle cpu if task->cpu is
|
||||
* not idle and an idle cpu is available. The span of cpus to
|
||||
* search starts with cpus closest then further out as needed,
|
||||
* so we always favor a closer, idle cpu.
|
||||
*
|
||||
* Returns the CPU we should wake onto.
|
||||
*/
|
||||
#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
|
||||
static int wake_idle(int cpu, struct task_struct *p)
|
||||
{
|
||||
cpumask_t tmp;
|
||||
struct sched_domain *sd;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* If it is idle, then it is the best cpu to run this task.
|
||||
*
|
||||
* This cpu is also the best, if it has more than one task already.
|
||||
* Siblings must be also busy(in most cases) as they didn't already
|
||||
* pickup the extra load from this cpu and hence we need not check
|
||||
* sibling runqueue info. This will avoid the checks and cache miss
|
||||
* penalities associated with that.
|
||||
*/
|
||||
if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1)
|
||||
return cpu;
|
||||
|
||||
for_each_domain(cpu, sd) {
|
||||
if (sd->flags & SD_WAKE_IDLE) {
|
||||
cpus_and(tmp, sd->span, p->cpus_allowed);
|
||||
for_each_cpu_mask(i, tmp) {
|
||||
if (idle_cpu(i)) {
|
||||
if (i != task_cpu(p)) {
|
||||
schedstat_inc(p,
|
||||
se.nr_wakeups_idle);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return cpu;
|
||||
}
|
||||
#else
|
||||
static inline int wake_idle(int cpu, struct task_struct *p)
|
||||
{
|
||||
return cpu;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int select_task_rq_fair(struct task_struct *p, int sync)
|
||||
{
|
||||
int cpu, this_cpu;
|
||||
struct rq *rq;
|
||||
struct sched_domain *sd, *this_sd = NULL;
|
||||
int new_cpu;
|
||||
|
||||
cpu = task_cpu(p);
|
||||
rq = task_rq(p);
|
||||
this_cpu = smp_processor_id();
|
||||
new_cpu = cpu;
|
||||
|
||||
for_each_domain(this_cpu, sd) {
|
||||
if (cpu_isset(cpu, sd->span)) {
|
||||
this_sd = sd;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
|
||||
goto out_set_cpu;
|
||||
|
||||
/*
|
||||
* Check for affine wakeup and passive balancing possibilities.
|
||||
*/
|
||||
if (this_sd) {
|
||||
int idx = this_sd->wake_idx;
|
||||
unsigned int imbalance;
|
||||
unsigned long load, this_load;
|
||||
|
||||
imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
|
||||
|
||||
load = source_load(cpu, idx);
|
||||
this_load = target_load(this_cpu, idx);
|
||||
|
||||
new_cpu = this_cpu; /* Wake to this CPU if we can */
|
||||
|
||||
if (this_sd->flags & SD_WAKE_AFFINE) {
|
||||
unsigned long tl = this_load;
|
||||
unsigned long tl_per_task;
|
||||
|
||||
/*
|
||||
* Attract cache-cold tasks on sync wakeups:
|
||||
*/
|
||||
if (sync && !task_hot(p, rq->clock, this_sd))
|
||||
goto out_set_cpu;
|
||||
|
||||
schedstat_inc(p, se.nr_wakeups_affine_attempts);
|
||||
tl_per_task = cpu_avg_load_per_task(this_cpu);
|
||||
|
||||
/*
|
||||
* If sync wakeup then subtract the (maximum possible)
|
||||
* effect of the currently running task from the load
|
||||
* of the current CPU:
|
||||
*/
|
||||
if (sync)
|
||||
tl -= current->se.load.weight;
|
||||
|
||||
if ((tl <= load &&
|
||||
tl + target_load(cpu, idx) <= tl_per_task) ||
|
||||
100*(tl + p->se.load.weight) <= imbalance*load) {
|
||||
/*
|
||||
* This domain has SD_WAKE_AFFINE and
|
||||
* p is cache cold in this domain, and
|
||||
* there is no bad imbalance.
|
||||
*/
|
||||
schedstat_inc(this_sd, ttwu_move_affine);
|
||||
schedstat_inc(p, se.nr_wakeups_affine);
|
||||
goto out_set_cpu;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Start passive balancing when half the imbalance_pct
|
||||
* limit is reached.
|
||||
*/
|
||||
if (this_sd->flags & SD_WAKE_BALANCE) {
|
||||
if (imbalance*this_load <= 100*load) {
|
||||
schedstat_inc(this_sd, ttwu_move_balance);
|
||||
schedstat_inc(p, se.nr_wakeups_passive);
|
||||
goto out_set_cpu;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
|
||||
out_set_cpu:
|
||||
return wake_idle(new_cpu, p);
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
*/
|
||||
|
@ -1153,6 +1298,9 @@ static const struct sched_class fair_sched_class = {
|
|||
.enqueue_task = enqueue_task_fair,
|
||||
.dequeue_task = dequeue_task_fair,
|
||||
.yield_task = yield_task_fair,
|
||||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_fair,
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
.check_preempt_curr = check_preempt_wakeup,
|
||||
|
||||
|
|
|
@ -5,6 +5,12 @@
|
|||
* handled in sched_fair.c)
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int select_task_rq_idle(struct task_struct *p, int sync)
|
||||
{
|
||||
return task_cpu(p); /* IDLE tasks as never migrated */
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
/*
|
||||
* Idle tasks are unconditionally rescheduled:
|
||||
*/
|
||||
|
@ -72,6 +78,9 @@ const struct sched_class idle_sched_class = {
|
|||
|
||||
/* dequeue is not valid, we print a debug message there: */
|
||||
.dequeue_task = dequeue_task_idle,
|
||||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_idle,
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
.check_preempt_curr = check_preempt_curr_idle,
|
||||
|
||||
|
|
|
@ -150,6 +150,13 @@ yield_task_rt(struct rq *rq)
|
|||
requeue_task_rt(rq, rq->curr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int select_task_rq_rt(struct task_struct *p, int sync)
|
||||
{
|
||||
return task_cpu(p);
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
*/
|
||||
|
@ -667,6 +674,9 @@ const struct sched_class rt_sched_class = {
|
|||
.enqueue_task = enqueue_task_rt,
|
||||
.dequeue_task = dequeue_task_rt,
|
||||
.yield_task = yield_task_rt,
|
||||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_rt,
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
.check_preempt_curr = check_preempt_curr_rt,
|
||||
|
||||
|
|
Loading…
Reference in a new issue