sched: reduce balance-tasks overhead

At the moment, balance_tasks() provides low level functionality for both
  move_tasks() and move_one_task() (indirectly) via the load_balance()
function (in the sched_class interface) which also provides dual
functionality.  This dual functionality complicates the interfaces and
internal mechanisms and makes the run time overhead of operations that
are called with two run queue locks held.

This patch addresses this issue and reduces the overhead of these
operations.

Signed-off-by: Peter Williams <pwil3058@bigpond.net.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Peter Williams 2007-10-24 18:23:51 +02:00 committed by Ingo Molnar
parent a0f846aa76
commit e1d1484f72
5 changed files with 135 additions and 57 deletions

View file

@ -829,11 +829,14 @@ struct sched_class {
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct rq *busiest, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio);
int (*move_one_task) (struct rq *this_rq, int this_cpu,
struct rq *busiest, struct sched_domain *sd,
enum cpu_idle_type idle);
void (*set_curr_task) (struct rq *rq);
void (*task_tick) (struct rq *rq, struct task_struct *p);
void (*task_new) (struct rq *rq, struct task_struct *p);

View file

@ -838,11 +838,35 @@ struct rq_iterator {
struct task_struct *(*next)(void *);
};
static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *load_moved,
int *this_best_prio, struct rq_iterator *iterator);
#ifdef CONFIG_SMP
static unsigned long
balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move, struct sched_domain *sd,
enum cpu_idle_type idle, int *all_pinned,
int *this_best_prio, struct rq_iterator *iterator);
static int
iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle,
struct rq_iterator *iterator);
#else
static inline unsigned long
balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move, struct sched_domain *sd,
enum cpu_idle_type idle, int *all_pinned,
int *this_best_prio, struct rq_iterator *iterator)
{
return 0;
}
static inline int
iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle,
struct rq_iterator *iterator)
{
return 0;
}
#endif
#include "sched_stats.h"
#include "sched_idletask.c"
@ -2224,17 +2248,17 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
return 1;
}
static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *load_moved,
int *this_best_prio, struct rq_iterator *iterator)
static unsigned long
balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move, struct sched_domain *sd,
enum cpu_idle_type idle, int *all_pinned,
int *this_best_prio, struct rq_iterator *iterator)
{
int pulled = 0, pinned = 0, skip_for_load;
struct task_struct *p;
long rem_load_move = max_load_move;
if (max_nr_move == 0 || max_load_move == 0)
if (max_load_move == 0)
goto out;
pinned = 1;
@ -2267,7 +2291,7 @@ next:
* We only want to steal up to the prescribed number of tasks
* and the prescribed amount of weighted load.
*/
if (pulled < max_nr_move && rem_load_move > 0) {
if (rem_load_move > 0) {
if (p->prio < *this_best_prio)
*this_best_prio = p->prio;
p = iterator->next(iterator->arg);
@ -2275,7 +2299,7 @@ next:
}
out:
/*
* Right now, this is the only place pull_task() is called,
* Right now, this is one of only two places pull_task() is called,
* so we can safely collect pull_task() stats here rather than
* inside pull_task().
*/
@ -2283,8 +2307,8 @@ out:
if (all_pinned)
*all_pinned = pinned;
*load_moved = max_load_move - rem_load_move;
return pulled;
return max_load_move - rem_load_move;
}
/*
@ -2306,7 +2330,7 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
do {
total_load_moved +=
class->load_balance(this_rq, this_cpu, busiest,
ULONG_MAX, max_load_move - total_load_moved,
max_load_move - total_load_moved,
sd, idle, all_pinned, &this_best_prio);
class = class->next;
} while (class && max_load_move > total_load_moved);
@ -2314,6 +2338,32 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
return total_load_moved > 0;
}
static int
iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle,
struct rq_iterator *iterator)
{
struct task_struct *p = iterator->start(iterator->arg);
int pinned = 0;
while (p) {
if (can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
pull_task(busiest, p, this_rq, this_cpu);
/*
* Right now, this is only the second place pull_task()
* is called, so we can safely collect pull_task()
* stats here rather than inside pull_task().
*/
schedstat_inc(sd, lb_gained[idle]);
return 1;
}
p = iterator->next(iterator->arg);
}
return 0;
}
/*
* move_one_task tries to move exactly one task from busiest to this_rq, as
* part of active balancing operations within "domain".
@ -2325,12 +2375,9 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle)
{
const struct sched_class *class;
int this_best_prio = MAX_PRIO;
for (class = sched_class_highest; class; class = class->next)
if (class->load_balance(this_rq, this_cpu, busiest,
1, ULONG_MAX, sd, idle, NULL,
&this_best_prio))
if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle))
return 1;
return 0;
@ -3267,18 +3314,6 @@ static inline void idle_balance(int cpu, struct rq *rq)
{
}
/* Avoid "used but not defined" warning on UP */
static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *load_moved,
int *this_best_prio, struct rq_iterator *iterator)
{
*load_moved = 0;
return 0;
}
#endif
DEFINE_PER_CPU(struct kernel_stat, kstat);

View file

@ -936,12 +936,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
static unsigned long
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
{
struct cfs_rq *busy_cfs_rq;
unsigned long load_moved, total_nr_moved = 0, nr_moved;
long rem_load_move = max_load_move;
struct rq_iterator cfs_rq_iterator;
@ -969,25 +968,47 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
#else
# define maxload rem_load_move
#endif
/* pass busy_cfs_rq argument into
/*
* pass busy_cfs_rq argument into
* load_balance_[start|next]_fair iterators
*/
cfs_rq_iterator.arg = busy_cfs_rq;
nr_moved = balance_tasks(this_rq, this_cpu, busiest,
max_nr_move, maxload, sd, idle, all_pinned,
&load_moved, this_best_prio, &cfs_rq_iterator);
rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
maxload, sd, idle, all_pinned,
this_best_prio,
&cfs_rq_iterator);
total_nr_moved += nr_moved;
max_nr_move -= nr_moved;
rem_load_move -= load_moved;
if (max_nr_move <= 0 || rem_load_move <= 0)
if (rem_load_move <= 0)
break;
}
return max_load_move - rem_load_move;
}
static int
move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle)
{
struct cfs_rq *busy_cfs_rq;
struct rq_iterator cfs_rq_iterator;
cfs_rq_iterator.start = load_balance_start_fair;
cfs_rq_iterator.next = load_balance_next_fair;
for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
/*
* pass busy_cfs_rq argument into
* load_balance_[start|next]_fair iterators
*/
cfs_rq_iterator.arg = busy_cfs_rq;
if (iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
&cfs_rq_iterator))
return 1;
}
return 0;
}
/*
* scheduler tick hitting a task of our scheduling class:
*/
@ -1064,6 +1085,7 @@ static const struct sched_class fair_sched_class = {
.put_prev_task = put_prev_task_fair,
.load_balance = load_balance_fair,
.move_one_task = move_one_task_fair,
.set_curr_task = set_curr_task_fair,
.task_tick = task_tick_fair,

View file

@ -39,9 +39,16 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
static unsigned long
load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
{
return 0;
}
static int
move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle)
{
return 0;
}
@ -70,6 +77,7 @@ const struct sched_class idle_sched_class = {
.put_prev_task = put_prev_task_idle,
.load_balance = load_balance_idle,
.move_one_task = move_one_task_idle,
.set_curr_task = set_curr_task_idle,
.task_tick = task_tick_idle,

View file

@ -172,13 +172,11 @@ static struct task_struct *load_balance_next_rt(void *arg)
static unsigned long
load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
{
int nr_moved;
struct rq_iterator rt_rq_iterator;
unsigned long load_moved;
rt_rq_iterator.start = load_balance_start_rt;
rt_rq_iterator.next = load_balance_next_rt;
@ -187,11 +185,22 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
*/
rt_rq_iterator.arg = busiest;
nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
max_load_move, sd, idle, all_pinned, &load_moved,
this_best_prio, &rt_rq_iterator);
return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd,
idle, all_pinned, this_best_prio, &rt_rq_iterator);
}
return load_moved;
static int
move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle)
{
struct rq_iterator rt_rq_iterator;
rt_rq_iterator.start = load_balance_start_rt;
rt_rq_iterator.next = load_balance_next_rt;
rt_rq_iterator.arg = busiest;
return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
&rt_rq_iterator);
}
static void task_tick_rt(struct rq *rq, struct task_struct *p)
@ -237,6 +246,7 @@ const struct sched_class rt_sched_class = {
.put_prev_task = put_prev_task_rt,
.load_balance = load_balance_rt,
.move_one_task = move_one_task_rt,
.set_curr_task = set_curr_task_rt,
.task_tick = task_tick_rt,