From 8f1bc385cfbab474db6c27b5af1e439614f3025c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 19 Apr 2008 19:45:00 +0200 Subject: [PATCH] sched: fair: weight calculations In order to level the hierarchy, we need to calculate load based on the root view. That is, each task's load is in the same unit. A / \ B 1 / \ 2 3 To compute 1's load we do: weight(1) -------------- rq_weight(A) To compute 2's load we do: weight(2) weight(B) ------------ * ----------- rq_weight(B) rw_weight(A) This yields load fractions in comparable units. The consequence is that it changes virtual time. We used to have: time_{i} vtime_{i} = ------------ weight_{i} vtime = \Sum vtime_{i} = time / rq_weight. But with the new way of load calculation we get that vtime equals time. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 9 ++--- kernel/sched_fair.c | 95 +++++++++++++++++++++++++++++---------------- 2 files changed, 65 insertions(+), 39 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 3202462109f..6d55dfc56ca 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1320,6 +1320,9 @@ static void __resched_task(struct task_struct *p, int tif_bit) */ #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) +/* + * delta *= weight / lw + */ static unsigned long calc_delta_mine(unsigned long delta_exec, unsigned long weight, struct load_weight *lw) @@ -1342,12 +1345,6 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); } -static inline unsigned long -calc_delta_fair(unsigned long delta_exec, struct load_weight *lw) -{ - return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); -} - static inline void update_load_add(struct load_weight *lw, unsigned long inc) { lw->weight += inc; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ed8ce329899..d72e8b41b3e 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -333,6 +333,34 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, } #endif +/* + * delta *= w / rw + */ +static inline unsigned long +calc_delta_weight(unsigned long delta, struct sched_entity *se) +{ + for_each_sched_entity(se) { + delta = calc_delta_mine(delta, + se->load.weight, &cfs_rq_of(se)->load); + } + + return delta; +} + +/* + * delta *= rw / w + */ +static inline unsigned long +calc_delta_fair(unsigned long delta, struct sched_entity *se) +{ + for_each_sched_entity(se) { + delta = calc_delta_mine(delta, + cfs_rq_of(se)->load.weight, &se->load); + } + + return delta; +} + /* * The idea is to set a period in which each task runs once. * @@ -362,47 +390,54 @@ static u64 __sched_period(unsigned long nr_running) */ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) { - u64 slice = __sched_period(cfs_rq->nr_running); - - for_each_sched_entity(se) { - cfs_rq = cfs_rq_of(se); - - slice *= se->load.weight; - do_div(slice, cfs_rq->load.weight); - } - - - return slice; + return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); } /* * We calculate the vruntime slice of a to be inserted task * - * vs = s/w = p/rw + * vs = s*rw/w = p */ static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) { unsigned long nr_running = cfs_rq->nr_running; - unsigned long weight; - u64 vslice; if (!se->on_rq) nr_running++; - vslice = __sched_period(nr_running); + return __sched_period(nr_running); +} + +/* + * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in + * that it favours >=0 over <0. + * + * -20 | + * | + * 0 --------+------- + * .' + * 19 .' + * + */ +static unsigned long +calc_delta_asym(unsigned long delta, struct sched_entity *se) +{ + struct load_weight lw = { + .weight = NICE_0_LOAD, + .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT) + }; for_each_sched_entity(se) { - cfs_rq = cfs_rq_of(se); + struct load_weight *se_lw = &se->load; - weight = cfs_rq->load.weight; - if (!se->on_rq) - weight += se->load.weight; + if (se->load.weight < NICE_0_LOAD) + se_lw = &lw; - vslice *= NICE_0_LOAD; - do_div(vslice, weight); + delta = calc_delta_mine(delta, + cfs_rq_of(se)->load.weight, se_lw); } - return vslice; + return delta; } /* @@ -419,11 +454,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, curr->sum_exec_runtime += delta_exec; schedstat_add(cfs_rq, exec_clock, delta_exec); - delta_exec_weighted = delta_exec; - if (unlikely(curr->load.weight != NICE_0_LOAD)) { - delta_exec_weighted = calc_delta_fair(delta_exec_weighted, - &curr->load); - } + delta_exec_weighted = calc_delta_fair(delta_exec, curr); curr->vruntime += delta_exec_weighted; } @@ -632,8 +663,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) /* sleeps upto a single latency don't count. */ if (sched_feat(NEW_FAIR_SLEEPERS)) { if (sched_feat(NORMALIZED_SLEEPER)) - vruntime -= calc_delta_fair(sysctl_sched_latency, - &cfs_rq->load); + vruntime -= calc_delta_weight(sysctl_sched_latency, se); else vruntime -= sysctl_sched_latency; } @@ -1132,11 +1162,10 @@ static unsigned long wakeup_gran(struct sched_entity *se) unsigned long gran = sysctl_sched_wakeup_granularity; /* - * More easily preempt - nice tasks, while not making - * it harder for + nice tasks. + * More easily preempt - nice tasks, while not making it harder for + * + nice tasks. */ - if (unlikely(se->load.weight > NICE_0_LOAD)) - gran = calc_delta_fair(gran, &se->load); + gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se); return gran; }