mirror of
https://github.com/adulau/aha.git
synced 2024-12-28 03:36:19 +00:00
sched: Remove reciprocal for cpu_power
Its a source of fail, also, now that cpu_power is dynamical, its a waste of time. before: <idle>-0 [000] 132.877936: find_busiest_group: avg_load: 0 group_load: 8241 power: 1 after: bash-1689 [001] 137.862151: find_busiest_group: avg_load: 10636288 group_load: 10387 power: 1 [ v2: build fix from From: Andreas Herrmann ] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Tested-by: Andreas Herrmann <andreas.herrmann3@amd.com> Acked-by: Andreas Herrmann <andreas.herrmann3@amd.com> Acked-by: Gautham R Shenoy <ego@in.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> LKML-Reference: <20090901083826.425896304@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
d899a789c2
commit
18a3885fc1
2 changed files with 36 additions and 75 deletions
|
@ -860,15 +860,9 @@ struct sched_group {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CPU power of this group, SCHED_LOAD_SCALE being max power for a
|
* CPU power of this group, SCHED_LOAD_SCALE being max power for a
|
||||||
* single CPU. This is read only (except for setup, hotplug CPU).
|
* single CPU.
|
||||||
* Note : Never change cpu_power without recompute its reciprocal
|
|
||||||
*/
|
*/
|
||||||
unsigned int __cpu_power;
|
unsigned int cpu_power;
|
||||||
/*
|
|
||||||
* reciprocal value of cpu_power to avoid expensive divides
|
|
||||||
* (see include/linux/reciprocal_div.h)
|
|
||||||
*/
|
|
||||||
u32 reciprocal_cpu_power;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The CPUs this group covers.
|
* The CPUs this group covers.
|
||||||
|
|
101
kernel/sched.c
101
kernel/sched.c
|
@ -64,7 +64,6 @@
|
||||||
#include <linux/tsacct_kern.h>
|
#include <linux/tsacct_kern.h>
|
||||||
#include <linux/kprobes.h>
|
#include <linux/kprobes.h>
|
||||||
#include <linux/delayacct.h>
|
#include <linux/delayacct.h>
|
||||||
#include <linux/reciprocal_div.h>
|
|
||||||
#include <linux/unistd.h>
|
#include <linux/unistd.h>
|
||||||
#include <linux/pagemap.h>
|
#include <linux/pagemap.h>
|
||||||
#include <linux/hrtimer.h>
|
#include <linux/hrtimer.h>
|
||||||
|
@ -120,30 +119,8 @@
|
||||||
*/
|
*/
|
||||||
#define RUNTIME_INF ((u64)~0ULL)
|
#define RUNTIME_INF ((u64)~0ULL)
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
|
|
||||||
static void double_rq_lock(struct rq *rq1, struct rq *rq2);
|
static void double_rq_lock(struct rq *rq1, struct rq *rq2);
|
||||||
|
|
||||||
/*
|
|
||||||
* Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
|
|
||||||
* Since cpu_power is a 'constant', we can use a reciprocal divide.
|
|
||||||
*/
|
|
||||||
static inline u32 sg_div_cpu_power(const struct sched_group *sg, u32 load)
|
|
||||||
{
|
|
||||||
return reciprocal_divide(load, sg->reciprocal_cpu_power);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Each time a sched group cpu_power is changed,
|
|
||||||
* we must compute its reciprocal value
|
|
||||||
*/
|
|
||||||
static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val)
|
|
||||||
{
|
|
||||||
sg->__cpu_power += val;
|
|
||||||
sg->reciprocal_cpu_power = reciprocal_value(sg->__cpu_power);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline int rt_policy(int policy)
|
static inline int rt_policy(int policy)
|
||||||
{
|
{
|
||||||
if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR))
|
if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR))
|
||||||
|
@ -2335,8 +2312,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Adjust by relative CPU power of the group */
|
/* Adjust by relative CPU power of the group */
|
||||||
avg_load = sg_div_cpu_power(group,
|
avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
|
||||||
avg_load * SCHED_LOAD_SCALE);
|
|
||||||
|
|
||||||
if (local_group) {
|
if (local_group) {
|
||||||
this_load = avg_load;
|
this_load = avg_load;
|
||||||
|
@ -3768,7 +3744,6 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
|
||||||
unsigned long weight = cpumask_weight(sched_domain_span(sd));
|
unsigned long weight = cpumask_weight(sched_domain_span(sd));
|
||||||
unsigned long power = SCHED_LOAD_SCALE;
|
unsigned long power = SCHED_LOAD_SCALE;
|
||||||
struct sched_group *sdg = sd->groups;
|
struct sched_group *sdg = sd->groups;
|
||||||
unsigned long old = sdg->__cpu_power;
|
|
||||||
|
|
||||||
/* here we could scale based on cpufreq */
|
/* here we could scale based on cpufreq */
|
||||||
|
|
||||||
|
@ -3783,33 +3758,26 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
|
||||||
if (!power)
|
if (!power)
|
||||||
power = 1;
|
power = 1;
|
||||||
|
|
||||||
if (power != old) {
|
sdg->cpu_power = power;
|
||||||
sdg->__cpu_power = power;
|
|
||||||
sdg->reciprocal_cpu_power = reciprocal_value(power);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_group_power(struct sched_domain *sd, int cpu)
|
static void update_group_power(struct sched_domain *sd, int cpu)
|
||||||
{
|
{
|
||||||
struct sched_domain *child = sd->child;
|
struct sched_domain *child = sd->child;
|
||||||
struct sched_group *group, *sdg = sd->groups;
|
struct sched_group *group, *sdg = sd->groups;
|
||||||
unsigned long power = sdg->__cpu_power;
|
|
||||||
|
|
||||||
if (!child) {
|
if (!child) {
|
||||||
update_cpu_power(sd, cpu);
|
update_cpu_power(sd, cpu);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
sdg->__cpu_power = 0;
|
sdg->cpu_power = 0;
|
||||||
|
|
||||||
group = child->groups;
|
group = child->groups;
|
||||||
do {
|
do {
|
||||||
sdg->__cpu_power += group->__cpu_power;
|
sdg->cpu_power += group->cpu_power;
|
||||||
group = group->next;
|
group = group->next;
|
||||||
} while (group != child->groups);
|
} while (group != child->groups);
|
||||||
|
|
||||||
if (power != sdg->__cpu_power)
|
|
||||||
sdg->reciprocal_cpu_power = reciprocal_value(sdg->__cpu_power);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -3889,8 +3857,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Adjust by relative CPU power of the group */
|
/* Adjust by relative CPU power of the group */
|
||||||
sgs->avg_load = sg_div_cpu_power(group,
|
sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power;
|
||||||
sgs->group_load * SCHED_LOAD_SCALE);
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3902,14 +3869,14 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
|
||||||
* normalized nr_running number somewhere that negates
|
* normalized nr_running number somewhere that negates
|
||||||
* the hierarchy?
|
* the hierarchy?
|
||||||
*/
|
*/
|
||||||
avg_load_per_task = sg_div_cpu_power(group,
|
avg_load_per_task = (sum_avg_load_per_task * SCHED_LOAD_SCALE) /
|
||||||
sum_avg_load_per_task * SCHED_LOAD_SCALE);
|
group->cpu_power;
|
||||||
|
|
||||||
if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
|
if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
|
||||||
sgs->group_imb = 1;
|
sgs->group_imb = 1;
|
||||||
|
|
||||||
sgs->group_capacity =
|
sgs->group_capacity =
|
||||||
DIV_ROUND_CLOSEST(group->__cpu_power, SCHED_LOAD_SCALE);
|
DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -3951,7 +3918,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
sds->total_load += sgs.group_load;
|
sds->total_load += sgs.group_load;
|
||||||
sds->total_pwr += group->__cpu_power;
|
sds->total_pwr += group->cpu_power;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In case the child domain prefers tasks go to siblings
|
* In case the child domain prefers tasks go to siblings
|
||||||
|
@ -4016,28 +3983,28 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
|
||||||
* moving them.
|
* moving them.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
pwr_now += sds->busiest->__cpu_power *
|
pwr_now += sds->busiest->cpu_power *
|
||||||
min(sds->busiest_load_per_task, sds->max_load);
|
min(sds->busiest_load_per_task, sds->max_load);
|
||||||
pwr_now += sds->this->__cpu_power *
|
pwr_now += sds->this->cpu_power *
|
||||||
min(sds->this_load_per_task, sds->this_load);
|
min(sds->this_load_per_task, sds->this_load);
|
||||||
pwr_now /= SCHED_LOAD_SCALE;
|
pwr_now /= SCHED_LOAD_SCALE;
|
||||||
|
|
||||||
/* Amount of load we'd subtract */
|
/* Amount of load we'd subtract */
|
||||||
tmp = sg_div_cpu_power(sds->busiest,
|
tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) /
|
||||||
sds->busiest_load_per_task * SCHED_LOAD_SCALE);
|
sds->busiest->cpu_power;
|
||||||
if (sds->max_load > tmp)
|
if (sds->max_load > tmp)
|
||||||
pwr_move += sds->busiest->__cpu_power *
|
pwr_move += sds->busiest->cpu_power *
|
||||||
min(sds->busiest_load_per_task, sds->max_load - tmp);
|
min(sds->busiest_load_per_task, sds->max_load - tmp);
|
||||||
|
|
||||||
/* Amount of load we'd add */
|
/* Amount of load we'd add */
|
||||||
if (sds->max_load * sds->busiest->__cpu_power <
|
if (sds->max_load * sds->busiest->cpu_power <
|
||||||
sds->busiest_load_per_task * SCHED_LOAD_SCALE)
|
sds->busiest_load_per_task * SCHED_LOAD_SCALE)
|
||||||
tmp = sg_div_cpu_power(sds->this,
|
tmp = (sds->max_load * sds->busiest->cpu_power) /
|
||||||
sds->max_load * sds->busiest->__cpu_power);
|
sds->this->cpu_power;
|
||||||
else
|
else
|
||||||
tmp = sg_div_cpu_power(sds->this,
|
tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) /
|
||||||
sds->busiest_load_per_task * SCHED_LOAD_SCALE);
|
sds->this->cpu_power;
|
||||||
pwr_move += sds->this->__cpu_power *
|
pwr_move += sds->this->cpu_power *
|
||||||
min(sds->this_load_per_task, sds->this_load + tmp);
|
min(sds->this_load_per_task, sds->this_load + tmp);
|
||||||
pwr_move /= SCHED_LOAD_SCALE;
|
pwr_move /= SCHED_LOAD_SCALE;
|
||||||
|
|
||||||
|
@ -4072,8 +4039,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
|
||||||
sds->max_load - sds->busiest_load_per_task);
|
sds->max_load - sds->busiest_load_per_task);
|
||||||
|
|
||||||
/* How much load to actually move to equalise the imbalance */
|
/* How much load to actually move to equalise the imbalance */
|
||||||
*imbalance = min(max_pull * sds->busiest->__cpu_power,
|
*imbalance = min(max_pull * sds->busiest->cpu_power,
|
||||||
(sds->avg_load - sds->this_load) * sds->this->__cpu_power)
|
(sds->avg_load - sds->this_load) * sds->this->cpu_power)
|
||||||
/ SCHED_LOAD_SCALE;
|
/ SCHED_LOAD_SCALE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -4208,7 +4175,7 @@ static unsigned long power_of(int cpu)
|
||||||
if (!group)
|
if (!group)
|
||||||
return SCHED_LOAD_SCALE;
|
return SCHED_LOAD_SCALE;
|
||||||
|
|
||||||
return group->__cpu_power;
|
return group->cpu_power;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -7922,7 +7889,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!group->__cpu_power) {
|
if (!group->cpu_power) {
|
||||||
printk(KERN_CONT "\n");
|
printk(KERN_CONT "\n");
|
||||||
printk(KERN_ERR "ERROR: domain->cpu_power not "
|
printk(KERN_ERR "ERROR: domain->cpu_power not "
|
||||||
"set\n");
|
"set\n");
|
||||||
|
@ -7946,9 +7913,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
||||||
cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
|
cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
|
||||||
|
|
||||||
printk(KERN_CONT " %s", str);
|
printk(KERN_CONT " %s", str);
|
||||||
if (group->__cpu_power != SCHED_LOAD_SCALE) {
|
if (group->cpu_power != SCHED_LOAD_SCALE) {
|
||||||
printk(KERN_CONT " (__cpu_power = %d)",
|
printk(KERN_CONT " (cpu_power = %d)",
|
||||||
group->__cpu_power);
|
group->cpu_power);
|
||||||
}
|
}
|
||||||
|
|
||||||
group = group->next;
|
group = group->next;
|
||||||
|
@ -8233,7 +8200,7 @@ init_sched_build_groups(const struct cpumask *span,
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
cpumask_clear(sched_group_cpus(sg));
|
cpumask_clear(sched_group_cpus(sg));
|
||||||
sg->__cpu_power = 0;
|
sg->cpu_power = 0;
|
||||||
|
|
||||||
for_each_cpu(j, span) {
|
for_each_cpu(j, span) {
|
||||||
if (group_fn(j, cpu_map, NULL, tmpmask) != group)
|
if (group_fn(j, cpu_map, NULL, tmpmask) != group)
|
||||||
|
@ -8491,7 +8458,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
sg_inc_cpu_power(sg, sd->groups->__cpu_power);
|
sg->cpu_power += sd->groups->cpu_power;
|
||||||
}
|
}
|
||||||
sg = sg->next;
|
sg = sg->next;
|
||||||
} while (sg != group_head);
|
} while (sg != group_head);
|
||||||
|
@ -8528,7 +8495,7 @@ static int build_numa_sched_groups(struct s_data *d,
|
||||||
sd->groups = sg;
|
sd->groups = sg;
|
||||||
}
|
}
|
||||||
|
|
||||||
sg->__cpu_power = 0;
|
sg->cpu_power = 0;
|
||||||
cpumask_copy(sched_group_cpus(sg), d->nodemask);
|
cpumask_copy(sched_group_cpus(sg), d->nodemask);
|
||||||
sg->next = sg;
|
sg->next = sg;
|
||||||
cpumask_or(d->covered, d->covered, d->nodemask);
|
cpumask_or(d->covered, d->covered, d->nodemask);
|
||||||
|
@ -8551,7 +8518,7 @@ static int build_numa_sched_groups(struct s_data *d,
|
||||||
"Can not alloc domain group for node %d\n", j);
|
"Can not alloc domain group for node %d\n", j);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
sg->__cpu_power = 0;
|
sg->cpu_power = 0;
|
||||||
cpumask_copy(sched_group_cpus(sg), d->tmpmask);
|
cpumask_copy(sched_group_cpus(sg), d->tmpmask);
|
||||||
sg->next = prev->next;
|
sg->next = prev->next;
|
||||||
cpumask_or(d->covered, d->covered, d->tmpmask);
|
cpumask_or(d->covered, d->covered, d->tmpmask);
|
||||||
|
@ -8629,7 +8596,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
|
||||||
|
|
||||||
child = sd->child;
|
child = sd->child;
|
||||||
|
|
||||||
sd->groups->__cpu_power = 0;
|
sd->groups->cpu_power = 0;
|
||||||
|
|
||||||
if (!child) {
|
if (!child) {
|
||||||
power = SCHED_LOAD_SCALE;
|
power = SCHED_LOAD_SCALE;
|
||||||
|
@ -8645,7 +8612,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
|
||||||
power /= weight;
|
power /= weight;
|
||||||
power >>= SCHED_LOAD_SHIFT;
|
power >>= SCHED_LOAD_SHIFT;
|
||||||
}
|
}
|
||||||
sg_inc_cpu_power(sd->groups, power);
|
sd->groups->cpu_power += power;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8654,7 +8621,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
|
||||||
*/
|
*/
|
||||||
group = child->groups;
|
group = child->groups;
|
||||||
do {
|
do {
|
||||||
sg_inc_cpu_power(sd->groups, group->__cpu_power);
|
sd->groups->cpu_power += group->cpu_power;
|
||||||
group = group->next;
|
group = group->next;
|
||||||
} while (group != child->groups);
|
} while (group != child->groups);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue