mirror of
https://github.com/adulau/aha.git
synced 2024-12-28 19:56:18 +00:00
sched, timers: move calc_load() to scheduler
Dimitri Sivanich noticed that xtime_lock is held write locked across calc_load() which iterates over all online CPUs. That can cause long latencies for xtime_lock readers on large SMP systems. The load average calculation is an rough estimate anyway so there is no real need to protect the readers vs. the update. It's not a problem when the avenrun array is updated while a reader copies the values. Instead of iterating over all online CPUs let the scheduler_tick code update the number of active tasks shortly before the avenrun update happens. The avenrun update itself is handled by the CPU which calls do_timer(). [ Impact: reduce xtime_lock write locked section ] Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra <peterz@infradead.org>
This commit is contained in:
parent
2ff799d3cf
commit
dce48a84ad
5 changed files with 81 additions and 66 deletions
|
@ -135,8 +135,8 @@ DECLARE_PER_CPU(unsigned long, process_counts);
|
||||||
extern int nr_processes(void);
|
extern int nr_processes(void);
|
||||||
extern unsigned long nr_running(void);
|
extern unsigned long nr_running(void);
|
||||||
extern unsigned long nr_uninterruptible(void);
|
extern unsigned long nr_uninterruptible(void);
|
||||||
extern unsigned long nr_active(void);
|
|
||||||
extern unsigned long nr_iowait(void);
|
extern unsigned long nr_iowait(void);
|
||||||
|
extern void calc_global_load(void);
|
||||||
|
|
||||||
extern unsigned long get_parent_ip(unsigned long addr);
|
extern unsigned long get_parent_ip(unsigned long addr);
|
||||||
|
|
||||||
|
|
|
@ -630,6 +630,10 @@ struct rq {
|
||||||
struct list_head migration_queue;
|
struct list_head migration_queue;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* calc_load related fields */
|
||||||
|
unsigned long calc_load_update;
|
||||||
|
long calc_load_active;
|
||||||
|
|
||||||
#ifdef CONFIG_SCHED_HRTICK
|
#ifdef CONFIG_SCHED_HRTICK
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
int hrtick_csd_pending;
|
int hrtick_csd_pending;
|
||||||
|
@ -1728,6 +1732,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static void calc_load_account_active(struct rq *this_rq);
|
||||||
|
|
||||||
#include "sched_stats.h"
|
#include "sched_stats.h"
|
||||||
#include "sched_idletask.c"
|
#include "sched_idletask.c"
|
||||||
#include "sched_fair.c"
|
#include "sched_fair.c"
|
||||||
|
@ -2856,19 +2862,57 @@ unsigned long nr_iowait(void)
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long nr_active(void)
|
/* Variables and functions for calc_load */
|
||||||
|
static atomic_long_t calc_load_tasks;
|
||||||
|
static unsigned long calc_load_update;
|
||||||
|
unsigned long avenrun[3];
|
||||||
|
EXPORT_SYMBOL(avenrun);
|
||||||
|
|
||||||
|
static unsigned long
|
||||||
|
calc_load(unsigned long load, unsigned long exp, unsigned long active)
|
||||||
{
|
{
|
||||||
unsigned long i, running = 0, uninterruptible = 0;
|
load *= exp;
|
||||||
|
load += active * (FIXED_1 - exp);
|
||||||
|
return load >> FSHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
for_each_online_cpu(i) {
|
/*
|
||||||
running += cpu_rq(i)->nr_running;
|
* calc_load - update the avenrun load estimates 10 ticks after the
|
||||||
uninterruptible += cpu_rq(i)->nr_uninterruptible;
|
* CPUs have updated calc_load_tasks.
|
||||||
|
*/
|
||||||
|
void calc_global_load(void)
|
||||||
|
{
|
||||||
|
unsigned long upd = calc_load_update + 10;
|
||||||
|
long active;
|
||||||
|
|
||||||
|
if (time_before(jiffies, upd))
|
||||||
|
return;
|
||||||
|
|
||||||
|
active = atomic_long_read(&calc_load_tasks);
|
||||||
|
active = active > 0 ? active * FIXED_1 : 0;
|
||||||
|
|
||||||
|
avenrun[0] = calc_load(avenrun[0], EXP_1, active);
|
||||||
|
avenrun[1] = calc_load(avenrun[1], EXP_5, active);
|
||||||
|
avenrun[2] = calc_load(avenrun[2], EXP_15, active);
|
||||||
|
|
||||||
|
calc_load_update += LOAD_FREQ;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Either called from update_cpu_load() or from a cpu going idle
|
||||||
|
*/
|
||||||
|
static void calc_load_account_active(struct rq *this_rq)
|
||||||
|
{
|
||||||
|
long nr_active, delta;
|
||||||
|
|
||||||
|
nr_active = this_rq->nr_running;
|
||||||
|
nr_active += (long) this_rq->nr_uninterruptible;
|
||||||
|
|
||||||
|
if (nr_active != this_rq->calc_load_active) {
|
||||||
|
delta = nr_active - this_rq->calc_load_active;
|
||||||
|
this_rq->calc_load_active = nr_active;
|
||||||
|
atomic_long_add(delta, &calc_load_tasks);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely((long)uninterruptible < 0))
|
|
||||||
uninterruptible = 0;
|
|
||||||
|
|
||||||
return running + uninterruptible;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2899,6 +2943,11 @@ static void update_cpu_load(struct rq *this_rq)
|
||||||
new_load += scale-1;
|
new_load += scale-1;
|
||||||
this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
|
this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (time_after_eq(jiffies, this_rq->calc_load_update)) {
|
||||||
|
this_rq->calc_load_update += LOAD_FREQ;
|
||||||
|
calc_load_account_active(this_rq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
@ -7091,6 +7140,14 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* remove the tasks which were accounted by rq from calc_load_tasks.
|
||||||
|
*/
|
||||||
|
static void calc_global_load_remove(struct rq *rq)
|
||||||
|
{
|
||||||
|
atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
|
||||||
|
}
|
||||||
#endif /* CONFIG_HOTPLUG_CPU */
|
#endif /* CONFIG_HOTPLUG_CPU */
|
||||||
|
|
||||||
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
|
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
|
||||||
|
@ -7325,6 +7382,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||||
/* Update our root-domain */
|
/* Update our root-domain */
|
||||||
rq = cpu_rq(cpu);
|
rq = cpu_rq(cpu);
|
||||||
spin_lock_irqsave(&rq->lock, flags);
|
spin_lock_irqsave(&rq->lock, flags);
|
||||||
|
rq->calc_load_update = calc_load_update;
|
||||||
|
rq->calc_load_active = 0;
|
||||||
if (rq->rd) {
|
if (rq->rd) {
|
||||||
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
|
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
|
||||||
|
|
||||||
|
@ -7364,7 +7423,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||||
cpuset_unlock();
|
cpuset_unlock();
|
||||||
migrate_nr_uninterruptible(rq);
|
migrate_nr_uninterruptible(rq);
|
||||||
BUG_ON(rq->nr_running != 0);
|
BUG_ON(rq->nr_running != 0);
|
||||||
|
calc_global_load_remove(rq);
|
||||||
/*
|
/*
|
||||||
* No need to migrate the tasks: it was best-effort if
|
* No need to migrate the tasks: it was best-effort if
|
||||||
* they didn't take sched_hotcpu_mutex. Just wake up
|
* they didn't take sched_hotcpu_mutex. Just wake up
|
||||||
|
@ -9059,6 +9118,8 @@ void __init sched_init(void)
|
||||||
rq = cpu_rq(i);
|
rq = cpu_rq(i);
|
||||||
spin_lock_init(&rq->lock);
|
spin_lock_init(&rq->lock);
|
||||||
rq->nr_running = 0;
|
rq->nr_running = 0;
|
||||||
|
rq->calc_load_active = 0;
|
||||||
|
rq->calc_load_update = jiffies + LOAD_FREQ;
|
||||||
init_cfs_rq(&rq->cfs, rq);
|
init_cfs_rq(&rq->cfs, rq);
|
||||||
init_rt_rq(&rq->rt, rq);
|
init_rt_rq(&rq->rt, rq);
|
||||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||||
|
@ -9166,6 +9227,9 @@ void __init sched_init(void)
|
||||||
* when this runqueue becomes "idle".
|
* when this runqueue becomes "idle".
|
||||||
*/
|
*/
|
||||||
init_idle(current, smp_processor_id());
|
init_idle(current, smp_processor_id());
|
||||||
|
|
||||||
|
calc_load_update = jiffies + LOAD_FREQ;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* During early bootup we pretend to be a normal task:
|
* During early bootup we pretend to be a normal task:
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -22,7 +22,8 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sy
|
||||||
static struct task_struct *pick_next_task_idle(struct rq *rq)
|
static struct task_struct *pick_next_task_idle(struct rq *rq)
|
||||||
{
|
{
|
||||||
schedstat_inc(rq, sched_goidle);
|
schedstat_inc(rq, sched_goidle);
|
||||||
|
/* adjust the active tasks as we might go into a long sleep */
|
||||||
|
calc_load_account_active(rq);
|
||||||
return rq->idle;
|
return rq->idle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This read-write spinlock protects us from races in SMP while
|
* This read-write spinlock protects us from races in SMP while
|
||||||
* playing with xtime and avenrun.
|
* playing with xtime.
|
||||||
*/
|
*/
|
||||||
__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
|
__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
|
||||||
|
|
||||||
|
|
|
@ -1122,47 +1122,6 @@ void update_process_times(int user_tick)
|
||||||
run_posix_cpu_timers(p);
|
run_posix_cpu_timers(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Nr of active tasks - counted in fixed-point numbers
|
|
||||||
*/
|
|
||||||
static unsigned long count_active_tasks(void)
|
|
||||||
{
|
|
||||||
return nr_active() * FIXED_1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Hmm.. Changed this, as the GNU make sources (load.c) seems to
|
|
||||||
* imply that avenrun[] is the standard name for this kind of thing.
|
|
||||||
* Nothing else seems to be standardized: the fractional size etc
|
|
||||||
* all seem to differ on different machines.
|
|
||||||
*
|
|
||||||
* Requires xtime_lock to access.
|
|
||||||
*/
|
|
||||||
unsigned long avenrun[3];
|
|
||||||
|
|
||||||
EXPORT_SYMBOL(avenrun);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* calc_load - given tick count, update the avenrun load estimates.
|
|
||||||
* This is called while holding a write_lock on xtime_lock.
|
|
||||||
*/
|
|
||||||
static inline void calc_load(unsigned long ticks)
|
|
||||||
{
|
|
||||||
unsigned long active_tasks; /* fixed-point */
|
|
||||||
static int count = LOAD_FREQ;
|
|
||||||
|
|
||||||
count -= ticks;
|
|
||||||
if (unlikely(count < 0)) {
|
|
||||||
active_tasks = count_active_tasks();
|
|
||||||
do {
|
|
||||||
CALC_LOAD(avenrun[0], EXP_1, active_tasks);
|
|
||||||
CALC_LOAD(avenrun[1], EXP_5, active_tasks);
|
|
||||||
CALC_LOAD(avenrun[2], EXP_15, active_tasks);
|
|
||||||
count += LOAD_FREQ;
|
|
||||||
} while (count < 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function runs timers and the timer-tq in bottom half context.
|
* This function runs timers and the timer-tq in bottom half context.
|
||||||
*/
|
*/
|
||||||
|
@ -1186,16 +1145,6 @@ void run_local_timers(void)
|
||||||
softlockup_tick();
|
softlockup_tick();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Called by the timer interrupt. xtime_lock must already be taken
|
|
||||||
* by the timer IRQ!
|
|
||||||
*/
|
|
||||||
static inline void update_times(unsigned long ticks)
|
|
||||||
{
|
|
||||||
update_wall_time();
|
|
||||||
calc_load(ticks);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The 64-bit jiffies value is not atomic - you MUST NOT read it
|
* The 64-bit jiffies value is not atomic - you MUST NOT read it
|
||||||
* without sampling the sequence number in xtime_lock.
|
* without sampling the sequence number in xtime_lock.
|
||||||
|
@ -1205,7 +1154,8 @@ static inline void update_times(unsigned long ticks)
|
||||||
void do_timer(unsigned long ticks)
|
void do_timer(unsigned long ticks)
|
||||||
{
|
{
|
||||||
jiffies_64 += ticks;
|
jiffies_64 += ticks;
|
||||||
update_times(ticks);
|
update_wall_time();
|
||||||
|
calc_global_load();
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __ARCH_WANT_SYS_ALARM
|
#ifdef __ARCH_WANT_SYS_ALARM
|
||||||
|
|
Loading…
Reference in a new issue