mirror of
https://github.com/adulau/aha.git
synced 2024-12-28 19:56:18 +00:00
softlockup: check all tasks in hung_task
Impact: extend the scope of hung-task checks Changed the default value of hung_task_check_count to PID_MAX_LIMIT. hung_task_batch_count added to put an upper bound on the critical section. Every hung_task_batch_count checks, the rcu lock is never held for a too long time. Keeping the critical section small minimizes time preemption is disabled and keeps rcu grace periods small. To prevent following a stale pointer, get_task_struct is called on g and t. To verify that g and t have not been unhashed while outside the critical section, the task states are checked. The design was proposed by Frédéric Weisbecker. Signed-off-by: Mandeep Singh Baines <msb@google.com> Suggested-by: Frédéric Weisbecker <fweisbec@gmail.com> Acked-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
5e54f5986a
commit
ce9dbe244b
1 changed files with 37 additions and 2 deletions
|
@ -17,9 +17,18 @@
|
||||||
#include <linux/sysctl.h>
|
#include <linux/sysctl.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Have a reasonable limit on the number of tasks checked:
|
* The number of tasks checked:
|
||||||
*/
|
*/
|
||||||
unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
|
unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Limit number of tasks checked in a batch.
|
||||||
|
*
|
||||||
|
* This value controls the preemptibility of khungtaskd since preemption
|
||||||
|
* is disabled during the critical section. It also controls the size of
|
||||||
|
* the RCU grace period. So it needs to be upper-bound.
|
||||||
|
*/
|
||||||
|
#define HUNG_TASK_BATCHING 1024
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Zero means infinite timeout - no checking done:
|
* Zero means infinite timeout - no checking done:
|
||||||
|
@ -109,6 +118,24 @@ static void check_hung_task(struct task_struct *t, unsigned long now,
|
||||||
panic("hung_task: blocked tasks");
|
panic("hung_task: blocked tasks");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To avoid extending the RCU grace period for an unbounded amount of time,
|
||||||
|
* periodically exit the critical section and enter a new one.
|
||||||
|
*
|
||||||
|
* For preemptible RCU it is sufficient to call rcu_read_unlock in order
|
||||||
|
* exit the grace period. For classic RCU, a reschedule is required.
|
||||||
|
*/
|
||||||
|
static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
|
||||||
|
{
|
||||||
|
get_task_struct(g);
|
||||||
|
get_task_struct(t);
|
||||||
|
rcu_read_unlock();
|
||||||
|
cond_resched();
|
||||||
|
rcu_read_lock();
|
||||||
|
put_task_struct(t);
|
||||||
|
put_task_struct(g);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
|
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
|
||||||
* a really long time (120 seconds). If that happens, print out
|
* a really long time (120 seconds). If that happens, print out
|
||||||
|
@ -117,6 +144,7 @@ static void check_hung_task(struct task_struct *t, unsigned long now,
|
||||||
static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
||||||
{
|
{
|
||||||
int max_count = sysctl_hung_task_check_count;
|
int max_count = sysctl_hung_task_check_count;
|
||||||
|
int batch_count = HUNG_TASK_BATCHING;
|
||||||
unsigned long now = get_timestamp();
|
unsigned long now = get_timestamp();
|
||||||
struct task_struct *g, *t;
|
struct task_struct *g, *t;
|
||||||
|
|
||||||
|
@ -131,6 +159,13 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
||||||
do_each_thread(g, t) {
|
do_each_thread(g, t) {
|
||||||
if (!--max_count)
|
if (!--max_count)
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
if (!--batch_count) {
|
||||||
|
batch_count = HUNG_TASK_BATCHING;
|
||||||
|
rcu_lock_break(g, t);
|
||||||
|
/* Exit if t or g was unhashed during refresh. */
|
||||||
|
if (t->state == TASK_DEAD || g->state == TASK_DEAD)
|
||||||
|
goto unlock;
|
||||||
|
}
|
||||||
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
|
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
|
||||||
if (t->state == TASK_UNINTERRUPTIBLE)
|
if (t->state == TASK_UNINTERRUPTIBLE)
|
||||||
check_hung_task(t, now, timeout);
|
check_hung_task(t, now, timeout);
|
||||||
|
|
Loading…
Reference in a new issue