[PATCH] Remove down_write() from taskstats code invoked on the exit() path

In send_cpu_listeners(), which is called on the exit path, a down_write()
was protecting operations like skb_clone() and genlmsg_unicast() that do
GFP_KERNEL allocations.  If the oom-killer decides to kill tasks to satisfy
the allocations,the exit of those tasks could block on the same semphore.

The down_write() was only needed to allow removal of invalid listeners from
the listener list.  The patch converts the down_write to a down_read and
defers the removal to a separate critical region.  This ensures that even
if the oom-killer is called, no other task's exit is blocked as it can
still acquire another down_read.

Thanks to Andrew Morton & Herbert Xu for pointing out the oom related
pitfalls, and to Chandra Seetharaman for suggesting this fix instead of
using something more complex like RCU.

Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Shailabh Nagar 2006-07-14 00:24:47 -07:00 committed by Linus Torvalds
parent f9fd8914c1
commit bb129994c3

View file

@ -51,6 +51,7 @@ __read_mostly = {
struct listener { struct listener {
struct list_head list; struct list_head list;
pid_t pid; pid_t pid;
char valid;
}; };
struct listener_list { struct listener_list {
@ -127,7 +128,7 @@ static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
struct listener *s, *tmp; struct listener *s, *tmp;
struct sk_buff *skb_next, *skb_cur = skb; struct sk_buff *skb_next, *skb_cur = skb;
void *reply = genlmsg_data(genlhdr); void *reply = genlmsg_data(genlhdr);
int rc, ret; int rc, ret, delcount = 0;
rc = genlmsg_end(skb, reply); rc = genlmsg_end(skb, reply);
if (rc < 0) { if (rc < 0) {
@ -137,7 +138,7 @@ static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
rc = 0; rc = 0;
listeners = &per_cpu(listener_array, cpu); listeners = &per_cpu(listener_array, cpu);
down_write(&listeners->sem); down_read(&listeners->sem);
list_for_each_entry_safe(s, tmp, &listeners->list, list) { list_for_each_entry_safe(s, tmp, &listeners->list, list) {
skb_next = NULL; skb_next = NULL;
if (!list_is_last(&s->list, &listeners->list)) { if (!list_is_last(&s->list, &listeners->list)) {
@ -150,14 +151,26 @@ static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
} }
ret = genlmsg_unicast(skb_cur, s->pid); ret = genlmsg_unicast(skb_cur, s->pid);
if (ret == -ECONNREFUSED) { if (ret == -ECONNREFUSED) {
list_del(&s->list); s->valid = 0;
kfree(s); delcount++;
rc = ret; rc = ret;
} }
skb_cur = skb_next; skb_cur = skb_next;
} }
up_write(&listeners->sem); up_read(&listeners->sem);
if (!delcount)
return rc;
/* Delete invalidated entries */
down_write(&listeners->sem);
list_for_each_entry_safe(s, tmp, &listeners->list, list) {
if (!s->valid) {
list_del(&s->list);
kfree(s);
}
}
up_write(&listeners->sem);
return rc; return rc;
} }
@ -290,6 +303,7 @@ static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
goto cleanup; goto cleanup;
s->pid = pid; s->pid = pid;
INIT_LIST_HEAD(&s->list); INIT_LIST_HEAD(&s->list);
s->valid = 1;
listeners = &per_cpu(listener_array, cpu); listeners = &per_cpu(listener_array, cpu);
down_write(&listeners->sem); down_write(&listeners->sem);