net: unix: fix inflight counting bug in garbage collector

Previously I assumed that the receive queues of candidates don't
change during the GC.  This is only half true, nothing can be received
from the queues (see comment in unix_gc()), but buffers could be added
through the other half of the socket pair, which may still have file
descriptors referring to it.

This can result in inc_inflight_move_tail() erronously increasing the
"inflight" counter for a unix socket for which dec_inflight() wasn't
previously called.  This in turn can trigger the "BUG_ON(total_refs <
inflight_refs)" in a later garbage collection run.

Fix this by only manipulating the "inflight" counter for sockets which
are candidates themselves.  Duplicating the file references in
unix_attach_fds() is also needed to prevent a socket becoming a
candidate for GC while the skb that contains it is not yet queued.

Reported-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Miklos Szeredi 2008-11-09 15:23:57 +01:00 committed by Linus Torvalds
parent 058e3739f6
commit 6209344f5a
3 changed files with 62 additions and 19 deletions

View file

@ -54,6 +54,7 @@ struct unix_sock {
atomic_long_t inflight; atomic_long_t inflight;
spinlock_t lock; spinlock_t lock;
unsigned int gc_candidate : 1; unsigned int gc_candidate : 1;
unsigned int gc_maybe_cycle : 1;
wait_queue_head_t peer_wait; wait_queue_head_t peer_wait;
}; };
#define unix_sk(__sk) ((struct unix_sock *)__sk) #define unix_sk(__sk) ((struct unix_sock *)__sk)

View file

@ -1302,14 +1302,23 @@ static void unix_destruct_fds(struct sk_buff *skb)
sock_wfree(skb); sock_wfree(skb);
} }
static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{ {
int i; int i;
/*
* Need to duplicate file references for the sake of garbage
* collection. Otherwise a socket in the fps might become a
* candidate for GC while the skb is not yet queued.
*/
UNIXCB(skb).fp = scm_fp_dup(scm->fp);
if (!UNIXCB(skb).fp)
return -ENOMEM;
for (i=scm->fp->count-1; i>=0; i--) for (i=scm->fp->count-1; i>=0; i--)
unix_inflight(scm->fp->fp[i]); unix_inflight(scm->fp->fp[i]);
UNIXCB(skb).fp = scm->fp;
skb->destructor = unix_destruct_fds; skb->destructor = unix_destruct_fds;
scm->fp = NULL; return 0;
} }
/* /*
@ -1368,8 +1377,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
goto out; goto out;
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
if (siocb->scm->fp) if (siocb->scm->fp) {
unix_attach_fds(siocb->scm, skb); err = unix_attach_fds(siocb->scm, skb);
if (err)
goto out_free;
}
unix_get_secdata(siocb->scm, skb); unix_get_secdata(siocb->scm, skb);
skb_reset_transport_header(skb); skb_reset_transport_header(skb);
@ -1538,8 +1550,13 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
size = min_t(int, size, skb_tailroom(skb)); size = min_t(int, size, skb_tailroom(skb));
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
if (siocb->scm->fp) if (siocb->scm->fp) {
unix_attach_fds(siocb->scm, skb); err = unix_attach_fds(siocb->scm, skb);
if (err) {
kfree_skb(skb);
goto out_err;
}
}
if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
kfree_skb(skb); kfree_skb(skb);

View file

@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
*/ */
struct sock *sk = unix_get_socket(*fp++); struct sock *sk = unix_get_socket(*fp++);
if (sk) { if (sk) {
struct unix_sock *u = unix_sk(sk);
/*
* Ignore non-candidates, they could
* have been added to the queues after
* starting the garbage collection
*/
if (u->gc_candidate) {
hit = true; hit = true;
func(unix_sk(sk)); func(u);
}
} }
} }
if (hit && hitlist != NULL) { if (hit && hitlist != NULL) {
@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struct unix_sock *u)
{ {
atomic_long_inc(&u->inflight); atomic_long_inc(&u->inflight);
/* /*
* If this is still a candidate, move it to the end of the * If this still might be part of a cycle, move it to the end
* list, so that it's checked even if it was already passed * of the list, so that it's checked even if it was already
* over * passed over
*/ */
if (u->gc_candidate) if (u->gc_maybe_cycle)
list_move_tail(&u->link, &gc_candidates); list_move_tail(&u->link, &gc_candidates);
} }
@ -267,6 +276,7 @@ void unix_gc(void)
struct unix_sock *next; struct unix_sock *next;
struct sk_buff_head hitlist; struct sk_buff_head hitlist;
struct list_head cursor; struct list_head cursor;
LIST_HEAD(not_cycle_list);
spin_lock(&unix_gc_lock); spin_lock(&unix_gc_lock);
@ -282,10 +292,14 @@ void unix_gc(void)
* *
* Holding unix_gc_lock will protect these candidates from * Holding unix_gc_lock will protect these candidates from
* being detached, and hence from gaining an external * being detached, and hence from gaining an external
* reference. This also means, that since there are no * reference. Since there are no possible receivers, all
* possible receivers, the receive queues of these sockets are * buffers currently on the candidates' queues stay there
* static during the GC, even though the dequeue is done * during the garbage collection.
* before the detach without atomicity guarantees. *
* We also know that no new candidate can be added onto the
* receive queues. Other, non candidate sockets _can_ be
* added to queue, so we must make sure only to touch
* candidates.
*/ */
list_for_each_entry_safe(u, next, &gc_inflight_list, link) { list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
long total_refs; long total_refs;
@ -299,6 +313,7 @@ void unix_gc(void)
if (total_refs == inflight_refs) { if (total_refs == inflight_refs) {
list_move_tail(&u->link, &gc_candidates); list_move_tail(&u->link, &gc_candidates);
u->gc_candidate = 1; u->gc_candidate = 1;
u->gc_maybe_cycle = 1;
} }
} }
@ -325,13 +340,23 @@ void unix_gc(void)
list_move(&cursor, &u->link); list_move(&cursor, &u->link);
if (atomic_long_read(&u->inflight) > 0) { if (atomic_long_read(&u->inflight) > 0) {
list_move_tail(&u->link, &gc_inflight_list); list_move_tail(&u->link, &not_cycle_list);
u->gc_candidate = 0; u->gc_maybe_cycle = 0;
scan_children(&u->sk, inc_inflight_move_tail, NULL); scan_children(&u->sk, inc_inflight_move_tail, NULL);
} }
} }
list_del(&cursor); list_del(&cursor);
/*
* not_cycle_list contains those sockets which do not make up a
* cycle. Restore these to the inflight list.
*/
while (!list_empty(&not_cycle_list)) {
u = list_entry(not_cycle_list.next, struct unix_sock, link);
u->gc_candidate = 0;
list_move_tail(&u->link, &gc_inflight_list);
}
/* /*
* Now gc_candidates contains only garbage. Restore original * Now gc_candidates contains only garbage. Restore original
* inflight counters for these as well, and remove the skbuffs * inflight counters for these as well, and remove the skbuffs