mirror of
https://github.com/adulau/aha.git
synced 2025-01-01 05:36:24 +00:00
net: unix: fix inflight counting bug in garbage collector
Previously I assumed that the receive queues of candidates don't change during the GC. This is only half true, nothing can be received from the queues (see comment in unix_gc()), but buffers could be added through the other half of the socket pair, which may still have file descriptors referring to it. This can result in inc_inflight_move_tail() erronously increasing the "inflight" counter for a unix socket for which dec_inflight() wasn't previously called. This in turn can trigger the "BUG_ON(total_refs < inflight_refs)" in a later garbage collection run. Fix this by only manipulating the "inflight" counter for sockets which are candidates themselves. Duplicating the file references in unix_attach_fds() is also needed to prevent a socket becoming a candidate for GC while the skb that contains it is not yet queued. Reported-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz> CC: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
058e3739f6
commit
6209344f5a
3 changed files with 62 additions and 19 deletions
|
@ -54,6 +54,7 @@ struct unix_sock {
|
||||||
atomic_long_t inflight;
|
atomic_long_t inflight;
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
unsigned int gc_candidate : 1;
|
unsigned int gc_candidate : 1;
|
||||||
|
unsigned int gc_maybe_cycle : 1;
|
||||||
wait_queue_head_t peer_wait;
|
wait_queue_head_t peer_wait;
|
||||||
};
|
};
|
||||||
#define unix_sk(__sk) ((struct unix_sock *)__sk)
|
#define unix_sk(__sk) ((struct unix_sock *)__sk)
|
||||||
|
|
|
@ -1302,14 +1302,23 @@ static void unix_destruct_fds(struct sk_buff *skb)
|
||||||
sock_wfree(skb);
|
sock_wfree(skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
|
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Need to duplicate file references for the sake of garbage
|
||||||
|
* collection. Otherwise a socket in the fps might become a
|
||||||
|
* candidate for GC while the skb is not yet queued.
|
||||||
|
*/
|
||||||
|
UNIXCB(skb).fp = scm_fp_dup(scm->fp);
|
||||||
|
if (!UNIXCB(skb).fp)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
for (i=scm->fp->count-1; i>=0; i--)
|
for (i=scm->fp->count-1; i>=0; i--)
|
||||||
unix_inflight(scm->fp->fp[i]);
|
unix_inflight(scm->fp->fp[i]);
|
||||||
UNIXCB(skb).fp = scm->fp;
|
|
||||||
skb->destructor = unix_destruct_fds;
|
skb->destructor = unix_destruct_fds;
|
||||||
scm->fp = NULL;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1368,8 +1377,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
|
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
|
||||||
if (siocb->scm->fp)
|
if (siocb->scm->fp) {
|
||||||
unix_attach_fds(siocb->scm, skb);
|
err = unix_attach_fds(siocb->scm, skb);
|
||||||
|
if (err)
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
unix_get_secdata(siocb->scm, skb);
|
unix_get_secdata(siocb->scm, skb);
|
||||||
|
|
||||||
skb_reset_transport_header(skb);
|
skb_reset_transport_header(skb);
|
||||||
|
@ -1538,8 +1550,13 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
|
||||||
size = min_t(int, size, skb_tailroom(skb));
|
size = min_t(int, size, skb_tailroom(skb));
|
||||||
|
|
||||||
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
|
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
|
||||||
if (siocb->scm->fp)
|
if (siocb->scm->fp) {
|
||||||
unix_attach_fds(siocb->scm, skb);
|
err = unix_attach_fds(siocb->scm, skb);
|
||||||
|
if (err) {
|
||||||
|
kfree_skb(skb);
|
||||||
|
goto out_err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
|
if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
|
||||||
kfree_skb(skb);
|
kfree_skb(skb);
|
||||||
|
|
|
@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
|
||||||
*/
|
*/
|
||||||
struct sock *sk = unix_get_socket(*fp++);
|
struct sock *sk = unix_get_socket(*fp++);
|
||||||
if (sk) {
|
if (sk) {
|
||||||
hit = true;
|
struct unix_sock *u = unix_sk(sk);
|
||||||
func(unix_sk(sk));
|
|
||||||
|
/*
|
||||||
|
* Ignore non-candidates, they could
|
||||||
|
* have been added to the queues after
|
||||||
|
* starting the garbage collection
|
||||||
|
*/
|
||||||
|
if (u->gc_candidate) {
|
||||||
|
hit = true;
|
||||||
|
func(u);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (hit && hitlist != NULL) {
|
if (hit && hitlist != NULL) {
|
||||||
|
@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struct unix_sock *u)
|
||||||
{
|
{
|
||||||
atomic_long_inc(&u->inflight);
|
atomic_long_inc(&u->inflight);
|
||||||
/*
|
/*
|
||||||
* If this is still a candidate, move it to the end of the
|
* If this still might be part of a cycle, move it to the end
|
||||||
* list, so that it's checked even if it was already passed
|
* of the list, so that it's checked even if it was already
|
||||||
* over
|
* passed over
|
||||||
*/
|
*/
|
||||||
if (u->gc_candidate)
|
if (u->gc_maybe_cycle)
|
||||||
list_move_tail(&u->link, &gc_candidates);
|
list_move_tail(&u->link, &gc_candidates);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -267,6 +276,7 @@ void unix_gc(void)
|
||||||
struct unix_sock *next;
|
struct unix_sock *next;
|
||||||
struct sk_buff_head hitlist;
|
struct sk_buff_head hitlist;
|
||||||
struct list_head cursor;
|
struct list_head cursor;
|
||||||
|
LIST_HEAD(not_cycle_list);
|
||||||
|
|
||||||
spin_lock(&unix_gc_lock);
|
spin_lock(&unix_gc_lock);
|
||||||
|
|
||||||
|
@ -282,10 +292,14 @@ void unix_gc(void)
|
||||||
*
|
*
|
||||||
* Holding unix_gc_lock will protect these candidates from
|
* Holding unix_gc_lock will protect these candidates from
|
||||||
* being detached, and hence from gaining an external
|
* being detached, and hence from gaining an external
|
||||||
* reference. This also means, that since there are no
|
* reference. Since there are no possible receivers, all
|
||||||
* possible receivers, the receive queues of these sockets are
|
* buffers currently on the candidates' queues stay there
|
||||||
* static during the GC, even though the dequeue is done
|
* during the garbage collection.
|
||||||
* before the detach without atomicity guarantees.
|
*
|
||||||
|
* We also know that no new candidate can be added onto the
|
||||||
|
* receive queues. Other, non candidate sockets _can_ be
|
||||||
|
* added to queue, so we must make sure only to touch
|
||||||
|
* candidates.
|
||||||
*/
|
*/
|
||||||
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
|
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
|
||||||
long total_refs;
|
long total_refs;
|
||||||
|
@ -299,6 +313,7 @@ void unix_gc(void)
|
||||||
if (total_refs == inflight_refs) {
|
if (total_refs == inflight_refs) {
|
||||||
list_move_tail(&u->link, &gc_candidates);
|
list_move_tail(&u->link, &gc_candidates);
|
||||||
u->gc_candidate = 1;
|
u->gc_candidate = 1;
|
||||||
|
u->gc_maybe_cycle = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -325,13 +340,23 @@ void unix_gc(void)
|
||||||
list_move(&cursor, &u->link);
|
list_move(&cursor, &u->link);
|
||||||
|
|
||||||
if (atomic_long_read(&u->inflight) > 0) {
|
if (atomic_long_read(&u->inflight) > 0) {
|
||||||
list_move_tail(&u->link, &gc_inflight_list);
|
list_move_tail(&u->link, ¬_cycle_list);
|
||||||
u->gc_candidate = 0;
|
u->gc_maybe_cycle = 0;
|
||||||
scan_children(&u->sk, inc_inflight_move_tail, NULL);
|
scan_children(&u->sk, inc_inflight_move_tail, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
list_del(&cursor);
|
list_del(&cursor);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* not_cycle_list contains those sockets which do not make up a
|
||||||
|
* cycle. Restore these to the inflight list.
|
||||||
|
*/
|
||||||
|
while (!list_empty(¬_cycle_list)) {
|
||||||
|
u = list_entry(not_cycle_list.next, struct unix_sock, link);
|
||||||
|
u->gc_candidate = 0;
|
||||||
|
list_move_tail(&u->link, &gc_inflight_list);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now gc_candidates contains only garbage. Restore original
|
* Now gc_candidates contains only garbage. Restore original
|
||||||
* inflight counters for these as well, and remove the skbuffs
|
* inflight counters for these as well, and remove the skbuffs
|
||||||
|
|
Loading…
Reference in a new issue