mirror of
https://github.com/adulau/aha.git
synced 2024-12-28 03:36:19 +00:00
pi-futex: fix exit races and locking problems
1. New entries can be added to tsk->pi_state_list after task completed exit_pi_state_list(). The result is memory leakage and deadlocks. 2. handle_mm_fault() is called under spinlock. The result is obvious. 3. results in self-inflicted deadlock inside glibc. Sometimes futex_lock_pi returns -ESRCH, when it is not expected and glibc enters to for(;;) sleep() to simulate deadlock. This problem is quite obvious and I think the patch is right. Though it looks like each "if" in futex_lock_pi() got some stupid special case "else if". :-) 4. sometimes futex_lock_pi() returns -EDEADLK, when nobody has the lock. The reason is also obvious (see comment in the patch), but correct fix is far beyond my comprehension. I guess someone already saw this, the chunk: if (rt_mutex_trylock(&q.pi_state->pi_mutex)) ret = 0; is obviously from the same opera. But it does not work, because the rtmutex is really taken at this point: wake_futex_pi() of previous owner reassigned it to us. My fix works. But it looks very stupid. I would think about removal of shift of ownership in wake_futex_pi() and making all the work in context of process taking lock. From: Thomas Gleixner <tglx@linutronix.de> Fix 1) Avoid the tasklist lock variant of the exit race fix by adding an additional state transition to the exit code. This fixes also the issue, when a task with recursive segfaults is not able to release the futexes. Fix 2) Cleanup the lookup_pi_state() failure path and solve the -ESRCH problem finally. Fix 3) Solve the fixup_pi_state_owner() problem which needs to do the fixup in the lock protected section by using the in_atomic userspace access functions. This removes also the ugly lock drop / unqueue inside of fixup_pi_state() Fix 4) Fix a stale lock in the error path of futex_wake_pi() Added some error checks for verification. The -EDEADLK problem is solved by the rtmutex fixups. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Ingo Molnar <mingo@elte.hu> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Ulrich Drepper <drepper@redhat.com> Cc: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
1a539a8728
commit
778e9a9c3e
3 changed files with 183 additions and 111 deletions
|
@ -1162,6 +1162,7 @@ static inline void put_task_struct(struct task_struct *t)
|
||||||
/* Not implemented yet, only for 486*/
|
/* Not implemented yet, only for 486*/
|
||||||
#define PF_STARTING 0x00000002 /* being created */
|
#define PF_STARTING 0x00000002 /* being created */
|
||||||
#define PF_EXITING 0x00000004 /* getting shut down */
|
#define PF_EXITING 0x00000004 /* getting shut down */
|
||||||
|
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
|
||||||
#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
|
#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
|
||||||
#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
|
#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
|
||||||
#define PF_DUMPCORE 0x00000200 /* dumped core */
|
#define PF_DUMPCORE 0x00000200 /* dumped core */
|
||||||
|
|
|
@ -892,13 +892,29 @@ fastcall NORET_TYPE void do_exit(long code)
|
||||||
if (unlikely(tsk->flags & PF_EXITING)) {
|
if (unlikely(tsk->flags & PF_EXITING)) {
|
||||||
printk(KERN_ALERT
|
printk(KERN_ALERT
|
||||||
"Fixing recursive fault but reboot is needed!\n");
|
"Fixing recursive fault but reboot is needed!\n");
|
||||||
|
/*
|
||||||
|
* We can do this unlocked here. The futex code uses
|
||||||
|
* this flag just to verify whether the pi state
|
||||||
|
* cleanup has been done or not. In the worst case it
|
||||||
|
* loops once more. We pretend that the cleanup was
|
||||||
|
* done as there is no way to return. Either the
|
||||||
|
* OWNER_DIED bit is set by now or we push the blocked
|
||||||
|
* task into the wait for ever nirwana as well.
|
||||||
|
*/
|
||||||
|
tsk->flags |= PF_EXITPIDONE;
|
||||||
if (tsk->io_context)
|
if (tsk->io_context)
|
||||||
exit_io_context();
|
exit_io_context();
|
||||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||||
schedule();
|
schedule();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* tsk->flags are checked in the futex code to protect against
|
||||||
|
* an exiting task cleaning up the robust pi futexes.
|
||||||
|
*/
|
||||||
|
spin_lock_irq(&tsk->pi_lock);
|
||||||
tsk->flags |= PF_EXITING;
|
tsk->flags |= PF_EXITING;
|
||||||
|
spin_unlock_irq(&tsk->pi_lock);
|
||||||
|
|
||||||
if (unlikely(in_atomic()))
|
if (unlikely(in_atomic()))
|
||||||
printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
|
printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
|
||||||
|
@ -912,7 +928,7 @@ fastcall NORET_TYPE void do_exit(long code)
|
||||||
}
|
}
|
||||||
group_dead = atomic_dec_and_test(&tsk->signal->live);
|
group_dead = atomic_dec_and_test(&tsk->signal->live);
|
||||||
if (group_dead) {
|
if (group_dead) {
|
||||||
hrtimer_cancel(&tsk->signal->real_timer);
|
hrtimer_cancel(&tsk->signal->real_timer);
|
||||||
exit_itimers(tsk->signal);
|
exit_itimers(tsk->signal);
|
||||||
}
|
}
|
||||||
acct_collect(code, group_dead);
|
acct_collect(code, group_dead);
|
||||||
|
@ -965,6 +981,12 @@ fastcall NORET_TYPE void do_exit(long code)
|
||||||
* Make sure we are holding no locks:
|
* Make sure we are holding no locks:
|
||||||
*/
|
*/
|
||||||
debug_check_no_locks_held(tsk);
|
debug_check_no_locks_held(tsk);
|
||||||
|
/*
|
||||||
|
* We can do this unlocked here. The futex code uses this flag
|
||||||
|
* just to verify whether the pi state cleanup has been done
|
||||||
|
* or not. In the worst case it loops once more.
|
||||||
|
*/
|
||||||
|
tsk->flags |= PF_EXITPIDONE;
|
||||||
|
|
||||||
if (tsk->io_context)
|
if (tsk->io_context)
|
||||||
exit_io_context();
|
exit_io_context();
|
||||||
|
|
269
kernel/futex.c
269
kernel/futex.c
|
@ -430,10 +430,6 @@ static struct task_struct * futex_find_get_task(pid_t pid)
|
||||||
p = NULL;
|
p = NULL;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
if (p->exit_state != 0) {
|
|
||||||
p = NULL;
|
|
||||||
goto out_unlock;
|
|
||||||
}
|
|
||||||
get_task_struct(p);
|
get_task_struct(p);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
@ -502,7 +498,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
||||||
struct futex_q *this, *next;
|
struct futex_q *this, *next;
|
||||||
struct plist_head *head;
|
struct plist_head *head;
|
||||||
struct task_struct *p;
|
struct task_struct *p;
|
||||||
pid_t pid;
|
pid_t pid = uval & FUTEX_TID_MASK;
|
||||||
|
|
||||||
head = &hb->chain;
|
head = &hb->chain;
|
||||||
|
|
||||||
|
@ -520,6 +516,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
WARN_ON(!atomic_read(&pi_state->refcount));
|
WARN_ON(!atomic_read(&pi_state->refcount));
|
||||||
|
WARN_ON(pid && pi_state->owner &&
|
||||||
|
pi_state->owner->pid != pid);
|
||||||
|
|
||||||
atomic_inc(&pi_state->refcount);
|
atomic_inc(&pi_state->refcount);
|
||||||
*ps = pi_state;
|
*ps = pi_state;
|
||||||
|
@ -530,15 +528,33 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We are the first waiter - try to look up the real owner and attach
|
* We are the first waiter - try to look up the real owner and attach
|
||||||
* the new pi_state to it, but bail out when the owner died bit is set
|
* the new pi_state to it, but bail out when TID = 0
|
||||||
* and TID = 0:
|
|
||||||
*/
|
*/
|
||||||
pid = uval & FUTEX_TID_MASK;
|
if (!pid)
|
||||||
if (!pid && (uval & FUTEX_OWNER_DIED))
|
|
||||||
return -ESRCH;
|
return -ESRCH;
|
||||||
p = futex_find_get_task(pid);
|
p = futex_find_get_task(pid);
|
||||||
if (!p)
|
if (IS_ERR(p))
|
||||||
return -ESRCH;
|
return PTR_ERR(p);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to look at the task state flags to figure out,
|
||||||
|
* whether the task is exiting. To protect against the do_exit
|
||||||
|
* change of the task flags, we do this protected by
|
||||||
|
* p->pi_lock:
|
||||||
|
*/
|
||||||
|
spin_lock_irq(&p->pi_lock);
|
||||||
|
if (unlikely(p->flags & PF_EXITING)) {
|
||||||
|
/*
|
||||||
|
* The task is on the way out. When PF_EXITPIDONE is
|
||||||
|
* set, we know that the task has finished the
|
||||||
|
* cleanup:
|
||||||
|
*/
|
||||||
|
int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
|
||||||
|
|
||||||
|
spin_unlock_irq(&p->pi_lock);
|
||||||
|
put_task_struct(p);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
pi_state = alloc_pi_state();
|
pi_state = alloc_pi_state();
|
||||||
|
|
||||||
|
@ -551,7 +567,6 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
||||||
/* Store the key for possible exit cleanups: */
|
/* Store the key for possible exit cleanups: */
|
||||||
pi_state->key = *key;
|
pi_state->key = *key;
|
||||||
|
|
||||||
spin_lock_irq(&p->pi_lock);
|
|
||||||
WARN_ON(!list_empty(&pi_state->list));
|
WARN_ON(!list_empty(&pi_state->list));
|
||||||
list_add(&pi_state->list, &p->pi_state_list);
|
list_add(&pi_state->list, &p->pi_state_list);
|
||||||
pi_state->owner = p;
|
pi_state->owner = p;
|
||||||
|
@ -618,6 +633,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
|
||||||
* preserve the owner died bit.)
|
* preserve the owner died bit.)
|
||||||
*/
|
*/
|
||||||
if (!(uval & FUTEX_OWNER_DIED)) {
|
if (!(uval & FUTEX_OWNER_DIED)) {
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
newval = FUTEX_WAITERS | new_owner->pid;
|
newval = FUTEX_WAITERS | new_owner->pid;
|
||||||
/* Keep the FUTEX_WAITER_REQUEUED flag if it was set */
|
/* Keep the FUTEX_WAITER_REQUEUED flag if it was set */
|
||||||
newval |= (uval & FUTEX_WAITER_REQUEUED);
|
newval |= (uval & FUTEX_WAITER_REQUEUED);
|
||||||
|
@ -625,10 +642,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
|
||||||
pagefault_disable();
|
pagefault_disable();
|
||||||
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
|
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
|
||||||
pagefault_enable();
|
pagefault_enable();
|
||||||
|
|
||||||
if (curval == -EFAULT)
|
if (curval == -EFAULT)
|
||||||
return -EFAULT;
|
ret = -EFAULT;
|
||||||
if (curval != uval)
|
if (curval != uval)
|
||||||
return -EINVAL;
|
ret = -EINVAL;
|
||||||
|
if (ret) {
|
||||||
|
spin_unlock(&pi_state->pi_mutex.wait_lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock_irq(&pi_state->owner->pi_lock);
|
spin_lock_irq(&pi_state->owner->pi_lock);
|
||||||
|
@ -1174,7 +1196,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
|
||||||
#ifdef CONFIG_DEBUG_PI_LIST
|
#ifdef CONFIG_DEBUG_PI_LIST
|
||||||
this->list.plist.lock = &hb2->lock;
|
this->list.plist.lock = &hb2->lock;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
this->key = key2;
|
this->key = key2;
|
||||||
get_futex_key_refs(&key2);
|
get_futex_key_refs(&key2);
|
||||||
drop_count++;
|
drop_count++;
|
||||||
|
@ -1326,12 +1348,10 @@ static void unqueue_me_pi(struct futex_q *q)
|
||||||
/*
|
/*
|
||||||
* Fixup the pi_state owner with current.
|
* Fixup the pi_state owner with current.
|
||||||
*
|
*
|
||||||
* The cur->mm semaphore must be held, it is released at return of this
|
* Must be called with hash bucket lock held and mm->sem held for non
|
||||||
* function.
|
* private futexes.
|
||||||
*/
|
*/
|
||||||
static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared,
|
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||||
struct futex_q *q,
|
|
||||||
struct futex_hash_bucket *hb,
|
|
||||||
struct task_struct *curr)
|
struct task_struct *curr)
|
||||||
{
|
{
|
||||||
u32 newtid = curr->pid | FUTEX_WAITERS;
|
u32 newtid = curr->pid | FUTEX_WAITERS;
|
||||||
|
@ -1355,23 +1375,24 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared,
|
||||||
list_add(&pi_state->list, &curr->pi_state_list);
|
list_add(&pi_state->list, &curr->pi_state_list);
|
||||||
spin_unlock_irq(&curr->pi_lock);
|
spin_unlock_irq(&curr->pi_lock);
|
||||||
|
|
||||||
/* Unqueue and drop the lock */
|
|
||||||
unqueue_me_pi(q);
|
|
||||||
if (fshared)
|
|
||||||
up_read(fshared);
|
|
||||||
/*
|
/*
|
||||||
* We own it, so we have to replace the pending owner
|
* We own it, so we have to replace the pending owner
|
||||||
* TID. This must be atomic as we have preserve the
|
* TID. This must be atomic as we have preserve the
|
||||||
* owner died bit here.
|
* owner died bit here.
|
||||||
*/
|
*/
|
||||||
ret = get_user(uval, uaddr);
|
ret = get_futex_value_locked(&uval, uaddr);
|
||||||
|
|
||||||
while (!ret) {
|
while (!ret) {
|
||||||
newval = (uval & FUTEX_OWNER_DIED) | newtid;
|
newval = (uval & FUTEX_OWNER_DIED) | newtid;
|
||||||
newval |= (uval & FUTEX_WAITER_REQUEUED);
|
newval |= (uval & FUTEX_WAITER_REQUEUED);
|
||||||
|
|
||||||
|
pagefault_disable();
|
||||||
curval = futex_atomic_cmpxchg_inatomic(uaddr,
|
curval = futex_atomic_cmpxchg_inatomic(uaddr,
|
||||||
uval, newval);
|
uval, newval);
|
||||||
|
pagefault_enable();
|
||||||
|
|
||||||
if (curval == -EFAULT)
|
if (curval == -EFAULT)
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
if (curval == uval)
|
if (curval == uval)
|
||||||
break;
|
break;
|
||||||
uval = curval;
|
uval = curval;
|
||||||
|
@ -1553,10 +1574,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
|
||||||
*/
|
*/
|
||||||
uaddr = q.pi_state->key.uaddr;
|
uaddr = q.pi_state->key.uaddr;
|
||||||
|
|
||||||
/* mmap_sem and hash_bucket lock are unlocked at
|
ret = fixup_pi_state_owner(uaddr, &q, curr);
|
||||||
return of this function */
|
|
||||||
ret = fixup_pi_state_owner(uaddr, fshared,
|
|
||||||
&q, hb, curr);
|
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* Catch the rare case, where the lock was released
|
* Catch the rare case, where the lock was released
|
||||||
|
@ -1567,12 +1585,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
|
||||||
if (rt_mutex_trylock(&q.pi_state->pi_mutex))
|
if (rt_mutex_trylock(&q.pi_state->pi_mutex))
|
||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
/* Unqueue and drop the lock */
|
|
||||||
unqueue_me_pi(&q);
|
|
||||||
if (fshared)
|
|
||||||
up_read(fshared);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Unqueue and drop the lock */
|
||||||
|
unqueue_me_pi(&q);
|
||||||
|
if (fshared)
|
||||||
|
up_read(fshared);
|
||||||
|
|
||||||
debug_rt_mutex_free_waiter(&q.waiter);
|
debug_rt_mutex_free_waiter(&q.waiter);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -1688,7 +1707,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
|
||||||
struct futex_hash_bucket *hb;
|
struct futex_hash_bucket *hb;
|
||||||
u32 uval, newval, curval;
|
u32 uval, newval, curval;
|
||||||
struct futex_q q;
|
struct futex_q q;
|
||||||
int ret, lock_held, attempt = 0;
|
int ret, lock_taken, ownerdied = 0, attempt = 0;
|
||||||
|
|
||||||
if (refill_pi_state_cache())
|
if (refill_pi_state_cache())
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
@ -1709,10 +1728,11 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
|
||||||
if (unlikely(ret != 0))
|
if (unlikely(ret != 0))
|
||||||
goto out_release_sem;
|
goto out_release_sem;
|
||||||
|
|
||||||
|
retry_unlocked:
|
||||||
hb = queue_lock(&q, -1, NULL);
|
hb = queue_lock(&q, -1, NULL);
|
||||||
|
|
||||||
retry_locked:
|
retry_locked:
|
||||||
lock_held = 0;
|
ret = lock_taken = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To avoid races, we attempt to take the lock here again
|
* To avoid races, we attempt to take the lock here again
|
||||||
|
@ -1728,43 +1748,44 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
|
||||||
if (unlikely(curval == -EFAULT))
|
if (unlikely(curval == -EFAULT))
|
||||||
goto uaddr_faulted;
|
goto uaddr_faulted;
|
||||||
|
|
||||||
/* We own the lock already */
|
/*
|
||||||
|
* Detect deadlocks. In case of REQUEUE_PI this is a valid
|
||||||
|
* situation and we return success to user space.
|
||||||
|
*/
|
||||||
if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
|
if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
|
||||||
if (!detect && 0)
|
|
||||||
force_sig(SIGKILL, current);
|
|
||||||
/*
|
|
||||||
* Normally, this check is done in user space.
|
|
||||||
* In case of requeue, the owner may attempt to lock this futex,
|
|
||||||
* even if the ownership has already been given by the previous
|
|
||||||
* waker.
|
|
||||||
* In the usual case, this is a case of deadlock, but not in case
|
|
||||||
* of REQUEUE_PI.
|
|
||||||
*/
|
|
||||||
if (!(curval & FUTEX_WAITER_REQUEUED))
|
if (!(curval & FUTEX_WAITER_REQUEUED))
|
||||||
ret = -EDEADLK;
|
ret = -EDEADLK;
|
||||||
goto out_unlock_release_sem;
|
goto out_unlock_release_sem;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Surprise - we got the lock. Just return
|
* Surprise - we got the lock. Just return to userspace:
|
||||||
* to userspace:
|
|
||||||
*/
|
*/
|
||||||
if (unlikely(!curval))
|
if (unlikely(!curval))
|
||||||
goto out_unlock_release_sem;
|
goto out_unlock_release_sem;
|
||||||
|
|
||||||
uval = curval;
|
uval = curval;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In case of a requeue, check if there already is an owner
|
* Set the WAITERS flag, so the owner will know it has someone
|
||||||
* If not, just take the futex.
|
* to wake at next unlock
|
||||||
*/
|
*/
|
||||||
if ((curval & FUTEX_WAITER_REQUEUED) && !(curval & FUTEX_TID_MASK)) {
|
newval = curval | FUTEX_WAITERS;
|
||||||
/* set current as futex owner */
|
|
||||||
newval = curval | current->pid;
|
/*
|
||||||
lock_held = 1;
|
* There are two cases, where a futex might have no owner (the
|
||||||
} else
|
* owner TID is 0): OWNER_DIED or REQUEUE. We take over the
|
||||||
/* Set the WAITERS flag, so the owner will know it has someone
|
* futex in this case. We also do an unconditional take over,
|
||||||
to wake at next unlock */
|
* when the owner of the futex died.
|
||||||
newval = curval | FUTEX_WAITERS;
|
*
|
||||||
|
* This is safe as we are protected by the hash bucket lock !
|
||||||
|
*/
|
||||||
|
if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
|
||||||
|
/* Keep the OWNER_DIED and REQUEUE bits */
|
||||||
|
newval = (curval & ~FUTEX_TID_MASK) | current->pid;
|
||||||
|
ownerdied = 0;
|
||||||
|
lock_taken = 1;
|
||||||
|
}
|
||||||
|
|
||||||
pagefault_disable();
|
pagefault_disable();
|
||||||
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
|
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
|
||||||
|
@ -1775,8 +1796,13 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
|
||||||
if (unlikely(curval != uval))
|
if (unlikely(curval != uval))
|
||||||
goto retry_locked;
|
goto retry_locked;
|
||||||
|
|
||||||
if (lock_held) {
|
/*
|
||||||
set_pi_futex_owner(hb, &q.key, curr);
|
* We took the lock due to requeue or owner died take over.
|
||||||
|
*/
|
||||||
|
if (unlikely(lock_taken)) {
|
||||||
|
/* For requeue we need to fixup the pi_futex */
|
||||||
|
if (curval & FUTEX_WAITER_REQUEUED)
|
||||||
|
set_pi_futex_owner(hb, &q.key, curr);
|
||||||
goto out_unlock_release_sem;
|
goto out_unlock_release_sem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1787,34 +1813,40 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
|
||||||
ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
|
ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
|
||||||
|
|
||||||
if (unlikely(ret)) {
|
if (unlikely(ret)) {
|
||||||
/*
|
switch (ret) {
|
||||||
* There were no waiters and the owner task lookup
|
|
||||||
* failed. When the OWNER_DIED bit is set, then we
|
|
||||||
* know that this is a robust futex and we actually
|
|
||||||
* take the lock. This is safe as we are protected by
|
|
||||||
* the hash bucket lock. We also set the waiters bit
|
|
||||||
* unconditionally here, to simplify glibc handling of
|
|
||||||
* multiple tasks racing to acquire the lock and
|
|
||||||
* cleanup the problems which were left by the dead
|
|
||||||
* owner.
|
|
||||||
*/
|
|
||||||
if (curval & FUTEX_OWNER_DIED) {
|
|
||||||
uval = newval;
|
|
||||||
newval = current->pid |
|
|
||||||
FUTEX_OWNER_DIED | FUTEX_WAITERS;
|
|
||||||
|
|
||||||
pagefault_disable();
|
case -EAGAIN:
|
||||||
curval = futex_atomic_cmpxchg_inatomic(uaddr,
|
/*
|
||||||
uval, newval);
|
* Task is exiting and we just wait for the
|
||||||
pagefault_enable();
|
* exit to complete.
|
||||||
|
*/
|
||||||
|
queue_unlock(&q, hb);
|
||||||
|
if (fshared)
|
||||||
|
up_read(fshared);
|
||||||
|
cond_resched();
|
||||||
|
goto retry;
|
||||||
|
|
||||||
if (unlikely(curval == -EFAULT))
|
case -ESRCH:
|
||||||
|
/*
|
||||||
|
* No owner found for this futex. Check if the
|
||||||
|
* OWNER_DIED bit is set to figure out whether
|
||||||
|
* this is a robust futex or not.
|
||||||
|
*/
|
||||||
|
if (get_futex_value_locked(&curval, uaddr))
|
||||||
goto uaddr_faulted;
|
goto uaddr_faulted;
|
||||||
if (unlikely(curval != uval))
|
|
||||||
|
/*
|
||||||
|
* We simply start over in case of a robust
|
||||||
|
* futex. The code above will take the futex
|
||||||
|
* and return happy.
|
||||||
|
*/
|
||||||
|
if (curval & FUTEX_OWNER_DIED) {
|
||||||
|
ownerdied = 1;
|
||||||
goto retry_locked;
|
goto retry_locked;
|
||||||
ret = 0;
|
}
|
||||||
|
default:
|
||||||
|
goto out_unlock_release_sem;
|
||||||
}
|
}
|
||||||
goto out_unlock_release_sem;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1845,31 +1877,42 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
|
||||||
down_read(fshared);
|
down_read(fshared);
|
||||||
spin_lock(q.lock_ptr);
|
spin_lock(q.lock_ptr);
|
||||||
|
|
||||||
/*
|
if (!ret) {
|
||||||
* Got the lock. We might not be the anticipated owner if we
|
/*
|
||||||
* did a lock-steal - fix up the PI-state in that case.
|
* Got the lock. We might not be the anticipated owner
|
||||||
*/
|
* if we did a lock-steal - fix up the PI-state in
|
||||||
if (!ret && q.pi_state->owner != curr)
|
* that case:
|
||||||
/* mmap_sem is unlocked at return of this function */
|
*/
|
||||||
ret = fixup_pi_state_owner(uaddr, fshared, &q, hb, curr);
|
if (q.pi_state->owner != curr)
|
||||||
else {
|
ret = fixup_pi_state_owner(uaddr, &q, curr);
|
||||||
|
} else {
|
||||||
/*
|
/*
|
||||||
* Catch the rare case, where the lock was released
|
* Catch the rare case, where the lock was released
|
||||||
* when we were on the way back before we locked
|
* when we were on the way back before we locked the
|
||||||
* the hash bucket.
|
* hash bucket.
|
||||||
*/
|
*/
|
||||||
if (ret && q.pi_state->owner == curr) {
|
if (q.pi_state->owner == curr &&
|
||||||
if (rt_mutex_trylock(&q.pi_state->pi_mutex))
|
rt_mutex_trylock(&q.pi_state->pi_mutex)) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Paranoia check. If we did not take the lock
|
||||||
|
* in the trylock above, then we should not be
|
||||||
|
* the owner of the rtmutex, neither the real
|
||||||
|
* nor the pending one:
|
||||||
|
*/
|
||||||
|
if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr)
|
||||||
|
printk(KERN_ERR "futex_lock_pi: ret = %d "
|
||||||
|
"pi-mutex: %p pi-state %p\n", ret,
|
||||||
|
q.pi_state->pi_mutex.owner,
|
||||||
|
q.pi_state->owner);
|
||||||
}
|
}
|
||||||
/* Unqueue and drop the lock */
|
|
||||||
unqueue_me_pi(&q);
|
|
||||||
if (fshared)
|
|
||||||
up_read(fshared);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!detect && ret == -EDEADLK && 0)
|
/* Unqueue and drop the lock */
|
||||||
force_sig(SIGKILL, current);
|
unqueue_me_pi(&q);
|
||||||
|
if (fshared)
|
||||||
|
up_read(fshared);
|
||||||
|
|
||||||
return ret != -EINTR ? ret : -ERESTARTNOINTR;
|
return ret != -EINTR ? ret : -ERESTARTNOINTR;
|
||||||
|
|
||||||
|
@ -1887,16 +1930,19 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
|
||||||
* non-atomically. Therefore, if get_user below is not
|
* non-atomically. Therefore, if get_user below is not
|
||||||
* enough, we need to handle the fault ourselves, while
|
* enough, we need to handle the fault ourselves, while
|
||||||
* still holding the mmap_sem.
|
* still holding the mmap_sem.
|
||||||
|
*
|
||||||
|
* ... and hb->lock. :-) --ANK
|
||||||
*/
|
*/
|
||||||
|
queue_unlock(&q, hb);
|
||||||
|
|
||||||
if (attempt++) {
|
if (attempt++) {
|
||||||
ret = futex_handle_fault((unsigned long)uaddr, fshared,
|
ret = futex_handle_fault((unsigned long)uaddr, fshared,
|
||||||
attempt);
|
attempt);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_unlock_release_sem;
|
goto out_release_sem;
|
||||||
goto retry_locked;
|
goto retry_unlocked;
|
||||||
}
|
}
|
||||||
|
|
||||||
queue_unlock(&q, hb);
|
|
||||||
if (fshared)
|
if (fshared)
|
||||||
up_read(fshared);
|
up_read(fshared);
|
||||||
|
|
||||||
|
@ -1940,9 +1986,9 @@ retry:
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
hb = hash_futex(&key);
|
hb = hash_futex(&key);
|
||||||
|
retry_unlocked:
|
||||||
spin_lock(&hb->lock);
|
spin_lock(&hb->lock);
|
||||||
|
|
||||||
retry_locked:
|
|
||||||
/*
|
/*
|
||||||
* To avoid races, try to do the TID -> 0 atomic transition
|
* To avoid races, try to do the TID -> 0 atomic transition
|
||||||
* again. If it succeeds then we can return without waking
|
* again. If it succeeds then we can return without waking
|
||||||
|
@ -2005,16 +2051,19 @@ pi_faulted:
|
||||||
* non-atomically. Therefore, if get_user below is not
|
* non-atomically. Therefore, if get_user below is not
|
||||||
* enough, we need to handle the fault ourselves, while
|
* enough, we need to handle the fault ourselves, while
|
||||||
* still holding the mmap_sem.
|
* still holding the mmap_sem.
|
||||||
|
*
|
||||||
|
* ... and hb->lock. --ANK
|
||||||
*/
|
*/
|
||||||
|
spin_unlock(&hb->lock);
|
||||||
|
|
||||||
if (attempt++) {
|
if (attempt++) {
|
||||||
ret = futex_handle_fault((unsigned long)uaddr, fshared,
|
ret = futex_handle_fault((unsigned long)uaddr, fshared,
|
||||||
attempt);
|
attempt);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_unlock;
|
goto out;
|
||||||
goto retry_locked;
|
goto retry_unlocked;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock(&hb->lock);
|
|
||||||
if (fshared)
|
if (fshared)
|
||||||
up_read(fshared);
|
up_read(fshared);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue