Merge branches 'core/debug', 'core/futexes', 'core/locking', 'core/rcu', 'core/signal', 'core/urgent' and 'core/xen' into core/core

This commit is contained in:
Ingo Molnar 2008-11-24 17:44:55 +01:00
28 changed files with 315 additions and 267 deletions

View file

@ -71,35 +71,50 @@ Look at the current lock statistics:
# less /proc/lock_stat # less /proc/lock_stat
01 lock_stat version 0.2 01 lock_stat version 0.3
02 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 02 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total 03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total
04 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 04 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
05 05
06 &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60 06 &mm->mmap_sem-W: 233 538 18446744073708 22924.27 607243.51 1342 45806 1.71 8595.89 1180582.34
07 &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38 07 &mm->mmap_sem-R: 205 587 18446744073708 28403.36 731975.00 1940 412426 0.58 187825.45 6307502.88
08 -------------------------- 08 ---------------
09 &inode->i_data.tree_lock 0 [<ffffffff8027c08f>] add_to_page_cache+0x5f/0x190 09 &mm->mmap_sem 487 [<ffffffff8053491f>] do_page_fault+0x466/0x928
10 10 &mm->mmap_sem 179 [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
11 ............................................................................................................................................................................................... 11 &mm->mmap_sem 279 [<ffffffff80210a57>] sys_mmap+0x75/0xce
12 12 &mm->mmap_sem 76 [<ffffffff802a490b>] sys_munmap+0x32/0x59
13 dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 13 ---------------
14 ----------- 14 &mm->mmap_sem 270 [<ffffffff80210a57>] sys_mmap+0x75/0xce
15 dcache_lock 180 [<ffffffff802c0d7e>] sys_getcwd+0x11e/0x230 15 &mm->mmap_sem 431 [<ffffffff8053491f>] do_page_fault+0x466/0x928
16 dcache_lock 165 [<ffffffff802c002a>] d_alloc+0x15a/0x210 16 &mm->mmap_sem 138 [<ffffffff802a490b>] sys_munmap+0x32/0x59
17 dcache_lock 33 [<ffffffff8035818d>] _atomic_dec_and_lock+0x4d/0x70 17 &mm->mmap_sem 145 [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
18 dcache_lock 1 [<ffffffff802beef8>] shrink_dcache_parent+0x18/0x130 18
19 ...............................................................................................................................................................................................
20
21 dcache_lock: 621 623 0.52 118.26 1053.02 6745 91930 0.29 316.29 118423.41
22 -----------
23 dcache_lock 179 [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
24 dcache_lock 113 [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
25 dcache_lock 99 [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
26 dcache_lock 104 [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
27 -----------
28 dcache_lock 192 [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
29 dcache_lock 98 [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
30 dcache_lock 72 [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
31 dcache_lock 112 [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
This excerpt shows the first two lock class statistics. Line 01 shows the This excerpt shows the first two lock class statistics. Line 01 shows the
output version - each time the format changes this will be updated. Line 02-04 output version - each time the format changes this will be updated. Line 02-04
show the header with column descriptions. Lines 05-10 and 13-18 show the actual show the header with column descriptions. Lines 05-18 and 20-31 show the actual
statistics. These statistics come in two parts; the actual stats separated by a statistics. These statistics come in two parts; the actual stats separated by a
short separator (line 08, 14) from the contention points. short separator (line 08, 13) from the contention points.
The first lock (05-10) is a read/write lock, and shows two lines above the The first lock (05-18) is a read/write lock, and shows two lines above the
short separator. The contention points don't match the column descriptors, short separator. The contention points don't match the column descriptors,
they have two: contentions and [<IP>] symbol. they have two: contentions and [<IP>] symbol. The second set of contention
points are the points we're contending with.
The integer part of the time values is in us.
View the top contending locks: View the top contending locks:

View file

@ -11,21 +11,21 @@ extern int get_signals(void);
extern void block_signals(void); extern void block_signals(void);
extern void unblock_signals(void); extern void unblock_signals(void);
#define local_save_flags(flags) do { typecheck(unsigned long, flags); \ #define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \
(flags) = get_signals(); } while(0) (flags) = get_signals(); } while(0)
#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \ #define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \
set_signals(flags); } while(0) set_signals(flags); } while(0)
#define local_irq_save(flags) do { local_save_flags(flags); \ #define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \
local_irq_disable(); } while(0) raw_local_irq_disable(); } while(0)
#define local_irq_enable() unblock_signals() #define raw_local_irq_enable() unblock_signals()
#define local_irq_disable() block_signals() #define raw_local_irq_disable() block_signals()
#define irqs_disabled() \ #define irqs_disabled() \
({ \ ({ \
unsigned long flags; \ unsigned long flags; \
local_save_flags(flags); \ raw_local_save_flags(flags); \
(flags == 0); \ (flags == 0); \
}) })

View file

@ -157,6 +157,7 @@ extern int __get_user_bad(void);
int __ret_gu; \ int __ret_gu; \
unsigned long __val_gu; \ unsigned long __val_gu; \
__chk_user_ptr(ptr); \ __chk_user_ptr(ptr); \
might_fault(); \
switch (sizeof(*(ptr))) { \ switch (sizeof(*(ptr))) { \
case 1: \ case 1: \
__get_user_x(1, __ret_gu, __val_gu, ptr); \ __get_user_x(1, __ret_gu, __val_gu, ptr); \
@ -241,6 +242,7 @@ extern void __put_user_8(void);
int __ret_pu; \ int __ret_pu; \
__typeof__(*(ptr)) __pu_val; \ __typeof__(*(ptr)) __pu_val; \
__chk_user_ptr(ptr); \ __chk_user_ptr(ptr); \
might_fault(); \
__pu_val = x; \ __pu_val = x; \
switch (sizeof(*(ptr))) { \ switch (sizeof(*(ptr))) { \
case 1: \ case 1: \

View file

@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
static __always_inline unsigned long __must_check static __always_inline unsigned long __must_check
__copy_to_user(void __user *to, const void *from, unsigned long n) __copy_to_user(void __user *to, const void *from, unsigned long n)
{ {
might_sleep(); might_fault();
return __copy_to_user_inatomic(to, from, n); return __copy_to_user_inatomic(to, from, n);
} }
static __always_inline unsigned long static __always_inline unsigned long
@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
static __always_inline unsigned long static __always_inline unsigned long
__copy_from_user(void *to, const void __user *from, unsigned long n) __copy_from_user(void *to, const void __user *from, unsigned long n)
{ {
might_sleep(); might_fault();
if (__builtin_constant_p(n)) { if (__builtin_constant_p(n)) {
unsigned long ret; unsigned long ret;
@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
static __always_inline unsigned long __copy_from_user_nocache(void *to, static __always_inline unsigned long __copy_from_user_nocache(void *to,
const void __user *from, unsigned long n) const void __user *from, unsigned long n)
{ {
might_sleep(); might_fault();
if (__builtin_constant_p(n)) { if (__builtin_constant_p(n)) {
unsigned long ret; unsigned long ret;

View file

@ -29,6 +29,8 @@ static __always_inline __must_check
int __copy_from_user(void *dst, const void __user *src, unsigned size) int __copy_from_user(void *dst, const void __user *src, unsigned size)
{ {
int ret = 0; int ret = 0;
might_fault();
if (!__builtin_constant_p(size)) if (!__builtin_constant_p(size))
return copy_user_generic(dst, (__force void *)src, size); return copy_user_generic(dst, (__force void *)src, size);
switch (size) { switch (size) {
@ -71,6 +73,8 @@ static __always_inline __must_check
int __copy_to_user(void __user *dst, const void *src, unsigned size) int __copy_to_user(void __user *dst, const void *src, unsigned size)
{ {
int ret = 0; int ret = 0;
might_fault();
if (!__builtin_constant_p(size)) if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst, src, size); return copy_user_generic((__force void *)dst, src, size);
switch (size) { switch (size) {
@ -113,6 +117,8 @@ static __always_inline __must_check
int __copy_in_user(void __user *dst, const void __user *src, unsigned size) int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
{ {
int ret = 0; int ret = 0;
might_fault();
if (!__builtin_constant_p(size)) if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst, return copy_user_generic((__force void *)dst,
(__force void *)src, size); (__force void *)src, size);

View file

@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
#define __do_strncpy_from_user(dst, src, count, res) \ #define __do_strncpy_from_user(dst, src, count, res) \
do { \ do { \
int __d0, __d1, __d2; \ int __d0, __d1, __d2; \
might_sleep(); \ might_fault(); \
__asm__ __volatile__( \ __asm__ __volatile__( \
" testl %1,%1\n" \ " testl %1,%1\n" \
" jz 2f\n" \ " jz 2f\n" \
@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user);
#define __do_clear_user(addr,size) \ #define __do_clear_user(addr,size) \
do { \ do { \
int __d0; \ int __d0; \
might_sleep(); \ might_fault(); \
__asm__ __volatile__( \ __asm__ __volatile__( \
"0: rep; stosl\n" \ "0: rep; stosl\n" \
" movl %2,%0\n" \ " movl %2,%0\n" \
@ -155,7 +155,7 @@ do { \
unsigned long unsigned long
clear_user(void __user *to, unsigned long n) clear_user(void __user *to, unsigned long n)
{ {
might_sleep(); might_fault();
if (access_ok(VERIFY_WRITE, to, n)) if (access_ok(VERIFY_WRITE, to, n))
__do_clear_user(to, n); __do_clear_user(to, n);
return n; return n;
@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n)
unsigned long mask = -__addr_ok(s); unsigned long mask = -__addr_ok(s);
unsigned long res, tmp; unsigned long res, tmp;
might_sleep(); might_fault();
__asm__ __volatile__( __asm__ __volatile__(
" testl %0, %0\n" " testl %0, %0\n"

View file

@ -15,7 +15,7 @@
#define __do_strncpy_from_user(dst,src,count,res) \ #define __do_strncpy_from_user(dst,src,count,res) \
do { \ do { \
long __d0, __d1, __d2; \ long __d0, __d1, __d2; \
might_sleep(); \ might_fault(); \
__asm__ __volatile__( \ __asm__ __volatile__( \
" testq %1,%1\n" \ " testq %1,%1\n" \
" jz 2f\n" \ " jz 2f\n" \
@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user);
unsigned long __clear_user(void __user *addr, unsigned long size) unsigned long __clear_user(void __user *addr, unsigned long size)
{ {
long __d0; long __d0;
might_sleep(); might_fault();
/* no memory constraint because it doesn't change any memory gcc knows /* no memory constraint because it doesn't change any memory gcc knows
about */ about */
asm volatile( asm volatile(

View file

@ -17,7 +17,7 @@ extern int debug_locks_off(void);
({ \ ({ \
int __ret = 0; \ int __ret = 0; \
\ \
if (unlikely(c)) { \ if (!oops_in_progress && unlikely(c)) { \
if (debug_locks_off() && !debug_locks_silent) \ if (debug_locks_off() && !debug_locks_silent) \
WARN_ON(1); \ WARN_ON(1); \
__ret = 1; \ __ret = 1; \

View file

@ -164,6 +164,8 @@ union futex_key {
} both; } both;
}; };
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
#ifdef CONFIG_FUTEX #ifdef CONFIG_FUTEX
extern void exit_robust_list(struct task_struct *curr); extern void exit_robust_list(struct task_struct *curr);
extern void exit_pi_state_list(struct task_struct *curr); extern void exit_pi_state_list(struct task_struct *curr);

View file

@ -141,6 +141,15 @@ extern int _cond_resched(void);
(__x < 0) ? -__x : __x; \ (__x < 0) ? -__x : __x; \
}) })
#ifdef CONFIG_PROVE_LOCKING
void might_fault(void);
#else
static inline void might_fault(void)
{
might_sleep();
}
#endif
extern struct atomic_notifier_head panic_notifier_list; extern struct atomic_notifier_head panic_notifier_list;
extern long (*panic_blink)(long time); extern long (*panic_blink)(long time);
NORET_TYPE void panic(const char * fmt, ...) NORET_TYPE void panic(const char * fmt, ...)
@ -188,6 +197,8 @@ extern unsigned long long memparse(const char *ptr, char **retptr);
extern int core_kernel_text(unsigned long addr); extern int core_kernel_text(unsigned long addr);
extern int __kernel_text_address(unsigned long addr); extern int __kernel_text_address(unsigned long addr);
extern int kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr);
extern int func_ptr_is_kernel_text(void *ptr);
struct pid; struct pid;
extern struct pid *session_of_pgrp(struct pid *pgrp); extern struct pid *session_of_pgrp(struct pid *pgrp);

View file

@ -73,6 +73,8 @@ struct lock_class_key {
struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
}; };
#define LOCKSTAT_POINTS 4
/* /*
* The lock-class itself: * The lock-class itself:
*/ */
@ -119,7 +121,8 @@ struct lock_class {
int name_version; int name_version;
#ifdef CONFIG_LOCK_STAT #ifdef CONFIG_LOCK_STAT
unsigned long contention_point[4]; unsigned long contention_point[LOCKSTAT_POINTS];
unsigned long contending_point[LOCKSTAT_POINTS];
#endif #endif
}; };
@ -144,6 +147,7 @@ enum bounce_type {
struct lock_class_stats { struct lock_class_stats {
unsigned long contention_point[4]; unsigned long contention_point[4];
unsigned long contending_point[4];
struct lock_time read_waittime; struct lock_time read_waittime;
struct lock_time write_waittime; struct lock_time write_waittime;
struct lock_time read_holdtime; struct lock_time read_holdtime;
@ -165,6 +169,7 @@ struct lockdep_map {
const char *name; const char *name;
#ifdef CONFIG_LOCK_STAT #ifdef CONFIG_LOCK_STAT
int cpu; int cpu;
unsigned long ip;
#endif #endif
}; };
@ -356,7 +361,7 @@ struct lock_class_key { };
#ifdef CONFIG_LOCK_STAT #ifdef CONFIG_LOCK_STAT
extern void lock_contended(struct lockdep_map *lock, unsigned long ip); extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
extern void lock_acquired(struct lockdep_map *lock); extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
#define LOCK_CONTENDED(_lock, try, lock) \ #define LOCK_CONTENDED(_lock, try, lock) \
do { \ do { \
@ -364,13 +369,13 @@ do { \
lock_contended(&(_lock)->dep_map, _RET_IP_); \ lock_contended(&(_lock)->dep_map, _RET_IP_); \
lock(_lock); \ lock(_lock); \
} \ } \
lock_acquired(&(_lock)->dep_map); \ lock_acquired(&(_lock)->dep_map, _RET_IP_); \
} while (0) } while (0)
#else /* CONFIG_LOCK_STAT */ #else /* CONFIG_LOCK_STAT */
#define lock_contended(lockdep_map, ip) do {} while (0) #define lock_contended(lockdep_map, ip) do {} while (0)
#define lock_acquired(lockdep_map) do {} while (0) #define lock_acquired(lockdep_map, ip) do {} while (0)
#define LOCK_CONTENDED(_lock, try, lock) \ #define LOCK_CONTENDED(_lock, try, lock) \
lock(_lock) lock(_lock)
@ -481,4 +486,22 @@ static inline void print_irqtrace_events(struct task_struct *curr)
# define lock_map_release(l) do { } while (0) # define lock_map_release(l) do { } while (0)
#endif #endif
#ifdef CONFIG_PROVE_LOCKING
# define might_lock(lock) \
do { \
typecheck(struct lockdep_map *, &(lock)->dep_map); \
lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_); \
lock_release(&(lock)->dep_map, 0, _THIS_IP_); \
} while (0)
# define might_lock_read(lock) \
do { \
typecheck(struct lockdep_map *, &(lock)->dep_map); \
lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_); \
lock_release(&(lock)->dep_map, 0, _THIS_IP_); \
} while (0)
#else
# define might_lock(lock) do { } while (0)
# define might_lock_read(lock) do { } while (0)
#endif
#endif /* __LINUX_LOCKDEP_H */ #endif /* __LINUX_LOCKDEP_H */

View file

@ -144,6 +144,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
/* /*
* NOTE: mutex_trylock() follows the spin_trylock() convention, * NOTE: mutex_trylock() follows the spin_trylock() convention,
* not the down_trylock() convention! * not the down_trylock() convention!
*
* Returns 1 if the mutex has been acquired successfully, and 0 on contention.
*/ */
extern int mutex_trylock(struct mutex *lock); extern int mutex_trylock(struct mutex *lock);
extern void mutex_unlock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock);

View file

@ -41,7 +41,7 @@
#include <linux/seqlock.h> #include <linux/seqlock.h>
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
#define RCU_SECONDS_TILL_STALL_CHECK ( 3 * HZ) /* for rcp->jiffies_stall */ #define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rcp->jiffies_stall */
#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */ #define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */

View file

@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to,
\ \
set_fs(KERNEL_DS); \ set_fs(KERNEL_DS); \
pagefault_disable(); \ pagefault_disable(); \
ret = __get_user(retval, (__force typeof(retval) __user *)(addr)); \ ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \
pagefault_enable(); \ pagefault_enable(); \
set_fs(old_fs); \ set_fs(old_fs); \
ret; \ ret; \

View file

@ -1321,10 +1321,10 @@ static int wait_task_zombie(struct task_struct *p, int options,
* group, which consolidates times for all threads in the * group, which consolidates times for all threads in the
* group including the group leader. * group including the group leader.
*/ */
thread_group_cputime(p, &cputime);
spin_lock_irq(&p->parent->sighand->siglock); spin_lock_irq(&p->parent->sighand->siglock);
psig = p->parent->signal; psig = p->parent->signal;
sig = p->signal; sig = p->signal;
thread_group_cputime(p, &cputime);
psig->cutime = psig->cutime =
cputime_add(psig->cutime, cputime_add(psig->cutime,
cputime_add(cputime.utime, cputime_add(cputime.utime,

View file

@ -66,3 +66,19 @@ int kernel_text_address(unsigned long addr)
return 1; return 1;
return module_text_address(addr) != NULL; return module_text_address(addr) != NULL;
} }
/*
* On some architectures (PPC64, IA64) function pointers
* are actually only tokens to some data that then holds the
* real function address. As a result, to find if a function
* pointer is part of the kernel text, we need to do some
* special dereferencing first.
*/
int func_ptr_is_kernel_text(void *ptr)
{
unsigned long addr;
addr = (unsigned long) dereference_function_descriptor(ptr);
if (core_kernel_text(addr))
return 1;
return module_text_address(addr) != NULL;
}

View file

@ -122,24 +122,6 @@ struct futex_hash_bucket {
static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
/*
* Take mm->mmap_sem, when futex is shared
*/
static inline void futex_lock_mm(struct rw_semaphore *fshared)
{
if (fshared)
down_read(fshared);
}
/*
* Release mm->mmap_sem, when the futex is shared
*/
static inline void futex_unlock_mm(struct rw_semaphore *fshared)
{
if (fshared)
up_read(fshared);
}
/* /*
* We hash on the keys returned from get_futex_key (see below). * We hash on the keys returned from get_futex_key (see below).
*/ */
@ -161,6 +143,45 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
&& key1->both.offset == key2->both.offset); && key1->both.offset == key2->both.offset);
} }
/*
* Take a reference to the resource addressed by a key.
* Can be called while holding spinlocks.
*
*/
static void get_futex_key_refs(union futex_key *key)
{
if (!key->both.ptr)
return;
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
atomic_inc(&key->shared.inode->i_count);
break;
case FUT_OFF_MMSHARED:
atomic_inc(&key->private.mm->mm_count);
break;
}
}
/*
* Drop a reference to the resource addressed by a key.
* The hash bucket spinlock must not be held.
*/
static void drop_futex_key_refs(union futex_key *key)
{
if (!key->both.ptr)
return;
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
iput(key->shared.inode);
break;
case FUT_OFF_MMSHARED:
mmdrop(key->private.mm);
break;
}
}
/** /**
* get_futex_key - Get parameters which are the keys for a futex. * get_futex_key - Get parameters which are the keys for a futex.
* @uaddr: virtual address of the futex * @uaddr: virtual address of the futex
@ -179,12 +200,10 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
* For other futexes, it points to &current->mm->mmap_sem and * For other futexes, it points to &current->mm->mmap_sem and
* caller must have taken the reader lock. but NOT any spinlocks. * caller must have taken the reader lock. but NOT any spinlocks.
*/ */
static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
union futex_key *key)
{ {
unsigned long address = (unsigned long)uaddr; unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct page *page; struct page *page;
int err; int err;
@ -208,100 +227,50 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
return -EFAULT; return -EFAULT;
key->private.mm = mm; key->private.mm = mm;
key->private.address = address; key->private.address = address;
get_futex_key_refs(key);
return 0; return 0;
} }
/*
* The futex is hashed differently depending on whether
* it's in a shared or private mapping. So check vma first.
*/
vma = find_extend_vma(mm, address);
if (unlikely(!vma))
return -EFAULT;
/* again:
* Permissions. err = get_user_pages_fast(address, 1, 0, &page);
*/ if (err < 0)
if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) return err;
return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
lock_page(page);
if (!page->mapping) {
unlock_page(page);
put_page(page);
goto again;
}
/* /*
* Private mappings are handled in a simple way. * Private mappings are handled in a simple way.
* *
* NOTE: When userspace waits on a MAP_SHARED mapping, even if * NOTE: When userspace waits on a MAP_SHARED mapping, even if
* it's a read-only handle, it's expected that futexes attach to * it's a read-only handle, it's expected that futexes attach to
* the object not the particular process. Therefore we use * the object not the particular process.
* VM_MAYSHARE here, not VM_SHARED which is restricted to shared
* mappings of _writable_ handles.
*/ */
if (likely(!(vma->vm_flags & VM_MAYSHARE))) { if (PageAnon(page)) {
key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */ key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
key->private.mm = mm; key->private.mm = mm;
key->private.address = address; key->private.address = address;
return 0; } else {
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
key->shared.inode = page->mapping->host;
key->shared.pgoff = page->index;
} }
/* get_futex_key_refs(key);
* Linear file mappings are also simple.
*/
key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
+ vma->vm_pgoff);
return 0;
}
/* unlock_page(page);
* We could walk the page table to read the non-linear put_page(page);
* pte, and get the page index without fetching the page return 0;
* from swap. But that's a lot of code to duplicate here
* for a rare case, so we simply fetch the page.
*/
err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
if (err >= 0) {
key->shared.pgoff =
page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
put_page(page);
return 0;
}
return err;
} }
/* static inline
* Take a reference to the resource addressed by a key. void put_futex_key(int fshared, union futex_key *key)
* Can be called while holding spinlocks.
*
*/
static void get_futex_key_refs(union futex_key *key)
{ {
if (key->both.ptr == NULL) drop_futex_key_refs(key);
return;
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
atomic_inc(&key->shared.inode->i_count);
break;
case FUT_OFF_MMSHARED:
atomic_inc(&key->private.mm->mm_count);
break;
}
}
/*
* Drop a reference to the resource addressed by a key.
* The hash bucket spinlock must not be held.
*/
static void drop_futex_key_refs(union futex_key *key)
{
if (!key->both.ptr)
return;
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
iput(key->shared.inode);
break;
case FUT_OFF_MMSHARED:
mmdrop(key->private.mm);
break;
}
} }
static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@ -328,10 +297,8 @@ static int get_futex_value_locked(u32 *dest, u32 __user *from)
/* /*
* Fault handling. * Fault handling.
* if fshared is non NULL, current->mm->mmap_sem is already held
*/ */
static int futex_handle_fault(unsigned long address, static int futex_handle_fault(unsigned long address, int attempt)
struct rw_semaphore *fshared, int attempt)
{ {
struct vm_area_struct * vma; struct vm_area_struct * vma;
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
@ -340,8 +307,7 @@ static int futex_handle_fault(unsigned long address,
if (attempt > 2) if (attempt > 2)
return ret; return ret;
if (!fshared) down_read(&mm->mmap_sem);
down_read(&mm->mmap_sem);
vma = find_vma(mm, address); vma = find_vma(mm, address);
if (vma && address >= vma->vm_start && if (vma && address >= vma->vm_start &&
(vma->vm_flags & VM_WRITE)) { (vma->vm_flags & VM_WRITE)) {
@ -361,8 +327,7 @@ static int futex_handle_fault(unsigned long address,
current->min_flt++; current->min_flt++;
} }
} }
if (!fshared) up_read(&mm->mmap_sem);
up_read(&mm->mmap_sem);
return ret; return ret;
} }
@ -385,6 +350,7 @@ static int refill_pi_state_cache(void)
/* pi_mutex gets initialized later */ /* pi_mutex gets initialized later */
pi_state->owner = NULL; pi_state->owner = NULL;
atomic_set(&pi_state->refcount, 1); atomic_set(&pi_state->refcount, 1);
pi_state->key = FUTEX_KEY_INIT;
current->pi_state_cache = pi_state; current->pi_state_cache = pi_state;
@ -462,7 +428,7 @@ void exit_pi_state_list(struct task_struct *curr)
struct list_head *next, *head = &curr->pi_state_list; struct list_head *next, *head = &curr->pi_state_list;
struct futex_pi_state *pi_state; struct futex_pi_state *pi_state;
struct futex_hash_bucket *hb; struct futex_hash_bucket *hb;
union futex_key key; union futex_key key = FUTEX_KEY_INIT;
if (!futex_cmpxchg_enabled) if (!futex_cmpxchg_enabled)
return; return;
@ -719,20 +685,17 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
* Wake up all waiters hashed on the physical page that is mapped * Wake up all waiters hashed on the physical page that is mapped
* to this virtual address: * to this virtual address:
*/ */
static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
int nr_wake, u32 bitset)
{ {
struct futex_hash_bucket *hb; struct futex_hash_bucket *hb;
struct futex_q *this, *next; struct futex_q *this, *next;
struct plist_head *head; struct plist_head *head;
union futex_key key; union futex_key key = FUTEX_KEY_INIT;
int ret; int ret;
if (!bitset) if (!bitset)
return -EINVAL; return -EINVAL;
futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &key); ret = get_futex_key(uaddr, fshared, &key);
if (unlikely(ret != 0)) if (unlikely(ret != 0))
goto out; goto out;
@ -760,7 +723,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
spin_unlock(&hb->lock); spin_unlock(&hb->lock);
out: out:
futex_unlock_mm(fshared); put_futex_key(fshared, &key);
return ret; return ret;
} }
@ -769,19 +732,16 @@ out:
* to this virtual address: * to this virtual address:
*/ */
static int static int
futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
u32 __user *uaddr2,
int nr_wake, int nr_wake2, int op) int nr_wake, int nr_wake2, int op)
{ {
union futex_key key1, key2; union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
struct futex_hash_bucket *hb1, *hb2; struct futex_hash_bucket *hb1, *hb2;
struct plist_head *head; struct plist_head *head;
struct futex_q *this, *next; struct futex_q *this, *next;
int ret, op_ret, attempt = 0; int ret, op_ret, attempt = 0;
retryfull: retryfull:
futex_lock_mm(fshared);
ret = get_futex_key(uaddr1, fshared, &key1); ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0)) if (unlikely(ret != 0))
goto out; goto out;
@ -826,18 +786,12 @@ retry:
*/ */
if (attempt++) { if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr2, ret = futex_handle_fault((unsigned long)uaddr2,
fshared, attempt); attempt);
if (ret) if (ret)
goto out; goto out;
goto retry; goto retry;
} }
/*
* If we would have faulted, release mmap_sem,
* fault it in and start all over again.
*/
futex_unlock_mm(fshared);
ret = get_user(dummy, uaddr2); ret = get_user(dummy, uaddr2);
if (ret) if (ret)
return ret; return ret;
@ -873,7 +827,8 @@ retry:
if (hb1 != hb2) if (hb1 != hb2)
spin_unlock(&hb2->lock); spin_unlock(&hb2->lock);
out: out:
futex_unlock_mm(fshared); put_futex_key(fshared, &key2);
put_futex_key(fshared, &key1);
return ret; return ret;
} }
@ -882,19 +837,16 @@ out:
* Requeue all waiters hashed on one physical page to another * Requeue all waiters hashed on one physical page to another
* physical page. * physical page.
*/ */
static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
u32 __user *uaddr2,
int nr_wake, int nr_requeue, u32 *cmpval) int nr_wake, int nr_requeue, u32 *cmpval)
{ {
union futex_key key1, key2; union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
struct futex_hash_bucket *hb1, *hb2; struct futex_hash_bucket *hb1, *hb2;
struct plist_head *head1; struct plist_head *head1;
struct futex_q *this, *next; struct futex_q *this, *next;
int ret, drop_count = 0; int ret, drop_count = 0;
retry: retry:
futex_lock_mm(fshared);
ret = get_futex_key(uaddr1, fshared, &key1); ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0)) if (unlikely(ret != 0))
goto out; goto out;
@ -917,12 +869,6 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
if (hb1 != hb2) if (hb1 != hb2)
spin_unlock(&hb2->lock); spin_unlock(&hb2->lock);
/*
* If we would have faulted, release mmap_sem, fault
* it in and start all over again.
*/
futex_unlock_mm(fshared);
ret = get_user(curval, uaddr1); ret = get_user(curval, uaddr1);
if (!ret) if (!ret)
@ -974,7 +920,8 @@ out_unlock:
drop_futex_key_refs(&key1); drop_futex_key_refs(&key1);
out: out:
futex_unlock_mm(fshared); put_futex_key(fshared, &key2);
put_futex_key(fshared, &key1);
return ret; return ret;
} }
@ -1096,8 +1043,7 @@ static void unqueue_me_pi(struct futex_q *q)
* private futexes. * private futexes.
*/ */
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
struct task_struct *newowner, struct task_struct *newowner, int fshared)
struct rw_semaphore *fshared)
{ {
u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
struct futex_pi_state *pi_state = q->pi_state; struct futex_pi_state *pi_state = q->pi_state;
@ -1176,7 +1122,7 @@ retry:
handle_fault: handle_fault:
spin_unlock(q->lock_ptr); spin_unlock(q->lock_ptr);
ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++); ret = futex_handle_fault((unsigned long)uaddr, attempt++);
spin_lock(q->lock_ptr); spin_lock(q->lock_ptr);
@ -1200,7 +1146,7 @@ handle_fault:
static long futex_wait_restart(struct restart_block *restart); static long futex_wait_restart(struct restart_block *restart);
static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, static int futex_wait(u32 __user *uaddr, int fshared,
u32 val, ktime_t *abs_time, u32 bitset) u32 val, ktime_t *abs_time, u32 bitset)
{ {
struct task_struct *curr = current; struct task_struct *curr = current;
@ -1218,8 +1164,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
q.pi_state = NULL; q.pi_state = NULL;
q.bitset = bitset; q.bitset = bitset;
retry: retry:
futex_lock_mm(fshared); q.key = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr, fshared, &q.key); ret = get_futex_key(uaddr, fshared, &q.key);
if (unlikely(ret != 0)) if (unlikely(ret != 0))
goto out_release_sem; goto out_release_sem;
@ -1251,12 +1196,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
if (unlikely(ret)) { if (unlikely(ret)) {
queue_unlock(&q, hb); queue_unlock(&q, hb);
/*
* If we would have faulted, release mmap_sem, fault it in and
* start all over again.
*/
futex_unlock_mm(fshared);
ret = get_user(uval, uaddr); ret = get_user(uval, uaddr);
if (!ret) if (!ret)
@ -1270,12 +1209,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
/* Only actually queue if *uaddr contained val. */ /* Only actually queue if *uaddr contained val. */
queue_me(&q, hb); queue_me(&q, hb);
/*
* Now the futex is queued and we have checked the data, we
* don't want to hold mmap_sem while we sleep.
*/
futex_unlock_mm(fshared);
/* /*
* There might have been scheduling since the queue_me(), as we * There might have been scheduling since the queue_me(), as we
* cannot hold a spinlock across the get_user() in case it * cannot hold a spinlock across the get_user() in case it
@ -1363,7 +1296,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
queue_unlock(&q, hb); queue_unlock(&q, hb);
out_release_sem: out_release_sem:
futex_unlock_mm(fshared); put_futex_key(fshared, &q.key);
return ret; return ret;
} }
@ -1371,13 +1304,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
static long futex_wait_restart(struct restart_block *restart) static long futex_wait_restart(struct restart_block *restart)
{ {
u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
struct rw_semaphore *fshared = NULL; int fshared = 0;
ktime_t t; ktime_t t;
t.tv64 = restart->futex.time; t.tv64 = restart->futex.time;
restart->fn = do_no_restart_syscall; restart->fn = do_no_restart_syscall;
if (restart->futex.flags & FLAGS_SHARED) if (restart->futex.flags & FLAGS_SHARED)
fshared = &current->mm->mmap_sem; fshared = 1;
return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
restart->futex.bitset); restart->futex.bitset);
} }
@ -1389,7 +1322,7 @@ static long futex_wait_restart(struct restart_block *restart)
* if there are waiters then it will block, it does PI, etc. (Due to * if there are waiters then it will block, it does PI, etc. (Due to
* races the kernel might see a 0 value of the futex too.) * races the kernel might see a 0 value of the futex too.)
*/ */
static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, static int futex_lock_pi(u32 __user *uaddr, int fshared,
int detect, ktime_t *time, int trylock) int detect, ktime_t *time, int trylock)
{ {
struct hrtimer_sleeper timeout, *to = NULL; struct hrtimer_sleeper timeout, *to = NULL;
@ -1412,8 +1345,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
q.pi_state = NULL; q.pi_state = NULL;
retry: retry:
futex_lock_mm(fshared); q.key = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr, fshared, &q.key); ret = get_futex_key(uaddr, fshared, &q.key);
if (unlikely(ret != 0)) if (unlikely(ret != 0))
goto out_release_sem; goto out_release_sem;
@ -1502,7 +1434,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
* exit to complete. * exit to complete.
*/ */
queue_unlock(&q, hb); queue_unlock(&q, hb);
futex_unlock_mm(fshared);
cond_resched(); cond_resched();
goto retry; goto retry;
@ -1534,12 +1465,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
*/ */
queue_me(&q, hb); queue_me(&q, hb);
/*
* Now the futex is queued and we have checked the data, we
* don't want to hold mmap_sem while we sleep.
*/
futex_unlock_mm(fshared);
WARN_ON(!q.pi_state); WARN_ON(!q.pi_state);
/* /*
* Block on the PI mutex: * Block on the PI mutex:
@ -1552,7 +1477,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
ret = ret ? 0 : -EWOULDBLOCK; ret = ret ? 0 : -EWOULDBLOCK;
} }
futex_lock_mm(fshared);
spin_lock(q.lock_ptr); spin_lock(q.lock_ptr);
if (!ret) { if (!ret) {
@ -1618,7 +1542,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
/* Unqueue and drop the lock */ /* Unqueue and drop the lock */
unqueue_me_pi(&q); unqueue_me_pi(&q);
futex_unlock_mm(fshared);
if (to) if (to)
destroy_hrtimer_on_stack(&to->timer); destroy_hrtimer_on_stack(&to->timer);
@ -1628,7 +1551,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
queue_unlock(&q, hb); queue_unlock(&q, hb);
out_release_sem: out_release_sem:
futex_unlock_mm(fshared); put_futex_key(fshared, &q.key);
if (to) if (to)
destroy_hrtimer_on_stack(&to->timer); destroy_hrtimer_on_stack(&to->timer);
return ret; return ret;
@ -1645,15 +1568,12 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
queue_unlock(&q, hb); queue_unlock(&q, hb);
if (attempt++) { if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr, fshared, ret = futex_handle_fault((unsigned long)uaddr, attempt);
attempt);
if (ret) if (ret)
goto out_release_sem; goto out_release_sem;
goto retry_unlocked; goto retry_unlocked;
} }
futex_unlock_mm(fshared);
ret = get_user(uval, uaddr); ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT)) if (!ret && (uval != -EFAULT))
goto retry; goto retry;
@ -1668,13 +1588,13 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
* This is the in-kernel slowpath: we look up the PI state (if any), * This is the in-kernel slowpath: we look up the PI state (if any),
* and do the rt-mutex unlock. * and do the rt-mutex unlock.
*/ */
static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared) static int futex_unlock_pi(u32 __user *uaddr, int fshared)
{ {
struct futex_hash_bucket *hb; struct futex_hash_bucket *hb;
struct futex_q *this, *next; struct futex_q *this, *next;
u32 uval; u32 uval;
struct plist_head *head; struct plist_head *head;
union futex_key key; union futex_key key = FUTEX_KEY_INIT;
int ret, attempt = 0; int ret, attempt = 0;
retry: retry:
@ -1685,10 +1605,6 @@ retry:
*/ */
if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
return -EPERM; return -EPERM;
/*
* First take all the futex related locks:
*/
futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &key); ret = get_futex_key(uaddr, fshared, &key);
if (unlikely(ret != 0)) if (unlikely(ret != 0))
@ -1747,7 +1663,7 @@ retry_unlocked:
out_unlock: out_unlock:
spin_unlock(&hb->lock); spin_unlock(&hb->lock);
out: out:
futex_unlock_mm(fshared); put_futex_key(fshared, &key);
return ret; return ret;
@ -1763,16 +1679,13 @@ pi_faulted:
spin_unlock(&hb->lock); spin_unlock(&hb->lock);
if (attempt++) { if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr, fshared, ret = futex_handle_fault((unsigned long)uaddr, attempt);
attempt);
if (ret) if (ret)
goto out; goto out;
uval = 0; uval = 0;
goto retry_unlocked; goto retry_unlocked;
} }
futex_unlock_mm(fshared);
ret = get_user(uval, uaddr); ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT)) if (!ret && (uval != -EFAULT))
goto retry; goto retry;
@ -1898,8 +1811,7 @@ retry:
* PI futexes happens in exit_pi_state(): * PI futexes happens in exit_pi_state():
*/ */
if (!pi && (uval & FUTEX_WAITERS)) if (!pi && (uval & FUTEX_WAITERS))
futex_wake(uaddr, &curr->mm->mmap_sem, 1, futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
FUTEX_BITSET_MATCH_ANY);
} }
return 0; return 0;
} }
@ -1995,10 +1907,10 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
{ {
int ret = -ENOSYS; int ret = -ENOSYS;
int cmd = op & FUTEX_CMD_MASK; int cmd = op & FUTEX_CMD_MASK;
struct rw_semaphore *fshared = NULL; int fshared = 0;
if (!(op & FUTEX_PRIVATE_FLAG)) if (!(op & FUTEX_PRIVATE_FLAG))
fshared = &current->mm->mmap_sem; fshared = 1;
switch (cmd) { switch (cmd) {
case FUTEX_WAIT: case FUTEX_WAIT:

View file

@ -136,16 +136,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
#ifdef CONFIG_LOCK_STAT #ifdef CONFIG_LOCK_STAT
static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
static int lock_contention_point(struct lock_class *class, unsigned long ip) static int lock_point(unsigned long points[], unsigned long ip)
{ {
int i; int i;
for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { for (i = 0; i < LOCKSTAT_POINTS; i++) {
if (class->contention_point[i] == 0) { if (points[i] == 0) {
class->contention_point[i] = ip; points[i] = ip;
break; break;
} }
if (class->contention_point[i] == ip) if (points[i] == ip)
break; break;
} }
@ -185,6 +185,9 @@ struct lock_class_stats lock_stats(struct lock_class *class)
for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
stats.contention_point[i] += pcs->contention_point[i]; stats.contention_point[i] += pcs->contention_point[i];
for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
stats.contending_point[i] += pcs->contending_point[i];
lock_time_add(&pcs->read_waittime, &stats.read_waittime); lock_time_add(&pcs->read_waittime, &stats.read_waittime);
lock_time_add(&pcs->write_waittime, &stats.write_waittime); lock_time_add(&pcs->write_waittime, &stats.write_waittime);
@ -209,6 +212,7 @@ void clear_lock_stats(struct lock_class *class)
memset(cpu_stats, 0, sizeof(struct lock_class_stats)); memset(cpu_stats, 0, sizeof(struct lock_class_stats));
} }
memset(class->contention_point, 0, sizeof(class->contention_point)); memset(class->contention_point, 0, sizeof(class->contention_point));
memset(class->contending_point, 0, sizeof(class->contending_point));
} }
static struct lock_class_stats *get_lock_stats(struct lock_class *class) static struct lock_class_stats *get_lock_stats(struct lock_class *class)
@ -2999,7 +3003,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
struct held_lock *hlock, *prev_hlock; struct held_lock *hlock, *prev_hlock;
struct lock_class_stats *stats; struct lock_class_stats *stats;
unsigned int depth; unsigned int depth;
int i, point; int i, contention_point, contending_point;
depth = curr->lockdep_depth; depth = curr->lockdep_depth;
if (DEBUG_LOCKS_WARN_ON(!depth)) if (DEBUG_LOCKS_WARN_ON(!depth))
@ -3023,18 +3027,22 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
found_it: found_it:
hlock->waittime_stamp = sched_clock(); hlock->waittime_stamp = sched_clock();
point = lock_contention_point(hlock_class(hlock), ip); contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
contending_point = lock_point(hlock_class(hlock)->contending_point,
lock->ip);
stats = get_lock_stats(hlock_class(hlock)); stats = get_lock_stats(hlock_class(hlock));
if (point < ARRAY_SIZE(stats->contention_point)) if (contention_point < LOCKSTAT_POINTS)
stats->contention_point[point]++; stats->contention_point[contention_point]++;
if (contending_point < LOCKSTAT_POINTS)
stats->contending_point[contending_point]++;
if (lock->cpu != smp_processor_id()) if (lock->cpu != smp_processor_id())
stats->bounces[bounce_contended + !!hlock->read]++; stats->bounces[bounce_contended + !!hlock->read]++;
put_lock_stats(stats); put_lock_stats(stats);
} }
static void static void
__lock_acquired(struct lockdep_map *lock) __lock_acquired(struct lockdep_map *lock, unsigned long ip)
{ {
struct task_struct *curr = current; struct task_struct *curr = current;
struct held_lock *hlock, *prev_hlock; struct held_lock *hlock, *prev_hlock;
@ -3083,6 +3091,7 @@ found_it:
put_lock_stats(stats); put_lock_stats(stats);
lock->cpu = cpu; lock->cpu = cpu;
lock->ip = ip;
} }
void lock_contended(struct lockdep_map *lock, unsigned long ip) void lock_contended(struct lockdep_map *lock, unsigned long ip)
@ -3104,7 +3113,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
} }
EXPORT_SYMBOL_GPL(lock_contended); EXPORT_SYMBOL_GPL(lock_contended);
void lock_acquired(struct lockdep_map *lock) void lock_acquired(struct lockdep_map *lock, unsigned long ip)
{ {
unsigned long flags; unsigned long flags;
@ -3117,7 +3126,7 @@ void lock_acquired(struct lockdep_map *lock)
raw_local_irq_save(flags); raw_local_irq_save(flags);
check_flags(flags); check_flags(flags);
current->lockdep_recursion = 1; current->lockdep_recursion = 1;
__lock_acquired(lock); __lock_acquired(lock, ip);
current->lockdep_recursion = 0; current->lockdep_recursion = 0;
raw_local_irq_restore(flags); raw_local_irq_restore(flags);
} }
@ -3276,10 +3285,10 @@ void __init lockdep_info(void)
{ {
printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES);
printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH);
printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS);
printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE);
printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES);
printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS);
printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE);

View file

@ -470,11 +470,12 @@ static void seq_line(struct seq_file *m, char c, int offset, int length)
static void snprint_time(char *buf, size_t bufsiz, s64 nr) static void snprint_time(char *buf, size_t bufsiz, s64 nr)
{ {
unsigned long rem; s64 div;
s32 rem;
nr += 5; /* for display rounding */ nr += 5; /* for display rounding */
rem = do_div(nr, 1000); /* XXX: do_div_signed */ div = div_s64_rem(nr, 1000, &rem);
snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10); snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10);
} }
static void seq_time(struct seq_file *m, s64 time) static void seq_time(struct seq_file *m, s64 time)
@ -556,7 +557,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
if (stats->read_holdtime.nr) if (stats->read_holdtime.nr)
namelen += 2; namelen += 2;
for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { for (i = 0; i < LOCKSTAT_POINTS; i++) {
char sym[KSYM_SYMBOL_LEN]; char sym[KSYM_SYMBOL_LEN];
char ip[32]; char ip[32];
@ -573,6 +574,23 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
stats->contention_point[i], stats->contention_point[i],
ip, sym); ip, sym);
} }
for (i = 0; i < LOCKSTAT_POINTS; i++) {
char sym[KSYM_SYMBOL_LEN];
char ip[32];
if (class->contending_point[i] == 0)
break;
if (!i)
seq_line(m, '-', 40-namelen, namelen);
sprint_symbol(sym, class->contending_point[i]);
snprintf(ip, sizeof(ip), "[<%p>]",
(void *)class->contending_point[i]);
seq_printf(m, "%40s %14lu %29s %s\n", name,
stats->contending_point[i],
ip, sym);
}
if (i) { if (i) {
seq_puts(m, "\n"); seq_puts(m, "\n");
seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1));
@ -582,7 +600,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
static void seq_header(struct seq_file *m) static void seq_header(struct seq_file *m)
{ {
seq_printf(m, "lock_stat version 0.2\n"); seq_printf(m, "lock_stat version 0.3\n");
seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
"%14s %14s\n", "%14s %14s\n",

View file

@ -59,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init);
* We also put the fastpath first in the kernel image, to make sure the * We also put the fastpath first in the kernel image, to make sure the
* branch is predicted by the CPU as default-untaken. * branch is predicted by the CPU as default-untaken.
*/ */
static void noinline __sched static __used noinline void __sched
__mutex_lock_slowpath(atomic_t *lock_count); __mutex_lock_slowpath(atomic_t *lock_count);
/*** /***
@ -96,7 +96,7 @@ void inline __sched mutex_lock(struct mutex *lock)
EXPORT_SYMBOL(mutex_lock); EXPORT_SYMBOL(mutex_lock);
#endif #endif
static noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
/*** /***
* mutex_unlock - release the mutex * mutex_unlock - release the mutex
@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
} }
done: done:
lock_acquired(&lock->dep_map); lock_acquired(&lock->dep_map, ip);
/* got the lock - rejoice! */ /* got the lock - rejoice! */
mutex_remove_waiter(lock, &waiter, task_thread_info(task)); mutex_remove_waiter(lock, &waiter, task_thread_info(task));
debug_mutex_set_owner(lock, task_thread_info(task)); debug_mutex_set_owner(lock, task_thread_info(task));
@ -268,7 +268,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
/* /*
* Release the lock, slowpath: * Release the lock, slowpath:
*/ */
static noinline void static __used noinline void
__mutex_unlock_slowpath(atomic_t *lock_count) __mutex_unlock_slowpath(atomic_t *lock_count)
{ {
__mutex_unlock_common_slowpath(lock_count, 1); __mutex_unlock_common_slowpath(lock_count, 1);
@ -313,7 +313,7 @@ int __sched mutex_lock_killable(struct mutex *lock)
} }
EXPORT_SYMBOL(mutex_lock_killable); EXPORT_SYMBOL(mutex_lock_killable);
static noinline void __sched static __used noinline void __sched
__mutex_lock_slowpath(atomic_t *lock_count) __mutex_lock_slowpath(atomic_t *lock_count)
{ {
struct mutex *lock = container_of(lock_count, struct mutex, count); struct mutex *lock = container_of(lock_count, struct mutex, count);

View file

@ -82,6 +82,14 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
while (nb && nr_to_call) { while (nb && nr_to_call) {
next_nb = rcu_dereference(nb->next); next_nb = rcu_dereference(nb->next);
#ifdef CONFIG_DEBUG_NOTIFIERS
if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
WARN(1, "Invalid notifier called!");
nb = next_nb;
continue;
}
#endif
ret = nb->notifier_call(nb, val, v); ret = nb->notifier_call(nb, val, v);
if (nr_calls) if (nr_calls)

View file

@ -58,21 +58,21 @@ void thread_group_cputime(
struct task_struct *tsk, struct task_struct *tsk,
struct task_cputime *times) struct task_cputime *times)
{ {
struct signal_struct *sig; struct task_cputime *totals, *tot;
int i; int i;
struct task_cputime *tot;
sig = tsk->signal; totals = tsk->signal->cputime.totals;
if (unlikely(!sig) || !sig->cputime.totals) { if (!totals) {
times->utime = tsk->utime; times->utime = tsk->utime;
times->stime = tsk->stime; times->stime = tsk->stime;
times->sum_exec_runtime = tsk->se.sum_exec_runtime; times->sum_exec_runtime = tsk->se.sum_exec_runtime;
return; return;
} }
times->stime = times->utime = cputime_zero; times->stime = times->utime = cputime_zero;
times->sum_exec_runtime = 0; times->sum_exec_runtime = 0;
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
tot = per_cpu_ptr(tsk->signal->cputime.totals, i); tot = per_cpu_ptr(totals, i);
times->utime = cputime_add(times->utime, tot->utime); times->utime = cputime_add(times->utime, tot->utime);
times->stime = cputime_add(times->stime, tot->stime); times->stime = cputime_add(times->stime, tot->stime);
times->sum_exec_runtime += tot->sum_exec_runtime; times->sum_exec_runtime += tot->sum_exec_runtime;

View file

@ -191,7 +191,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
/* OK, time to rat on our buddy... */ /* OK, time to rat on our buddy... */
printk(KERN_ERR "RCU detected CPU stalls:"); printk(KERN_ERR "INFO: RCU detected CPU stalls:");
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
if (cpu_isset(cpu, rcp->cpumask)) if (cpu_isset(cpu, rcp->cpumask))
printk(" %d", cpu); printk(" %d", cpu);
@ -204,7 +204,7 @@ static void print_cpu_stall(struct rcu_ctrlblk *rcp)
{ {
unsigned long flags; unsigned long flags;
printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n", printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
smp_processor_id(), jiffies, smp_processor_id(), jiffies,
jiffies - rcp->gp_start); jiffies - rcp->gp_start);
dump_stack(); dump_stack();

View file

@ -4202,7 +4202,6 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
if (p == rq->idle) { if (p == rq->idle) {
p->stime = cputime_add(p->stime, steal); p->stime = cputime_add(p->stime, steal);
account_group_system_time(p, steal);
if (atomic_read(&rq->nr_iowait) > 0) if (atomic_read(&rq->nr_iowait) > 0)
cpustat->iowait = cputime64_add(cpustat->iowait, tmp); cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
else else
@ -4338,7 +4337,7 @@ void __kprobes sub_preempt_count(int val)
/* /*
* Underflow? * Underflow?
*/ */
if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
return; return;
/* /*
* Is the spinlock portion underflowing? * Is the spinlock portion underflowing?

View file

@ -164,7 +164,7 @@ unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
/* /*
* Zero means infinite timeout - no checking done: * Zero means infinite timeout - no checking done:
*/ */
unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
unsigned long __read_mostly sysctl_hung_task_warnings = 10; unsigned long __read_mostly sysctl_hung_task_warnings = 10;

View file

@ -858,8 +858,8 @@ void do_sys_times(struct tms *tms)
struct task_cputime cputime; struct task_cputime cputime;
cputime_t cutime, cstime; cputime_t cutime, cstime;
spin_lock_irq(&current->sighand->siglock);
thread_group_cputime(current, &cputime); thread_group_cputime(current, &cputime);
spin_lock_irq(&current->sighand->siglock);
cutime = current->signal->cutime; cutime = current->signal->cutime;
cstime = current->signal->cstime; cstime = current->signal->cstime;
spin_unlock_irq(&current->sighand->siglock); spin_unlock_irq(&current->sighand->siglock);

View file

@ -545,6 +545,16 @@ config DEBUG_SG
If unsure, say N. If unsure, say N.
config DEBUG_NOTIFIERS
bool "Debug notifier call chains"
depends on DEBUG_KERNEL
help
Enable this to turn on sanity checking for notifier call chains.
This is most useful for kernel developers to make sure that
modules properly unregister themselves from notifier chains.
This is a relatively cheap check but if you care about maximum
performance, say N.
config FRAME_POINTER config FRAME_POINTER
bool "Compile the kernel with frame pointers" bool "Compile the kernel with frame pointers"
depends on DEBUG_KERNEL && \ depends on DEBUG_KERNEL && \

View file

@ -3049,3 +3049,18 @@ void print_vma_addr(char *prefix, unsigned long ip)
} }
up_read(&current->mm->mmap_sem); up_read(&current->mm->mmap_sem);
} }
#ifdef CONFIG_PROVE_LOCKING
void might_fault(void)
{
might_sleep();
/*
* it would be nicer only to annotate paths which are not under
* pagefault_disable, however that requires a larger audit and
* providing helpers like get_user_atomic.
*/
if (!in_atomic() && current->mm)
might_lock_read(&current->mm->mmap_sem);
}
EXPORT_SYMBOL(might_fault);
#endif