mirror of
https://github.com/adulau/aha.git
synced 2024-12-28 03:36:19 +00:00
mutex: speed up generic mutex implementations
- atomic operations which both modify the variable and return something imply full smp memory barriers before and after the memory operations involved (failing atomic_cmpxchg, atomic_add_unless, etc don't imply a barrier because they don't modify the target). See Documentation/atomic_ops.txt. So remove extra barriers and branches. - All architectures support atomic_cmpxchg. This has no relation to __HAVE_ARCH_CMPXCHG. We can just take the atomic_cmpxchg path unconditionally This reduces a simple single threaded fastpath lock+unlock test from 590 cycles to 203 cycles on a ppc970 system. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
5a439c5657
commit
a8ddac7e53
2 changed files with 3 additions and 32 deletions
|
@ -22,8 +22,6 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
|
||||||
{
|
{
|
||||||
if (unlikely(atomic_dec_return(count) < 0))
|
if (unlikely(atomic_dec_return(count) < 0))
|
||||||
fail_fn(count);
|
fail_fn(count);
|
||||||
else
|
|
||||||
smp_mb();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -41,10 +39,7 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
|
||||||
{
|
{
|
||||||
if (unlikely(atomic_dec_return(count) < 0))
|
if (unlikely(atomic_dec_return(count) < 0))
|
||||||
return fail_fn(count);
|
return fail_fn(count);
|
||||||
else {
|
return 0;
|
||||||
smp_mb();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -63,7 +58,6 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
|
||||||
static inline void
|
static inline void
|
||||||
__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
|
__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
|
||||||
{
|
{
|
||||||
smp_mb();
|
|
||||||
if (unlikely(atomic_inc_return(count) <= 0))
|
if (unlikely(atomic_inc_return(count) <= 0))
|
||||||
fail_fn(count);
|
fail_fn(count);
|
||||||
}
|
}
|
||||||
|
@ -88,25 +82,9 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
|
||||||
static inline int
|
static inline int
|
||||||
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
|
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
|
||||||
{
|
{
|
||||||
/*
|
if (likely(atomic_cmpxchg(count, 1, 0) == 1))
|
||||||
* We have two variants here. The cmpxchg based one is the best one
|
|
||||||
* because it never induce a false contention state. It is included
|
|
||||||
* here because architectures using the inc/dec algorithms over the
|
|
||||||
* xchg ones are much more likely to support cmpxchg natively.
|
|
||||||
*
|
|
||||||
* If not we fall back to the spinlock based variant - that is
|
|
||||||
* just as efficient (and simpler) as a 'destructive' probing of
|
|
||||||
* the mutex state would be.
|
|
||||||
*/
|
|
||||||
#ifdef __HAVE_ARCH_CMPXCHG
|
|
||||||
if (likely(atomic_cmpxchg(count, 1, 0) == 1)) {
|
|
||||||
smp_mb();
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
#else
|
|
||||||
return fail_fn(count);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -27,8 +27,6 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
|
||||||
{
|
{
|
||||||
if (unlikely(atomic_xchg(count, 0) != 1))
|
if (unlikely(atomic_xchg(count, 0) != 1))
|
||||||
fail_fn(count);
|
fail_fn(count);
|
||||||
else
|
|
||||||
smp_mb();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -46,10 +44,7 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
|
||||||
{
|
{
|
||||||
if (unlikely(atomic_xchg(count, 0) != 1))
|
if (unlikely(atomic_xchg(count, 0) != 1))
|
||||||
return fail_fn(count);
|
return fail_fn(count);
|
||||||
else {
|
return 0;
|
||||||
smp_mb();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -67,7 +62,6 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
|
||||||
static inline void
|
static inline void
|
||||||
__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
|
__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
|
||||||
{
|
{
|
||||||
smp_mb();
|
|
||||||
if (unlikely(atomic_xchg(count, 1) != 0))
|
if (unlikely(atomic_xchg(count, 1) != 0))
|
||||||
fail_fn(count);
|
fail_fn(count);
|
||||||
}
|
}
|
||||||
|
@ -110,7 +104,6 @@ __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
|
||||||
if (prev < 0)
|
if (prev < 0)
|
||||||
prev = 0;
|
prev = 0;
|
||||||
}
|
}
|
||||||
smp_mb();
|
|
||||||
|
|
||||||
return prev;
|
return prev;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue