mirror of
https://github.com/adulau/aha.git
synced 2024-12-27 11:16:11 +00:00
f595ec964d
We have a few instances of the open-coded iterative div/mod loop, used when we don't expcet the dividend to be much bigger than the divisor. Unfortunately modern gcc's have the tendency to strength "reduce" this into a full mod operation, which isn't necessarily any faster, and even if it were, doesn't exist if gcc implements it in libgcc. The workaround is to put a dummy asm statement in the loop to prevent gcc from performing the transformation. This patch creates a single implementation of this loop, and uses it to replace the open-coded versions I know about. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: john stultz <johnstul@us.ibm.com> Cc: Segher Boessenkool <segher@kernel.crashing.org> Cc: Christian Kujau <lists@nerdbynature.de> Cc: Robert Hancock <hancockr@shaw.ca> Signed-off-by: Ingo Molnar <mingo@elte.hu>
123 lines
2.5 KiB
C
123 lines
2.5 KiB
C
/*
|
|
* Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
|
|
*
|
|
* Based on former do_div() implementation from asm-parisc/div64.h:
|
|
* Copyright (C) 1999 Hewlett-Packard Co
|
|
* Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
|
|
*
|
|
*
|
|
* Generic C version of 64bit/32bit division and modulo, with
|
|
* 64bit result and 32bit remainder.
|
|
*
|
|
* The fast case for (n>>32 == 0) is handled inline by do_div().
|
|
*
|
|
* Code generated for this function might be very inefficient
|
|
* for some CPUs. __div64_32() can be overridden by linking arch-specific
|
|
* assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S.
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/math64.h>
|
|
|
|
/* Not needed on 64bit architectures */
|
|
#if BITS_PER_LONG == 32
|
|
|
|
uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
|
|
{
|
|
uint64_t rem = *n;
|
|
uint64_t b = base;
|
|
uint64_t res, d = 1;
|
|
uint32_t high = rem >> 32;
|
|
|
|
/* Reduce the thing a bit first */
|
|
res = 0;
|
|
if (high >= base) {
|
|
high /= base;
|
|
res = (uint64_t) high << 32;
|
|
rem -= (uint64_t) (high*base) << 32;
|
|
}
|
|
|
|
while ((int64_t)b > 0 && b < rem) {
|
|
b = b+b;
|
|
d = d+d;
|
|
}
|
|
|
|
do {
|
|
if (rem >= b) {
|
|
rem -= b;
|
|
res += d;
|
|
}
|
|
b >>= 1;
|
|
d >>= 1;
|
|
} while (d);
|
|
|
|
*n = res;
|
|
return rem;
|
|
}
|
|
|
|
EXPORT_SYMBOL(__div64_32);
|
|
|
|
#ifndef div_s64_rem
|
|
s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
|
|
{
|
|
u64 quotient;
|
|
|
|
if (dividend < 0) {
|
|
quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
|
|
*remainder = -*remainder;
|
|
if (divisor > 0)
|
|
quotient = -quotient;
|
|
} else {
|
|
quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
|
|
if (divisor < 0)
|
|
quotient = -quotient;
|
|
}
|
|
return quotient;
|
|
}
|
|
EXPORT_SYMBOL(div_s64_rem);
|
|
#endif
|
|
|
|
/* 64bit divisor, dividend and result. dynamic precision */
|
|
#ifndef div64_u64
|
|
u64 div64_u64(u64 dividend, u64 divisor)
|
|
{
|
|
u32 high, d;
|
|
|
|
high = divisor >> 32;
|
|
if (high) {
|
|
unsigned int shift = fls(high);
|
|
|
|
d = divisor >> shift;
|
|
dividend >>= shift;
|
|
} else
|
|
d = divisor;
|
|
|
|
return div_u64(dividend, d);
|
|
}
|
|
EXPORT_SYMBOL(div64_u64);
|
|
#endif
|
|
|
|
#endif /* BITS_PER_LONG == 32 */
|
|
|
|
/*
|
|
* Iterative div/mod for use when dividend is not expected to be much
|
|
* bigger than divisor.
|
|
*/
|
|
u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
|
|
{
|
|
u32 ret = 0;
|
|
|
|
while (dividend >= divisor) {
|
|
/* The following asm() prevents the compiler from
|
|
optimising this loop into a modulo operation. */
|
|
asm("" : "+rm"(dividend));
|
|
|
|
dividend -= divisor;
|
|
ret++;
|
|
}
|
|
|
|
*remainder = dividend;
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(iter_div_u64_rem);
|