aha/lib/find_next_bit.c
Alexander van Heukelum 64970b68d2 x86, generic: optimize find_next_(zero_)bit for small constant-size bitmaps
This moves an optimization for searching constant-sized small
bitmaps form x86_64-specific to generic code.

On an i386 defconfig (the x86#testing one), the size of vmlinux hardly
changes with this applied. I have observed only four places where this
optimization avoids a call into find_next_bit:

In the functions return_unused_surplus_pages, alloc_fresh_huge_page,
and adjust_pool_surplus, this patch avoids a call for a 1-bit bitmap.
In __next_cpu a call is avoided for a 32-bit bitmap. That's it.

On x86_64, 52 locations are optimized with a minimal increase in
code size:

Current #testing defconfig:
	146 x bsf, 27 x find_next_*bit
   text    data     bss     dec     hex filename
   5392637  846592  724424 6963653  6a41c5 vmlinux

After removing the x86_64 specific optimization for find_next_*bit:
	94 x bsf, 79 x find_next_*bit
   text    data     bss     dec     hex filename
   5392358  846592  724424 6963374  6a40ae vmlinux

After this patch (making the optimization generic):
	146 x bsf, 27 x find_next_*bit
   text    data     bss     dec     hex filename
   5392396  846592  724424 6963412  6a40d4 vmlinux

[ tglx@linutronix.de: build fixes ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-04-26 19:21:16 +02:00

219 lines
5.1 KiB
C

/* find_next_bit.c: fallback find next bit implementation
*
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/bitops.h>
#include <linux/module.h>
#include <asm/types.h>
#include <asm/byteorder.h>
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
/*
* Find the next set bit in a memory region.
*/
unsigned long __find_next_bit(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG-1);
unsigned long tmp;
if (offset >= size)
return size;
size -= result;
offset %= BITS_PER_LONG;
if (offset) {
tmp = *(p++);
tmp &= (~0UL << offset);
if (size < BITS_PER_LONG)
goto found_first;
if (tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG-1)) {
if ((tmp = *(p++)))
goto found_middle;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = *p;
found_first:
tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found_middle:
return result + __ffs(tmp);
}
EXPORT_SYMBOL(__find_next_bit);
/*
* This implementation of find_{first,next}_zero_bit was stolen from
* Linus' asm-alpha/bitops.h.
*/
unsigned long __find_next_zero_bit(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG-1);
unsigned long tmp;
if (offset >= size)
return size;
size -= result;
offset %= BITS_PER_LONG;
if (offset) {
tmp = *(p++);
tmp |= ~0UL >> (BITS_PER_LONG - offset);
if (size < BITS_PER_LONG)
goto found_first;
if (~tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG-1)) {
if (~(tmp = *(p++)))
goto found_middle;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = *p;
found_first:
tmp |= ~0UL << size;
if (tmp == ~0UL) /* Are any bits zero? */
return result + size; /* Nope. */
found_middle:
return result + ffz(tmp);
}
EXPORT_SYMBOL(__find_next_zero_bit);
#ifdef __BIG_ENDIAN
/* include/linux/byteorder does not support "unsigned long" type */
static inline unsigned long ext2_swabp(const unsigned long * x)
{
#if BITS_PER_LONG == 64
return (unsigned long) __swab64p((u64 *) x);
#elif BITS_PER_LONG == 32
return (unsigned long) __swab32p((u32 *) x);
#else
#error BITS_PER_LONG not defined
#endif
}
/* include/linux/byteorder doesn't support "unsigned long" type */
static inline unsigned long ext2_swab(const unsigned long y)
{
#if BITS_PER_LONG == 64
return (unsigned long) __swab64((u64) y);
#elif BITS_PER_LONG == 32
return (unsigned long) __swab32((u32) y);
#else
#error BITS_PER_LONG not defined
#endif
}
unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned
long size, unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG - 1);
unsigned long tmp;
if (offset >= size)
return size;
size -= result;
offset &= (BITS_PER_LONG - 1UL);
if (offset) {
tmp = ext2_swabp(p++);
tmp |= (~0UL >> (BITS_PER_LONG - offset));
if (size < BITS_PER_LONG)
goto found_first;
if (~tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG - 1)) {
if (~(tmp = *(p++)))
goto found_middle_swap;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = ext2_swabp(p);
found_first:
tmp |= ~0UL << size;
if (tmp == ~0UL) /* Are any bits zero? */
return result + size; /* Nope. Skip ffz */
found_middle:
return result + ffz(tmp);
found_middle_swap:
return result + ffz(ext2_swab(tmp));
}
EXPORT_SYMBOL(generic_find_next_zero_le_bit);
unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned
long size, unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG - 1);
unsigned long tmp;
if (offset >= size)
return size;
size -= result;
offset &= (BITS_PER_LONG - 1UL);
if (offset) {
tmp = ext2_swabp(p++);
tmp &= (~0UL << offset);
if (size < BITS_PER_LONG)
goto found_first;
if (tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG - 1)) {
tmp = *(p++);
if (tmp)
goto found_middle_swap;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = ext2_swabp(p);
found_first:
tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found_middle:
return result + __ffs(tmp);
found_middle_swap:
return result + __ffs(ext2_swab(tmp));
}
EXPORT_SYMBOL(generic_find_next_le_bit);
#endif /* __BIG_ENDIAN */