mirror of
https://github.com/adulau/aha.git
synced 2024-12-27 11:16:11 +00:00
64970b68d2
This moves an optimization for searching constant-sized small bitmaps form x86_64-specific to generic code. On an i386 defconfig (the x86#testing one), the size of vmlinux hardly changes with this applied. I have observed only four places where this optimization avoids a call into find_next_bit: In the functions return_unused_surplus_pages, alloc_fresh_huge_page, and adjust_pool_surplus, this patch avoids a call for a 1-bit bitmap. In __next_cpu a call is avoided for a 32-bit bitmap. That's it. On x86_64, 52 locations are optimized with a minimal increase in code size: Current #testing defconfig: 146 x bsf, 27 x find_next_*bit text data bss dec hex filename 5392637 846592 724424 6963653 6a41c5 vmlinux After removing the x86_64 specific optimization for find_next_*bit: 94 x bsf, 79 x find_next_*bit text data bss dec hex filename 5392358 846592 724424 6963374 6a40ae vmlinux After this patch (making the optimization generic): 146 x bsf, 27 x find_next_*bit text data bss dec hex filename 5392396 846592 724424 6963412 6a40d4 vmlinux [ tglx@linutronix.de: build fixes ] Signed-off-by: Ingo Molnar <mingo@elte.hu>
219 lines
5.1 KiB
C
219 lines
5.1 KiB
C
/* find_next_bit.c: fallback find next bit implementation
|
|
*
|
|
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#include <linux/bitops.h>
|
|
#include <linux/module.h>
|
|
#include <asm/types.h>
|
|
#include <asm/byteorder.h>
|
|
|
|
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
|
|
|
|
/*
|
|
* Find the next set bit in a memory region.
|
|
*/
|
|
unsigned long __find_next_bit(const unsigned long *addr,
|
|
unsigned long size, unsigned long offset)
|
|
{
|
|
const unsigned long *p = addr + BITOP_WORD(offset);
|
|
unsigned long result = offset & ~(BITS_PER_LONG-1);
|
|
unsigned long tmp;
|
|
|
|
if (offset >= size)
|
|
return size;
|
|
size -= result;
|
|
offset %= BITS_PER_LONG;
|
|
if (offset) {
|
|
tmp = *(p++);
|
|
tmp &= (~0UL << offset);
|
|
if (size < BITS_PER_LONG)
|
|
goto found_first;
|
|
if (tmp)
|
|
goto found_middle;
|
|
size -= BITS_PER_LONG;
|
|
result += BITS_PER_LONG;
|
|
}
|
|
while (size & ~(BITS_PER_LONG-1)) {
|
|
if ((tmp = *(p++)))
|
|
goto found_middle;
|
|
result += BITS_PER_LONG;
|
|
size -= BITS_PER_LONG;
|
|
}
|
|
if (!size)
|
|
return result;
|
|
tmp = *p;
|
|
|
|
found_first:
|
|
tmp &= (~0UL >> (BITS_PER_LONG - size));
|
|
if (tmp == 0UL) /* Are any bits set? */
|
|
return result + size; /* Nope. */
|
|
found_middle:
|
|
return result + __ffs(tmp);
|
|
}
|
|
EXPORT_SYMBOL(__find_next_bit);
|
|
|
|
/*
|
|
* This implementation of find_{first,next}_zero_bit was stolen from
|
|
* Linus' asm-alpha/bitops.h.
|
|
*/
|
|
unsigned long __find_next_zero_bit(const unsigned long *addr,
|
|
unsigned long size, unsigned long offset)
|
|
{
|
|
const unsigned long *p = addr + BITOP_WORD(offset);
|
|
unsigned long result = offset & ~(BITS_PER_LONG-1);
|
|
unsigned long tmp;
|
|
|
|
if (offset >= size)
|
|
return size;
|
|
size -= result;
|
|
offset %= BITS_PER_LONG;
|
|
if (offset) {
|
|
tmp = *(p++);
|
|
tmp |= ~0UL >> (BITS_PER_LONG - offset);
|
|
if (size < BITS_PER_LONG)
|
|
goto found_first;
|
|
if (~tmp)
|
|
goto found_middle;
|
|
size -= BITS_PER_LONG;
|
|
result += BITS_PER_LONG;
|
|
}
|
|
while (size & ~(BITS_PER_LONG-1)) {
|
|
if (~(tmp = *(p++)))
|
|
goto found_middle;
|
|
result += BITS_PER_LONG;
|
|
size -= BITS_PER_LONG;
|
|
}
|
|
if (!size)
|
|
return result;
|
|
tmp = *p;
|
|
|
|
found_first:
|
|
tmp |= ~0UL << size;
|
|
if (tmp == ~0UL) /* Are any bits zero? */
|
|
return result + size; /* Nope. */
|
|
found_middle:
|
|
return result + ffz(tmp);
|
|
}
|
|
EXPORT_SYMBOL(__find_next_zero_bit);
|
|
|
|
#ifdef __BIG_ENDIAN
|
|
|
|
/* include/linux/byteorder does not support "unsigned long" type */
|
|
static inline unsigned long ext2_swabp(const unsigned long * x)
|
|
{
|
|
#if BITS_PER_LONG == 64
|
|
return (unsigned long) __swab64p((u64 *) x);
|
|
#elif BITS_PER_LONG == 32
|
|
return (unsigned long) __swab32p((u32 *) x);
|
|
#else
|
|
#error BITS_PER_LONG not defined
|
|
#endif
|
|
}
|
|
|
|
/* include/linux/byteorder doesn't support "unsigned long" type */
|
|
static inline unsigned long ext2_swab(const unsigned long y)
|
|
{
|
|
#if BITS_PER_LONG == 64
|
|
return (unsigned long) __swab64((u64) y);
|
|
#elif BITS_PER_LONG == 32
|
|
return (unsigned long) __swab32((u32) y);
|
|
#else
|
|
#error BITS_PER_LONG not defined
|
|
#endif
|
|
}
|
|
|
|
unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned
|
|
long size, unsigned long offset)
|
|
{
|
|
const unsigned long *p = addr + BITOP_WORD(offset);
|
|
unsigned long result = offset & ~(BITS_PER_LONG - 1);
|
|
unsigned long tmp;
|
|
|
|
if (offset >= size)
|
|
return size;
|
|
size -= result;
|
|
offset &= (BITS_PER_LONG - 1UL);
|
|
if (offset) {
|
|
tmp = ext2_swabp(p++);
|
|
tmp |= (~0UL >> (BITS_PER_LONG - offset));
|
|
if (size < BITS_PER_LONG)
|
|
goto found_first;
|
|
if (~tmp)
|
|
goto found_middle;
|
|
size -= BITS_PER_LONG;
|
|
result += BITS_PER_LONG;
|
|
}
|
|
|
|
while (size & ~(BITS_PER_LONG - 1)) {
|
|
if (~(tmp = *(p++)))
|
|
goto found_middle_swap;
|
|
result += BITS_PER_LONG;
|
|
size -= BITS_PER_LONG;
|
|
}
|
|
if (!size)
|
|
return result;
|
|
tmp = ext2_swabp(p);
|
|
found_first:
|
|
tmp |= ~0UL << size;
|
|
if (tmp == ~0UL) /* Are any bits zero? */
|
|
return result + size; /* Nope. Skip ffz */
|
|
found_middle:
|
|
return result + ffz(tmp);
|
|
|
|
found_middle_swap:
|
|
return result + ffz(ext2_swab(tmp));
|
|
}
|
|
|
|
EXPORT_SYMBOL(generic_find_next_zero_le_bit);
|
|
|
|
unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned
|
|
long size, unsigned long offset)
|
|
{
|
|
const unsigned long *p = addr + BITOP_WORD(offset);
|
|
unsigned long result = offset & ~(BITS_PER_LONG - 1);
|
|
unsigned long tmp;
|
|
|
|
if (offset >= size)
|
|
return size;
|
|
size -= result;
|
|
offset &= (BITS_PER_LONG - 1UL);
|
|
if (offset) {
|
|
tmp = ext2_swabp(p++);
|
|
tmp &= (~0UL << offset);
|
|
if (size < BITS_PER_LONG)
|
|
goto found_first;
|
|
if (tmp)
|
|
goto found_middle;
|
|
size -= BITS_PER_LONG;
|
|
result += BITS_PER_LONG;
|
|
}
|
|
|
|
while (size & ~(BITS_PER_LONG - 1)) {
|
|
tmp = *(p++);
|
|
if (tmp)
|
|
goto found_middle_swap;
|
|
result += BITS_PER_LONG;
|
|
size -= BITS_PER_LONG;
|
|
}
|
|
if (!size)
|
|
return result;
|
|
tmp = ext2_swabp(p);
|
|
found_first:
|
|
tmp &= (~0UL >> (BITS_PER_LONG - size));
|
|
if (tmp == 0UL) /* Are any bits set? */
|
|
return result + size; /* Nope. */
|
|
found_middle:
|
|
return result + __ffs(tmp);
|
|
|
|
found_middle_swap:
|
|
return result + __ffs(ext2_swab(tmp));
|
|
}
|
|
EXPORT_SYMBOL(generic_find_next_le_bit);
|
|
#endif /* __BIG_ENDIAN */
|