mirror of
https://github.com/adulau/aha.git
synced 2024-12-27 19:26:25 +00:00
x86: change x86 to use generic find_next_bit
The versions with inline assembly are in fact slower on the machines I
tested them on (in userspace) (Athlon XP 2800+, p4-like Xeon 2.8GHz, AMD
Opteron 270). The i386-version needed a fix similar to 06024f21
to avoid
crashing the benchmark.
Benchmark using: gcc -fomit-frame-pointer -Os. For each bitmap size
1...512, for each possible bitmap with one bit set, for each possible
offset: find the position of the first bit starting at offset. If you
follow ;). Times include setup of the bitmap and checking of the
results.
Athlon Xeon Opteron 32/64bit
x86-specific: 0m3.692s 0m2.820s 0m3.196s / 0m2.480s
generic: 0m2.622s 0m1.662s 0m2.100s / 0m1.572s
If the bitmap size is not a multiple of BITS_PER_LONG, and no set
(cleared) bit is found, find_next_bit (find_next_zero_bit) returns a
value outside of the range [0, size]. The generic version always returns
exactly size. The generic version also uses unsigned long everywhere,
while the x86 versions use a mishmash of int, unsigned (int), long and
unsigned long.
Using the generic version does give a slightly bigger kernel, though.
defconfig: text data bss dec hex filename
x86-specific: 4738555 481232 626688 5846475 5935cb vmlinux (32 bit)
generic: 4738621 481232 626688 5846541 59360d vmlinux (32 bit)
x86-specific: 5392395 846568 724424 6963387 6a40bb vmlinux (64 bit)
generic: 5392458 846568 724424 6963450 6a40fa vmlinux (64 bit)
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
18e413f719
commit
6fd92b63d0
8 changed files with 12 additions and 157 deletions
|
@ -77,6 +77,9 @@ config GENERIC_BUG
|
|||
def_bool y
|
||||
depends on BUG
|
||||
|
||||
config GENERIC_FIND_NEXT_BIT
|
||||
def_bool y
|
||||
|
||||
config GENERIC_HWEIGHT
|
||||
def_bool y
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ lib-y += memcpy_$(BITS).o
|
|||
ifeq ($(CONFIG_X86_32),y)
|
||||
lib-y += checksum_32.o
|
||||
lib-y += strstr_32.o
|
||||
lib-y += bitops_32.o semaphore_32.o string_32.o
|
||||
lib-y += semaphore_32.o string_32.o
|
||||
|
||||
lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
|
||||
else
|
||||
|
|
|
@ -1,70 +0,0 @@
|
|||
#include <linux/bitops.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/**
|
||||
* find_next_bit - find the next set bit in a memory region
|
||||
* @addr: The address to base the search on
|
||||
* @offset: The bitnumber to start searching at
|
||||
* @size: The maximum size to search
|
||||
*/
|
||||
int find_next_bit(const unsigned long *addr, int size, int offset)
|
||||
{
|
||||
const unsigned long *p = addr + (offset >> 5);
|
||||
int set = 0, bit = offset & 31, res;
|
||||
|
||||
if (bit) {
|
||||
/*
|
||||
* Look for nonzero in the first 32 bits:
|
||||
*/
|
||||
__asm__("bsfl %1,%0\n\t"
|
||||
"jne 1f\n\t"
|
||||
"movl $32, %0\n"
|
||||
"1:"
|
||||
: "=r" (set)
|
||||
: "r" (*p >> bit));
|
||||
if (set < (32 - bit))
|
||||
return set + offset;
|
||||
set = 32 - bit;
|
||||
p++;
|
||||
}
|
||||
/*
|
||||
* No set bit yet, search remaining full words for a bit
|
||||
*/
|
||||
res = find_first_bit (p, size - 32 * (p - addr));
|
||||
return (offset + set + res);
|
||||
}
|
||||
EXPORT_SYMBOL(find_next_bit);
|
||||
|
||||
/**
|
||||
* find_next_zero_bit - find the first zero bit in a memory region
|
||||
* @addr: The address to base the search on
|
||||
* @offset: The bitnumber to start searching at
|
||||
* @size: The maximum size to search
|
||||
*/
|
||||
int find_next_zero_bit(const unsigned long *addr, int size, int offset)
|
||||
{
|
||||
const unsigned long *p = addr + (offset >> 5);
|
||||
int set = 0, bit = offset & 31, res;
|
||||
|
||||
if (bit) {
|
||||
/*
|
||||
* Look for zero in the first 32 bits.
|
||||
*/
|
||||
__asm__("bsfl %1,%0\n\t"
|
||||
"jne 1f\n\t"
|
||||
"movl $32, %0\n"
|
||||
"1:"
|
||||
: "=r" (set)
|
||||
: "r" (~(*p >> bit)));
|
||||
if (set < (32 - bit))
|
||||
return set + offset;
|
||||
set = 32 - bit;
|
||||
p++;
|
||||
}
|
||||
/*
|
||||
* No zero yet, search remaining full bytes for a zero
|
||||
*/
|
||||
res = find_first_zero_bit(p, size - 32 * (p - addr));
|
||||
return (offset + set + res);
|
||||
}
|
||||
EXPORT_SYMBOL(find_next_zero_bit);
|
|
@ -1,9 +1,7 @@
|
|||
#include <linux/bitops.h>
|
||||
|
||||
#undef find_first_zero_bit
|
||||
#undef find_next_zero_bit
|
||||
#undef find_first_bit
|
||||
#undef find_next_bit
|
||||
|
||||
static inline long
|
||||
__find_first_zero_bit(const unsigned long * addr, unsigned long size)
|
||||
|
@ -57,39 +55,6 @@ long find_first_zero_bit(const unsigned long * addr, unsigned long size)
|
|||
return __find_first_zero_bit (addr, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* find_next_zero_bit - find the next zero bit in a memory region
|
||||
* @addr: The address to base the search on
|
||||
* @offset: The bitnumber to start searching at
|
||||
* @size: The maximum size to search
|
||||
*/
|
||||
long find_next_zero_bit (const unsigned long * addr, long size, long offset)
|
||||
{
|
||||
const unsigned long * p = addr + (offset >> 6);
|
||||
unsigned long set = 0;
|
||||
unsigned long res, bit = offset&63;
|
||||
|
||||
if (bit) {
|
||||
/*
|
||||
* Look for zero in first word
|
||||
*/
|
||||
asm("bsfq %1,%0\n\t"
|
||||
"cmoveq %2,%0"
|
||||
: "=r" (set)
|
||||
: "r" (~(*p >> bit)), "r"(64L));
|
||||
if (set < (64 - bit))
|
||||
return set + offset;
|
||||
set = 64 - bit;
|
||||
p++;
|
||||
}
|
||||
/*
|
||||
* No zero yet, search remaining full words for a zero
|
||||
*/
|
||||
res = __find_first_zero_bit (p, size - 64 * (p - addr));
|
||||
|
||||
return (offset + set + res);
|
||||
}
|
||||
|
||||
static inline long
|
||||
__find_first_bit(const unsigned long * addr, unsigned long size)
|
||||
{
|
||||
|
@ -136,40 +101,7 @@ long find_first_bit(const unsigned long * addr, unsigned long size)
|
|||
return __find_first_bit(addr,size);
|
||||
}
|
||||
|
||||
/**
|
||||
* find_next_bit - find the first set bit in a memory region
|
||||
* @addr: The address to base the search on
|
||||
* @offset: The bitnumber to start searching at
|
||||
* @size: The maximum size to search
|
||||
*/
|
||||
long find_next_bit(const unsigned long * addr, long size, long offset)
|
||||
{
|
||||
const unsigned long * p = addr + (offset >> 6);
|
||||
unsigned long set = 0, bit = offset & 63, res;
|
||||
|
||||
if (bit) {
|
||||
/*
|
||||
* Look for nonzero in the first 64 bits:
|
||||
*/
|
||||
asm("bsfq %1,%0\n\t"
|
||||
"cmoveq %2,%0\n\t"
|
||||
: "=r" (set)
|
||||
: "r" (*p >> bit), "r" (64L));
|
||||
if (set < (64 - bit))
|
||||
return set + offset;
|
||||
set = 64 - bit;
|
||||
p++;
|
||||
}
|
||||
/*
|
||||
* No set bit yet, search remaining full words for a bit
|
||||
*/
|
||||
res = __find_first_bit (p, size - 64 * (p - addr));
|
||||
return (offset + set + res);
|
||||
}
|
||||
|
||||
#include <linux/module.h>
|
||||
|
||||
EXPORT_SYMBOL(find_next_bit);
|
||||
EXPORT_SYMBOL(find_first_bit);
|
||||
EXPORT_SYMBOL(find_first_zero_bit);
|
||||
EXPORT_SYMBOL(find_next_zero_bit);
|
||||
|
|
|
@ -306,6 +306,12 @@ static int test_bit(int nr, const volatile unsigned long *addr);
|
|||
#undef BIT_ADDR
|
||||
#undef ADDR
|
||||
|
||||
unsigned long find_next_bit(const unsigned long *addr,
|
||||
unsigned long size, unsigned long offset);
|
||||
unsigned long find_next_zero_bit(const unsigned long *addr,
|
||||
unsigned long size, unsigned long offset);
|
||||
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
# include "bitops_32.h"
|
||||
#else
|
||||
|
|
|
@ -39,14 +39,6 @@ static inline int find_first_zero_bit(const unsigned long *addr, unsigned size)
|
|||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* find_next_zero_bit - find the first zero bit in a memory region
|
||||
* @addr: The address to base the search on
|
||||
* @offset: The bit number to start searching at
|
||||
* @size: The maximum size to search
|
||||
*/
|
||||
int find_next_zero_bit(const unsigned long *addr, int size, int offset);
|
||||
|
||||
/**
|
||||
* __ffs - find first bit in word.
|
||||
* @word: The word to search
|
||||
|
@ -82,14 +74,6 @@ static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
|
|||
return x;
|
||||
}
|
||||
|
||||
/**
|
||||
* find_next_bit - find the first set bit in a memory region
|
||||
* @addr: The address to base the search on
|
||||
* @offset: The bit number to start searching at
|
||||
* @size: The maximum size to search
|
||||
*/
|
||||
int find_next_bit(const unsigned long *addr, int size, int offset);
|
||||
|
||||
/**
|
||||
* ffz - find first zero in word.
|
||||
* @word: The word to search
|
||||
|
|
|
@ -6,9 +6,7 @@
|
|||
*/
|
||||
|
||||
extern long find_first_zero_bit(const unsigned long *addr, unsigned long size);
|
||||
extern long find_next_zero_bit(const unsigned long *addr, long size, long offset);
|
||||
extern long find_first_bit(const unsigned long *addr, unsigned long size);
|
||||
extern long find_next_bit(const unsigned long *addr, long size, long offset);
|
||||
|
||||
/* return index of first bet set in val or max when no bit is set */
|
||||
static inline long __scanbit(unsigned long val, unsigned long max)
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
#include <asm/byteorder.h>
|
||||
|
||||
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
|
||||
#undef find_next_bit
|
||||
#undef find_next_zero_bit
|
||||
|
||||
/**
|
||||
* find_next_bit - find the next set bit in a memory region
|
||||
|
|
Loading…
Reference in a new issue