Merge branch 'slub-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/christoph/vm

* 'slub-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/christoph/vm:
  SLUB: fix checkpatch warnings
  Use non atomic unlock
  SLUB: Support for performance statistics
  SLUB: Alternate fast paths using cmpxchg_local
  SLUB: Use unique end pointer for each slab page.
  SLUB: Deal with annoying gcc warning on kfree()
This commit is contained in:
Linus Torvalds 2008-02-07 18:22:29 -08:00
commit c00f08d705
6 changed files with 457 additions and 63 deletions

View file

@ -32,6 +32,13 @@ struct slabinfo {
int sanity_checks, slab_size, store_user, trace;
int order, poison, reclaim_account, red_zone;
unsigned long partial, objects, slabs;
unsigned long alloc_fastpath, alloc_slowpath;
unsigned long free_fastpath, free_slowpath;
unsigned long free_frozen, free_add_partial, free_remove_partial;
unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill;
unsigned long cpuslab_flush, deactivate_full, deactivate_empty;
unsigned long deactivate_to_head, deactivate_to_tail;
unsigned long deactivate_remote_frees;
int numa[MAX_NODES];
int numa_partial[MAX_NODES];
} slabinfo[MAX_SLABS];
@ -64,8 +71,10 @@ int show_inverted = 0;
int show_single_ref = 0;
int show_totals = 0;
int sort_size = 0;
int sort_active = 0;
int set_debug = 0;
int show_ops = 0;
int show_activity = 0;
/* Debug options */
int sanity = 0;
@ -93,8 +102,10 @@ void usage(void)
printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n"
"slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n"
"-a|--aliases Show aliases\n"
"-A|--activity Most active slabs first\n"
"-d<options>|--debug=<options> Set/Clear Debug options\n"
"-e|--empty Show empty slabs\n"
"-D|--display-active Switch line format to activity\n"
"-e|--empty Show empty slabs\n"
"-f|--first-alias Show first alias\n"
"-h|--help Show usage information\n"
"-i|--inverted Inverted list\n"
@ -281,8 +292,11 @@ int line = 0;
void first_line(void)
{
printf("Name Objects Objsize Space "
"Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n");
if (show_activity)
printf("Name Objects Alloc Free %%Fast\n");
else
printf("Name Objects Objsize Space "
"Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n");
}
/*
@ -309,6 +323,12 @@ unsigned long slab_size(struct slabinfo *s)
return s->slabs * (page_size << s->order);
}
unsigned long slab_activity(struct slabinfo *s)
{
return s->alloc_fastpath + s->free_fastpath +
s->alloc_slowpath + s->free_slowpath;
}
void slab_numa(struct slabinfo *s, int mode)
{
int node;
@ -392,6 +412,71 @@ const char *onoff(int x)
return "Off";
}
void slab_stats(struct slabinfo *s)
{
unsigned long total_alloc;
unsigned long total_free;
unsigned long total;
if (!s->alloc_slab)
return;
total_alloc = s->alloc_fastpath + s->alloc_slowpath;
total_free = s->free_fastpath + s->free_slowpath;
if (!total_alloc)
return;
printf("\n");
printf("Slab Perf Counter Alloc Free %%Al %%Fr\n");
printf("--------------------------------------------------\n");
printf("Fastpath %8lu %8lu %3lu %3lu\n",
s->alloc_fastpath, s->free_fastpath,
s->alloc_fastpath * 100 / total_alloc,
s->free_fastpath * 100 / total_free);
printf("Slowpath %8lu %8lu %3lu %3lu\n",
total_alloc - s->alloc_fastpath, s->free_slowpath,
(total_alloc - s->alloc_fastpath) * 100 / total_alloc,
s->free_slowpath * 100 / total_free);
printf("Page Alloc %8lu %8lu %3lu %3lu\n",
s->alloc_slab, s->free_slab,
s->alloc_slab * 100 / total_alloc,
s->free_slab * 100 / total_free);
printf("Add partial %8lu %8lu %3lu %3lu\n",
s->deactivate_to_head + s->deactivate_to_tail,
s->free_add_partial,
(s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc,
s->free_add_partial * 100 / total_free);
printf("Remove partial %8lu %8lu %3lu %3lu\n",
s->alloc_from_partial, s->free_remove_partial,
s->alloc_from_partial * 100 / total_alloc,
s->free_remove_partial * 100 / total_free);
printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n",
s->deactivate_remote_frees, s->free_frozen,
s->deactivate_remote_frees * 100 / total_alloc,
s->free_frozen * 100 / total_free);
printf("Total %8lu %8lu\n\n", total_alloc, total_free);
if (s->cpuslab_flush)
printf("Flushes %8lu\n", s->cpuslab_flush);
if (s->alloc_refill)
printf("Refill %8lu\n", s->alloc_refill);
total = s->deactivate_full + s->deactivate_empty +
s->deactivate_to_head + s->deactivate_to_tail;
if (total)
printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) "
"ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n",
s->deactivate_full, (s->deactivate_full * 100) / total,
s->deactivate_empty, (s->deactivate_empty * 100) / total,
s->deactivate_to_head, (s->deactivate_to_head * 100) / total,
s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total);
}
void report(struct slabinfo *s)
{
if (strcmp(s->name, "*") == 0)
@ -430,6 +515,7 @@ void report(struct slabinfo *s)
ops(s);
show_tracking(s);
slab_numa(s, 1);
slab_stats(s);
}
void slabcache(struct slabinfo *s)
@ -479,13 +565,27 @@ void slabcache(struct slabinfo *s)
*p++ = 'T';
*p = 0;
printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
s->name, s->objects, s->object_size, size_str, dist_str,
s->objs_per_slab, s->order,
s->slabs ? (s->partial * 100) / s->slabs : 100,
s->slabs ? (s->objects * s->object_size * 100) /
(s->slabs * (page_size << s->order)) : 100,
flags);
if (show_activity) {
unsigned long total_alloc;
unsigned long total_free;
total_alloc = s->alloc_fastpath + s->alloc_slowpath;
total_free = s->free_fastpath + s->free_slowpath;
printf("%-21s %8ld %8ld %8ld %3ld %3ld \n",
s->name, s->objects,
total_alloc, total_free,
total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0,
total_free ? (s->free_fastpath * 100 / total_free) : 0);
}
else
printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
s->name, s->objects, s->object_size, size_str, dist_str,
s->objs_per_slab, s->order,
s->slabs ? (s->partial * 100) / s->slabs : 100,
s->slabs ? (s->objects * s->object_size * 100) /
(s->slabs * (page_size << s->order)) : 100,
flags);
}
/*
@ -892,6 +992,8 @@ void sort_slabs(void)
if (sort_size)
result = slab_size(s1) < slab_size(s2);
else if (sort_active)
result = slab_activity(s1) < slab_activity(s2);
else
result = strcasecmp(s1->name, s2->name);
@ -1074,6 +1176,23 @@ void read_slab_dir(void)
free(t);
slab->store_user = get_obj("store_user");
slab->trace = get_obj("trace");
slab->alloc_fastpath = get_obj("alloc_fastpath");
slab->alloc_slowpath = get_obj("alloc_slowpath");
slab->free_fastpath = get_obj("free_fastpath");
slab->free_slowpath = get_obj("free_slowpath");
slab->free_frozen= get_obj("free_frozen");
slab->free_add_partial = get_obj("free_add_partial");
slab->free_remove_partial = get_obj("free_remove_partial");
slab->alloc_from_partial = get_obj("alloc_from_partial");
slab->alloc_slab = get_obj("alloc_slab");
slab->alloc_refill = get_obj("alloc_refill");
slab->free_slab = get_obj("free_slab");
slab->cpuslab_flush = get_obj("cpuslab_flush");
slab->deactivate_full = get_obj("deactivate_full");
slab->deactivate_empty = get_obj("deactivate_empty");
slab->deactivate_to_head = get_obj("deactivate_to_head");
slab->deactivate_to_tail = get_obj("deactivate_to_tail");
slab->deactivate_remote_frees = get_obj("deactivate_remote_frees");
chdir("..");
if (slab->name[0] == ':')
alias_targets++;
@ -1124,7 +1243,9 @@ void output_slabs(void)
struct option opts[] = {
{ "aliases", 0, NULL, 'a' },
{ "activity", 0, NULL, 'A' },
{ "debug", 2, NULL, 'd' },
{ "display-activity", 0, NULL, 'D' },
{ "empty", 0, NULL, 'e' },
{ "first-alias", 0, NULL, 'f' },
{ "help", 0, NULL, 'h' },
@ -1149,7 +1270,7 @@ int main(int argc, char *argv[])
page_size = getpagesize();
while ((c = getopt_long(argc, argv, "ad::efhil1noprstvzTS",
while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS",
opts, NULL)) != -1)
switch (c) {
case '1':
@ -1158,11 +1279,17 @@ int main(int argc, char *argv[])
case 'a':
show_alias = 1;
break;
case 'A':
sort_active = 1;
break;
case 'd':
set_debug = 1;
if (!debug_opt_scan(optarg))
fatal("Invalid debug option '%s'\n", optarg);
break;
case 'D':
show_activity = 1;
break;
case 'e':
show_empty = 1;
break;

View file

@ -52,6 +52,10 @@ config HAVE_LATENCYTOP_SUPPORT
config SEMAPHORE_SLEEPERS
def_bool y
config FAST_CMPXCHG_LOCAL
bool
default y
config MMU
def_bool y

View file

@ -64,7 +64,10 @@ struct page {
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
spinlock_t ptl;
#endif
struct kmem_cache *slab; /* SLUB: Pointer to slab */
struct {
struct kmem_cache *slab; /* SLUB: Pointer to slab */
void *end; /* SLUB: end marker */
};
struct page *first_page; /* Compound tail pages */
};
union {

View file

@ -11,12 +11,35 @@
#include <linux/workqueue.h>
#include <linux/kobject.h>
enum stat_item {
ALLOC_FASTPATH, /* Allocation from cpu slab */
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
FREE_FASTPATH, /* Free to cpu slub */
FREE_SLOWPATH, /* Freeing not to cpu slab */
FREE_FROZEN, /* Freeing to frozen slab */
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from partial list */
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
FREE_SLAB, /* Slab freed to the page allocator */
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
NR_SLUB_STAT_ITEMS };
struct kmem_cache_cpu {
void **freelist; /* Pointer to first free per cpu object */
struct page *page; /* The slab from which we are allocating */
int node; /* The node of the page (or -1 for debug) */
unsigned int offset; /* Freepointer offset (in word units) */
unsigned int objsize; /* Size of an object (from kmem_cache) */
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
};
struct kmem_cache_node {

View file

@ -205,6 +205,19 @@ config SLUB_DEBUG_ON
off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
"slub_debug=-".
config SLUB_STATS
default n
bool "Enable SLUB performance statistics"
depends on SLUB
help
SLUB statistics are useful to debug SLUBs allocation behavior in
order find ways to optimize the allocator. This should never be
enabled for production use since keeping statistics slows down
the allocator by a few percentage points. The slabinfo command
supports the determination of the most active slabs to figure
out which slabs are relevant to a particular load.
Try running: slabinfo -DA
config DEBUG_PREEMPT
bool "Debug preemptible kernel"
depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64)

326
mm/slub.c
View file

@ -149,6 +149,13 @@ static inline void ClearSlabDebug(struct page *page)
/* Enable to test recovery from slab corruption on boot */
#undef SLUB_RESILIENCY_TEST
/*
* Currently fastpath is not supported if preemption is enabled.
*/
#if defined(CONFIG_FAST_CMPXCHG_LOCAL) && !defined(CONFIG_PREEMPT)
#define SLUB_FASTPATH
#endif
#if PAGE_SHIFT <= 12
/*
@ -243,6 +250,7 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
static int sysfs_slab_add(struct kmem_cache *);
static int sysfs_slab_alias(struct kmem_cache *, const char *);
static void sysfs_slab_remove(struct kmem_cache *);
#else
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
@ -251,8 +259,16 @@ static inline void sysfs_slab_remove(struct kmem_cache *s)
{
kfree(s);
}
#endif
static inline void stat(struct kmem_cache_cpu *c, enum stat_item si)
{
#ifdef CONFIG_SLUB_STATS
c->stat[si]++;
#endif
}
/********************************************************************
* Core slab cache functions
*******************************************************************/
@ -280,15 +296,32 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
#endif
}
/*
* The end pointer in a slab is special. It points to the first object in the
* slab but has bit 0 set to mark it.
*
* Note that SLUB relies on page_mapping returning NULL for pages with bit 0
* in the mapping set.
*/
static inline int is_end(void *addr)
{
return (unsigned long)addr & PAGE_MAPPING_ANON;
}
void *slab_address(struct page *page)
{
return page->end - PAGE_MAPPING_ANON;
}
static inline int check_valid_pointer(struct kmem_cache *s,
struct page *page, const void *object)
{
void *base;
if (!object)
if (object == page->end)
return 1;
base = page_address(page);
base = slab_address(page);
if (object < base || object >= base + s->objects * s->size ||
(object - base) % s->size) {
return 0;
@ -321,7 +354,8 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
/* Scan freelist */
#define for_each_free_object(__p, __s, __free) \
for (__p = (__free); __p; __p = get_freepointer((__s), __p))
for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\
__p))
/* Determine object index from a given position */
static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
@ -473,7 +507,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
{
unsigned int off; /* Offset of last byte */
u8 *addr = page_address(page);
u8 *addr = slab_address(page);
print_tracking(s, p);
@ -651,7 +685,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
if (!(s->flags & SLAB_POISON))
return 1;
start = page_address(page);
start = slab_address(page);
end = start + (PAGE_SIZE << s->order);
length = s->objects * s->size;
remainder = end - (start + length);
@ -685,9 +719,10 @@ static int check_object(struct kmem_cache *s, struct page *page,
endobject, red, s->inuse - s->objsize))
return 0;
} else {
if ((s->flags & SLAB_POISON) && s->objsize < s->inuse)
check_bytes_and_report(s, page, p, "Alignment padding", endobject,
POISON_INUSE, s->inuse - s->objsize);
if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
check_bytes_and_report(s, page, p, "Alignment padding",
endobject, POISON_INUSE, s->inuse - s->objsize);
}
}
if (s->flags & SLAB_POISON) {
@ -718,7 +753,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
* of the free objects in this slab. May cause
* another error because the object count is now wrong.
*/
set_freepointer(s, p, NULL);
set_freepointer(s, p, page->end);
return 0;
}
return 1;
@ -752,18 +787,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
void *fp = page->freelist;
void *object = NULL;
while (fp && nr <= s->objects) {
while (fp != page->end && nr <= s->objects) {
if (fp == search)
return 1;
if (!check_valid_pointer(s, page, fp)) {
if (object) {
object_err(s, page, object,
"Freechain corrupt");
set_freepointer(s, object, NULL);
set_freepointer(s, object, page->end);
break;
} else {
slab_err(s, page, "Freepointer corrupt");
page->freelist = NULL;
page->freelist = page->end;
page->inuse = s->objects;
slab_fix(s, "Freelist cleared");
return 0;
@ -869,7 +904,7 @@ bad:
*/
slab_fix(s, "Marking all objects used");
page->inuse = s->objects;
page->freelist = NULL;
page->freelist = page->end;
}
return 0;
}
@ -894,11 +929,10 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
return 0;
if (unlikely(s != page->slab)) {
if (!PageSlab(page))
if (!PageSlab(page)) {
slab_err(s, page, "Attempt to free object(0x%p) "
"outside of slab", object);
else
if (!page->slab) {
} else if (!page->slab) {
printk(KERN_ERR
"SLUB <none>: no slab for object 0x%p.\n",
object);
@ -910,7 +944,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
}
/* Special debug activities for freeing objects */
if (!SlabFrozen(page) && !page->freelist)
if (!SlabFrozen(page) && page->freelist == page->end)
remove_full(s, page);
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_FREE, addr);
@ -1007,7 +1041,7 @@ static unsigned long kmem_cache_flags(unsigned long objsize,
*/
if (slub_debug && (!slub_debug_slabs ||
strncmp(slub_debug_slabs, name,
strlen(slub_debug_slabs)) == 0))
strlen(slub_debug_slabs)) == 0))
flags |= slub_debug;
}
@ -1102,6 +1136,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
SetSlabDebug(page);
start = page_address(page);
page->end = start + 1;
if (unlikely(s->flags & SLAB_POISON))
memset(start, POISON_INUSE, PAGE_SIZE << s->order);
@ -1113,7 +1148,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
last = p;
}
setup_object(s, page, last);
set_freepointer(s, last, NULL);
set_freepointer(s, last, page->end);
page->freelist = start;
page->inuse = 0;
@ -1129,7 +1164,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
void *p;
slab_pad_check(s, page);
for_each_object(p, s, page_address(page))
for_each_object(p, s, slab_address(page))
check_object(s, page, p, 0);
ClearSlabDebug(page);
}
@ -1139,6 +1174,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
-pages);
page->mapping = NULL;
__free_pages(page, s->order);
}
@ -1183,7 +1219,7 @@ static __always_inline void slab_lock(struct page *page)
static __always_inline void slab_unlock(struct page *page)
{
bit_spin_unlock(PG_locked, &page->flags);
__bit_spin_unlock(PG_locked, &page->flags);
}
static __always_inline int slab_trylock(struct page *page)
@ -1294,8 +1330,8 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
get_cycles() % 1024 > s->remote_node_defrag_ratio)
return NULL;
zonelist = &NODE_DATA(slab_node(current->mempolicy))
->node_zonelists[gfp_zone(flags)];
zonelist = &NODE_DATA(
slab_node(current->mempolicy))->node_zonelists[gfp_zone(flags)];
for (z = zonelist->zones; *z; z++) {
struct kmem_cache_node *n;
@ -1337,17 +1373,22 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
{
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());
ClearSlabFrozen(page);
if (page->inuse) {
if (page->freelist)
if (page->freelist != page->end) {
add_partial(n, page, tail);
else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
add_full(n, page);
stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
} else {
stat(c, DEACTIVATE_FULL);
if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
add_full(n, page);
}
slab_unlock(page);
} else {
stat(c, DEACTIVATE_EMPTY);
if (n->nr_partial < MIN_PARTIAL) {
/*
* Adding an empty slab to the partial slabs in order
@ -1361,6 +1402,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
slab_unlock(page);
} else {
slab_unlock(page);
stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB);
discard_slab(s, page);
}
}
@ -1373,12 +1415,19 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
struct page *page = c->page;
int tail = 1;
if (c->freelist)
stat(c, DEACTIVATE_REMOTE_FREES);
/*
* Merge cpu freelist into freelist. Typically we get here
* because both freelists are empty. So this is unlikely
* to occur.
*
* We need to use _is_end here because deactivate slab may
* be called for a debug slab. Then c->freelist may contain
* a dummy pointer.
*/
while (unlikely(c->freelist)) {
while (unlikely(!is_end(c->freelist))) {
void **object;
tail = 0; /* Hot objects. Put the slab first */
@ -1398,6 +1447,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
stat(c, CPUSLAB_FLUSH);
slab_lock(c->page);
deactivate_slab(s, c);
}
@ -1469,16 +1519,21 @@ static void *__slab_alloc(struct kmem_cache *s,
{
void **object;
struct page *new;
#ifdef SLUB_FASTPATH
unsigned long flags;
local_irq_save(flags);
#endif
if (!c->page)
goto new_slab;
slab_lock(c->page);
if (unlikely(!node_match(c, node)))
goto another_slab;
stat(c, ALLOC_REFILL);
load_freelist:
object = c->page->freelist;
if (unlikely(!object))
if (unlikely(object == c->page->end))
goto another_slab;
if (unlikely(SlabDebug(c->page)))
goto debug;
@ -1486,9 +1541,15 @@ load_freelist:
object = c->page->freelist;
c->freelist = object[c->offset];
c->page->inuse = s->objects;
c->page->freelist = NULL;
c->page->freelist = c->page->end;
c->node = page_to_nid(c->page);
unlock_out:
slab_unlock(c->page);
stat(c, ALLOC_SLOWPATH);
out:
#ifdef SLUB_FASTPATH
local_irq_restore(flags);
#endif
return object;
another_slab:
@ -1498,6 +1559,7 @@ new_slab:
new = get_partial(s, gfpflags, node);
if (new) {
c->page = new;
stat(c, ALLOC_FROM_PARTIAL);
goto load_freelist;
}
@ -1511,6 +1573,7 @@ new_slab:
if (new) {
c = get_cpu_slab(s, smp_processor_id());
stat(c, ALLOC_SLAB);
if (c->page)
flush_slab(s, c);
slab_lock(new);
@ -1518,7 +1581,8 @@ new_slab:
c->page = new;
goto load_freelist;
}
return NULL;
object = NULL;
goto out;
debug:
object = c->page->freelist;
if (!alloc_debug_processing(s, c->page, object, addr))
@ -1527,8 +1591,7 @@ debug:
c->page->inuse++;
c->page->freelist = object[c->offset];
c->node = -1;
slab_unlock(c->page);
return object;
goto unlock_out;
}
/*
@ -1545,20 +1608,50 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
gfp_t gfpflags, int node, void *addr)
{
void **object;
unsigned long flags;
struct kmem_cache_cpu *c;
/*
* The SLUB_FASTPATH path is provisional and is currently disabled if the
* kernel is compiled with preemption or if the arch does not support
* fast cmpxchg operations. There are a couple of coming changes that will
* simplify matters and allow preemption. Ultimately we may end up making
* SLUB_FASTPATH the default.
*
* 1. The introduction of the per cpu allocator will avoid array lookups
* through get_cpu_slab(). A special register can be used instead.
*
* 2. The introduction of per cpu atomic operations (cpu_ops) means that
* we can realize the logic here entirely with per cpu atomics. The
* per cpu atomic ops will take care of the preemption issues.
*/
#ifdef SLUB_FASTPATH
c = get_cpu_slab(s, raw_smp_processor_id());
do {
object = c->freelist;
if (unlikely(is_end(object) || !node_match(c, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c);
break;
}
stat(c, ALLOC_FASTPATH);
} while (cmpxchg_local(&c->freelist, object, object[c->offset])
!= object);
#else
unsigned long flags;
local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id());
if (unlikely(!c->freelist || !node_match(c, node)))
if (unlikely(is_end(c->freelist) || !node_match(c, node)))
object = __slab_alloc(s, gfpflags, node, addr, c);
else {
object = c->freelist;
c->freelist = object[c->offset];
stat(c, ALLOC_FASTPATH);
}
local_irq_restore(flags);
#endif
if (unlikely((gfpflags & __GFP_ZERO) && object))
memset(object, 0, c->objsize);
@ -1593,7 +1686,15 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
{
void *prior;
void **object = (void *)x;
struct kmem_cache_cpu *c;
#ifdef SLUB_FASTPATH
unsigned long flags;
local_irq_save(flags);
#endif
c = get_cpu_slab(s, raw_smp_processor_id());
stat(c, FREE_SLOWPATH);
slab_lock(page);
if (unlikely(SlabDebug(page)))
@ -1603,8 +1704,10 @@ checks_ok:
page->freelist = object;
page->inuse--;
if (unlikely(SlabFrozen(page)))
if (unlikely(SlabFrozen(page))) {
stat(c, FREE_FROZEN);
goto out_unlock;
}
if (unlikely(!page->inuse))
goto slab_empty;
@ -1614,21 +1717,31 @@ checks_ok:
* was not on the partial list before
* then add it.
*/
if (unlikely(!prior))
if (unlikely(prior == page->end)) {
add_partial(get_node(s, page_to_nid(page)), page, 1);
stat(c, FREE_ADD_PARTIAL);
}
out_unlock:
slab_unlock(page);
#ifdef SLUB_FASTPATH
local_irq_restore(flags);
#endif
return;
slab_empty:
if (prior)
if (prior != page->end) {
/*
* Slab still on the partial list.
*/
remove_partial(s, page);
stat(c, FREE_REMOVE_PARTIAL);
}
slab_unlock(page);
stat(c, FREE_SLAB);
#ifdef SLUB_FASTPATH
local_irq_restore(flags);
#endif
discard_slab(s, page);
return;
@ -1653,19 +1766,49 @@ static __always_inline void slab_free(struct kmem_cache *s,
struct page *page, void *x, void *addr)
{
void **object = (void *)x;
unsigned long flags;
struct kmem_cache_cpu *c;
#ifdef SLUB_FASTPATH
void **freelist;
c = get_cpu_slab(s, raw_smp_processor_id());
debug_check_no_locks_freed(object, s->objsize);
do {
freelist = c->freelist;
barrier();
/*
* If the compiler would reorder the retrieval of c->page to
* come before c->freelist then an interrupt could
* change the cpu slab before we retrieve c->freelist. We
* could be matching on a page no longer active and put the
* object onto the freelist of the wrong slab.
*
* On the other hand: If we already have the freelist pointer
* then any change of cpu_slab will cause the cmpxchg to fail
* since the freelist pointers are unique per slab.
*/
if (unlikely(page != c->page || c->node < 0)) {
__slab_free(s, page, x, addr, c->offset);
break;
}
object[c->offset] = freelist;
stat(c, FREE_FASTPATH);
} while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
#else
unsigned long flags;
local_irq_save(flags);
debug_check_no_locks_freed(object, s->objsize);
c = get_cpu_slab(s, smp_processor_id());
if (likely(page == c->page && c->node >= 0)) {
object[c->offset] = c->freelist;
c->freelist = object;
stat(c, FREE_FASTPATH);
} else
__slab_free(s, page, x, addr, c->offset);
local_irq_restore(flags);
#endif
}
void kmem_cache_free(struct kmem_cache *s, void *x)
@ -1842,7 +1985,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s,
struct kmem_cache_cpu *c)
{
c->page = NULL;
c->freelist = NULL;
c->freelist = (void *)PAGE_MAPPING_ANON;
c->node = 0;
c->offset = s->offset / sizeof(void *);
c->objsize = s->objsize;
@ -2446,7 +2589,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
goto unlock_out;
realsize = kmalloc_caches[index].objsize;
text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize),
text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
(unsigned int)realsize);
s = kmalloc(kmem_size, flags & ~SLUB_DMA);
if (!s || !text || !kmem_cache_open(s, flags, text,
@ -2601,6 +2745,7 @@ EXPORT_SYMBOL(ksize);
void kfree(const void *x)
{
struct page *page;
void *object = (void *)x;
if (unlikely(ZERO_OR_NULL_PTR(x)))
return;
@ -2610,7 +2755,7 @@ void kfree(const void *x)
put_page(page);
return;
}
slab_free(page->slab, page, (void *)x, __builtin_return_address(0));
slab_free(page->slab, page, object, __builtin_return_address(0));
}
EXPORT_SYMBOL(kfree);
@ -2896,7 +3041,8 @@ void __init kmem_cache_init(void)
#endif
printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
printk(KERN_INFO
"SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
" CPUs=%d, Nodes=%d\n",
caches, cache_line_size(),
slub_min_order, slub_max_order, slub_min_objects,
@ -3063,7 +3209,7 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
}
static struct notifier_block __cpuinitdata slab_notifier = {
&slab_cpuup_callback, NULL, 0
.notifier_call = slab_cpuup_callback
};
#endif
@ -3104,7 +3250,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
unsigned long *map)
{
void *p;
void *addr = page_address(page);
void *addr = slab_address(page);
if (!check_slab(s, page) ||
!on_freelist(s, page, NULL))
@ -3221,8 +3367,9 @@ static void resiliency_test(void)
p = kzalloc(32, GFP_KERNEL);
p[32 + sizeof(void *)] = 0x34;
printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
" 0x34 -> -0x%p\n", p);
printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n");
" 0x34 -> -0x%p\n", p);
printk(KERN_ERR
"If allocated object is overwritten then not detectable\n\n");
validate_slab_cache(kmalloc_caches + 5);
p = kzalloc(64, GFP_KERNEL);
@ -3230,7 +3377,8 @@ static void resiliency_test(void)
*p = 0x56;
printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
p);
printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n");
printk(KERN_ERR
"If allocated object is overwritten then not detectable\n\n");
validate_slab_cache(kmalloc_caches + 6);
printk(KERN_ERR "\nB. Corruption after free\n");
@ -3243,7 +3391,8 @@ static void resiliency_test(void)
p = kzalloc(256, GFP_KERNEL);
kfree(p);
p[50] = 0x9a;
printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
p);
validate_slab_cache(kmalloc_caches + 8);
p = kzalloc(512, GFP_KERNEL);
@ -3384,7 +3533,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
static void process_slab(struct loc_track *t, struct kmem_cache *s,
struct page *page, enum track_item alloc)
{
void *addr = page_address(page);
void *addr = slab_address(page);
DECLARE_BITMAP(map, s->objects);
void *p;
@ -3872,6 +4021,62 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
SLAB_ATTR(remote_node_defrag_ratio);
#endif
#ifdef CONFIG_SLUB_STATS
static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
{
unsigned long sum = 0;
int cpu;
int len;
int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
if (!data)
return -ENOMEM;
for_each_online_cpu(cpu) {
unsigned x = get_cpu_slab(s, cpu)->stat[si];
data[cpu] = x;
sum += x;
}
len = sprintf(buf, "%lu", sum);
for_each_online_cpu(cpu) {
if (data[cpu] && len < PAGE_SIZE - 20)
len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]);
}
kfree(data);
return len + sprintf(buf + len, "\n");
}
#define STAT_ATTR(si, text) \
static ssize_t text##_show(struct kmem_cache *s, char *buf) \
{ \
return show_stat(s, buf, si); \
} \
SLAB_ATTR_RO(text); \
STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
STAT_ATTR(FREE_FASTPATH, free_fastpath);
STAT_ATTR(FREE_SLOWPATH, free_slowpath);
STAT_ATTR(FREE_FROZEN, free_frozen);
STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
STAT_ATTR(ALLOC_SLAB, alloc_slab);
STAT_ATTR(ALLOC_REFILL, alloc_refill);
STAT_ATTR(FREE_SLAB, free_slab);
STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
#endif
static struct attribute *slab_attrs[] = {
&slab_size_attr.attr,
&object_size_attr.attr,
@ -3901,6 +4106,25 @@ static struct attribute *slab_attrs[] = {
#endif
#ifdef CONFIG_NUMA
&remote_node_defrag_ratio_attr.attr,
#endif
#ifdef CONFIG_SLUB_STATS
&alloc_fastpath_attr.attr,
&alloc_slowpath_attr.attr,
&free_fastpath_attr.attr,
&free_slowpath_attr.attr,
&free_frozen_attr.attr,
&free_add_partial_attr.attr,
&free_remove_partial_attr.attr,
&alloc_from_partial_attr.attr,
&alloc_slab_attr.attr,
&alloc_refill_attr.attr,
&free_slab_attr.attr,
&cpuslab_flush_attr.attr,
&deactivate_full_attr.attr,
&deactivate_empty_attr.attr,
&deactivate_to_head_attr.attr,
&deactivate_to_tail_attr.attr,
&deactivate_remote_frees_attr.attr,
#endif
NULL
};