diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c4c8f2e1dd1..d7d77d4a402 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -63,6 +63,20 @@ void unregister_memory_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_memory_notifier); +static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain); + +int register_memory_isolate_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&memory_isolate_chain, nb); +} +EXPORT_SYMBOL(register_memory_isolate_notifier); + +void unregister_memory_isolate_notifier(struct notifier_block *nb) +{ + atomic_notifier_chain_unregister(&memory_isolate_chain, nb); +} +EXPORT_SYMBOL(unregister_memory_isolate_notifier); + /* * register_memory - Setup a sysfs device for a memory block */ @@ -157,6 +171,11 @@ int memory_notify(unsigned long val, void *v) return blocking_notifier_call_chain(&memory_chain, val, v); } +int memory_isolate_notify(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&memory_isolate_chain, val, v); +} + /* * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is * OK to have direct references to sparsemem variables in here. diff --git a/include/linux/memory.h b/include/linux/memory.h index 37fa19b34ef..1adfe779eb9 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -50,6 +50,19 @@ struct memory_notify { int status_change_nid; }; +/* + * During pageblock isolation, count the number of pages within the + * range [start_pfn, start_pfn + nr_pages) which are owned by code + * in the notifier chain. + */ +#define MEM_ISOLATE_COUNT (1<<0) + +struct memory_isolate_notify { + unsigned long start_pfn; /* Start of range to check */ + unsigned int nr_pages; /* # pages in range to check */ + unsigned int pages_found; /* # pages owned found by callbacks */ +}; + struct notifier_block; struct mem_section; @@ -76,14 +89,28 @@ static inline int memory_notify(unsigned long val, void *v) { return 0; } +static inline int register_memory_isolate_notifier(struct notifier_block *nb) +{ + return 0; +} +static inline void unregister_memory_isolate_notifier(struct notifier_block *nb) +{ +} +static inline int memory_isolate_notify(unsigned long val, void *v) +{ + return 0; +} #else extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); +extern int register_memory_isolate_notifier(struct notifier_block *nb); +extern void unregister_memory_isolate_notifier(struct notifier_block *nb); extern int register_new_memory(int, struct mem_section *); extern int unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); extern int remove_memory_block(unsigned long, struct mem_section *, int); extern int memory_notify(unsigned long val, void *v); +extern int memory_isolate_notify(unsigned long val, void *v); extern struct memory_block *find_memory_block(struct mem_section *); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION< #include #include +#include #include #include @@ -5008,23 +5009,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, int set_migratetype_isolate(struct page *page) { struct zone *zone; - unsigned long flags; + struct page *curr_page; + unsigned long flags, pfn, iter; + unsigned long immobile = 0; + struct memory_isolate_notify arg; + int notifier_ret; int ret = -EBUSY; int zone_idx; zone = page_zone(page); zone_idx = zone_idx(zone); + spin_lock_irqsave(&zone->lock, flags); - /* - * In future, more migrate types will be able to be isolation target. - */ - if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE && - zone_idx != ZONE_MOVABLE) + if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE || + zone_idx == ZONE_MOVABLE) { + ret = 0; goto out; - set_pageblock_migratetype(page, MIGRATE_ISOLATE); - move_freepages_block(zone, page, MIGRATE_ISOLATE); - ret = 0; + } + + pfn = page_to_pfn(page); + arg.start_pfn = pfn; + arg.nr_pages = pageblock_nr_pages; + arg.pages_found = 0; + + /* + * It may be possible to isolate a pageblock even if the + * migratetype is not MIGRATE_MOVABLE. The memory isolation + * notifier chain is used by balloon drivers to return the + * number of pages in a range that are held by the balloon + * driver to shrink memory. If all the pages are accounted for + * by balloons, are free, or on the LRU, isolation can continue. + * Later, for example, when memory hotplug notifier runs, these + * pages reported as "can be isolated" should be isolated(freed) + * by the balloon driver through the memory notifier chain. + */ + notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg); + notifier_ret = notifier_to_errno(notifier_ret); + if (notifier_ret || !arg.pages_found) + goto out; + + for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) { + if (!pfn_valid_within(pfn)) + continue; + + curr_page = pfn_to_page(iter); + if (!page_count(curr_page) || PageLRU(curr_page)) + continue; + + immobile++; + } + + if (arg.pages_found == immobile) + ret = 0; + out: + if (!ret) { + set_pageblock_migratetype(page, MIGRATE_ISOLATE); + move_freepages_block(zone, page, MIGRATE_ISOLATE); + } + spin_unlock_irqrestore(&zone->lock, flags); if (!ret) drain_all_pages();