From 216773a787c3c46ef26bf1742c1fdba37d26be45 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 14 Feb 2009 01:59:06 +0100 Subject: [PATCH 1/9] Consolidate driver_probe_done() loops into one place there's a few places that currently loop over driver_probe_done(), and I'm about to add another one. This patch abstracts it into a helper to reduce duplication. Signed-off-by: Arjan van de Ven Signed-off-by: Rafael J. Wysocki Cc: Len Brown Acked-by: Greg KH Signed-off-by: Linus Torvalds --- drivers/base/dd.c | 17 +++++++++++++++++ include/linux/device.h | 2 ++ init/do_mounts.c | 13 +++++++++---- init/do_mounts_md.c | 5 +++-- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 315bed8d5e7..13523123910 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -18,9 +18,11 @@ */ #include +#include #include #include #include +#include #include "base.h" #include "power/power.h" @@ -167,6 +169,21 @@ int driver_probe_done(void) return 0; } +/** + * wait_for_device_probe + * Wait for device probing to be completed. + * + * Note: this function polls at 100 msec intervals. + */ +int wait_for_device_probe(void) +{ + /* wait for the known devices to complete their probing */ + while (driver_probe_done() != 0) + msleep(100); + async_synchronize_full(); + return 0; +} + /** * driver_probe_device - attempt to bind device & driver together * @drv: driver to bind a device to diff --git a/include/linux/device.h b/include/linux/device.h index 45e5b1921fb..47f343c7bdd 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -147,6 +147,8 @@ extern void put_driver(struct device_driver *drv); extern struct device_driver *driver_find(const char *name, struct bus_type *bus); extern int driver_probe_done(void); +extern int wait_for_device_probe(void); + /* sysfs interface for exporting driver attributes */ diff --git a/init/do_mounts.c b/init/do_mounts.c index 708105e163d..8d4ff5afc1d 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -370,10 +370,14 @@ void __init prepare_namespace(void) ssleep(root_delay); } - /* wait for the known devices to complete their probing */ - while (driver_probe_done() != 0) - msleep(100); - async_synchronize_full(); + /* + * wait for the known devices to complete their probing + * + * Note: this is a potential source of long boot delays. + * For example, it is not atypical to wait 5 seconds here + * for the touchpad of a laptop to initialize. + */ + wait_for_device_probe(); md_run_setup(); @@ -399,6 +403,7 @@ void __init prepare_namespace(void) while (driver_probe_done() != 0 || (ROOT_DEV = name_to_dev_t(saved_root_name)) == 0) msleep(100); + async_synchronize_full(); } is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR; diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index ff95e319288..9bdddbcb3d6 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -281,8 +281,9 @@ static void __init autodetect_raid(void) */ printk(KERN_INFO "md: Waiting for all devices to be available before autodetect\n"); printk(KERN_INFO "md: If you don't use raid, use raid=noautodetect\n"); - while (driver_probe_done() < 0) - msleep(100); + + wait_for_device_probe(); + fd = sys_open("/dev/md0", 0, 0); if (fd >= 0) { sys_ioctl(fd, RAID_AUTORUN, raid_autopart); From eed3ee08292d821282169708e5e8e89a0d0a0c63 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 14 Feb 2009 02:00:19 +0100 Subject: [PATCH 2/9] PM/resume: wait for device probing to finish the resume code does not currently wait for device probing to finish. Even without async function calls this is dicey and not correct, but with async function calls during the boot sequence this is going to get hit more... This patch adds the synchronization using the newly introduced helper. Signed-off-by: Arjan van de Ven Signed-off-by: Rafael J. Wysocki Cc: Len Brown Acked-by: Greg KH Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 432ee575c9e..7b40e94b1d4 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -594,6 +594,12 @@ static int software_resume(void) int error; unsigned int flags; + /* + * If the user said "noresume".. bail out early. + */ + if (noresume) + return 0; + /* * name_to_dev_t() below takes a sysfs buffer mutex when sysfs * is configured into the kernel. Since the regular hibernate @@ -610,6 +616,11 @@ static int software_resume(void) mutex_unlock(&pm_mutex); return -ENOENT; } + /* + * Some device discovery might still be in progress; we need + * to wait for this to finish. + */ + wait_for_device_probe(); swsusp_resume_device = name_to_dev_t(resume_file); pr_debug("PM: Resume from partition %s\n", resume_file); } else { From a1bb7d61233ba5fb5cd865f907a9ddcc8f8c02bd Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Sat, 14 Feb 2009 02:01:14 +0100 Subject: [PATCH 3/9] PM/hibernate: fix "swap breaks after hibernation failures" http://bugzilla.kernel.org/show_bug.cgi?id=12239 The image writing code dropped a reference to the current swap device. This doesn't show up if the hibernation succeeds - because it doesn't affect the image which gets resumed. But it means multiple _failed_ hibernations end up freeing the swap device while it is still use! swsusp_write() finds the block device for the swap file using swap_type_of(). It then uses blkdev_get() / blkdev_put() to open and close the block device. Unfortunately, blkdev_get() assumes ownership of the inode of the block_device passed to it. So blkdev_put() calls iput() on the inode. This is by design and other callers expect this behaviour. The fix is for swap_type_of() to take a reference on the inode using bdget(). Signed-off-by: Alan Jenkins Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Signed-off-by: Linus Torvalds --- mm/swapfile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 7e6304dfafa..312fafe0ab6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -635,7 +635,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) if (!bdev) { if (bdev_p) - *bdev_p = sis->bdev; + *bdev_p = bdget(sis->bdev->bd_dev); spin_unlock(&swap_lock); return i; @@ -647,7 +647,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) struct swap_extent, list); if (se->start_block == offset) { if (bdev_p) - *bdev_p = sis->bdev; + *bdev_p = bdget(sis->bdev->bd_dev); spin_unlock(&swap_lock); bdput(bdev); From 09664fda48c5dd63277f1f42888ca9d5dca6037a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 14 Feb 2009 02:02:16 +0100 Subject: [PATCH 4/9] PM: fix build for CONFIG_PM unset Compilation of kprobes.c with CONFIG_PM unset is broken due to some broken config dependncies. Fix that. Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Reported-by: Ingo Molnar Tested-by: Masami Hiramatsu Signed-off-by: Linus Torvalds --- kernel/Makefile | 1 + kernel/power/Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/Makefile b/kernel/Makefile index 170a9213c1b..e4791b3ba55 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_PM) += power/ +obj-$(CONFIG_FREEZER) += power/ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o diff --git a/kernel/power/Makefile b/kernel/power/Makefile index d7a10167a25..720ea4f781b 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -3,7 +3,7 @@ ifeq ($(CONFIG_PM_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif -obj-y := main.o +obj-$(CONFIG_PM) += main.o obj-$(CONFIG_PM_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o From 3049103ddfc9aac111916bd2f39ac6976c431517 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Sat, 14 Feb 2009 02:03:08 +0100 Subject: [PATCH 5/9] swsusp: dont fiddle with swappiness sc.swappiness is not used in the swsusp memory shrinking path, do not set it. Signed-off-by: Johannes Weiner Reviewed-by: KOSAKI Motohiro Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Signed-off-by: Linus Torvalds --- mm/vmscan.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 9a27c44aa32..550e8695070 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2112,7 +2112,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages) .may_swap = 0, .swap_cluster_max = nr_pages, .may_writepage = 1, - .swappiness = vm_swappiness, .isolate_pages = isolate_pages_global, }; @@ -2146,10 +2145,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages) int prio; /* Force reclaiming mapped pages in the passes #3 and #4 */ - if (pass > 2) { + if (pass > 2) sc.may_swap = 1; - sc.swappiness = 100; - } for (prio = DEF_PRIORITY; prio >= 0; prio--) { unsigned long nr_to_scan = nr_pages - ret; From 0cb57258fe01e9b21076b6a15b6aec7a24168228 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Sat, 14 Feb 2009 02:04:10 +0100 Subject: [PATCH 6/9] swsusp: clean up shrink_all_zones() Move local variables to innermost possible scopes and use local variables to cache calculations/reads done more than once. No change in functionality (intended). Signed-off-by: Johannes Weiner Reviewed-by: KOSAKI Motohiro Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Acked-by: Pavel Machek Signed-off-by: Linus Torvalds --- mm/vmscan.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 550e8695070..6177e3bcd66 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2057,31 +2057,31 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, int pass, struct scan_control *sc) { struct zone *zone; - unsigned long nr_to_scan, ret = 0; - enum lru_list l; + unsigned long ret = 0; for_each_zone(zone) { + enum lru_list l; if (!populated_zone(zone)) continue; - if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) continue; for_each_evictable_lru(l) { + enum zone_stat_item ls = NR_LRU_BASE + l; + unsigned long lru_pages = zone_page_state(zone, ls); + /* For pass = 0, we don't shrink the active list */ - if (pass == 0 && - (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE)) + if (pass == 0 && (l == LRU_ACTIVE_ANON || + l == LRU_ACTIVE_FILE)) continue; - zone->lru[l].nr_scan += - (zone_page_state(zone, NR_LRU_BASE + l) - >> prio) + 1; + zone->lru[l].nr_scan += (lru_pages >> prio) + 1; if (zone->lru[l].nr_scan >= nr_pages || pass > 3) { + unsigned long nr_to_scan; + zone->lru[l].nr_scan = 0; - nr_to_scan = min(nr_pages, - zone_page_state(zone, - NR_LRU_BASE + l)); + nr_to_scan = min(nr_pages, lru_pages); ret += shrink_list(l, nr_to_scan, zone, sc, prio); if (ret >= nr_pages) @@ -2089,7 +2089,6 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, } } } - return ret; } From ebae2604f2c3693717d9dc687c84578f0526480c Mon Sep 17 00:00:00 2001 From: Andrey Borzenkov Date: Sat, 14 Feb 2009 02:05:14 +0100 Subject: [PATCH 7/9] PM: Fix pm_notifiers during user mode hibernation Snapshot device is opened with O_RDONLY during suspend and O_WRONLY durig resume. Make sure we also call notifiers with correct parameter telling them what we are really doing. Signed-off-by: Andrey Borzenkov Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Acked-by: Pavel Machek Signed-off-by: Linus Torvalds --- kernel/power/user.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/power/user.c b/kernel/power/user.c index 005b93d839b..6c85359364f 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -95,15 +95,15 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = swsusp_resume_device ? swap_type_of(swsusp_resume_device, 0, NULL) : -1; data->mode = O_RDONLY; - error = pm_notifier_call_chain(PM_RESTORE_PREPARE); - if (error) - pm_notifier_call_chain(PM_POST_RESTORE); - } else { - data->swap = -1; - data->mode = O_WRONLY; error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); if (error) pm_notifier_call_chain(PM_POST_HIBERNATION); + } else { + data->swap = -1; + data->mode = O_WRONLY; + error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + if (error) + pm_notifier_call_chain(PM_POST_RESTORE); } if (error) atomic_inc(&snapshot_device_available); From b090f9fa53d51c8a33370071de9e391919ee1fa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Sat, 14 Feb 2009 02:06:17 +0100 Subject: [PATCH 8/9] PM: Wait for console in resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoids later waking up to a blinking cursor if the device woke up and returned to sleep before the console switch happened. Signed-off-by: Brian Swetland Signed-off-by: Arve Hjønnevåg Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Signed-off-by: Linus Torvalds --- kernel/power/console.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/power/console.c b/kernel/power/console.c index b8628be2a46..a3961b205de 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -78,6 +78,12 @@ void pm_restore_console(void) } set_console(orig_fgconsole); release_console_sem(); + + if (vt_waitactive(orig_fgconsole)) { + pr_debug("Resume: Can't switch VCs."); + return; + } + kmsg_redirect = orig_kmsg; } #endif From 403f307576396f3362fbb65af190885b6036c72c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Sat, 14 Feb 2009 02:07:24 +0100 Subject: [PATCH 9/9] PM: Fix suspend_console and resume_console to use only one semaphore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a race where a thread acquires the console while the console is suspended, and the console is resumed before this thread releases it. In this case, the secondary console semaphore would be left locked, and the primary semaphore would be released twice. This in turn would cause the console switch on suspend or resume to hang forever. Note that suspend_console does not actually lock the console for clients that use acquire_console_sem, it only locks it for clients that use try_acquire_console_sem. If we change suspend_console to fully lock the console, then the kernel may deadlock on suspend. One client of try_acquire_console_sem is acquire_console_semaphore_for_printk, which uses it to prevent printk from using the console while it is suspended. Signed-off-by: Arve Hjønnevåg Signed-off-by: Rafael J. Wysocki Cc: Len Brown Cc: Greg KH Signed-off-by: Linus Torvalds --- kernel/printk.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kernel/printk.c b/kernel/printk.c index 69188f226a9..e3602d0755b 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -73,7 +73,6 @@ EXPORT_SYMBOL(oops_in_progress); * driver system. */ static DECLARE_MUTEX(console_sem); -static DECLARE_MUTEX(secondary_console_sem); struct console *console_drivers; EXPORT_SYMBOL_GPL(console_drivers); @@ -891,12 +890,14 @@ void suspend_console(void) printk("Suspending console(s) (use no_console_suspend to debug)\n"); acquire_console_sem(); console_suspended = 1; + up(&console_sem); } void resume_console(void) { if (!console_suspend_enabled) return; + down(&console_sem); console_suspended = 0; release_console_sem(); } @@ -912,11 +913,9 @@ void resume_console(void) void acquire_console_sem(void) { BUG_ON(in_interrupt()); - if (console_suspended) { - down(&secondary_console_sem); - return; - } down(&console_sem); + if (console_suspended) + return; console_locked = 1; console_may_schedule = 1; } @@ -926,6 +925,10 @@ int try_acquire_console_sem(void) { if (down_trylock(&console_sem)) return -1; + if (console_suspended) { + up(&console_sem); + return -1; + } console_locked = 1; console_may_schedule = 0; return 0; @@ -979,7 +982,7 @@ void release_console_sem(void) unsigned wake_klogd = 0; if (console_suspended) { - up(&secondary_console_sem); + up(&console_sem); return; }