From 30996f40bffe73f05abb92a4cec254befa8cecf7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 5 Oct 2009 11:03:39 +0200 Subject: [PATCH 01/16] cfq-iosched: fix issue with rq-rq merging and fifo list ordering cfq uses rq->start_time as the fifo indicator, but that field may get modified prior to cfq doing it's fifo list adjustment when a request gets merged with another request. This can cause the fifo list to become unordered. Reported-by: Corrado Zoccolo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 9c4b679908f..e7f9e4e3a27 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -827,8 +827,10 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, * reposition in fifo if next is older than rq */ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && - time_before(next->start_time, rq->start_time)) + time_before(rq_fifo_time(next), rq_fifo_time(rq))) { list_move(&rq->queuelist, &next->queuelist); + rq_set_fifo_time(rq, rq_fifo_time(next)); + } cfq_remove_request(next); } @@ -1129,9 +1131,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) */ static struct request *cfq_check_fifo(struct cfq_queue *cfqq) { - struct cfq_data *cfqd = cfqq->cfqd; - struct request *rq; - int fifo; + struct request *rq = NULL; if (cfq_cfqq_fifo_expire(cfqq)) return NULL; @@ -1141,13 +1141,11 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq) if (list_empty(&cfqq->fifo)) return NULL; - fifo = cfq_cfqq_sync(cfqq); rq = rq_entry_fifo(cfqq->fifo.next); - - if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) + if (time_before(jiffies, rq_fifo_time(rq))) rq = NULL; - cfq_log_cfqq(cfqd, cfqq, "fifo=%p", rq); + cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq); return rq; } @@ -2130,6 +2128,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) cfq_add_rq_rb(rq); + rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); list_add_tail(&rq->queuelist, &cfqq->fifo); cfq_rq_enqueued(cfqd, cfqq, rq); From 48e025e63ac908ed6ec5394a294f4ecd510a7476 Mon Sep 17 00:00:00 2001 From: Corrado Zoccolo Date: Mon, 5 Oct 2009 08:49:23 +0200 Subject: [PATCH 02/16] cfq-iosched: fix possible problem with jiffies wraparound The RR service tree is indexed by a key that is relative to current jiffies. This can cause problems on jiffies wraparound. The patch fixes it using time_before comparison, and changing the add_front path to use a relative number, too. Signed-off-by: Corrado Zoccolo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e7f9e4e3a27..ae14cbaf9d0 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -512,8 +512,11 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; rb_key += cfqq->slice_resid; cfqq->slice_resid = 0; - } else - rb_key = 0; + } else { + rb_key = -HZ; + __cfqq = cfq_rb_first(&cfqd->service_tree); + rb_key += __cfqq ? __cfqq->rb_key : jiffies; + } if (!RB_EMPTY_NODE(&cfqq->rb_node)) { /* @@ -547,7 +550,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, n = &(*p)->rb_left; else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq)) n = &(*p)->rb_right; - else if (rb_key < __cfqq->rb_key) + else if (time_before(rb_key, __cfqq->rb_key)) n = &(*p)->rb_left; else n = &(*p)->rb_right; From 23e018a1b083ecb4b8bb2fb43d58e7c19b5d7959 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 5 Oct 2009 08:52:35 +0200 Subject: [PATCH 03/16] block: get rid of kblock_schedule_delayed_work() It was briefly introduced to allow CFQ to to delayed scheduling, but we ended up removing that feature again. So lets kill the function and export, and just switch CFQ back to the normal work schedule since it is now passing in a '0' delay from all call sites. Signed-off-by: Jens Axboe --- block/blk-core.c | 8 -------- block/cfq-iosched.c | 24 +++++++++++------------- include/linux/blkdev.h | 4 ---- 3 files changed, 11 insertions(+), 25 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 81f34311659..73ecbed0260 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2492,14 +2492,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) } EXPORT_SYMBOL(kblockd_schedule_work); -int kblockd_schedule_delayed_work(struct request_queue *q, - struct delayed_work *work, - unsigned long delay) -{ - return queue_delayed_work(kblockd_workqueue, work, delay); -} -EXPORT_SYMBOL(kblockd_schedule_delayed_work); - int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ae14cbaf9d0..690ebd96dc4 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -150,7 +150,7 @@ struct cfq_data { * idle window management */ struct timer_list idle_slice_timer; - struct delayed_work unplug_work; + struct work_struct unplug_work; struct cfq_queue *active_queue; struct cfq_io_context *active_cic; @@ -268,13 +268,11 @@ static inline int cfq_bio_sync(struct bio *bio) * scheduler run of queue, if there are requests pending and no one in the * driver that will restart queueing */ -static inline void cfq_schedule_dispatch(struct cfq_data *cfqd, - unsigned long delay) +static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) { if (cfqd->busy_queues) { cfq_log(cfqd, "schedule dispatch"); - kblockd_schedule_delayed_work(cfqd->queue, &cfqd->unplug_work, - delay); + kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); } } @@ -1400,7 +1398,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) if (unlikely(cfqd->active_queue == cfqq)) { __cfq_slice_expired(cfqd, cfqq, 0); - cfq_schedule_dispatch(cfqd, 0); + cfq_schedule_dispatch(cfqd); } kmem_cache_free(cfq_pool, cfqq); @@ -1495,7 +1493,7 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) { if (unlikely(cfqq == cfqd->active_queue)) { __cfq_slice_expired(cfqd, cfqq, 0); - cfq_schedule_dispatch(cfqd, 0); + cfq_schedule_dispatch(cfqd); } cfq_put_queue(cfqq); @@ -2213,7 +2211,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) } if (!rq_in_driver(cfqd)) - cfq_schedule_dispatch(cfqd, 0); + cfq_schedule_dispatch(cfqd); } /* @@ -2343,7 +2341,7 @@ queue_fail: if (cic) put_io_context(cic->ioc); - cfq_schedule_dispatch(cfqd, 0); + cfq_schedule_dispatch(cfqd); spin_unlock_irqrestore(q->queue_lock, flags); cfq_log(cfqd, "set_request fail"); return 1; @@ -2352,7 +2350,7 @@ queue_fail: static void cfq_kick_queue(struct work_struct *work) { struct cfq_data *cfqd = - container_of(work, struct cfq_data, unplug_work.work); + container_of(work, struct cfq_data, unplug_work); struct request_queue *q = cfqd->queue; spin_lock_irq(q->queue_lock); @@ -2406,7 +2404,7 @@ static void cfq_idle_slice_timer(unsigned long data) expire: cfq_slice_expired(cfqd, timed_out); out_kick: - cfq_schedule_dispatch(cfqd, 0); + cfq_schedule_dispatch(cfqd); out_cont: spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); } @@ -2414,7 +2412,7 @@ out_cont: static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) { del_timer_sync(&cfqd->idle_slice_timer); - cancel_delayed_work_sync(&cfqd->unplug_work); + cancel_work_sync(&cfqd->unplug_work); } static void cfq_put_async_queues(struct cfq_data *cfqd) @@ -2496,7 +2494,7 @@ static void *cfq_init_queue(struct request_queue *q) cfqd->idle_slice_timer.function = cfq_idle_slice_timer; cfqd->idle_slice_timer.data = (unsigned long) cfqd; - INIT_DELAYED_WORK(&cfqd->unplug_work, cfq_kick_queue); + INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); cfqd->cfq_quantum = cfq_quantum; cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25119041e03..221cecd86bd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1172,11 +1172,7 @@ static inline void put_dev_sector(Sector p) } struct work_struct; -struct delayed_work; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); -int kblockd_schedule_delayed_work(struct request_queue *q, - struct delayed_work *work, - unsigned long delay); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) From 316d315bffa4026f28085f6b24ebcebede370ac7 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Tue, 6 Oct 2009 20:16:55 +0200 Subject: [PATCH 04/16] block: Seperate read and write statistics of in_flight requests v2 Commit a9327cac440be4d8333bba975cbbf76045096275 added seperate read and write statistics of in_flight requests. And exported the number of read and write requests in progress seperately through sysfs. But Corrado Zoccolo reported getting strange output from "iostat -kx 2". Global values for service time and utilization were garbage. For interval values, utilization was always 100%, and service time is higher than normal. So this was reverted by commit 0f78ab9899e9d6acb09d5465def618704255963b The problem was in part_round_stats_single(), I missed the following: if (now == part->stamp) return; - if (part->in_flight) { + if (part_in_flight(part)) { __part_stat_add(cpu, part, time_in_queue, part_in_flight(part) * (now - part->stamp)); __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); With this chunk included, the reported regression gets fixed. Signed-off-by: Nikanth Karthikesan -- Signed-off-by: Jens Axboe --- block/blk-core.c | 8 ++++---- block/blk-merge.c | 2 +- block/genhd.c | 4 +++- drivers/md/dm.c | 16 ++++++++++------ fs/partitions/check.c | 12 +++++++++++- include/linux/genhd.h | 21 ++++++++++++++------- 6 files changed, 43 insertions(+), 20 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 73ecbed0260..ac0fa10f8fa 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -70,7 +70,7 @@ static void drive_stat_acct(struct request *rq, int new_io) part_stat_inc(cpu, part, merges[rw]); else { part_round_stats(cpu, part); - part_inc_in_flight(part); + part_inc_in_flight(part, rw); } part_stat_unlock(); @@ -1030,9 +1030,9 @@ static void part_round_stats_single(int cpu, struct hd_struct *part, if (now == part->stamp) return; - if (part->in_flight) { + if (part_in_flight(part)) { __part_stat_add(cpu, part, time_in_queue, - part->in_flight * (now - part->stamp)); + part_in_flight(part) * (now - part->stamp)); __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); } part->stamp = now; @@ -1739,7 +1739,7 @@ static void blk_account_io_done(struct request *req) part_stat_inc(cpu, part, ios[rw]); part_stat_add(cpu, part, ticks[rw], duration); part_round_stats(cpu, part); - part_dec_in_flight(part); + part_dec_in_flight(part, rw); part_stat_unlock(); } diff --git a/block/blk-merge.c b/block/blk-merge.c index b0de8574fdc..99cb5cf1f44 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -351,7 +351,7 @@ static void blk_account_io_merge(struct request *req) part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); part_round_stats(cpu, part); - part_dec_in_flight(part); + part_dec_in_flight(part, rq_data_dir(req)); part_stat_unlock(); } diff --git a/block/genhd.c b/block/genhd.c index 5a0861da324..517e4332cb3 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -869,6 +869,7 @@ static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); +static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); @@ -888,6 +889,7 @@ static struct attribute *disk_attrs[] = { &dev_attr_alignment_offset.attr, &dev_attr_capability.attr, &dev_attr_stat.attr, + &dev_attr_inflight.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, #endif @@ -1053,7 +1055,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) part_stat_read(hd, merges[1]), (unsigned long long)part_stat_read(hd, sectors[1]), jiffies_to_msecs(part_stat_read(hd, ticks[1])), - hd->in_flight, + part_in_flight(hd), jiffies_to_msecs(part_stat_read(hd, io_ticks)), jiffies_to_msecs(part_stat_read(hd, time_in_queue)) ); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 23e76fe0d35..376f1ab48a2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -130,7 +130,7 @@ struct mapped_device { /* * A list of ios that arrived while we were suspended. */ - atomic_t pending; + atomic_t pending[2]; wait_queue_head_t wait; struct work_struct work; struct bio_list deferred; @@ -453,13 +453,14 @@ static void start_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; int cpu; + int rw = bio_data_dir(io->bio); io->start_time = jiffies; cpu = part_stat_lock(); part_round_stats(cpu, &dm_disk(md)->part0); part_stat_unlock(); - dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); + dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]); } static void end_io_acct(struct dm_io *io) @@ -479,8 +480,9 @@ static void end_io_acct(struct dm_io *io) * After this is decremented the bio must not be touched if it is * a barrier. */ - dm_disk(md)->part0.in_flight = pending = - atomic_dec_return(&md->pending); + dm_disk(md)->part0.in_flight[rw] = pending = + atomic_dec_return(&md->pending[rw]); + pending += atomic_read(&md->pending[rw^0x1]); /* nudge anyone waiting on suspend queue */ if (!pending) @@ -1785,7 +1787,8 @@ static struct mapped_device *alloc_dev(int minor) if (!md->disk) goto bad_disk; - atomic_set(&md->pending, 0); + atomic_set(&md->pending[0], 0); + atomic_set(&md->pending[1], 0); init_waitqueue_head(&md->wait); INIT_WORK(&md->work, dm_wq_work); init_waitqueue_head(&md->eventq); @@ -2088,7 +2091,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) break; } spin_unlock_irqrestore(q->queue_lock, flags); - } else if (!atomic_read(&md->pending)) + } else if (!atomic_read(&md->pending[0]) && + !atomic_read(&md->pending[1])) break; if (interruptible == TASK_INTERRUPTIBLE && diff --git a/fs/partitions/check.c b/fs/partitions/check.c index f38fee0311a..7b685e10cba 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -248,11 +248,19 @@ ssize_t part_stat_show(struct device *dev, part_stat_read(p, merges[WRITE]), (unsigned long long)part_stat_read(p, sectors[WRITE]), jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), - p->in_flight, + part_in_flight(p), jiffies_to_msecs(part_stat_read(p, io_ticks)), jiffies_to_msecs(part_stat_read(p, time_in_queue))); } +ssize_t part_inflight_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + + return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]); +} + #ifdef CONFIG_FAIL_MAKE_REQUEST ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -281,6 +289,7 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); +static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); @@ -292,6 +301,7 @@ static struct attribute *part_attrs[] = { &dev_attr_size.attr, &dev_attr_alignment_offset.attr, &dev_attr_stat.attr, + &dev_attr_inflight.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, #endif diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7beaa21b388..297df45ffd0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -98,7 +98,7 @@ struct hd_struct { int make_it_fail; #endif unsigned long stamp; - int in_flight; + int in_flight[2]; #ifdef CONFIG_SMP struct disk_stats *dkstats; #else @@ -322,18 +322,23 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_sub(cpu, gendiskp, field, subnd) \ part_stat_add(cpu, gendiskp, field, -subnd) -static inline void part_inc_in_flight(struct hd_struct *part) +static inline void part_inc_in_flight(struct hd_struct *part, int rw) { - part->in_flight++; + part->in_flight[rw]++; if (part->partno) - part_to_disk(part)->part0.in_flight++; + part_to_disk(part)->part0.in_flight[rw]++; } -static inline void part_dec_in_flight(struct hd_struct *part) +static inline void part_dec_in_flight(struct hd_struct *part, int rw) { - part->in_flight--; + part->in_flight[rw]--; if (part->partno) - part_to_disk(part)->part0.in_flight--; + part_to_disk(part)->part0.in_flight[rw]--; +} + +static inline int part_in_flight(struct hd_struct *part) +{ + return part->in_flight[0] + part->in_flight[1]; } /* block/blk-core.c */ @@ -546,6 +551,8 @@ extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t part_inflight_show(struct device *dev, + struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); From 1b59dd511b9a36d4be3c01d7c7024aeec36dc651 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Oct 2009 20:19:02 +0200 Subject: [PATCH 05/16] block: use proper BLK_RW_ASYNC in blk_queue_start_tag() Makes it easier to read than the 0. Signed-off-by: Jens Axboe --- block/blk-tag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-tag.c b/block/blk-tag.c index 2e5cfeb5933..6b0f52c2096 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -359,7 +359,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) max_depth -= 2; if (!max_depth) max_depth = 1; - if (q->in_flight[0] > max_depth) + if (q->in_flight[BLK_RW_ASYNC] > max_depth) return 1; } From 0b182d617eb50762b483658dd6dd9a9fbcb25758 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Oct 2009 20:49:37 +0200 Subject: [PATCH 06/16] cfq-iosched: abstract out the 'may this cfqq dispatch' logic Makes the whole thing easier to read, cfq_dispatch_requests() was a bit messy before. Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 121 ++++++++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 54 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 690ebd96dc4..5c3cee93329 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1247,67 +1247,21 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) return dispatched; } -/* - * Dispatch a request from cfqq, moving them to the request queue - * dispatch list. - */ -static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq) +static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - struct request *rq; - - BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); - - /* - * follow expired path, else get first next available - */ - rq = cfq_check_fifo(cfqq); - if (!rq) - rq = cfqq->next_rq; - - /* - * insert request into driver dispatch list - */ - cfq_dispatch_insert(cfqd->queue, rq); - - if (!cfqd->active_cic) { - struct cfq_io_context *cic = RQ_CIC(rq); - - atomic_long_inc(&cic->ioc->refcount); - cfqd->active_cic = cic; - } -} - -/* - * Find the cfqq that we need to service and move a request from that to the - * dispatch list - */ -static int cfq_dispatch_requests(struct request_queue *q, int force) -{ - struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_queue *cfqq; unsigned int max_dispatch; - if (!cfqd->busy_queues) - return 0; - - if (unlikely(force)) - return cfq_forced_dispatch(cfqd); - - cfqq = cfq_select_queue(cfqd); - if (!cfqq) - return 0; - /* * Drain async requests before we start sync IO */ if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC]) - return 0; + return false; /* * If this is an async queue and we have sync IO in flight, let it wait */ if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) - return 0; + return false; max_dispatch = cfqd->cfq_quantum; if (cfq_class_idle(cfqq)) @@ -1321,13 +1275,13 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) * idle queue must always only have a single IO in flight */ if (cfq_class_idle(cfqq)) - return 0; + return false; /* * We have other queues, don't allow more IO from this one */ if (cfqd->busy_queues > 1) - return 0; + return false; /* * Sole queue user, allow bigger slice @@ -1351,13 +1305,72 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) max_dispatch = depth; } - if (cfqq->dispatched >= max_dispatch) + /* + * If we're below the current max, allow a dispatch + */ + return cfqq->dispatched < max_dispatch; +} + +/* + * Dispatch a request from cfqq, moving them to the request queue + * dispatch list. + */ +static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + struct request *rq; + + BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); + + if (!cfq_may_dispatch(cfqd, cfqq)) + return false; + + /* + * follow expired path, else get first next available + */ + rq = cfq_check_fifo(cfqq); + if (!rq) + rq = cfqq->next_rq; + + /* + * insert request into driver dispatch list + */ + cfq_dispatch_insert(cfqd->queue, rq); + + if (!cfqd->active_cic) { + struct cfq_io_context *cic = RQ_CIC(rq); + + atomic_long_inc(&cic->ioc->refcount); + cfqd->active_cic = cic; + } + + return true; +} + +/* + * Find the cfqq that we need to service and move a request from that to the + * dispatch list + */ +static int cfq_dispatch_requests(struct request_queue *q, int force) +{ + struct cfq_data *cfqd = q->elevator->elevator_data; + struct cfq_queue *cfqq; + + if (!cfqd->busy_queues) + return 0; + + if (unlikely(force)) + return cfq_forced_dispatch(cfqd); + + cfqq = cfq_select_queue(cfqd); + if (!cfqq) return 0; /* - * Dispatch a request from this cfqq + * Dispatch a request from this cfqq, if it is allowed */ - cfq_dispatch_request(cfqd, cfqq); + if (!cfq_dispatch_request(cfqd, cfqq)) + return 0; + cfqq->slice_dispatch++; cfq_clear_cfqq_must_dispatch(cfqq); From b9c8946b192397394a0ccd4fcecb31bc060f79f8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Oct 2009 20:53:44 +0200 Subject: [PATCH 07/16] cfq-iosched: fix the slice residual sign We should subtract the slice residual from the rb tree key, since a negative residual count indicates that the cfqq overran its slice the last time. Hence we want to add the overrun time, to position it a bit further away in the service tree. Reported-by: Corrado Zoccolo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 5c3cee93329..4ab33d8a20b 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -507,8 +507,14 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, } else rb_key += jiffies; } else if (!add_front) { + /* + * Get our rb key offset. Subtract any residual slice + * value carried from last service. A negative resid + * count indicates slice overrun, and this should position + * the next service time further away in the tree. + */ rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; - rb_key += cfqq->slice_resid; + rb_key -= cfqq->slice_resid; cfqq->slice_resid = 0; } else { rb_key = -HZ; From ec60e4f6749daf535329dac571293cf19c627aff Mon Sep 17 00:00:00 2001 From: Corrado Zoccolo Date: Wed, 7 Oct 2009 19:51:54 +0200 Subject: [PATCH 08/16] cfq-iosched: fix think time allowed for seekers CFQ enables idle only for processes that think less than the allowed idle time. Since idle time is lower for seeky queues, we should use the correct value in the comparison. Signed-off-by: Corrado Zoccolo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4ab33d8a20b..b35cc56dfd9 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1995,7 +1995,10 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, (!cfqd->cfq_latency && cfqd->hw_tag && CIC_SEEKY(cic))) enable_idle = 0; else if (sample_valid(cic->ttime_samples)) { - if (cic->ttime_mean > cfqd->cfq_slice_idle) + unsigned int slice_idle = cfqd->cfq_slice_idle; + if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic)) + slice_idle = msecs_to_jiffies(CFQ_MIN_TT); + if (cic->ttime_mean > slice_idle) enable_idle = 0; else enable_idle = 1; From a6151c3a5c8e1ff5a28450bc8d6a99a2a0add0a7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 Oct 2009 20:02:57 +0200 Subject: [PATCH 09/16] cfq-iosched: apply bool value where we return 0/1 Saves 16 bytes of text, woohoo. But the more important point is that it makes the code more readable when returning bool for 0/1 cases. Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 68 +++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index b35cc56dfd9..a592afcf1e6 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -230,7 +230,7 @@ CFQ_CFQQ_FNS(coop); blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) static void cfq_dispatch_insert(struct request_queue *, struct request *); -static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, +static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool, struct io_context *, gfp_t); static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, struct io_context *); @@ -241,27 +241,24 @@ static inline int rq_in_driver(struct cfq_data *cfqd) } static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, - int is_sync) + bool is_sync) { - return cic->cfqq[!!is_sync]; + return cic->cfqq[is_sync]; } static inline void cic_set_cfqq(struct cfq_io_context *cic, - struct cfq_queue *cfqq, int is_sync) + struct cfq_queue *cfqq, bool is_sync) { - cic->cfqq[!!is_sync] = cfqq; + cic->cfqq[is_sync] = cfqq; } /* * We regard a request as SYNC, if it's either a read or has the SYNC bit * set (in which case it could also be direct WRITE). */ -static inline int cfq_bio_sync(struct bio *bio) +static inline bool cfq_bio_sync(struct bio *bio) { - if (bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO)) - return 1; - - return 0; + return bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO); } /* @@ -288,7 +285,7 @@ static int cfq_queue_empty(struct request_queue *q) * if a queue is marked sync and has sync io queued. A sync queue with async * io only, should not get full sync slice length. */ -static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync, +static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync, unsigned short prio) { const int base_slice = cfqd->cfq_slice[sync]; @@ -316,7 +313,7 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) * isn't valid until the first request from the dispatch is activated * and the slice time set. */ -static inline int cfq_slice_used(struct cfq_queue *cfqq) +static inline bool cfq_slice_used(struct cfq_queue *cfqq) { if (cfq_cfqq_slice_new(cfqq)) return 0; @@ -491,7 +488,7 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd, * we will service the queues. */ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, - int add_front) + bool add_front) { struct rb_node **p, *parent; struct cfq_queue *__cfqq; @@ -853,7 +850,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, * Disallow merge of a sync bio into an async request. */ if (cfq_bio_sync(bio) && !rq_is_sync(rq)) - return 0; + return false; /* * Lookup the cfqq that this bio will be queued with. Allow @@ -861,13 +858,10 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, */ cic = cfq_cic_lookup(cfqd, current->io_context); if (!cic) - return 0; + return false; cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); - if (cfqq == RQ_CFQQ(rq)) - return 1; - - return 0; + return cfqq == RQ_CFQQ(rq); } static void __cfq_set_active_queue(struct cfq_data *cfqd, @@ -895,7 +889,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, */ static void __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, - int timed_out) + bool timed_out) { cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); @@ -923,7 +917,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, } } -static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out) +static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) { struct cfq_queue *cfqq = cfqd->active_queue; @@ -1035,7 +1029,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, */ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, struct cfq_queue *cur_cfqq, - int probe) + bool probe) { struct cfq_queue *cfqq; @@ -1676,7 +1670,7 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc) } static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, - pid_t pid, int is_sync) + pid_t pid, bool is_sync) { RB_CLEAR_NODE(&cfqq->rb_node); RB_CLEAR_NODE(&cfqq->p_node); @@ -1696,7 +1690,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, } static struct cfq_queue * -cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, +cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, gfp_t gfp_mask) { struct cfq_queue *cfqq, *new_cfqq = NULL; @@ -1760,7 +1754,7 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio) } static struct cfq_queue * -cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, +cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, gfp_t gfp_mask) { const int ioprio = task_ioprio(ioc); @@ -2017,7 +2011,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, * Check if new_cfqq should preempt the currently active queue. Return 0 for * no or if we aren't sure, a 1 will cause a preempt. */ -static int +static bool cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, struct request *rq) { @@ -2025,48 +2019,48 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, cfqq = cfqd->active_queue; if (!cfqq) - return 0; + return false; if (cfq_slice_used(cfqq)) - return 1; + return true; if (cfq_class_idle(new_cfqq)) - return 0; + return false; if (cfq_class_idle(cfqq)) - return 1; + return true; /* * if the new request is sync, but the currently running queue is * not, let the sync request have priority. */ if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq)) - return 1; + return true; /* * So both queues are sync. Let the new request get disk time if * it's a metadata request and the current queue is doing regular IO. */ if (rq_is_meta(rq) && !cfqq->meta_pending) - return 1; + return false; /* * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. */ if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) - return 1; + return true; if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) - return 0; + return false; /* * if this request is as-good as one we would expect from the * current cfqq, let it preempt */ if (cfq_rq_close(cfqd, rq)) - return 1; + return true; - return 0; + return false; } /* @@ -2331,7 +2325,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_io_context *cic; const int rw = rq_data_dir(rq); - const int is_sync = rq_is_sync(rq); + const bool is_sync = rq_is_sync(rq); struct cfq_queue *cfqq; unsigned long flags; From 355b659c87432a4e76160640625c47fcf9174e8d Mon Sep 17 00:00:00 2001 From: Corrado Zoccolo Date: Thu, 8 Oct 2009 08:43:32 +0200 Subject: [PATCH 10/16] cfq-iosched: avoid probable slice overrun when idling If the average think time is larger than the remaining time slice for any given queue, don't allow it to idle. A succesful idle also means that we need to dispatch and complete a request, so if we don't even have time left for the idle process, we would overrun the slice in any case. Signed-off-by: Corrado Zoccolo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index a592afcf1e6..069a61017c0 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1093,6 +1093,15 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) if (!cic || !atomic_read(&cic->ioc->nr_tasks)) return; + /* + * If our average think time is larger than the remaining time + * slice, then don't idle. This avoids overrunning the allotted + * time slice. + */ + if (sample_valid(cic->ttime_samples) && + (cfqq->slice_end - jiffies < cic->ttime_mean)) + return; + cfq_mark_cfqq_wait_request(cfqq); /* From 8c279598585e4992a41016bb973993ed15888cb3 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Fri, 9 Oct 2009 08:48:08 +0200 Subject: [PATCH 11/16] elv_iosched_store(): fix strstrip() misuse elv_iosched_store() ignore the return value of strstrip(). It makes small inconsistent behavior. This patch fixes it. ==================================== # cd /sys/block/{blockdev}/queue case1: # echo "anticipatory" > scheduler # cat scheduler noop [anticipatory] deadline cfq case2: # echo "anticipatory " > scheduler # cat scheduler noop [anticipatory] deadline cfq case3: # echo " anticipatory" > scheduler bash: echo: write error: Invalid argument ==================================== # cd /sys/block/{blockdev}/queue case1: # echo "anticipatory" > scheduler # cat scheduler noop [anticipatory] deadline cfq case2: # echo "anticipatory " > scheduler # cat scheduler noop [anticipatory] deadline cfq case3: # echo " anticipatory" > scheduler noop [anticipatory] deadline cfq Cc: Li Zefan Cc: Jens Axboe Signed-off-by: KOSAKI Motohiro Signed-off-by: Jens Axboe --- block/elevator.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index 1975b619c86..a847046c6e5 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1059,9 +1059,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, return count; strlcpy(elevator_name, name, sizeof(elevator_name)); - strstrip(elevator_name); - - e = elevator_get(elevator_name); + e = elevator_get(strstrip(elevator_name)); if (!e) { printk(KERN_ERR "elevator: type %s not found\n", elevator_name); return -EINVAL; From d25105e8911bff1dbd68e387f12901c5b1a15fe8 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 9 Oct 2009 12:40:42 +0200 Subject: [PATCH 12/16] writeback: account IO throttling wait as iowait It makes sense to do IOWAIT when someone is blocked due to IO throttle, as suggested by Kame and Peter. There is an old comment for not doing IOWAIT on throttle, however it has been mismatching the code for a long time. If we stop accounting IOWAIT for 2.6.32, it could be an undesirable behavior change. So restore the io_schedule. CC: KAMEZAWA Hiroyuki CC: Peter Zijlstra Signed-off-by: Wu Fengguang Signed-off-by: Jens Axboe --- kernel/sched.c | 3 --- mm/page-writeback.c | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 1535f3884b8..074f753f744 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6720,9 +6720,6 @@ EXPORT_SYMBOL(yield); /* * This task is about to go to sleep on IO. Increment rq->nr_iowait so * that process accounting knows that this is a task in IO wait state. - * - * But don't do that if it is a deliberate, throttling IO wait (this task - * has set its backing_dev_info: the queue against which it should throttle) */ void __sched io_schedule(void) { diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a3b14090b1f..2c5d79236ea 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -566,7 +566,8 @@ static void balance_dirty_pages(struct address_space *mapping, if (pages_written >= write_chunk) break; /* We've done our duty */ - schedule_timeout_interruptible(pause); + __set_current_state(TASK_INTERRUPTIBLE); + io_schedule_timeout(pause); /* * Increase the delay for each loop, up to our previous From 961515f613f26b7958c56c5c71061a8231e02be7 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 9 Oct 2009 13:01:27 +0200 Subject: [PATCH 13/16] writeback: kill space in debugfs item name The space is not script friendly, kill it. Signed-off-by: Wu Fengguang Signed-off-by: Jens Axboe --- mm/backing-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 3d3accb1f80..5a37e205571 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -92,7 +92,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) "BdiDirtyThresh: %8lu kB\n" "DirtyThresh: %8lu kB\n" "BackgroundThresh: %8lu kB\n" - "WriteBack threads:%8lu\n" + "WritebackThreads: %8lu\n" "b_dirty: %8lu\n" "b_io: %8lu\n" "b_more_io: %8lu\n" From c7ebf0657b1f47d85aee8349ed6345d940d7232a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 12 Oct 2009 08:20:47 +0200 Subject: [PATCH 14/16] blk-settings: fix function parameter kernel-doc notation Fix kernel-doc notation in blk-settings.c::blk_queue_max_discard_sectors(). Signed-off-by: Randy Dunlap Signed-off-by: Jens Axboe --- block/blk-settings.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index e0695bca702..66d4aa8799b 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -242,7 +242,7 @@ EXPORT_SYMBOL(blk_queue_max_hw_sectors); /** * blk_queue_max_discard_sectors - set max sectors for a single discard * @q: the request queue for the device - * @max_discard: maximum number of sectors to discard + * @max_discard_sectors: maximum number of sectors to discard **/ void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors) From 2cfa948c9ea8681e831743a3bb61157f590fa8d8 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 13 Oct 2009 09:18:22 +0200 Subject: [PATCH 15/16] cciss: Fix multiple calls to pci_release_regions Fix multiple calls to pci_release_regions. If cciss_pci_init fails, it already does any necessary call to pci_release_regions, so this does not need to be done again in cciss_init_one in that case. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index fb5be2d95d5..52f53c134c5 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4254,7 +4254,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, mutex_init(&hba[i]->busy_shutting_down); if (cciss_pci_init(hba[i], pdev) != 0) - goto clean0; + goto clean_no_release_regions; sprintf(hba[i]->devname, "cciss%d", i); hba[i]->ctlr = i; @@ -4391,13 +4391,14 @@ clean2: clean1: cciss_destroy_hba_sysfs_entry(hba[i]); clean0: + pci_release_regions(pdev); +clean_no_release_regions: hba[i]->busy_initializing = 0; /* * Deliberately omit pci_disable_device(): it does something nasty to * Smart Array controllers that pci_enable_device does not undo */ - pci_release_regions(pdev); pci_set_drvdata(pdev, NULL); free_hba(i); return -1; From 2ec24ff1d1875defa742c76c9c7d74dca06b7e1f Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 13 Oct 2009 09:18:22 +0200 Subject: [PATCH 16/16] cciss: Add cciss_allow_hpsa module parameter Add cciss_allow_hpsa module parameter. This parameter causes the cciss driver to ignore any Smart Array devices known to be supported by the hpsa driver. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 74 ++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 52f53c134c5..6399e5090df 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -68,6 +68,12 @@ MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400" MODULE_VERSION("3.6.20"); MODULE_LICENSE("GPL"); +static int cciss_allow_hpsa; +module_param(cciss_allow_hpsa, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(cciss_allow_hpsa, + "Prevent cciss driver from accessing hardware known to be " + " supported by the hpsa driver"); + #include "cciss_cmd.h" #include "cciss.h" #include @@ -101,8 +107,6 @@ static const struct pci_device_id cciss_pci_device_id[] = { {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3249}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324A}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324B}, - {PCI_VENDOR_ID_HP, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0}, {0,} }; @@ -123,8 +127,6 @@ static struct board_type products[] = { {0x409D0E11, "Smart Array 6400 EM", &SA5_access}, {0x40910E11, "Smart Array 6i", &SA5_access}, {0x3225103C, "Smart Array P600", &SA5_access}, - {0x3223103C, "Smart Array P800", &SA5_access}, - {0x3234103C, "Smart Array P400", &SA5_access}, {0x3235103C, "Smart Array P400i", &SA5_access}, {0x3211103C, "Smart Array E200i", &SA5_access}, {0x3212103C, "Smart Array E200", &SA5_access}, @@ -132,6 +134,10 @@ static struct board_type products[] = { {0x3214103C, "Smart Array E200i", &SA5_access}, {0x3215103C, "Smart Array E200i", &SA5_access}, {0x3237103C, "Smart Array E500", &SA5_access}, +/* controllers below this line are also supported by the hpsa driver. */ +#define HPSA_BOUNDARY 0x3223103C + {0x3223103C, "Smart Array P800", &SA5_access}, + {0x3234103C, "Smart Array P400", &SA5_access}, {0x323D103C, "Smart Array P700m", &SA5_access}, {0x3241103C, "Smart Array P212", &SA5_access}, {0x3243103C, "Smart Array P410", &SA5_access}, @@ -140,7 +146,6 @@ static struct board_type products[] = { {0x3249103C, "Smart Array P812", &SA5_access}, {0x324A103C, "Smart Array P712m", &SA5_access}, {0x324B103C, "Smart Array P711m", &SA5_access}, - {0xFFFF103C, "Unknown Smart Array", &SA5_access}, }; /* How long to wait (in milliseconds) for board to go into simple mode */ @@ -3754,7 +3759,27 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) __u64 cfg_offset; __u32 cfg_base_addr; __u64 cfg_base_addr_index; - int i, err; + int i, prod_index, err; + + subsystem_vendor_id = pdev->subsystem_vendor; + subsystem_device_id = pdev->subsystem_device; + board_id = (((__u32) (subsystem_device_id << 16) & 0xffff0000) | + subsystem_vendor_id); + + for (i = 0; i < ARRAY_SIZE(products); i++) { + /* Stand aside for hpsa driver on request */ + if (cciss_allow_hpsa && products[i].board_id == HPSA_BOUNDARY) + return -ENODEV; + if (board_id == products[i].board_id) + break; + } + prod_index = i; + if (prod_index == ARRAY_SIZE(products)) { + dev_warn(&pdev->dev, + "unrecognized board ID: 0x%08lx, ignoring.\n", + (unsigned long) board_id); + return -ENODEV; + } /* check to see if controller has been disabled */ /* BEFORE trying to enable it */ @@ -3778,11 +3803,6 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) return err; } - subsystem_vendor_id = pdev->subsystem_vendor; - subsystem_device_id = pdev->subsystem_device; - board_id = (((__u32) (subsystem_device_id << 16) & 0xffff0000) | - subsystem_vendor_id); - #ifdef CCISS_DEBUG printk("command = %x\n", command); printk("irq = %x\n", pdev->irq); @@ -3868,14 +3888,9 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) * leave a little room for ioctl calls. */ c->max_commands = readl(&(c->cfgtable->CmdsOutMax)); - for (i = 0; i < ARRAY_SIZE(products); i++) { - if (board_id == products[i].board_id) { - c->product_name = products[i].product_name; - c->access = *(products[i].access); - c->nr_cmds = c->max_commands - 4; - break; - } - } + c->product_name = products[prod_index].product_name; + c->access = *(products[prod_index].access); + c->nr_cmds = c->max_commands - 4; if ((readb(&c->cfgtable->Signature[0]) != 'C') || (readb(&c->cfgtable->Signature[1]) != 'I') || (readb(&c->cfgtable->Signature[2]) != 'S') || @@ -3884,27 +3899,6 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) err = -ENODEV; goto err_out_free_res; } - /* We didn't find the controller in our list. We know the - * signature is valid. If it's an HP device let's try to - * bind to the device and fire it up. Otherwise we bail. - */ - if (i == ARRAY_SIZE(products)) { - if (subsystem_vendor_id == PCI_VENDOR_ID_HP) { - c->product_name = products[i-1].product_name; - c->access = *(products[i-1].access); - c->nr_cmds = c->max_commands - 4; - printk(KERN_WARNING "cciss: This is an unknown " - "Smart Array controller.\n" - "cciss: Please update to the latest driver " - "available from www.hp.com.\n"); - } else { - printk(KERN_WARNING "cciss: Sorry, I don't know how" - " to access the Smart Array controller %08lx\n" - , (unsigned long)board_id); - err = -ENODEV; - goto err_out_free_res; - } - } #ifdef CONFIG_X86 { /* Need to enable prefetch in the SCSI core for 6400 in x86 */