From c3270e577c18b3d0e984c3371493205a4807db9d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 24 Apr 2008 12:52:20 +0200 Subject: [PATCH 01/16] relay: fix splice problem Splice isn't always incrementing the ppos correctly, which broke relay splice. Signed-off-by: Tom Zanussi Signed-off-by: Jens Axboe --- fs/splice.c | 2 +- kernel/relay.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index eeb1a86a701..633f58ebfb7 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1075,7 +1075,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, ret = splice_direct_to_actor(in, &sd, direct_splice_actor); if (ret > 0) - *ppos += ret; + *ppos = sd.pos; return ret; } diff --git a/kernel/relay.c b/kernel/relay.c index d6204a48581..dc873fba90d 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1162,7 +1162,7 @@ static ssize_t relay_file_splice_read(struct file *in, ret = 0; spliced = 0; - while (len) { + while (len && !spliced) { ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); if (ret < 0) break; From 1afb20f30151dd4160877c827f5b7203f98627fb Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 25 Apr 2008 12:26:28 +0200 Subject: [PATCH 02/16] block: make rq_init() do a full memset() This requires moving rq_init() from get_request() to blk_alloc_request(). The upside is that we can now require an rq_init() from any path that wishes to hand the request to the block layer. rq_init() will be exported for the code that uses struct request without blk_get_request. This is a preparation for large command support, which needs to initialize struct request in a proper way (that is, just doing a memset() will not work). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-barrier.c | 7 +------ block/blk-core.c | 30 ++++-------------------------- 2 files changed, 5 insertions(+), 32 deletions(-) diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 55c5f1fc4f1..722140ac175 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -143,10 +143,8 @@ static void queue_flush(struct request_queue *q, unsigned which) end_io = post_flush_end_io; } - rq->cmd_flags = REQ_HARDBARRIER; rq_init(q, rq); - rq->elevator_private = NULL; - rq->elevator_private2 = NULL; + rq->cmd_flags = REQ_HARDBARRIER; rq->rq_disk = q->bar_rq.rq_disk; rq->end_io = end_io; q->prepare_flush_fn(q, rq); @@ -167,14 +165,11 @@ static inline struct request *start_ordered(struct request_queue *q, blkdev_dequeue_request(rq); q->orig_bar_rq = rq; rq = &q->bar_rq; - rq->cmd_flags = 0; rq_init(q, rq); if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) rq->cmd_flags |= REQ_RW; if (q->ordered & QUEUE_ORDERED_FUA) rq->cmd_flags |= REQ_FUA; - rq->elevator_private = NULL; - rq->elevator_private2 = NULL; init_request_from_bio(rq, q->orig_bar_rq->bio); rq->end_io = bar_end_io; diff --git a/block/blk-core.c b/block/blk-core.c index 2a438a93f72..e447799256d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -107,40 +107,18 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) } EXPORT_SYMBOL(blk_get_backing_dev_info); -/* - * We can't just memset() the structure, since the allocation path - * already stored some information in the request. - */ void rq_init(struct request_queue *q, struct request *rq) { + memset(rq, 0, sizeof(*rq)); + INIT_LIST_HEAD(&rq->queuelist); INIT_LIST_HEAD(&rq->donelist); rq->q = q; rq->sector = rq->hard_sector = (sector_t) -1; - rq->nr_sectors = rq->hard_nr_sectors = 0; - rq->current_nr_sectors = rq->hard_cur_sectors = 0; - rq->bio = rq->biotail = NULL; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); - rq->rq_disk = NULL; - rq->nr_phys_segments = 0; - rq->nr_hw_segments = 0; - rq->ioprio = 0; - rq->special = NULL; - rq->buffer = NULL; rq->tag = -1; - rq->errors = 0; rq->ref_count = 1; - rq->cmd_len = 0; - memset(rq->cmd, 0, sizeof(rq->cmd)); - rq->data_len = 0; - rq->extra_len = 0; - rq->sense_len = 0; - rq->data = NULL; - rq->sense = NULL; - rq->end_io = NULL; - rq->end_io_data = NULL; - rq->next_rq = NULL; } static void req_bio_endio(struct request *rq, struct bio *bio, @@ -607,6 +585,8 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) if (!rq) return NULL; + rq_init(q, rq); + /* * first three bits are identical in rq->cmd_flags and bio->bi_rw, * see bio.h and blkdev.h @@ -789,8 +769,6 @@ rq_starved: if (ioc_batching(q, ioc)) ioc->nr_batch_requests--; - rq_init(q, rq); - blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); out: return rq; From 31e103c595c0fa0d23eea5a4168362fba4c5ba62 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 25 Apr 2008 12:46:20 +0200 Subject: [PATCH 03/16] ps3disk: Remove superfluous cast As ps3disk is a ppc64-only driver, sector_t equals to unsigned long, and the cast is not needed. Reuse in another (possibly 32-bit) driver is protected by the safety net called `compiler warning' (with the cast, it may silently truncate to 32-bit). If sector_t ever changes, we will get a compiler warning as well (with the cast, we won't). Signed-off-by: Geert Uytterhoeven Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/ps3disk.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 7483f947f0e..78e9ea73a31 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -102,8 +102,7 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev, dev_dbg(&dev->sbd.core, "%s:%u: bio %u: %u segs %u sectors from %lu\n", __func__, __LINE__, i, bio_segments(iter.bio), - bio_sectors(iter.bio), - (unsigned long)iter.bio->bi_sector); + bio_sectors(iter.bio), iter.bio->bi_sector); size = bvec->bv_len; buf = bvec_kmap_irq(bvec, &flags); From 657e93be356f51888f56a58d2b374caefbf2fe86 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 25 Apr 2008 12:46:58 +0200 Subject: [PATCH 04/16] unexport blk_max_pfn blk_max_pfn can now be unexported. Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- block/blk-settings.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 5713f7e5cbd..77b51dc37a3 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -14,7 +14,6 @@ unsigned long blk_max_low_pfn; EXPORT_SYMBOL(blk_max_low_pfn); unsigned long blk_max_pfn; -EXPORT_SYMBOL(blk_max_pfn); /** * blk_queue_prep_rq - set a prepare_request function for queue From 68154e90c9d1492d570671ae181d9a8f8530da55 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 25 Apr 2008 12:47:50 +0200 Subject: [PATCH 05/16] block: add dma alignment and padding support to blk_rq_map_kern This patch adds bio_copy_kern similar to bio_copy_user. blk_rq_map_kern uses bio_copy_kern instead of bio_map_kern if necessary. bio_copy_kern uses temporary pages and the bi_end_io callback frees these pages. bio_copy_kern saves the original kernel buffer at bio->bi_private it doesn't use something like struct bio_map_data to store the information about the caller. Signed-off-by: FUJITA Tomonori Cc: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-map.c | 21 ++++++++++- fs/bio.c | 90 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 2 + 3 files changed, 112 insertions(+), 1 deletion(-) diff --git a/block/blk-map.c b/block/blk-map.c index 3c942bd6422..0b1af5a3537 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -255,10 +255,18 @@ EXPORT_SYMBOL(blk_rq_unmap_user); * @kbuf: the kernel buffer * @len: length of user data * @gfp_mask: memory allocation flags + * + * Description: + * Data will be mapped directly if possible. Otherwise a bounce + * buffer is used. */ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, unsigned int len, gfp_t gfp_mask) { + unsigned long kaddr; + unsigned int alignment; + int reading = rq_data_dir(rq) == READ; + int do_copy = 0; struct bio *bio; if (len > (q->max_hw_sectors << 9)) @@ -266,13 +274,24 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (!len || !kbuf) return -EINVAL; - bio = bio_map_kern(q, kbuf, len, gfp_mask); + kaddr = (unsigned long)kbuf; + alignment = queue_dma_alignment(q) | q->dma_pad_mask; + do_copy = ((kaddr & alignment) || (len & alignment)); + + if (do_copy) + bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); + else + bio = bio_map_kern(q, kbuf, len, gfp_mask); + if (IS_ERR(bio)) return PTR_ERR(bio); if (rq_data_dir(rq) == WRITE) bio->bi_rw |= (1 << BIO_RW); + if (do_copy) + rq->cmd_flags |= REQ_COPY_USER; + blk_rq_bio_prep(q, rq, bio); blk_queue_bounce(q, &rq->bio); rq->buffer = rq->data = NULL; diff --git a/fs/bio.c b/fs/bio.c index 6e0b6f66df0..799f86deff2 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -937,6 +937,95 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, return ERR_PTR(-EINVAL); } +static void bio_copy_kern_endio(struct bio *bio, int err) +{ + struct bio_vec *bvec; + const int read = bio_data_dir(bio) == READ; + char *p = bio->bi_private; + int i; + + __bio_for_each_segment(bvec, bio, i, 0) { + char *addr = page_address(bvec->bv_page); + + if (read && !err) + memcpy(p, addr, bvec->bv_len); + + __free_page(bvec->bv_page); + p += bvec->bv_len; + } + + bio_put(bio); +} + +/** + * bio_copy_kern - copy kernel address into bio + * @q: the struct request_queue for the bio + * @data: pointer to buffer to copy + * @len: length in bytes + * @gfp_mask: allocation flags for bio and page allocation + * + * copy the kernel address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, + gfp_t gfp_mask, int reading) +{ + unsigned long kaddr = (unsigned long)data; + unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = kaddr >> PAGE_SHIFT; + const int nr_pages = end - start; + struct bio *bio; + struct bio_vec *bvec; + int i, ret; + + bio = bio_alloc(gfp_mask, nr_pages); + if (!bio) + return ERR_PTR(-ENOMEM); + + while (len) { + struct page *page; + unsigned int bytes = PAGE_SIZE; + + if (bytes > len) + bytes = len; + + page = alloc_page(q->bounce_gfp | gfp_mask); + if (!page) { + ret = -ENOMEM; + goto cleanup; + } + + if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { + ret = -EINVAL; + goto cleanup; + } + + len -= bytes; + } + + if (!reading) { + void *p = data; + + bio_for_each_segment(bvec, bio, i) { + char *addr = page_address(bvec->bv_page); + + memcpy(addr, p, bvec->bv_len); + p += bvec->bv_len; + } + } + + bio->bi_private = data; + bio->bi_end_io = bio_copy_kern_endio; + return bio; +cleanup: + bio_for_each_segment(bvec, bio, i) + __free_page(bvec->bv_page); + + bio_put(bio); + + return ERR_PTR(ret); +} + /* * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions * for performing direct-IO in BIOs. @@ -1273,6 +1362,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs); EXPORT_SYMBOL(bio_map_user); EXPORT_SYMBOL(bio_unmap_user); EXPORT_SYMBOL(bio_map_kern); +EXPORT_SYMBOL(bio_copy_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); EXPORT_SYMBOL(bio_split_pool); diff --git a/include/linux/bio.h b/include/linux/bio.h index d259690863f..61c15eaf3fb 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -324,6 +324,8 @@ extern struct bio *bio_map_user_iov(struct request_queue *, extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); +extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, + gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); From 75ad23bc0fcb4f992a5d06982bf0857ab1738e9e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 29 Apr 2008 14:48:33 +0200 Subject: [PATCH 06/16] block: make queue flags non-atomic We can save some atomic ops in the IO path, if we clearly define the rules of how to modify the queue flags. Signed-off-by: Jens Axboe --- block/blk-core.c | 39 ++++++++++++++++++++----------- block/blk-merge.c | 6 ++--- block/blk-settings.c | 2 +- block/blk-tag.c | 8 +++---- block/elevator.c | 13 ++++++++--- drivers/block/loop.c | 2 +- drivers/block/ub.c | 2 +- drivers/md/dm-table.c | 7 ++++-- drivers/md/md.c | 3 ++- drivers/scsi/scsi_debug.c | 2 +- drivers/scsi/scsi_lib.c | 29 +++++++++++++---------- drivers/scsi/scsi_transport_sas.c | 3 +-- include/linux/blkdev.h | 33 ++++++++++++++++++++++---- 13 files changed, 101 insertions(+), 48 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index e447799256d..d2f23ec5ebf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -198,7 +198,8 @@ void blk_plug_device(struct request_queue *q) if (blk_queue_stopped(q)) return; - if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { + if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { + __set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); } @@ -213,9 +214,10 @@ int blk_remove_plug(struct request_queue *q) { WARN_ON(!irqs_disabled()); - if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) + if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) return 0; + queue_flag_clear(QUEUE_FLAG_PLUGGED, q); del_timer(&q->unplug_timer); return 1; } @@ -311,15 +313,16 @@ void blk_start_queue(struct request_queue *q) { WARN_ON(!irqs_disabled()); - clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_STOPPED, q); /* * one level of recursion is ok and is much faster than kicking * the unplug handling */ - if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + queue_flag_set(QUEUE_FLAG_REENTER, q); q->request_fn(q); - clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_REENTER, q); } else { blk_plug_device(q); kblockd_schedule_work(&q->unplug_work); @@ -344,7 +347,7 @@ EXPORT_SYMBOL(blk_start_queue); void blk_stop_queue(struct request_queue *q) { blk_remove_plug(q); - set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_STOPPED, q); } EXPORT_SYMBOL(blk_stop_queue); @@ -373,11 +376,8 @@ EXPORT_SYMBOL(blk_sync_queue); * blk_run_queue - run a single device queue * @q: The queue to run */ -void blk_run_queue(struct request_queue *q) +void __blk_run_queue(struct request_queue *q) { - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); blk_remove_plug(q); /* @@ -385,15 +385,28 @@ void blk_run_queue(struct request_queue *q) * handling reinvoke the handler shortly if we already got there. */ if (!elv_queue_empty(q)) { - if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + queue_flag_set(QUEUE_FLAG_REENTER, q); q->request_fn(q); - clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_REENTER, q); } else { blk_plug_device(q); kblockd_schedule_work(&q->unplug_work); } } +} +EXPORT_SYMBOL(__blk_run_queue); +/** + * blk_run_queue - run a single device queue + * @q: The queue to run + */ +void blk_run_queue(struct request_queue *q) +{ + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_run_queue); @@ -406,7 +419,7 @@ void blk_put_queue(struct request_queue *q) void blk_cleanup_queue(struct request_queue *q) { mutex_lock(&q->sysfs_lock); - set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); + queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); mutex_unlock(&q->sysfs_lock); if (q->elevator) diff --git a/block/blk-merge.c b/block/blk-merge.c index b5c5c4a9e3f..73b23562af2 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -55,7 +55,7 @@ void blk_recalc_rq_segments(struct request *rq) if (!rq->bio) return; - cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); + cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); hw_seg_size = seg_size = 0; phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; rq_for_each_segment(bv, rq, iter) { @@ -128,7 +128,7 @@ EXPORT_SYMBOL(blk_recount_segments); static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, struct bio *nxt) { - if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) + if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) return 0; if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) @@ -175,7 +175,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, int nsegs, cluster; nsegs = 0; - cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); + cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); /* * for each bio in rq diff --git a/block/blk-settings.c b/block/blk-settings.c index 77b51dc37a3..6089384ab06 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -287,7 +287,7 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) t->max_segment_size = min(t->max_segment_size, b->max_segment_size); t->hardsect_size = max(t->hardsect_size, b->hardsect_size); if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) - clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); + queue_flag_clear(QUEUE_FLAG_CLUSTER, t); } EXPORT_SYMBOL(blk_queue_stack_limits); diff --git a/block/blk-tag.c b/block/blk-tag.c index 4780a46ce23..e176ddbe599 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -70,7 +70,7 @@ void __blk_queue_free_tags(struct request_queue *q) __blk_free_tags(bqt); q->queue_tags = NULL; - q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); + queue_flag_clear(QUEUE_FLAG_QUEUED, q); } /** @@ -98,7 +98,7 @@ EXPORT_SYMBOL(blk_free_tags); **/ void blk_queue_free_tags(struct request_queue *q) { - clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_QUEUED, q); } EXPORT_SYMBOL(blk_queue_free_tags); @@ -188,7 +188,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth, rc = blk_queue_resize_tags(q, depth); if (rc) return rc; - set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_QUEUED, q); return 0; } else atomic_inc(&tags->refcnt); @@ -197,7 +197,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth, * assign it, all done */ q->queue_tags = tags; - q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); + queue_flag_set(QUEUE_FLAG_QUEUED, q); INIT_LIST_HEAD(&q->tag_busy_list); return 0; fail: diff --git a/block/elevator.c b/block/elevator.c index 88318c38360..e8a90fe2342 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1070,7 +1070,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) */ spin_lock_irq(q->queue_lock); - set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); elv_drain_elevator(q); @@ -1104,7 +1104,10 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) * finally exit old elevator and turn off BYPASS. */ elevator_exit(old_elevator); - clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); + spin_lock_irq(q->queue_lock); + queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); + spin_unlock_irq(q->queue_lock); + return 1; fail_register: @@ -1115,7 +1118,11 @@ fail_register: elevator_exit(e); q->elevator = old_elevator; elv_register_queue(q); - clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); + + spin_lock_irq(q->queue_lock); + queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); + spin_unlock_irq(q->queue_lock); + return 0; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f7f163557aa..d3a25b027ff 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -546,7 +546,7 @@ static void loop_unplug(struct request_queue *q) { struct loop_device *lo = q->queuedata; - clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); + queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q); blk_run_address_space(lo->lo_backing_file->f_mapping); } diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 27bfe72aab5..e322cce8c12 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -2399,7 +2399,7 @@ static void ub_disconnect(struct usb_interface *intf) del_gendisk(lun->disk); /* * I wish I could do: - * set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); + * queue_flag_set(QUEUE_FLAG_DEAD, q); * As it is, we rely on our internal poisoning and let * the upper levels to spin furiously failing all the I/O. */ diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 51be5334421..73326e7c54b 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -873,10 +873,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) q->max_hw_sectors = t->limits.max_hw_sectors; q->seg_boundary_mask = t->limits.seg_boundary_mask; q->bounce_pfn = t->limits.bounce_pfn; + /* XXX: the below will probably go bug. must ensure there can be no + * concurrency on queue_flags, and use the unlocked versions... + */ if (t->limits.no_cluster) - q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER); + queue_flag_clear(QUEUE_FLAG_CLUSTER, q); else - q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER); + queue_flag_set(QUEUE_FLAG_CLUSTER, q); } diff --git a/drivers/md/md.c b/drivers/md/md.c index 87620b705be..acd716b657b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -282,7 +282,8 @@ static mddev_t * mddev_find(dev_t unit) kfree(new); return NULL; } - set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags); + /* Can be unlocked because the queue is new: no concurrency */ + queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue); blk_queue_make_request(new->queue, md_fail_request); diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 07103c399fe..f6600bfb5bd 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1773,7 +1773,7 @@ static int scsi_debug_slave_alloc(struct scsi_device *sdp) if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts) printk(KERN_INFO "scsi_debug: slave_alloc <%u %u %u %u>\n", sdp->host->host_no, sdp->channel, sdp->id, sdp->lun); - set_bit(QUEUE_FLAG_BIDI, &sdp->request_queue->queue_flags); + queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue); return 0; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 67f412bb497..d545ad1cf47 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -536,6 +536,9 @@ static void scsi_run_queue(struct request_queue *q) !shost->host_blocked && !shost->host_self_blocked && !((shost->can_queue > 0) && (shost->host_busy >= shost->can_queue))) { + + int flagset; + /* * As long as shost is accepting commands and we have * starved queues, call blk_run_queue. scsi_request_fn @@ -549,19 +552,20 @@ static void scsi_run_queue(struct request_queue *q) sdev = list_entry(shost->starved_list.next, struct scsi_device, starved_entry); list_del_init(&sdev->starved_entry); - spin_unlock_irqrestore(shost->host_lock, flags); + spin_unlock(shost->host_lock); + spin_lock(sdev->request_queue->queue_lock); + flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) && + !test_bit(QUEUE_FLAG_REENTER, + &sdev->request_queue->queue_flags); + if (flagset) + queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue); + __blk_run_queue(sdev->request_queue); + if (flagset) + queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue); + spin_unlock(sdev->request_queue->queue_lock); - if (test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) && - !test_and_set_bit(QUEUE_FLAG_REENTER, - &sdev->request_queue->queue_flags)) { - blk_run_queue(sdev->request_queue); - clear_bit(QUEUE_FLAG_REENTER, - &sdev->request_queue->queue_flags); - } else - blk_run_queue(sdev->request_queue); - - spin_lock_irqsave(shost->host_lock, flags); + spin_lock(shost->host_lock); if (unlikely(!list_empty(&sdev->starved_entry))) /* * sdev lost a race, and was put back on the @@ -1585,8 +1589,9 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); + /* New queue, no concurrency on queue_flags */ if (!shost->use_clustering) - clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); + queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); /* * set a reasonable default alignment on word boundaries: the diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 7899e3dda9b..f4461d35ffb 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -248,8 +248,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) else q->queuedata = shost; - set_bit(QUEUE_FLAG_BIDI, &q->queue_flags); - + queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); return 0; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c5065e3d2ca..8ca481cd7d7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -408,6 +408,30 @@ struct request_queue #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ +static inline void queue_flag_set_unlocked(unsigned int flag, + struct request_queue *q) +{ + __set_bit(flag, &q->queue_flags); +} + +static inline void queue_flag_set(unsigned int flag, struct request_queue *q) +{ + WARN_ON_ONCE(!spin_is_locked(q->queue_lock)); + __set_bit(flag, &q->queue_flags); +} + +static inline void queue_flag_clear_unlocked(unsigned int flag, + struct request_queue *q) +{ + __clear_bit(flag, &q->queue_flags); +} + +static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) +{ + WARN_ON_ONCE(!spin_is_locked(q->queue_lock)); + __clear_bit(flag, &q->queue_flags); +} + enum { /* * Hardbarrier is supported with one of the following methods. @@ -496,17 +520,17 @@ static inline int blk_queue_full(struct request_queue *q, int rw) static inline void blk_set_queue_full(struct request_queue *q, int rw) { if (rw == READ) - set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_READFULL, q); else - set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_WRITEFULL, q); } static inline void blk_clear_queue_full(struct request_queue *q, int rw) { if (rw == READ) - clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_READFULL, q); else - clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_WRITEFULL, q); } @@ -626,6 +650,7 @@ extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); +extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); extern void blk_start_queueing(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); From 72ed0bf60ade8d2cc1f58276cb16add0af2c3e25 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 09:49:05 +0200 Subject: [PATCH 07/16] block/elevator.c:elv_rq_merge_ok() mustn't be inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes the following build error with UML and gcc 4.3: <-- snip --> ... CC block/elevator.o /home/bunk/linux/kernel-2.6/git/linux-2.6/block/elevator.c: In function ‘elv_merge’: /home/bunk/linux/kernel-2.6/git/linux-2.6/block/elevator.c:73: sorry, unimplemented: inlining failed in call to ‘elv_rq_merge_ok’: function body not available /home/bunk/linux/kernel-2.6/git/linux-2.6/block/elevator.c:103: sorry, unimplemented: called from here /home/bunk/linux/kernel-2.6/git/linux-2.6/block/elevator.c:73: sorry, unimplemented: inlining failed in call to ‘elv_rq_merge_ok’: function body not available /home/bunk/linux/kernel-2.6/git/linux-2.6/block/elevator.c:495: sorry, unimplemented: called from here make[2]: *** [block/elevator.o] Error 1 make[1]: *** [block] Error 2 <-- snip --> Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- block/elevator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/elevator.c b/block/elevator.c index e8a90fe2342..7253fa05db0 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -69,7 +69,7 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) /* * can we safely merge with this request? */ -inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) +int elv_rq_merge_ok(struct request *rq, struct bio *bio) { if (!rq_mergeable(rq)) return 0; From 6f6a036e6e061563efecb61505fc4cc3ca415f80 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 09:49:06 +0200 Subject: [PATCH 08/16] block/blk-barrier.c:blk_ordered_cur_seq() mustn't be inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes the following build error with UML and gcc 4.3: <-- snip --> ... CC block/blk-barrier.o /home/bunk/linux/kernel-2.6/git/linux-2.6/block/blk-barrier.c: In function ‘blk_do_ordered’: /home/bunk/linux/kernel-2.6/git/linux-2.6/block/blk-barrier.c:57: sorry, unimplemented: inlining failed in call to ‘blk_ordered_cur_seq’: function body not available /home/bunk/linux/kernel-2.6/git/linux-2.6/block/blk-barrier.c:252: sorry, unimplemented: called from here /home/bunk/linux/kernel-2.6/git/linux-2.6/block/blk-barrier.c:57: sorry, unimplemented: inlining failed in call to ‘blk_ordered_cur_seq’: function body not available /home/bunk/linux/kernel-2.6/git/linux-2.6/block/blk-barrier.c:253: sorry, unimplemented: called from here make[2]: *** [block/blk-barrier.o] Error 1 <-- snip --> Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- block/blk-barrier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 722140ac175..47127ba09e4 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -53,7 +53,7 @@ EXPORT_SYMBOL(blk_queue_ordered); /* * Cache flushing for ordered writes handling */ -inline unsigned blk_ordered_cur_seq(struct request_queue *q) +unsigned blk_ordered_cur_seq(struct request_queue *q) { if (!q->ordseq) return 0; From 4917fa292558593d36b2880977ea206f7727dbe5 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 29 Apr 2008 09:54:35 +0200 Subject: [PATCH 09/16] block: no need to initialize rq->cmd in prepare_flush_fn hook The block layer initializes rq->cmd (queue_flush calls rq_init) so prepare_flush_fn hooks don't need to do that. The purpose of this patch is to remove sizeof(rq->cmd), as a preparation for large command support, which changes rq->cmd from the static array to a pointer. sizeof(rq->cmd) will not make sense. Signed-off-by: FUJITA Tomonori Cc: Geert Uytterhoeven Cc: James Bottomley Cc: Jens Axboe Signed-off-by: Jens Axboe --- drivers/block/ps3disk.c | 1 - drivers/scsi/sd.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 78e9ea73a31..d797e209951 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -405,7 +405,6 @@ static void ps3disk_prepare_flush(struct request_queue *q, struct request *req) dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); - memset(req->cmd, 0, sizeof(req->cmd)); req->cmd_type = REQ_TYPE_FLUSH; } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3cea17dd5db..01cefbb2d53 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -860,7 +860,6 @@ static int sd_sync_cache(struct scsi_disk *sdkp) static void sd_prepare_flush(struct request_queue *q, struct request *rq) { - memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->timeout = SD_TIMEOUT; rq->cmd[0] = SYNCHRONIZE_CACHE; From 992b5bceee447a32ef2d617730ae0d03c063eedd Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 29 Apr 2008 09:54:36 +0200 Subject: [PATCH 10/16] block: no need to initialize rq->cmd with blk_get_request blk_get_request initializes rq->cmd (rq_init does) so the users don't need to do that. The purpose of this patch is to remove sizeof(rq->cmd) and &rq->cmd, as a preparation for large command support, which changes rq->cmd from the static array to a pointer. sizeof(rq->cmd) will not make sense and &rq->cmd won't work. Signed-off-by: FUJITA Tomonori Cc: James Bottomley Cc: Alasdair G Kergon Cc: Jens Axboe Signed-off-by: Jens Axboe --- block/scsi_ioctl.c | 3 --- drivers/block/pktcdvd.c | 2 -- drivers/cdrom/cdrom.c | 1 - drivers/md/dm-emc.c | 2 -- drivers/md/dm-mpath-hp-sw.c | 1 - drivers/md/dm-mpath-rdac.c | 1 - 6 files changed, 10 deletions(-) diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index a2c3a936ebf..ffa3720e6ca 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -217,8 +217,6 @@ EXPORT_SYMBOL_GPL(blk_verify_command); static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, struct sg_io_hdr *hdr, int has_write_perm) { - memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ - if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) return -EFAULT; if (blk_verify_command(rq->cmd, has_write_perm)) @@ -531,7 +529,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, rq->data_len = 0; rq->extra_len = 0; rq->timeout = BLK_DEFAULT_SG_TIMEOUT; - memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd[0] = cmd; rq->cmd[4] = data; rq->cmd_len = 6; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 18feb1c7c33..3b806c9fb00 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -776,8 +776,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->cmd_len = COMMAND_SIZE(cgc->cmd[0]); memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE); - if (sizeof(rq->cmd) > CDROM_PACKET_SIZE) - memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE); rq->timeout = 60*HZ; rq->cmd_type = REQ_TYPE_BLOCK_PC; diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index ac3829030ac..69f26eb6415 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2194,7 +2194,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, if (ret) break; - memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd[0] = GPCMD_READ_CD; rq->cmd[1] = 1 << 2; rq->cmd[2] = (lba >> 24) & 0xff; diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c index 6b91b9ab1d4..3ea5ad4b780 100644 --- a/drivers/md/dm-emc.c +++ b/drivers/md/dm-emc.c @@ -110,8 +110,6 @@ static struct request *get_failover_req(struct emc_handler *h, memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); rq->sense_len = 0; - memset(&rq->cmd, 0, BLK_MAX_CDB); - rq->timeout = EMC_FAILOVER_TIMEOUT; rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c index 204bf42c944..b63a0ab37c5 100644 --- a/drivers/md/dm-mpath-hp-sw.c +++ b/drivers/md/dm-mpath-hp-sw.c @@ -137,7 +137,6 @@ static struct request *hp_sw_get_request(struct dm_path *path) req->sense = h->sense; memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE); - memset(&req->cmd, 0, BLK_MAX_CDB); req->cmd[0] = START_STOP; req->cmd[4] = 1; req->cmd_len = COMMAND_SIZE(req->cmd[0]); diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c index e04eb5c697f..95e77734880 100644 --- a/drivers/md/dm-mpath-rdac.c +++ b/drivers/md/dm-mpath-rdac.c @@ -284,7 +284,6 @@ static struct request *get_rdac_req(struct rdac_handler *h, return NULL; } - memset(&rq->cmd, 0, BLK_MAX_CDB); rq->sense = h->sense; memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); rq->sense_len = 0; From 2a4aa30c5f967eb6ae874c67fa6fceeee84815f9 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 29 Apr 2008 09:54:36 +0200 Subject: [PATCH 11/16] block: rename and export rq_init() This rename rq_init() blk_rq_init() and export it. Any path that hands the request to the block layer needs to call it to initialize the request. This is a preparation for large command support, which needs to initialize the request in a proper way (that is, just doing a memset() will not work). Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-barrier.c | 4 ++-- block/blk-core.c | 5 +++-- block/blk.h | 1 - include/linux/blkdev.h | 1 + 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 47127ba09e4..66e55288178 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -143,7 +143,7 @@ static void queue_flush(struct request_queue *q, unsigned which) end_io = post_flush_end_io; } - rq_init(q, rq); + blk_rq_init(q, rq); rq->cmd_flags = REQ_HARDBARRIER; rq->rq_disk = q->bar_rq.rq_disk; rq->end_io = end_io; @@ -165,7 +165,7 @@ static inline struct request *start_ordered(struct request_queue *q, blkdev_dequeue_request(rq); q->orig_bar_rq = rq; rq = &q->bar_rq; - rq_init(q, rq); + blk_rq_init(q, rq); if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) rq->cmd_flags |= REQ_RW; if (q->ordered & QUEUE_ORDERED_FUA) diff --git a/block/blk-core.c b/block/blk-core.c index d2f23ec5ebf..fe0d1390b74 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -107,7 +107,7 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) } EXPORT_SYMBOL(blk_get_backing_dev_info); -void rq_init(struct request_queue *q, struct request *rq) +void blk_rq_init(struct request_queue *q, struct request *rq) { memset(rq, 0, sizeof(*rq)); @@ -120,6 +120,7 @@ void rq_init(struct request_queue *q, struct request *rq) rq->tag = -1; rq->ref_count = 1; } +EXPORT_SYMBOL(blk_rq_init); static void req_bio_endio(struct request *rq, struct bio *bio, unsigned int nbytes, int error) @@ -598,7 +599,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) if (!rq) return NULL; - rq_init(q, rq); + blk_rq_init(q, rq); /* * first three bits are identical in rq->cmd_flags and bio->bi_rw, diff --git a/block/blk.h b/block/blk.h index ec9120fb789..59776ab4742 100644 --- a/block/blk.h +++ b/block/blk.h @@ -10,7 +10,6 @@ extern struct kmem_cache *blk_requestq_cachep; extern struct kobj_type blk_queue_ktype; -void rq_init(struct request_queue *q, struct request *rq); void init_request_from_bio(struct request *req, struct bio *bio); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8ca481cd7d7..d17032c347c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -607,6 +607,7 @@ extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern void register_disk(struct gendisk *dev); extern void generic_make_request(struct bio *bio); +extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern void blk_end_sync_rq(struct request *rq, int error); From 4f54eec8311c3325888c29ce8e4496daf4dbe624 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 29 Apr 2008 09:54:37 +0200 Subject: [PATCH 12/16] block: use blk_rq_init() to initialize the request Any path needs to call it to initialize the request. This is a preparation for large command support, which needs to initialize the request in a proper way (that is, just doing a memset() will not work). Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 1 + drivers/block/paride/pd.c | 4 +--- drivers/scsi/scsi_error.c | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 60cc54368b6..f75bda16a1f 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -537,6 +537,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file, switch (cmd) { case NBD_DISCONNECT: printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name); + blk_rq_init(NULL, &sreq); sreq.cmd_type = REQ_TYPE_SPECIAL; nbd_cmd(&sreq) = NBD_CMD_DISC; /* diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index df819f8a95a..570f3b70dce 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -716,10 +716,8 @@ static int pd_special_command(struct pd_unit *disk, struct request rq; int err = 0; - memset(&rq, 0, sizeof(rq)); - rq.errors = 0; + blk_rq_init(NULL, &rq); rq.rq_disk = disk->gd; - rq.ref_count = 1; rq.end_io_data = &wait; rq.end_io = blk_end_sync_rq; blk_insert_request(disk->gd->queue, &rq, 0, func); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 221f31e36d2..1eaba6cd80f 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1771,6 +1771,7 @@ scsi_reset_provider(struct scsi_device *dev, int flag) unsigned long flags; int rtn; + blk_rq_init(NULL, &req); scmd->request = &req; memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout)); From e7b241a7715d2a0885f779f5baa63711d71b1d75 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 29 Apr 2008 09:54:38 +0200 Subject: [PATCH 13/16] ide: use blk_rq_init() to initialize the request This converts ide to use blk_rq_init to initialize the request. This is a preparation for large command support, which needs to initialize the request in a proper way (that is, just doing a memset() will not work). Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Cc: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- drivers/ide/ide-io.c | 3 +-- drivers/ide/ide-tape.c | 2 +- drivers/ide/ide-taskfile.c | 3 +-- drivers/ide/ide.c | 4 ++-- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 788783da902..696525342e9 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -1550,8 +1550,7 @@ irqreturn_t ide_intr (int irq, void *dev_id) void ide_init_drive_cmd (struct request *rq) { - memset(rq, 0, sizeof(*rq)); - rq->ref_count = 1; + blk_rq_init(NULL, rq); } EXPORT_SYMBOL(ide_init_drive_cmd); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 54a43b04460..1e1f26331a2 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -662,7 +662,7 @@ static void idetape_create_request_sense_cmd(struct ide_atapi_pc *pc) static void idetape_init_rq(struct request *rq, u8 cmd) { - memset(rq, 0, sizeof(*rq)); + blk_rq_init(NULL, rq); rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd[0] = cmd; } diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 9a846a0cd5a..0c908ca3ff7 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -494,8 +494,7 @@ int ide_raw_taskfile(ide_drive_t *drive, ide_task_t *task, u8 *buf, u16 nsect) { struct request rq; - memset(&rq, 0, sizeof(rq)); - rq.ref_count = 1; + blk_rq_init(NULL, &rq); rq.cmd_type = REQ_TYPE_ATA_TASKFILE; rq.buffer = buf; diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 999584c03d9..c758dcb13b1 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -564,7 +564,7 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg) if (!(drive->dn % 2)) ide_acpi_get_timing(hwif); - memset(&rq, 0, sizeof(rq)); + blk_rq_init(NULL, &rq); memset(&rqpm, 0, sizeof(rqpm)); memset(&args, 0, sizeof(args)); rq.cmd_type = REQ_TYPE_PM_SUSPEND; @@ -602,7 +602,7 @@ static int generic_ide_resume(struct device *dev) ide_acpi_exec_tfs(drive); - memset(&rq, 0, sizeof(rq)); + blk_rq_init(NULL, &rq); memset(&rqpm, 0, sizeof(rqpm)); memset(&args, 0, sizeof(args)); rq.cmd_type = REQ_TYPE_PM_RESUME; From d34c87e4ba3d1857f80a65179e81a18705a31656 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 29 Apr 2008 14:37:52 +0200 Subject: [PATCH 14/16] block: replace sizeof(rq->cmd) with BLK_MAX_CDB This is a preparation for changing rq->cmd from the static array to a pointer. Signed-off-by: FUJITA Tomonori Cc: Boaz Harrosh Cc: Bartlomiej Zolnierkiewicz Cc: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- drivers/ide/ide-cd.c | 4 ++-- drivers/ide/ide-cd_verbose.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index fe0d1390b74..e6fdb288be6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -173,7 +173,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg) if (blk_pc_request(rq)) { printk(KERN_INFO " cdb: "); - for (bit = 0; bit < sizeof(rq->cmd); bit++) + for (bit = 0; bit < BLK_MAX_CDB; bit++) printk("%02x ", rq->cmd[bit]); printk("\n"); } diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index fe9df38f62c..68e7f19dc03 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -782,7 +782,7 @@ static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive) sector_div(frame, queue_hardsect_size(drive->queue) >> SECTOR_BITS); - memset(rq->cmd, 0, sizeof(rq->cmd)); + memset(rq->cmd, 0, BLK_MAX_CDB); rq->cmd[0] = GPCMD_SEEK; put_unaligned(cpu_to_be32(frame), (unsigned int *) &rq->cmd[2]); @@ -1694,7 +1694,7 @@ static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) long block = (long)rq->hard_sector / (hard_sect >> 9); unsigned long blocks = rq->hard_nr_sectors / (hard_sect >> 9); - memset(rq->cmd, 0, sizeof(rq->cmd)); + memset(rq->cmd, 0, BLK_MAX_CDB); if (rq_data_dir(rq) == READ) rq->cmd[0] = GPCMD_READ_10; diff --git a/drivers/ide/ide-cd_verbose.c b/drivers/ide/ide-cd_verbose.c index 6ed7ca07133..6490a2dea96 100644 --- a/drivers/ide/ide-cd_verbose.c +++ b/drivers/ide/ide-cd_verbose.c @@ -326,7 +326,7 @@ void ide_cd_log_error(const char *name, struct request *failed_command, printk(KERN_ERR " The failed \"%s\" packet command " "was: \n \"", s); - for (i = 0; i < sizeof(failed_command->cmd); i++) + for (i = 0; i < BLK_MAX_CDB; i++) printk(KERN_CONT "%02x ", failed_command->cmd[i]); printk(KERN_CONT "\"\n"); } From d7e3c3249ef23b4617393c69fe464765b4ff1645 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 29 Apr 2008 09:54:39 +0200 Subject: [PATCH 15/16] block: add large command support This patch changes rq->cmd from the static array to a pointer to support large commands. We rarely handle large commands. So for optimization, a struct request still has a static array for a command. rq_init sets rq->cmd pointer to the static array. Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-core.c | 1 + include/linux/blkdev.h | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index e6fdb288be6..5d09f8c5602 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -117,6 +117,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->sector = rq->hard_sector = (sector_t) -1; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); + rq->cmd = rq->__cmd; rq->tag = -1; rq->ref_count = 1; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d17032c347c..08df1ea8bac 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -215,8 +215,9 @@ struct request { /* * when request is used as a packet command carrier */ - unsigned int cmd_len; - unsigned char cmd[BLK_MAX_CDB]; + unsigned short cmd_len; + unsigned char __cmd[BLK_MAX_CDB]; + unsigned char *cmd; unsigned int data_len; unsigned int extra_len; /* length of alignment and padding */ From ac9fafa1243640349aa481adf473db283a695766 Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Tue, 29 Apr 2008 14:44:19 +0200 Subject: [PATCH 16/16] block: Skip I/O merges when disabled The block I/O + elevator + I/O scheduler code spend a lot of time trying to merge I/Os -- rightfully so under "normal" circumstances. However, if one were to know that the incoming I/O stream was /very/ random in nature, the cycles are wasted. This patch adds a per-request_queue tunable that (when set) disables merge attempts (beyond the simple one-hit cache check), thus freeing up a non-trivial amount of CPU cycles. Signed-off-by: Alan D. Brunelle Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 26 ++++++++++++++++++++++++++ block/elevator.c | 3 +++ include/linux/blkdev.h | 2 ++ 3 files changed, 31 insertions(+) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index fc41d83be22..e85c4013e8a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -135,6 +135,25 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) return queue_var_show(max_hw_sectors_kb, (page)); } +static ssize_t queue_nomerges_show(struct request_queue *q, char *page) +{ + return queue_var_show(blk_queue_nomerges(q), page); +} + +static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long nm; + ssize_t ret = queue_var_store(&nm, page, count); + + if (nm) + set_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags); + else + clear_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags); + + return ret; +} + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, @@ -170,6 +189,12 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = { .show = queue_hw_sector_size_show, }; +static struct queue_sysfs_entry queue_nomerges_entry = { + .attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR }, + .show = queue_nomerges_show, + .store = queue_nomerges_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -177,6 +202,7 @@ static struct attribute *default_attrs[] = { &queue_max_sectors_entry.attr, &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, + &queue_nomerges_entry.attr, NULL, }; diff --git a/block/elevator.c b/block/elevator.c index 7253fa05db0..ac5310ef827 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -488,6 +488,9 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) } } + if (blk_queue_nomerges(q)) + return ELEVATOR_NO_MERGE; + /* * See if our hash lookup can find a potential backmerge. */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 08df1ea8bac..c09696a90d6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -408,6 +408,7 @@ struct request_queue #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ +#define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ static inline void queue_flag_set_unlocked(unsigned int flag, struct request_queue *q) @@ -476,6 +477,7 @@ enum { #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) +#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)