diff --git a/block/blk-core.c b/block/blk-core.c index c3df30cfb3f..10e8a64a5a5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1770,8 +1770,6 @@ static void end_that_request_last(struct request *req, int error) { struct gendisk *disk = req->rq_disk; - blk_delete_timer(req); - if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); @@ -1781,6 +1779,8 @@ static void end_that_request_last(struct request *req, int error) if (unlikely(laptop_mode) && blk_fs_request(req)) laptop_io_completion(); + blk_delete_timer(req); + /* * Account IO completion. bar_rq isn't accounted as a normal * IO on queueing nor completion. Accounting the containing diff --git a/block/blk-merge.c b/block/blk-merge.c index 8681cd6f991..b92f5b0866b 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -222,27 +222,6 @@ new_segment: } EXPORT_SYMBOL(blk_rq_map_sg); -static inline int ll_new_mergeable(struct request_queue *q, - struct request *req, - struct bio *bio) -{ - int nr_phys_segs = bio_phys_segments(q, bio); - - if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { - req->cmd_flags |= REQ_NOMERGE; - if (req == q->last_merge) - q->last_merge = NULL; - return 0; - } - - /* - * A hw segment is just getting larger, bump just the phys - * counter. - */ - req->nr_phys_segments += nr_phys_segs; - return 1; -} - static inline int ll_new_hw_segment(struct request_queue *q, struct request *req, struct bio *bio) diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 972a63f848f..69185ea9fae 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -75,14 +75,7 @@ void blk_delete_timer(struct request *req) { struct request_queue *q = req->q; - /* - * Nothing to detach - */ - if (!q->rq_timed_out_fn || !req->deadline) - return; - list_del_init(&req->timeout_list); - if (list_empty(&q->timeout_list)) del_timer(&q->timeout); } @@ -142,7 +135,7 @@ void blk_rq_timed_out_timer(unsigned long data) } if (next_set && !list_empty(&q->timeout_list)) - mod_timer(&q->timeout, round_jiffies(next)); + mod_timer(&q->timeout, round_jiffies_up(next)); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -198,17 +191,10 @@ void blk_add_timer(struct request *req) /* * If the timer isn't already pending or this timeout is earlier - * than an existing one, modify the timer. Round to next nearest + * than an existing one, modify the timer. Round up to next nearest * second. */ - expiry = round_jiffies(req->deadline); - - /* - * We use ->deadline == 0 to detect whether a timer was added or - * not, so just increase to next jiffy for that specific case - */ - if (unlikely(!req->deadline)) - req->deadline = 1; + expiry = round_jiffies_up(req->deadline); if (!timer_pending(&q->timeout) || time_before(expiry, q->timeout.expires)) diff --git a/block/elevator.c b/block/elevator.c index 59173a69ebd..9ac82dde99d 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -773,12 +773,6 @@ struct request *elv_next_request(struct request_queue *q) */ rq->cmd_flags |= REQ_STARTED; blk_add_trace_rq(q, rq, BLK_TA_ISSUE); - - /* - * We are now handing the request to the hardware, - * add the timeout handler - */ - blk_add_timer(rq); } if (!q->boundary_rq || q->boundary_rq == rq) { @@ -850,6 +844,12 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq) */ if (blk_account_rq(rq)) q->in_flight++; + + /* + * We are now handing the request to the hardware, add the + * timeout handler. + */ + blk_add_timer(rq); } EXPORT_SYMBOL(elv_dequeue_request); diff --git a/fs/block_dev.c b/fs/block_dev.c index 88a776fa0ef..db831efbdbb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -986,7 +986,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; - struct hd_struct *part = NULL; int ret; int partno; int perm = 0; @@ -1004,24 +1003,25 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return ret; } - ret = -ENXIO; - lock_kernel(); + ret = -ENXIO; disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) goto out_unlock_kernel; - part = disk_get_part(disk, partno); - if (!part) - goto out_unlock_kernel; mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; - bdev->bd_part = part; bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; + + ret = -ENXIO; + bdev->bd_part = disk_get_part(disk, partno); + if (!bdev->bd_part) + goto out_clear; + if (disk->fops->open) { ret = disk->fops->open(bdev, mode); if (ret) @@ -1049,18 +1049,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_contains = whole; bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; + bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || - !part || !part->nr_sects) { + !bdev->bd_part || !bdev->bd_part->nr_sects) { ret = -ENXIO; goto out_clear; } - bd_set_size(bdev, (loff_t)part->nr_sects << 9); + bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); } } else { - disk_put_part(part); put_disk(disk); module_put(disk->fops->owner); - part = NULL; disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { @@ -1080,6 +1079,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return 0; out_clear: + disk_put_part(bdev->bd_part); bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; @@ -1091,7 +1091,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) out_unlock_kernel: unlock_kernel(); - disk_put_part(part); if (disk) module_put(disk->fops->owner); put_disk(disk); diff --git a/include/linux/bio.h b/include/linux/bio.h index 1c91a176b9a..6a642098e5c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -236,12 +236,16 @@ static inline void *bio_data(struct bio *bio) #define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1) #define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx) +/* Default implementation of BIOVEC_PHYS_MERGEABLE */ +#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) + /* * allow arch override, for eg virtualized architectures (put in asm/io.h) */ #ifndef BIOVEC_PHYS_MERGEABLE #define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) + __BIOVEC_PHYS_MERGEABLE(vec1, vec2) #endif #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ diff --git a/include/linux/timer.h b/include/linux/timer.h index d4ba79248a2..daf9685b861 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -186,4 +186,9 @@ unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); unsigned long round_jiffies_relative(unsigned long j); +unsigned long __round_jiffies_up(unsigned long j, int cpu); +unsigned long __round_jiffies_up_relative(unsigned long j, int cpu); +unsigned long round_jiffies_up(unsigned long j); +unsigned long round_jiffies_up_relative(unsigned long j); + #endif diff --git a/kernel/smp.c b/kernel/smp.c index f362a855377..75c8dde58c5 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -51,10 +51,6 @@ static void csd_flag_wait(struct call_single_data *data) { /* Wait for response */ do { - /* - * We need to see the flags store in the IPI handler - */ - smp_mb(); if (!(data->flags & CSD_FLAG_WAIT)) break; cpu_relax(); @@ -76,6 +72,11 @@ static void generic_exec_single(int cpu, struct call_single_data *data) list_add_tail(&data->list, &dst->list); spin_unlock_irqrestore(&dst->lock, flags); + /* + * Make the list addition visible before sending the ipi. + */ + smp_mb(); + if (ipi) arch_send_call_function_single_ipi(cpu); @@ -157,7 +158,7 @@ void generic_smp_call_function_single_interrupt(void) * Need to see other stores to list head for checking whether * list is empty without holding q->lock */ - smp_mb(); + smp_read_barrier_depends(); while (!list_empty(&q->list)) { unsigned int data_flags; @@ -191,7 +192,7 @@ void generic_smp_call_function_single_interrupt(void) /* * See comment on outer loop */ - smp_mb(); + smp_read_barrier_depends(); } } @@ -370,6 +371,11 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, list_add_tail_rcu(&data->csd.list, &call_function_queue); spin_unlock_irqrestore(&call_function_lock, flags); + /* + * Make the list addition visible before sending the ipi. + */ + smp_mb(); + /* Send a message to all CPUs in the map */ arch_send_call_function_ipi(mask); diff --git a/kernel/timer.c b/kernel/timer.c index 56becf373c5..dbd50fabe4c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -112,6 +112,44 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base) tbase_get_deferrable(timer->base)); } +static unsigned long round_jiffies_common(unsigned long j, int cpu, + bool force_up) +{ + int rem; + unsigned long original = j; + + /* + * We don't want all cpus firing their timers at once hitting the + * same lock or cachelines, so we skew each extra cpu with an extra + * 3 jiffies. This 3 jiffies came originally from the mm/ code which + * already did this. + * The skew is done by adding 3*cpunr, then round, then subtract this + * extra offset again. + */ + j += cpu * 3; + + rem = j % HZ; + + /* + * If the target jiffie is just after a whole second (which can happen + * due to delays of the timer irq, long irq off times etc etc) then + * we should round down to the whole second, not up. Use 1/4th second + * as cutoff for this rounding as an extreme upper bound for this. + * But never round down if @force_up is set. + */ + if (rem < HZ/4 && !force_up) /* round down */ + j = j - rem; + else /* round up */ + j = j - rem + HZ; + + /* now that we have rounded, subtract the extra skew again */ + j -= cpu * 3; + + if (j <= jiffies) /* rounding ate our timeout entirely; */ + return original; + return j; +} + /** * __round_jiffies - function to round jiffies to a full second * @j: the time in (absolute) jiffies that should be rounded @@ -134,38 +172,7 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base) */ unsigned long __round_jiffies(unsigned long j, int cpu) { - int rem; - unsigned long original = j; - - /* - * We don't want all cpus firing their timers at once hitting the - * same lock or cachelines, so we skew each extra cpu with an extra - * 3 jiffies. This 3 jiffies came originally from the mm/ code which - * already did this. - * The skew is done by adding 3*cpunr, then round, then subtract this - * extra offset again. - */ - j += cpu * 3; - - rem = j % HZ; - - /* - * If the target jiffie is just after a whole second (which can happen - * due to delays of the timer irq, long irq off times etc etc) then - * we should round down to the whole second, not up. Use 1/4th second - * as cutoff for this rounding as an extreme upper bound for this. - */ - if (rem < HZ/4) /* round down */ - j = j - rem; - else /* round up */ - j = j - rem + HZ; - - /* now that we have rounded, subtract the extra skew again */ - j -= cpu * 3; - - if (j <= jiffies) /* rounding ate our timeout entirely; */ - return original; - return j; + return round_jiffies_common(j, cpu, false); } EXPORT_SYMBOL_GPL(__round_jiffies); @@ -191,13 +198,10 @@ EXPORT_SYMBOL_GPL(__round_jiffies); */ unsigned long __round_jiffies_relative(unsigned long j, int cpu) { - /* - * In theory the following code can skip a jiffy in case jiffies - * increments right between the addition and the later subtraction. - * However since the entire point of this function is to use approximate - * timeouts, it's entirely ok to not handle that. - */ - return __round_jiffies(j + jiffies, cpu) - jiffies; + unsigned long j0 = jiffies; + + /* Use j0 because jiffies might change while we run */ + return round_jiffies_common(j + j0, cpu, false) - j0; } EXPORT_SYMBOL_GPL(__round_jiffies_relative); @@ -218,7 +222,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative); */ unsigned long round_jiffies(unsigned long j) { - return __round_jiffies(j, raw_smp_processor_id()); + return round_jiffies_common(j, raw_smp_processor_id(), false); } EXPORT_SYMBOL_GPL(round_jiffies); @@ -243,6 +247,71 @@ unsigned long round_jiffies_relative(unsigned long j) } EXPORT_SYMBOL_GPL(round_jiffies_relative); +/** + * __round_jiffies_up - function to round jiffies up to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * This is the same as __round_jiffies() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long __round_jiffies_up(unsigned long j, int cpu) +{ + return round_jiffies_common(j, cpu, true); +} +EXPORT_SYMBOL_GPL(__round_jiffies_up); + +/** + * __round_jiffies_up_relative - function to round jiffies up to a full second + * @j: the time in (relative) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * This is the same as __round_jiffies_relative() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) +{ + unsigned long j0 = jiffies; + + /* Use j0 because jiffies might change while we run */ + return round_jiffies_common(j + j0, cpu, true) - j0; +} +EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); + +/** + * round_jiffies_up - function to round jiffies up to a full second + * @j: the time in (absolute) jiffies that should be rounded + * + * This is the same as round_jiffies() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long round_jiffies_up(unsigned long j) +{ + return round_jiffies_common(j, raw_smp_processor_id(), true); +} +EXPORT_SYMBOL_GPL(round_jiffies_up); + +/** + * round_jiffies_up_relative - function to round jiffies up to a full second + * @j: the time in (relative) jiffies that should be rounded + * + * This is the same as round_jiffies_relative() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long round_jiffies_up_relative(unsigned long j) +{ + return __round_jiffies_up_relative(j, raw_smp_processor_id()); +} +EXPORT_SYMBOL_GPL(round_jiffies_up_relative); + static inline void set_running_timer(struct tvec_base *base, struct timer_list *timer)