diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 1ceceb334d5..3b3f77c4f24 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -292,7 +292,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) int cpu; if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { - bio_endio(bio, -EOPNOTSUPP); + md_barrier_request(mddev, bio); return 0; } diff --git a/drivers/md/md.c b/drivers/md/md.c index d79a4064979..569f25183db 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -213,12 +213,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio) return 0; } rcu_read_lock(); - if (mddev->suspended) { + if (mddev->suspended || mddev->barrier) { DEFINE_WAIT(__wait); for (;;) { prepare_to_wait(&mddev->sb_wait, &__wait, TASK_UNINTERRUPTIBLE); - if (!mddev->suspended) + if (!mddev->suspended && !mddev->barrier) break; rcu_read_unlock(); schedule(); @@ -260,10 +260,110 @@ static void mddev_resume(mddev_t *mddev) int mddev_congested(mddev_t *mddev, int bits) { + if (mddev->barrier) + return 1; return mddev->suspended; } EXPORT_SYMBOL(mddev_congested); +/* + * Generic barrier handling for md + */ + +#define POST_REQUEST_BARRIER ((void*)1) + +static void md_end_barrier(struct bio *bio, int err) +{ + mdk_rdev_t *rdev = bio->bi_private; + mddev_t *mddev = rdev->mddev; + if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER) + set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags); + + rdev_dec_pending(rdev, mddev); + + if (atomic_dec_and_test(&mddev->flush_pending)) { + if (mddev->barrier == POST_REQUEST_BARRIER) { + /* This was a post-request barrier */ + mddev->barrier = NULL; + wake_up(&mddev->sb_wait); + } else + /* The pre-request barrier has finished */ + schedule_work(&mddev->barrier_work); + } + bio_put(bio); +} + +static void submit_barriers(mddev_t *mddev) +{ + mdk_rdev_t *rdev; + + rcu_read_lock(); + list_for_each_entry_rcu(rdev, &mddev->disks, same_set) + if (rdev->raid_disk >= 0 && + !test_bit(Faulty, &rdev->flags)) { + /* Take two references, one is dropped + * when request finishes, one after + * we reclaim rcu_read_lock + */ + struct bio *bi; + atomic_inc(&rdev->nr_pending); + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + bi = bio_alloc(GFP_KERNEL, 0); + bi->bi_end_io = md_end_barrier; + bi->bi_private = rdev; + bi->bi_bdev = rdev->bdev; + atomic_inc(&mddev->flush_pending); + submit_bio(WRITE_BARRIER, bi); + rcu_read_lock(); + rdev_dec_pending(rdev, mddev); + } + rcu_read_unlock(); +} + +static void md_submit_barrier(struct work_struct *ws) +{ + mddev_t *mddev = container_of(ws, mddev_t, barrier_work); + struct bio *bio = mddev->barrier; + + atomic_set(&mddev->flush_pending, 1); + + if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) + bio_endio(bio, -EOPNOTSUPP); + else if (bio->bi_size == 0) + /* an empty barrier - all done */ + bio_endio(bio, 0); + else { + bio->bi_rw &= ~(1<pers->make_request(mddev->queue, bio)) + generic_make_request(bio); + mddev->barrier = POST_REQUEST_BARRIER; + submit_barriers(mddev); + } + if (atomic_dec_and_test(&mddev->flush_pending)) { + mddev->barrier = NULL; + wake_up(&mddev->sb_wait); + } +} + +void md_barrier_request(mddev_t *mddev, struct bio *bio) +{ + spin_lock_irq(&mddev->write_lock); + wait_event_lock_irq(mddev->sb_wait, + !mddev->barrier, + mddev->write_lock, /*nothing*/); + mddev->barrier = bio; + spin_unlock_irq(&mddev->write_lock); + + atomic_set(&mddev->flush_pending, 1); + INIT_WORK(&mddev->barrier_work, md_submit_barrier); + + submit_barriers(mddev); + + if (atomic_dec_and_test(&mddev->flush_pending)) + schedule_work(&mddev->barrier_work); +} +EXPORT_SYMBOL(md_barrier_request); static inline mddev_t *mddev_get(mddev_t *mddev) { @@ -371,6 +471,7 @@ static mddev_t * mddev_find(dev_t unit) atomic_set(&new->openers, 0); atomic_set(&new->active_io, 0); spin_lock_init(&new->write_lock); + atomic_set(&new->flush_pending, 0); init_waitqueue_head(&new->sb_wait); init_waitqueue_head(&new->recovery_wait); new->reshape_position = MaxSector; diff --git a/drivers/md/md.h b/drivers/md/md.h index 87430fea287..cb036868a9e 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -292,6 +292,17 @@ struct mddev_s struct mutex bitmap_mutex; struct list_head all_mddevs; + + /* Generic barrier handling. + * If there is a pending barrier request, all other + * writes are blocked while the devices are flushed. + * The last to finish a flush schedules a worker to + * submit the barrier request (without the barrier flag), + * then submit more flush requests. + */ + struct bio *barrier; + atomic_t flush_pending; + struct work_struct barrier_work; }; @@ -432,6 +443,7 @@ extern void md_done_sync(mddev_t *mddev, int blocks, int ok); extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); extern int mddev_congested(mddev_t *mddev, int bits); +extern void md_barrier_request(mddev_t *mddev, struct bio *bio); extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, sector_t sector, int size, struct page *page); extern void md_super_wait(mddev_t *mddev); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index ee7646f974a..cbc0a99f379 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -145,7 +145,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) int cpu; if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { - bio_endio(bio, -EOPNOTSUPP); + md_barrier_request(mddev, bio); return 0; } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index d3a4ce06015..122d07af5b5 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -453,7 +453,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio) int cpu; if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { - bio_endio(bio, -EOPNOTSUPP); + md_barrier_request(mddev, bio); return 0; } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index c2cb7b87b44..2fbf867f8b3 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -804,7 +804,7 @@ static int make_request(struct request_queue *q, struct bio * bio) mdk_rdev_t *blocked_rdev; if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { - bio_endio(bio, -EOPNOTSUPP); + md_barrier_request(mddev, bio); return 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 34cb065f6d6..8c9395f2028 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3865,7 +3865,13 @@ static int make_request(struct request_queue *q, struct bio * bi) int cpu, remaining; if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) { - bio_endio(bi, -EOPNOTSUPP); + /* Drain all pending writes. We only really need + * to ensure they have been submitted, but this is + * easier. + */ + mddev->pers->quiesce(mddev, 1); + mddev->pers->quiesce(mddev, 0); + md_barrier_request(mddev, bi); return 0; }