block: change the request allocation/congestion logic to be sync/async based

This makes sure that we never wait on async IO for sync requests, instead
of doing the split on writes vs reads.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Jens Axboe 2009-04-06 14:48:01 +02:00 committed by Linus Torvalds
parent 0221c81b1b
commit 1faa16d228
6 changed files with 100 additions and 82 deletions

View file

@ -484,11 +484,11 @@ static int blk_init_free_list(struct request_queue *q)
{ {
struct request_list *rl = &q->rq; struct request_list *rl = &q->rq;
rl->count[READ] = rl->count[WRITE] = 0; rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
rl->starved[READ] = rl->starved[WRITE] = 0; rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
rl->elvpriv = 0; rl->elvpriv = 0;
init_waitqueue_head(&rl->wait[READ]); init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
init_waitqueue_head(&rl->wait[WRITE]); init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
mempool_free_slab, request_cachep, q->node); mempool_free_slab, request_cachep, q->node);
@ -699,18 +699,18 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
ioc->last_waited = jiffies; ioc->last_waited = jiffies;
} }
static void __freed_request(struct request_queue *q, int rw) static void __freed_request(struct request_queue *q, int sync)
{ {
struct request_list *rl = &q->rq; struct request_list *rl = &q->rq;
if (rl->count[rw] < queue_congestion_off_threshold(q)) if (rl->count[sync] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, rw); blk_clear_queue_congested(q, sync);
if (rl->count[rw] + 1 <= q->nr_requests) { if (rl->count[sync] + 1 <= q->nr_requests) {
if (waitqueue_active(&rl->wait[rw])) if (waitqueue_active(&rl->wait[sync]))
wake_up(&rl->wait[rw]); wake_up(&rl->wait[sync]);
blk_clear_queue_full(q, rw); blk_clear_queue_full(q, sync);
} }
} }
@ -718,18 +718,18 @@ static void __freed_request(struct request_queue *q, int rw)
* A request has just been released. Account for it, update the full and * A request has just been released. Account for it, update the full and
* congestion status, wake up any waiters. Called under q->queue_lock. * congestion status, wake up any waiters. Called under q->queue_lock.
*/ */
static void freed_request(struct request_queue *q, int rw, int priv) static void freed_request(struct request_queue *q, int sync, int priv)
{ {
struct request_list *rl = &q->rq; struct request_list *rl = &q->rq;
rl->count[rw]--; rl->count[sync]--;
if (priv) if (priv)
rl->elvpriv--; rl->elvpriv--;
__freed_request(q, rw); __freed_request(q, sync);
if (unlikely(rl->starved[rw ^ 1])) if (unlikely(rl->starved[sync ^ 1]))
__freed_request(q, rw ^ 1); __freed_request(q, sync ^ 1);
} }
/* /*
@ -743,15 +743,15 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
struct request *rq = NULL; struct request *rq = NULL;
struct request_list *rl = &q->rq; struct request_list *rl = &q->rq;
struct io_context *ioc = NULL; struct io_context *ioc = NULL;
const int rw = rw_flags & 0x01; const bool is_sync = rw_is_sync(rw_flags) != 0;
int may_queue, priv; int may_queue, priv;
may_queue = elv_may_queue(q, rw_flags); may_queue = elv_may_queue(q, rw_flags);
if (may_queue == ELV_MQUEUE_NO) if (may_queue == ELV_MQUEUE_NO)
goto rq_starved; goto rq_starved;
if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
if (rl->count[rw]+1 >= q->nr_requests) { if (rl->count[is_sync]+1 >= q->nr_requests) {
ioc = current_io_context(GFP_ATOMIC, q->node); ioc = current_io_context(GFP_ATOMIC, q->node);
/* /*
* The queue will fill after this allocation, so set * The queue will fill after this allocation, so set
@ -759,9 +759,9 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
* This process will be allowed to complete a batch of * This process will be allowed to complete a batch of
* requests, others will be blocked. * requests, others will be blocked.
*/ */
if (!blk_queue_full(q, rw)) { if (!blk_queue_full(q, is_sync)) {
ioc_set_batching(q, ioc); ioc_set_batching(q, ioc);
blk_set_queue_full(q, rw); blk_set_queue_full(q, is_sync);
} else { } else {
if (may_queue != ELV_MQUEUE_MUST if (may_queue != ELV_MQUEUE_MUST
&& !ioc_batching(q, ioc)) { && !ioc_batching(q, ioc)) {
@ -774,7 +774,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
} }
} }
} }
blk_set_queue_congested(q, rw); blk_set_queue_congested(q, is_sync);
} }
/* /*
@ -782,11 +782,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
* limit of requests, otherwise we could have thousands of requests * limit of requests, otherwise we could have thousands of requests
* allocated with any setting of ->nr_requests * allocated with any setting of ->nr_requests
*/ */
if (rl->count[rw] >= (3 * q->nr_requests / 2)) if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
goto out; goto out;
rl->count[rw]++; rl->count[is_sync]++;
rl->starved[rw] = 0; rl->starved[is_sync] = 0;
priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
if (priv) if (priv)
@ -804,7 +804,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
* wait queue, but this is pretty rare. * wait queue, but this is pretty rare.
*/ */
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
freed_request(q, rw, priv); freed_request(q, is_sync, priv);
/* /*
* in the very unlikely event that allocation failed and no * in the very unlikely event that allocation failed and no
@ -814,8 +814,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
* rq mempool into READ and WRITE * rq mempool into READ and WRITE
*/ */
rq_starved: rq_starved:
if (unlikely(rl->count[rw] == 0)) if (unlikely(rl->count[is_sync] == 0))
rl->starved[rw] = 1; rl->starved[is_sync] = 1;
goto out; goto out;
} }
@ -829,7 +829,7 @@ rq_starved:
if (ioc_batching(q, ioc)) if (ioc_batching(q, ioc))
ioc->nr_batch_requests--; ioc->nr_batch_requests--;
trace_block_getrq(q, bio, rw); trace_block_getrq(q, bio, rw_flags & 1);
out: out:
return rq; return rq;
} }
@ -843,7 +843,7 @@ out:
static struct request *get_request_wait(struct request_queue *q, int rw_flags, static struct request *get_request_wait(struct request_queue *q, int rw_flags,
struct bio *bio) struct bio *bio)
{ {
const int rw = rw_flags & 0x01; const bool is_sync = rw_is_sync(rw_flags) != 0;
struct request *rq; struct request *rq;
rq = get_request(q, rw_flags, bio, GFP_NOIO); rq = get_request(q, rw_flags, bio, GFP_NOIO);
@ -852,10 +852,10 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
struct io_context *ioc; struct io_context *ioc;
struct request_list *rl = &q->rq; struct request_list *rl = &q->rq;
prepare_to_wait_exclusive(&rl->wait[rw], &wait, prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
trace_block_sleeprq(q, bio, rw); trace_block_sleeprq(q, bio, rw_flags & 1);
__generic_unplug_device(q); __generic_unplug_device(q);
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
@ -871,7 +871,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
ioc_set_batching(q, ioc); ioc_set_batching(q, ioc);
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
finish_wait(&rl->wait[rw], &wait); finish_wait(&rl->wait[is_sync], &wait);
rq = get_request(q, rw_flags, bio, GFP_NOIO); rq = get_request(q, rw_flags, bio, GFP_NOIO);
}; };
@ -1070,14 +1070,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
* it didn't come out of our reserved rq pools * it didn't come out of our reserved rq pools
*/ */
if (req->cmd_flags & REQ_ALLOCED) { if (req->cmd_flags & REQ_ALLOCED) {
int rw = rq_data_dir(req); int is_sync = rq_is_sync(req) != 0;
int priv = req->cmd_flags & REQ_ELVPRIV; int priv = req->cmd_flags & REQ_ELVPRIV;
BUG_ON(!list_empty(&req->queuelist)); BUG_ON(!list_empty(&req->queuelist));
BUG_ON(!hlist_unhashed(&req->hash)); BUG_ON(!hlist_unhashed(&req->hash));
blk_free_request(q, req); blk_free_request(q, req);
freed_request(q, rw, priv); freed_request(q, is_sync, priv);
} }
} }
EXPORT_SYMBOL_GPL(__blk_put_request); EXPORT_SYMBOL_GPL(__blk_put_request);

View file

@ -48,28 +48,28 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
q->nr_requests = nr; q->nr_requests = nr;
blk_queue_congestion_threshold(q); blk_queue_congestion_threshold(q);
if (rl->count[READ] >= queue_congestion_on_threshold(q)) if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
blk_set_queue_congested(q, READ); blk_set_queue_congested(q, BLK_RW_SYNC);
else if (rl->count[READ] < queue_congestion_off_threshold(q)) else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, READ); blk_clear_queue_congested(q, BLK_RW_SYNC);
if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
blk_set_queue_congested(q, WRITE); blk_set_queue_congested(q, BLK_RW_ASYNC);
else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, WRITE); blk_clear_queue_congested(q, BLK_RW_ASYNC);
if (rl->count[READ] >= q->nr_requests) { if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
blk_set_queue_full(q, READ); blk_set_queue_full(q, BLK_RW_SYNC);
} else if (rl->count[READ]+1 <= q->nr_requests) { } else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) {
blk_clear_queue_full(q, READ); blk_clear_queue_full(q, BLK_RW_SYNC);
wake_up(&rl->wait[READ]); wake_up(&rl->wait[BLK_RW_SYNC]);
} }
if (rl->count[WRITE] >= q->nr_requests) { if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
blk_set_queue_full(q, WRITE); blk_set_queue_full(q, BLK_RW_ASYNC);
} else if (rl->count[WRITE]+1 <= q->nr_requests) { } else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) {
blk_clear_queue_full(q, WRITE); blk_clear_queue_full(q, BLK_RW_ASYNC);
wake_up(&rl->wait[WRITE]); wake_up(&rl->wait[BLK_RW_ASYNC]);
} }
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
return ret; return ret;

View file

@ -677,7 +677,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
} }
if (unplug_it && blk_queue_plugged(q)) { if (unplug_it && blk_queue_plugged(q)) {
int nrq = q->rq.count[READ] + q->rq.count[WRITE] int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
- q->in_flight; - q->in_flight;
if (nrq >= q->unplug_thresh) if (nrq >= q->unplug_thresh)

View file

@ -24,8 +24,8 @@ struct dentry;
*/ */
enum bdi_state { enum bdi_state {
BDI_pdflush, /* A pdflush thread is working this device */ BDI_pdflush, /* A pdflush thread is working this device */
BDI_write_congested, /* The write queue is getting full */ BDI_async_congested, /* The async (write) queue is getting full */
BDI_read_congested, /* The read queue is getting full */ BDI_sync_congested, /* The sync queue is getting full */
BDI_unused, /* Available bits start here */ BDI_unused, /* Available bits start here */
}; };
@ -215,18 +215,18 @@ static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits)
static inline int bdi_read_congested(struct backing_dev_info *bdi) static inline int bdi_read_congested(struct backing_dev_info *bdi)
{ {
return bdi_congested(bdi, 1 << BDI_read_congested); return bdi_congested(bdi, 1 << BDI_sync_congested);
} }
static inline int bdi_write_congested(struct backing_dev_info *bdi) static inline int bdi_write_congested(struct backing_dev_info *bdi)
{ {
return bdi_congested(bdi, 1 << BDI_write_congested); return bdi_congested(bdi, 1 << BDI_async_congested);
} }
static inline int bdi_rw_congested(struct backing_dev_info *bdi) static inline int bdi_rw_congested(struct backing_dev_info *bdi)
{ {
return bdi_congested(bdi, (1 << BDI_read_congested)| return bdi_congested(bdi, (1 << BDI_sync_congested) |
(1 << BDI_write_congested)); (1 << BDI_async_congested));
} }
void clear_bdi_congested(struct backing_dev_info *bdi, int rw); void clear_bdi_congested(struct backing_dev_info *bdi, int rw);

View file

@ -38,6 +38,10 @@ struct request;
typedef void (rq_end_io_fn)(struct request *, int); typedef void (rq_end_io_fn)(struct request *, int);
struct request_list { struct request_list {
/*
* count[], starved[], and wait[] are indexed by
* BLK_RW_SYNC/BLK_RW_ASYNC
*/
int count[2]; int count[2];
int starved[2]; int starved[2];
int elvpriv; int elvpriv;
@ -66,6 +70,11 @@ enum rq_cmd_type_bits {
REQ_TYPE_ATA_PC, REQ_TYPE_ATA_PC,
}; };
enum {
BLK_RW_ASYNC = 0,
BLK_RW_SYNC = 1,
};
/* /*
* For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
* sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
@ -103,7 +112,7 @@ enum rq_flag_bits {
__REQ_QUIET, /* don't worry about errors */ __REQ_QUIET, /* don't worry about errors */
__REQ_PREEMPT, /* set for "ide_preempt" requests */ __REQ_PREEMPT, /* set for "ide_preempt" requests */
__REQ_ORDERED_COLOR, /* is before or after barrier */ __REQ_ORDERED_COLOR, /* is before or after barrier */
__REQ_RW_SYNC, /* request is sync (O_DIRECT) */ __REQ_RW_SYNC, /* request is sync (sync write or read) */
__REQ_ALLOCED, /* request came from our alloc pool */ __REQ_ALLOCED, /* request came from our alloc pool */
__REQ_RW_META, /* metadata io request */ __REQ_RW_META, /* metadata io request */
__REQ_COPY_USER, /* contains copies of user pages */ __REQ_COPY_USER, /* contains copies of user pages */
@ -438,8 +447,8 @@ struct request_queue
#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
#define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
#define QUEUE_FLAG_READFULL 3 /* read queue has been filled */ #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */
#define QUEUE_FLAG_WRITEFULL 4 /* write queue has been filled */ #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */
#define QUEUE_FLAG_DEAD 5 /* queue being torn down */ #define QUEUE_FLAG_DEAD 5 /* queue being torn down */
#define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */
#define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */
@ -611,32 +620,41 @@ enum {
#define rq_data_dir(rq) ((rq)->cmd_flags & 1) #define rq_data_dir(rq) ((rq)->cmd_flags & 1)
/* /*
* We regard a request as sync, if it's a READ or a SYNC write. * We regard a request as sync, if either a read or a sync write
*/ */
#define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC) static inline bool rw_is_sync(unsigned int rw_flags)
{
return !(rw_flags & REQ_RW) || (rw_flags & REQ_RW_SYNC);
}
static inline bool rq_is_sync(struct request *rq)
{
return rw_is_sync(rq->cmd_flags);
}
#define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META)
static inline int blk_queue_full(struct request_queue *q, int rw) static inline int blk_queue_full(struct request_queue *q, int sync)
{ {
if (rw == READ) if (sync)
return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags);
return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags);
} }
static inline void blk_set_queue_full(struct request_queue *q, int rw) static inline void blk_set_queue_full(struct request_queue *q, int sync)
{ {
if (rw == READ) if (sync)
queue_flag_set(QUEUE_FLAG_READFULL, q); queue_flag_set(QUEUE_FLAG_SYNCFULL, q);
else else
queue_flag_set(QUEUE_FLAG_WRITEFULL, q); queue_flag_set(QUEUE_FLAG_ASYNCFULL, q);
} }
static inline void blk_clear_queue_full(struct request_queue *q, int rw) static inline void blk_clear_queue_full(struct request_queue *q, int sync)
{ {
if (rw == READ) if (sync)
queue_flag_clear(QUEUE_FLAG_READFULL, q); queue_flag_clear(QUEUE_FLAG_SYNCFULL, q);
else else
queue_flag_clear(QUEUE_FLAG_WRITEFULL, q); queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q);
} }

View file

@ -284,12 +284,12 @@ static wait_queue_head_t congestion_wqh[2] = {
}; };
void clear_bdi_congested(struct backing_dev_info *bdi, int rw) void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
{ {
enum bdi_state bit; enum bdi_state bit;
wait_queue_head_t *wqh = &congestion_wqh[rw]; wait_queue_head_t *wqh = &congestion_wqh[sync];
bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; bit = sync ? BDI_sync_congested : BDI_async_congested;
clear_bit(bit, &bdi->state); clear_bit(bit, &bdi->state);
smp_mb__after_clear_bit(); smp_mb__after_clear_bit();
if (waitqueue_active(wqh)) if (waitqueue_active(wqh))
@ -297,11 +297,11 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int rw)
} }
EXPORT_SYMBOL(clear_bdi_congested); EXPORT_SYMBOL(clear_bdi_congested);
void set_bdi_congested(struct backing_dev_info *bdi, int rw) void set_bdi_congested(struct backing_dev_info *bdi, int sync)
{ {
enum bdi_state bit; enum bdi_state bit;
bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; bit = sync ? BDI_sync_congested : BDI_async_congested;
set_bit(bit, &bdi->state); set_bit(bit, &bdi->state);
} }
EXPORT_SYMBOL(set_bdi_congested); EXPORT_SYMBOL(set_bdi_congested);