blkio: Introduce the root service tree for cfq groups

o So far we just had one cfq_group in cfq_data. To create space for more than
  one cfq_group, we need to have a service tree of groups where all the groups
  can be queued if they have active cfq queues backlogged in these.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
This commit is contained in:
Vivek Goyal 2009-12-03 12:59:41 -05:00 committed by Jens Axboe
parent f04a642463
commit 1fa8f6d68b

View file

@ -66,6 +66,7 @@ static DEFINE_SPINLOCK(ioc_gone_lock);
#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
#define sample_valid(samples) ((samples) > 80) #define sample_valid(samples) ((samples) > 80)
#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
/* /*
* Most of our rbtree usage is for sorting with min extraction, so * Most of our rbtree usage is for sorting with min extraction, so
@ -77,8 +78,9 @@ struct cfq_rb_root {
struct rb_root rb; struct rb_root rb;
struct rb_node *left; struct rb_node *left;
unsigned count; unsigned count;
u64 min_vdisktime;
}; };
#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, } #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
/* /*
* Per process-grouping structure * Per process-grouping structure
@ -156,6 +158,16 @@ enum wl_type_t {
/* This is per cgroup per device grouping structure */ /* This is per cgroup per device grouping structure */
struct cfq_group { struct cfq_group {
/* group service_tree member */
struct rb_node rb_node;
/* group service_tree key */
u64 vdisktime;
bool on_st;
/* number of cfqq currently on this group */
int nr_cfqq;
/* /*
* rr lists of queues with requests, onle rr for each priority class. * rr lists of queues with requests, onle rr for each priority class.
* Counts are embedded in the cfq_rb_root * Counts are embedded in the cfq_rb_root
@ -169,6 +181,8 @@ struct cfq_group {
*/ */
struct cfq_data { struct cfq_data {
struct request_queue *queue; struct request_queue *queue;
/* Root service tree for cfq_groups */
struct cfq_rb_root grp_service_tree;
struct cfq_group root_group; struct cfq_group root_group;
/* /*
@ -251,6 +265,9 @@ static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg,
enum wl_type_t type, enum wl_type_t type,
struct cfq_data *cfqd) struct cfq_data *cfqd)
{ {
if (!cfqg)
return NULL;
if (prio == IDLE_WORKLOAD) if (prio == IDLE_WORKLOAD)
return &cfqg->service_tree_idle; return &cfqg->service_tree_idle;
@ -589,6 +606,17 @@ static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
return NULL; return NULL;
} }
static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root)
{
if (!root->left)
root->left = rb_first(&root->rb);
if (root->left)
return rb_entry_cfqg(root->left);
return NULL;
}
static void rb_erase_init(struct rb_node *n, struct rb_root *root) static void rb_erase_init(struct rb_node *n, struct rb_root *root)
{ {
rb_erase(n, root); rb_erase(n, root);
@ -640,6 +668,83 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio)); cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
} }
static inline s64
cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg)
{
return cfqg->vdisktime - st->min_vdisktime;
}
static void
__cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
{
struct rb_node **node = &st->rb.rb_node;
struct rb_node *parent = NULL;
struct cfq_group *__cfqg;
s64 key = cfqg_key(st, cfqg);
int left = 1;
while (*node != NULL) {
parent = *node;
__cfqg = rb_entry_cfqg(parent);
if (key < cfqg_key(st, __cfqg))
node = &parent->rb_left;
else {
node = &parent->rb_right;
left = 0;
}
}
if (left)
st->left = &cfqg->rb_node;
rb_link_node(&cfqg->rb_node, parent, node);
rb_insert_color(&cfqg->rb_node, &st->rb);
}
static void
cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
{
struct cfq_rb_root *st = &cfqd->grp_service_tree;
struct cfq_group *__cfqg;
struct rb_node *n;
cfqg->nr_cfqq++;
if (cfqg->on_st)
return;
/*
* Currently put the group at the end. Later implement something
* so that groups get lesser vtime based on their weights, so that
* if group does not loose all if it was not continously backlogged.
*/
n = rb_last(&st->rb);
if (n) {
__cfqg = rb_entry_cfqg(n);
cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
} else
cfqg->vdisktime = st->min_vdisktime;
__cfq_group_service_tree_add(st, cfqg);
cfqg->on_st = true;
}
static void
cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
{
struct cfq_rb_root *st = &cfqd->grp_service_tree;
BUG_ON(cfqg->nr_cfqq < 1);
cfqg->nr_cfqq--;
/* If there are other cfq queues under this group, don't delete it */
if (cfqg->nr_cfqq)
return;
cfqg->on_st = false;
if (!RB_EMPTY_NODE(&cfqg->rb_node))
cfq_rb_erase(&cfqg->rb_node, st);
}
/* /*
* The cfqd->service_trees holds all pending cfq_queue's that have * The cfqd->service_trees holds all pending cfq_queue's that have
* requests waiting to be processed. It is sorted in the order that * requests waiting to be processed. It is sorted in the order that
@ -722,6 +827,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
rb_link_node(&cfqq->rb_node, parent, p); rb_link_node(&cfqq->rb_node, parent, p);
rb_insert_color(&cfqq->rb_node, &service_tree->rb); rb_insert_color(&cfqq->rb_node, &service_tree->rb);
service_tree->count++; service_tree->count++;
cfq_group_service_tree_add(cfqd, cfqq->cfqg);
} }
static struct cfq_queue * static struct cfq_queue *
@ -832,6 +938,7 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
cfqq->p_root = NULL; cfqq->p_root = NULL;
} }
cfq_group_service_tree_del(cfqd, cfqq->cfqg);
BUG_ON(!cfqd->busy_queues); BUG_ON(!cfqd->busy_queues);
cfqd->busy_queues--; cfqd->busy_queues--;
} }
@ -1108,6 +1215,9 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
if (!cfqd->rq_queued) if (!cfqd->rq_queued)
return NULL; return NULL;
/* There is nothing to dispatch */
if (!service_tree)
return NULL;
if (RB_EMPTY_ROOT(&service_tree->rb)) if (RB_EMPTY_ROOT(&service_tree->rb))
return NULL; return NULL;
return cfq_rb_first(service_tree); return cfq_rb_first(service_tree);
@ -1477,6 +1587,12 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
unsigned count; unsigned count;
struct cfq_rb_root *st; struct cfq_rb_root *st;
if (!cfqg) {
cfqd->serving_prio = IDLE_WORKLOAD;
cfqd->workload_expires = jiffies + 1;
return;
}
/* Choose next priority. RT > BE > IDLE */ /* Choose next priority. RT > BE > IDLE */
if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd)) if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))
cfqd->serving_prio = RT_WORKLOAD; cfqd->serving_prio = RT_WORKLOAD;
@ -1535,10 +1651,21 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
cfqd->noidle_tree_requires_idle = false; cfqd->noidle_tree_requires_idle = false;
} }
static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
{
struct cfq_rb_root *st = &cfqd->grp_service_tree;
if (RB_EMPTY_ROOT(&st->rb))
return NULL;
return cfq_rb_first_group(st);
}
static void cfq_choose_cfqg(struct cfq_data *cfqd) static void cfq_choose_cfqg(struct cfq_data *cfqd)
{ {
cfqd->serving_group = &cfqd->root_group; struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
choose_service_tree(cfqd, &cfqd->root_group);
cfqd->serving_group = cfqg;
choose_service_tree(cfqd, cfqg);
} }
/* /*
@ -3014,10 +3141,14 @@ static void *cfq_init_queue(struct request_queue *q)
if (!cfqd) if (!cfqd)
return NULL; return NULL;
/* Init root service tree */
cfqd->grp_service_tree = CFQ_RB_ROOT;
/* Init root group */ /* Init root group */
cfqg = &cfqd->root_group; cfqg = &cfqd->root_group;
for_each_cfqg_st(cfqg, i, j, st) for_each_cfqg_st(cfqg, i, j, st)
*st = CFQ_RB_ROOT; *st = CFQ_RB_ROOT;
RB_CLEAR_NODE(&cfqg->rb_node);
/* /*
* Not strictly needed (since RB_ROOT just clears the node and we * Not strictly needed (since RB_ROOT just clears the node and we