CFQ: move think time check variables to a separate struct
[linux-2.6-block.git] / block / cfq-iosched.c
index 3c7b537bf9081f9f43b90c7cdcd9a95073f1c02b..56255599a0222358e3233b10948f524b879a45a0 100644 (file)
@@ -129,14 +129,12 @@ struct cfq_queue {
        unsigned long slice_end;
        long slice_resid;
 
-       /* pending metadata requests */
-       int meta_pending;
        /* number of requests that are on the dispatch list or inside driver */
        int dispatched;
 
        /* io prio of this group */
        unsigned short ioprio, org_ioprio;
-       unsigned short ioprio_class, org_ioprio_class;
+       unsigned short ioprio_class;
 
        pid_t pid;
 
@@ -670,9 +668,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
        if (rq_is_sync(rq1) != rq_is_sync(rq2))
                return rq_is_sync(rq1) ? rq1 : rq2;
 
-       if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
-               return rq1->cmd_flags & REQ_META ? rq1 : rq2;
-
        s1 = blk_rq_pos(rq1);
        s2 = blk_rq_pos(rq2);
 
@@ -988,9 +983,10 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 
        cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
                                        st->min_vdisktime);
-       cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u"
-                       " sect=%u", used_sl, cfqq->slice_dispatch, charge,
-                       iops_mode(cfqd), cfqq->nr_sectors);
+       cfq_log_cfqq(cfqq->cfqd, cfqq,
+                    "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
+                    used_sl, cfqq->slice_dispatch, charge,
+                    iops_mode(cfqd), cfqq->nr_sectors);
        cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
                                          unaccounted_sl);
        cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
@@ -1004,8 +1000,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
        return NULL;
 }
 
-void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
-                                       unsigned int weight)
+static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
+                                         unsigned int weight)
 {
        struct cfq_group *cfqg = cfqg_of_blkg(blkg);
        cfqg->new_weight = weight;
@@ -1234,7 +1230,7 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
  * it should not be NULL as even if elevator was exiting, cgroup deltion
  * path got to it first.
  */
-void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
+static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
 {
        unsigned long  flags;
        struct cfq_data *cfqd = key;
@@ -1501,16 +1497,11 @@ static void cfq_add_rq_rb(struct request *rq)
 {
        struct cfq_queue *cfqq = RQ_CFQQ(rq);
        struct cfq_data *cfqd = cfqq->cfqd;
-       struct request *__alias, *prev;
+       struct request *prev;
 
        cfqq->queued[rq_is_sync(rq)]++;
 
-       /*
-        * looks a little odd, but the first insert might return an alias.
-        * if that happens, put the alias on the dispatch list
-        */
-       while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
-               cfq_dispatch_insert(cfqd->queue, __alias);
+       elv_rb_add(&cfqq->sort_list, rq);
 
        if (!cfq_cfqq_on_rr(cfqq))
                cfq_add_cfqq_rr(cfqd, cfqq);
@@ -1597,10 +1588,6 @@ static void cfq_remove_request(struct request *rq)
        cfqq->cfqd->rq_queued--;
        cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
                                        rq_data_dir(rq), rq_is_sync(rq));
-       if (rq->cmd_flags & REQ_META) {
-               WARN_ON(!cfqq->meta_pending);
-               cfqq->meta_pending--;
-       }
 }
 
 static int cfq_merge(struct request_queue *q, struct request **req,
@@ -2021,10 +2008,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
         * slice, then don't idle. This avoids overrunning the allotted
         * time slice.
         */
-       if (sample_valid(cic->ttime_samples) &&
-           (cfqq->slice_end - jiffies < cic->ttime_mean)) {
-               cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d",
-                               cic->ttime_mean);
+       if (sample_valid(cic->ttime.ttime_samples) &&
+           (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
+               cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
+                            cic->ttime.ttime_mean);
                return;
        }
 
@@ -2772,8 +2759,14 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
        smp_wmb();
        cic->key = cfqd_dead_key(cfqd);
 
-       if (ioc->ioc_data == cic)
+       rcu_read_lock();
+       if (rcu_dereference(ioc->ioc_data) == cic) {
+               rcu_read_unlock();
+               spin_lock(&ioc->lock);
                rcu_assign_pointer(ioc->ioc_data, NULL);
+               spin_unlock(&ioc->lock);
+       } else
+               rcu_read_unlock();
 
        if (cic->cfqq[BLK_RW_ASYNC]) {
                cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
@@ -2826,7 +2819,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
        cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
                                                        cfqd->queue->node);
        if (cic) {
-               cic->last_end_request = jiffies;
+               cic->ttime.last_end_request = jiffies;
                INIT_LIST_HEAD(&cic->queue_list);
                INIT_HLIST_NODE(&cic->cic_list);
                cic->dtor = cfq_free_io_context;
@@ -2876,7 +2869,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
         * elevate the priority of this queue
         */
        cfqq->org_ioprio = cfqq->ioprio;
-       cfqq->org_ioprio_class = cfqq->ioprio_class;
        cfq_clear_cfqq_prio_changed(cfqq);
 }
 
@@ -3080,7 +3072,8 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
 
        spin_lock_irqsave(&ioc->lock, flags);
 
-       BUG_ON(ioc->ioc_data == cic);
+       BUG_ON(rcu_dereference_check(ioc->ioc_data,
+               lockdep_is_held(&ioc->lock)) == cic);
 
        radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
        hlist_del_rcu(&cic->cic_list);
@@ -3213,14 +3206,22 @@ err:
 }
 
 static void
-cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
+__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
 {
-       unsigned long elapsed = jiffies - cic->last_end_request;
-       unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
+       unsigned long elapsed = jiffies - ttime->last_end_request;
+       elapsed = min(elapsed, 2UL * slice_idle);
 
-       cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
-       cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
-       cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
+       ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
+       ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
+       ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
+}
+
+static void
+cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+       struct cfq_io_context *cic)
+{
+       if (cfq_cfqq_sync(cfqq))
+               __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
 }
 
 static void
@@ -3269,8 +3270,8 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
            (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
                enable_idle = 0;
-       else if (sample_valid(cic->ttime_samples)) {
-               if (cic->ttime_mean > cfqd->cfq_slice_idle)
+       else if (sample_valid(cic->ttime.ttime_samples)) {
+               if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
                        enable_idle = 0;
                else
                        enable_idle = 1;
@@ -3331,13 +3332,6 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
            RB_EMPTY_ROOT(&cfqq->sort_list))
                return true;
 
-       /*
-        * So both queues are sync. Let the new request get disk time if
-        * it's a metadata request and the current queue is doing regular IO.
-        */
-       if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
-               return true;
-
        /*
         * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
         */
@@ -3402,10 +3396,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        struct cfq_io_context *cic = RQ_CIC(rq);
 
        cfqd->rq_queued++;
-       if (rq->cmd_flags & REQ_META)
-               cfqq->meta_pending++;
 
-       cfq_update_io_thinktime(cfqd, cic);
+       cfq_update_io_thinktime(cfqd, cfqq, cic);
        cfq_update_io_seektime(cfqd, cfqq, rq);
        cfq_update_idle_window(cfqd, cfqq, cic);
 
@@ -3516,8 +3508,8 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
                return true;
 
        /* if slice left is less than think time, wait busy */
-       if (cic && sample_valid(cic->ttime_samples)
-           && (cfqq->slice_end - jiffies < cic->ttime_mean))
+       if (cic && sample_valid(cic->ttime.ttime_samples)
+           && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
                return true;
 
        /*
@@ -3558,7 +3550,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
 
        if (sync) {
-               RQ_CIC(rq)->last_end_request = now;
+               RQ_CIC(rq)->ttime.last_end_request = now;
                if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
                        cfqd->last_delayed_sync = now;
        }
@@ -3608,30 +3600,6 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                cfq_schedule_dispatch(cfqd);
 }
 
-/*
- * we temporarily boost lower priority queues if they are holding fs exclusive
- * resources. they are boosted to normal prio (CLASS_BE/4)
- */
-static void cfq_prio_boost(struct cfq_queue *cfqq)
-{
-       if (has_fs_excl()) {
-               /*
-                * boost idle prio on transactions that would lock out other
-                * users of the filesystem
-                */
-               if (cfq_class_idle(cfqq))
-                       cfqq->ioprio_class = IOPRIO_CLASS_BE;
-               if (cfqq->ioprio > IOPRIO_NORM)
-                       cfqq->ioprio = IOPRIO_NORM;
-       } else {
-               /*
-                * unboost the queue (if needed)
-                */
-               cfqq->ioprio_class = cfqq->org_ioprio_class;
-               cfqq->ioprio = cfqq->org_ioprio;
-       }
-}
-
 static inline int __cfq_may_queue(struct cfq_queue *cfqq)
 {
        if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -3662,7 +3630,6 @@ static int cfq_may_queue(struct request_queue *q, int rw)
        cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
        if (cfqq) {
                cfq_init_prio_data(cfqq, cic->ioc);
-               cfq_prio_boost(cfqq);
 
                return __cfq_may_queue(cfqq);
        }