diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index fd7080e..064f4fb 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -27,6 +27,12 @@ static const int cfq_slice_sync = HZ / 10;
 static int cfq_slice_async = HZ / 25;
 static const int cfq_slice_async_rq = 2;
 static int cfq_slice_idle = HZ / 125;
+static int cfq_target_latency = HZ * 3/10; /* 300 ms */
+static int cfq_hist_divisor = 4;
+/*
+ * Number of times that other workloads can be scheduled before async
+ */
+static const unsigned int cfq_async_penalty = 4;
 
 /*
  * offset from end of service tree
@@ -36,7 +42,7 @@ static int cfq_slice_idle = HZ / 125;
 /*
  * below this threshold, we consider thinktime immediate
  */
-#define CFQ_MIN_TT		(2)
+#define CFQ_MIN_TT		(1)
 
 #define CFQ_SLICE_SCALE		(5)
 #define CFQ_HW_QUEUE_MIN	(5)
@@ -67,8 +73,9 @@ static DEFINE_SPINLOCK(ioc_gone_lock);
 struct cfq_rb_root {
 	struct rb_root rb;
 	struct rb_node *left;
+	unsigned count;
 };
-#define CFQ_RB_ROOT	(struct cfq_rb_root) { RB_ROOT, NULL, }
+#define CFQ_RB_ROOT	(struct cfq_rb_root) { RB_ROOT, NULL, 0, }
 
 /*
  * Per process-grouping structure
@@ -113,6 +120,21 @@ struct cfq_queue {
 	unsigned short ioprio_class, org_ioprio_class;
 
 	pid_t pid;
+
+	struct cfq_rb_root *service_tree;
+	struct cfq_io_context *cic;
+};
+
+enum wl_prio_t {
+	IDLE_WL = -1,
+	BE_WL = 0,
+	RT_WL = 1
+};
+
+enum wl_type_t {
+	ASYNC_WL = 0,
+	SYNC_NOIDLE_WL = 1,
+	SYNC_WL = 2
 };
 
 /*
@@ -124,7 +146,13 @@ struct cfq_data {
 	/*
 	 * rr list of queues with requests and the count of them
 	 */
-	struct cfq_rb_root service_tree;
+	struct cfq_rb_root service_trees[2][3];
+	struct cfq_rb_root service_tree_idle;
+
+	enum wl_prio_t serving_prio;
+	enum wl_type_t serving_type;
+	unsigned long workload_expires;
+	unsigned int async_starved;
 
 	/*
 	 * Each priority tree is sorted by next_request position.  These
@@ -134,14 +162,11 @@ struct cfq_data {
 	struct rb_root prio_trees[CFQ_PRIO_LISTS];
 
 	unsigned int busy_queues;
-	/*
-	 * Used to track any pending rt requests so we can pre-empt current
-	 * non-RT cfqq in service when this value is non-zero.
-	 */
-	unsigned int busy_rt_queues;
+	unsigned int busy_queues_avg[2];
 
-	int rq_in_driver;
+	int rq_in_driver[2];
 	int sync_flight;
+	int reads_delayed;
 
 	/*
 	 * queue-depth detection
@@ -178,6 +203,9 @@ struct cfq_data {
 	unsigned int cfq_slice[2];
 	unsigned int cfq_slice_async_rq;
 	unsigned int cfq_slice_idle;
+	unsigned int cfq_target_latency;
+	unsigned int cfq_hist_divisor;
+	unsigned int cfq_async_penalty;
 
 	struct list_head cic_list;
 
@@ -187,11 +215,15 @@ struct cfq_data {
 	struct cfq_queue oom_cfqq;
 };
 
+static struct cfq_rb_root * service_tree_for(enum wl_prio_t prio, enum wl_type_t type,
+							  struct cfq_data *cfqd) {
+	return prio == IDLE_WL ? &cfqd->service_tree_idle :  &cfqd->service_trees[prio][type];
+}
+
 enum cfqq_state_flags {
 	CFQ_CFQQ_FLAG_on_rr = 0,	/* on round-robin busy list */
 	CFQ_CFQQ_FLAG_wait_request,	/* waiting for a request */
 	CFQ_CFQQ_FLAG_must_dispatch,	/* must be allowed a dispatch */
-	CFQ_CFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */
 	CFQ_CFQQ_FLAG_must_alloc_slice,	/* per-slice must_alloc flag */
 	CFQ_CFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */
 	CFQ_CFQQ_FLAG_idle_window,	/* slice idling enabled */
@@ -218,7 +250,6 @@ static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq)		\
 CFQ_CFQQ_FNS(on_rr);
 CFQ_CFQQ_FNS(wait_request);
 CFQ_CFQQ_FNS(must_dispatch);
-CFQ_CFQQ_FNS(must_alloc);
 CFQ_CFQQ_FNS(must_alloc_slice);
 CFQ_CFQQ_FNS(fifo_expire);
 CFQ_CFQQ_FNS(idle_window);
@@ -233,12 +264,28 @@ CFQ_CFQQ_FNS(coop);
 #define cfq_log(cfqd, fmt, args...)	\
 	blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
 
+#define CIC_SEEK_THR	1024
+#define CIC_SEEKY(cic)	((cic)->seek_mean > CIC_SEEK_THR)
+#define CFQQ_SEEKY(cfqq) (!cfqq->cic || CIC_SEEKY(cfqq->cic))
+
+static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd) {
+	return wl==IDLE_WL? cfqd->service_tree_idle.count :
+		cfqd->service_trees[wl][ASYNC_WL].count
+		+ cfqd->service_trees[wl][SYNC_NOIDLE_WL].count
+		+ cfqd->service_trees[wl][SYNC_WL].count;
+}
+
 static void cfq_dispatch_insert(struct request_queue *, struct request *);
 static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
 				       struct io_context *, gfp_t);
 static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
 						struct io_context *);
 
+static inline int rq_in_driver(struct cfq_data *cfqd)
+{
+	return cfqd->rq_in_driver[0] + cfqd->rq_in_driver[1];
+}
+
 static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
 					    int is_sync)
 {
@@ -249,6 +296,7 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
 				struct cfq_queue *cfqq, int is_sync)
 {
 	cic->cfqq[!!is_sync] = cfqq;
+	cfqq->cic = cic;
 }
 
 /*
@@ -257,7 +305,7 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
  */
 static inline int cfq_bio_sync(struct bio *bio)
 {
-	if (bio_data_dir(bio) == READ || bio_sync(bio))
+	if (bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO))
 		return 1;
 
 	return 0;
@@ -303,10 +351,33 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
 }
 
+static inline unsigned
+cfq_get_interested_queues(struct cfq_data *cfqd, bool rt) {
+	unsigned min_q, max_q;
+	unsigned mult  = cfqd->cfq_hist_divisor - 1;
+	unsigned round = cfqd->cfq_hist_divisor / 2;
+	unsigned busy  = cfq_busy_queues_wl(rt, cfqd);
+	min_q = min(cfqd->busy_queues_avg[rt], busy);
+	max_q = max(cfqd->busy_queues_avg[rt], busy);
+	cfqd->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
+		cfqd->cfq_hist_divisor;
+	return cfqd->busy_queues_avg[rt];
+}
+
 static inline void
 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
+	unsigned process_thr = cfqd->cfq_target_latency / cfqd->cfq_slice[1];
+	unsigned iq = cfq_get_interested_queues(cfqd, cfq_class_rt(cfqq));
+	unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
+
+	if (iq > process_thr) {
+		unsigned low_slice = 2 * slice * cfqd->cfq_slice_idle
+			/ cfqd->cfq_slice[1];
+		slice = max(slice * process_thr / iq, min(slice, low_slice));
+	}
+
+	cfqq->slice_end = jiffies + slice;
 	cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
 }
 
@@ -445,6 +516,7 @@ static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
 	if (root->left == n)
 		root->left = NULL;
 	rb_erase_init(n, &root->rb);
+	--root->count;
 }
 
 /*
@@ -485,46 +557,56 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
 }
 
 /*
- * The cfqd->service_tree holds all pending cfq_queue's that have
+ * The cfqd->service_trees holds all pending cfq_queue's that have
  * requests waiting to be processed. It is sorted in the order that
  * we will service the queues.
  */
-static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-				 int add_front)
+static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	struct rb_node **p, *parent;
 	struct cfq_queue *__cfqq;
 	unsigned long rb_key;
+	struct cfq_rb_root *service_tree;
 	int left;
 
 	if (cfq_class_idle(cfqq)) {
 		rb_key = CFQ_IDLE_DELAY;
-		parent = rb_last(&cfqd->service_tree.rb);
+		service_tree = &cfqd->service_tree_idle;
+		parent = rb_last(&service_tree->rb);
 		if (parent && parent != &cfqq->rb_node) {
 			__cfqq = rb_entry(parent, struct cfq_queue, rb_node);
 			rb_key += __cfqq->rb_key;
 		} else
 			rb_key += jiffies;
-	} else if (!add_front) {
+	} else {
+		enum wl_prio_t prio = cfq_class_rt(cfqq) ? RT_WL : BE_WL;
+		enum wl_type_t type = cfq_cfqq_sync(cfqq) ? SYNC_WL : ASYNC_WL;
+
 		rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
 		rb_key += cfqq->slice_resid;
 		cfqq->slice_resid = 0;
-	} else
-		rb_key = 0;
+
+		if (type == SYNC_WL && (CFQQ_SEEKY(cfqq) || !cfq_cfqq_idle_window(cfqq)))
+			type = SYNC_NOIDLE_WL;
+
+		service_tree = service_tree_for(prio, type, cfqd);
+	}
 
 	if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
 		/*
 		 * same position, nothing more to do
 		 */
-		if (rb_key == cfqq->rb_key)
+		if (rb_key == cfqq->rb_key && cfqq->service_tree == service_tree)
 			return;
 
-		cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
+		cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
+		cfqq->service_tree = NULL;
 	}
 
 	left = 1;
 	parent = NULL;
-	p = &cfqd->service_tree.rb.rb_node;
+	cfqq->service_tree = service_tree;
+	p = &service_tree->rb.rb_node;
 	while (*p) {
 		struct rb_node **n;
 
@@ -556,11 +638,12 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	}
 
 	if (left)
-		cfqd->service_tree.left = &cfqq->rb_node;
+		service_tree->left = &cfqq->rb_node;
 
 	cfqq->rb_key = rb_key;
 	rb_link_node(&cfqq->rb_node, parent, p);
-	rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb);
+	rb_insert_color(&cfqq->rb_node, &service_tree->rb);
+	service_tree->count++;
 }
 
 static struct cfq_queue *
@@ -633,7 +716,7 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	 * Resorting requires the cfqq to be on the RR list already.
 	 */
 	if (cfq_cfqq_on_rr(cfqq)) {
-		cfq_service_tree_add(cfqd, cfqq, 0);
+		cfq_service_tree_add(cfqd, cfqq);
 		cfq_prio_tree_add(cfqd, cfqq);
 	}
 }
@@ -648,8 +731,6 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	BUG_ON(cfq_cfqq_on_rr(cfqq));
 	cfq_mark_cfqq_on_rr(cfqq);
 	cfqd->busy_queues++;
-	if (cfq_class_rt(cfqq))
-		cfqd->busy_rt_queues++;
 
 	cfq_resort_rr_list(cfqd, cfqq);
 }
@@ -664,8 +745,10 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	BUG_ON(!cfq_cfqq_on_rr(cfqq));
 	cfq_clear_cfqq_on_rr(cfqq);
 
-	if (!RB_EMPTY_NODE(&cfqq->rb_node))
-		cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
+	if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
+		cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
+		cfqq->service_tree = NULL;
+	}
 	if (cfqq->p_root) {
 		rb_erase(&cfqq->p_node, cfqq->p_root);
 		cfqq->p_root = NULL;
@@ -673,8 +756,6 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 
 	BUG_ON(!cfqd->busy_queues);
 	cfqd->busy_queues--;
-	if (cfq_class_rt(cfqq))
-		cfqd->busy_rt_queues--;
 }
 
 /*
@@ -760,9 +841,9 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 
-	cfqd->rq_in_driver++;
+	cfqd->rq_in_driver[rq_is_sync(rq)]++;
 	cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
-						cfqd->rq_in_driver);
+						rq_in_driver(cfqd));
 
 	cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
 }
@@ -770,11 +851,12 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
 static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
+	const int sync = rq_is_sync(rq);
 
-	WARN_ON(!cfqd->rq_in_driver);
-	cfqd->rq_in_driver--;
+	WARN_ON(!cfqd->rq_in_driver[sync]);
+	cfqd->rq_in_driver[sync]--;
 	cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
-						cfqd->rq_in_driver);
+						rq_in_driver(cfqd));
 }
 
 static void cfq_remove_request(struct request *rq)
@@ -928,10 +1010,11 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out)
  */
 static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
 {
-	if (RB_EMPTY_ROOT(&cfqd->service_tree.rb))
-		return NULL;
+	struct cfq_rb_root *service_tree = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd);
 
-	return cfq_rb_first(&cfqd->service_tree);
+	if (RB_EMPTY_ROOT(&service_tree->rb))
+		return NULL;
+	return cfq_rb_first(service_tree);
 }
 
 /*
@@ -959,9 +1042,6 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
 		return cfqd->last_position - blk_rq_pos(rq);
 }
 
-#define CIC_SEEK_THR	8 * 1024
-#define CIC_SEEKY(cic)	((cic)->seek_mean > CIC_SEEK_THR)
-
 static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
 {
 	struct cfq_io_context *cic = cfqd->active_cic;
@@ -1049,6 +1129,10 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
 	if (cfq_cfqq_coop(cfqq))
 		return NULL;
 
+	/* we don't want to mix processes with different characteristics */
+	if (cfqq->service_tree != cur_cfqq->service_tree)
+		return NULL;
+
 	if (!probe)
 		cfq_mark_cfqq_coop(cfqq);
 	return cfqq;
@@ -1080,7 +1164,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 	/*
 	 * still requests with the driver, don't idle
 	 */
-	if (cfqd->rq_in_driver)
+	if (rq_in_driver(cfqd))
 		return;
 
 	/*
@@ -1092,14 +1176,15 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 
 	cfq_mark_cfqq_wait_request(cfqq);
 
-	/*
-	 * we don't want to idle for seeks, but we do want to allow
-	 * fair distribution of slice time for a process doing back-to-back
-	 * seeks. so allow a little bit of time for him to submit a new rq
-	 */
-	sl = cfqd->cfq_slice_idle;
-	if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
+	sl = min_t(unsigned, cfqd->cfq_slice_idle, cfqq->slice_end - jiffies);
+
+	/* very small idle if we are serving noidle trees, and there are more trees */
+	if (cfqd->serving_type == SYNC_NOIDLE_WL &&
+	    service_tree_for(cfqd->serving_prio, SYNC_NOIDLE_WL, cfqd)->count > 0) {
+		if (blk_queue_nonrot(cfqd->queue))
+			return;
 		sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
+	}
 
 	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
 	cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
@@ -1115,6 +1200,12 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
 
 	cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
 
+	if (!time_before(jiffies, rq->start_time + cfqd->cfq_target_latency / 2) && rq_data_dir(rq)==READ) {
+		cfqd->reads_delayed = max_t(int, cfqd->reads_delayed,
+					    (jiffies - rq->start_time) / (cfqd->cfq_target_latency / 2));
+	}
+
+	cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq);
 	cfq_remove_request(rq);
 	cfqq->dispatched++;
 	elv_dispatch_sort(q, rq);
@@ -1160,6 +1251,16 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
 }
 
+enum wl_type_t cfq_choose_sync_async(struct cfq_data *cfqd, enum wl_prio_t prio) {
+	struct cfq_queue *id, *ni;
+	ni = cfq_rb_first(service_tree_for(prio, SYNC_NOIDLE_WL, cfqd));
+	id = cfq_rb_first(service_tree_for(prio, SYNC_WL, cfqd));
+	if (id && ni && id->rb_key < ni->rb_key)
+		return SYNC_WL;
+	if (!ni) return SYNC_WL;
+	return SYNC_NOIDLE_WL;
+}
+
 /*
  * Select a queue for service. If we have a current active queue,
  * check whether to continue servicing it, or retrieve and set a new one.
@@ -1179,20 +1280,6 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
 		goto expire;
 
 	/*
-	 * If we have a RT cfqq waiting, then we pre-empt the current non-rt
-	 * cfqq.
-	 */
-	if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues) {
-		/*
-		 * We simulate this as cfqq timed out so that it gets to bank
-		 * the remaining of its time slice.
-		 */
-		cfq_log_cfqq(cfqd, cfqq, "preempt");
-		cfq_slice_expired(cfqd, 1);
-		goto new_queue;
-	}
-
-	/*
 	 * The active queue has requests and isn't expired, allow it to
 	 * dispatch.
 	 */
@@ -1214,15 +1301,68 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
 	 * flight or is idling for a new request, allow either of these
 	 * conditions to happen (or time out) before selecting a new queue.
 	 */
-	if (timer_pending(&cfqd->idle_slice_timer) ||
+	if (timer_pending(&cfqd->idle_slice_timer) || 
 	    (cfqq->dispatched && cfq_cfqq_idle_window(cfqq))) {
 		cfqq = NULL;
 		goto keep_queue;
 	}
-
 expire:
 	cfq_slice_expired(cfqd, 0);
 new_queue:
+	if (!new_cfqq) {
+		enum wl_prio_t previous_prio = cfqd->serving_prio;
+
+		if (cfq_busy_queues_wl(RT_WL, cfqd))
+			cfqd->serving_prio = RT_WL;
+		else if (cfq_busy_queues_wl(BE_WL, cfqd))
+			cfqd->serving_prio = BE_WL;
+		else {
+			cfqd->serving_prio = IDLE_WL;
+			cfqd->workload_expires = jiffies + 1;
+			cfqd->reads_delayed = 0;
+		}
+
+		if (cfqd->serving_prio != IDLE_WL) {
+			int counts[]={
+				service_tree_for(cfqd->serving_prio, ASYNC_WL, cfqd)->count,
+				service_tree_for(cfqd->serving_prio, SYNC_NOIDLE_WL, cfqd)->count,
+				service_tree_for(cfqd->serving_prio, SYNC_WL, cfqd)->count
+			};
+			int nonzero_counts= !!counts[0] + !!counts[1] + !!counts[2];
+
+			if (previous_prio != cfqd->serving_prio || (nonzero_counts == 1)) {
+				cfqd->serving_type = counts[1] ? SYNC_NOIDLE_WL : counts[2] ? SYNC_WL : ASYNC_WL;
+				cfqd->async_starved = 0;
+				cfqd->reads_delayed = 0;
+			} else {
+				if (!counts[cfqd->serving_type] || time_after(jiffies, cfqd->workload_expires)) {
+					if (cfqd->serving_type != ASYNC_WL && counts[ASYNC_WL] &&
+					    cfqd->async_starved++ > cfqd->cfq_async_penalty * (1 + cfqd->reads_delayed))
+						cfqd->serving_type = ASYNC_WL;
+					else 
+						cfqd->serving_type = cfq_choose_sync_async(cfqd, cfqd->serving_prio);
+				} else
+					goto same_wl;
+			}
+
+			{
+				unsigned slice = cfqd->cfq_target_latency;
+				slice = slice * counts[cfqd->serving_type] /
+					max_t(unsigned, cfqd->busy_queues_avg[cfqd->serving_prio],
+					      counts[SYNC_WL] + counts[SYNC_NOIDLE_WL] + counts[ASYNC_WL]);
+					    
+				if (cfqd->serving_type == ASYNC_WL)
+					slice = max(1U, (slice / (1 + cfqd->reads_delayed))
+						    * cfqd->cfq_slice[0] / cfqd->cfq_slice[1]);
+				else
+					slice = max(slice, 2U * max(1U, cfqd->cfq_slice_idle));
+
+				cfqd->workload_expires = jiffies + slice;
+				cfqd->async_starved *= (cfqd->serving_type != ASYNC_WL);
+			}
+		}
+	}
+ same_wl:
 	cfqq = cfq_set_active_queue(cfqd, new_cfqq);
 keep_queue:
 	return cfqq;
@@ -1249,8 +1389,13 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
 {
 	struct cfq_queue *cfqq;
 	int dispatched = 0;
+	int i,j;
+	for (i = 0; i < 2; ++i)
+		for (j = 0; j < 3; ++j)
+			while ((cfqq = cfq_rb_first(&cfqd->service_trees[i][j])) != NULL)
+				dispatched += __cfq_forced_dispatch_cfqq(cfqq);
 
-	while ((cfqq = cfq_rb_first(&cfqd->service_tree)) != NULL)
+	while ((cfqq = cfq_rb_first(&cfqd->service_tree_idle)) != NULL)
 		dispatched += __cfq_forced_dispatch_cfqq(cfqq);
 
 	cfq_slice_expired(cfqd, 0);
@@ -1312,6 +1457,12 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
 		return 0;
 
 	/*
+	 * Drain async requests before we start sync IO
+	 */
+	if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
+		return 0;
+
+	/*
 	 * If this is an async queue and we have sync IO in flight, let it wait
 	 */
 	if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
@@ -1362,7 +1513,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
 		cfq_slice_expired(cfqd, 0);
 	}
 
-	cfq_log(cfqd, "dispatched a request");
+	cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
 	return 1;
 }
 
@@ -2004,18 +2155,8 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
 	if (cfq_class_idle(cfqq))
 		return 1;
 
-	/*
-	 * if the new request is sync, but the currently running queue is
-	 * not, let the sync request have priority.
-	 */
-	if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
-		return 1;
-
-	/*
-	 * So both queues are sync. Let the new request get disk time if
-	 * it's a metadata request and the current queue is doing regular IO.
-	 */
-	if (rq_is_meta(rq) && !cfqq->meta_pending)
+	if (cfqd->serving_type == SYNC_NOIDLE_WL
+	    && new_cfqq->service_tree == cfqq->service_tree)
 		return 1;
 
 	/*
@@ -2046,13 +2187,9 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	cfq_log_cfqq(cfqd, cfqq, "preempt");
 	cfq_slice_expired(cfqd, 1);
 
-	/*
-	 * Put the new queue at the front of the of the current list,
-	 * so we know that it will be selected next.
-	 */
 	BUG_ON(!cfq_cfqq_on_rr(cfqq));
 
-	cfq_service_tree_add(cfqd, cfqq, 1);
+	cfq_service_tree_add(cfqd, cfqq);
 
 	cfqq->slice_end = 0;
 	cfq_mark_cfqq_slice_new(cfqq);
@@ -2130,11 +2267,11 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
  */
 static void cfq_update_hw_tag(struct cfq_data *cfqd)
 {
-	if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak)
-		cfqd->rq_in_driver_peak = cfqd->rq_in_driver;
+	if (rq_in_driver(cfqd) > cfqd->rq_in_driver_peak)
+		cfqd->rq_in_driver_peak = rq_in_driver(cfqd);
 
 	if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
-	    cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
+	    rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN)
 		return;
 
 	if (cfqd->hw_tag_samples++ < 50)
@@ -2161,9 +2298,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 
 	cfq_update_hw_tag(cfqd);
 
-	WARN_ON(!cfqd->rq_in_driver);
+	WARN_ON(!cfqd->rq_in_driver[sync]);
 	WARN_ON(!cfqq->dispatched);
-	cfqd->rq_in_driver--;
+	cfqd->rq_in_driver[sync]--;
 	cfqq->dispatched--;
 
 	if (cfq_cfqq_sync(cfqq))
@@ -2197,7 +2334,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 			cfq_arm_slice_timer(cfqd);
 	}
 
-	if (!cfqd->rq_in_driver)
+	if (!rq_in_driver(cfqd))
 		cfq_schedule_dispatch(cfqd);
 }
 
@@ -2229,8 +2366,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
 
 static inline int __cfq_may_queue(struct cfq_queue *cfqq)
 {
-	if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
-	    !cfq_cfqq_must_alloc_slice(cfqq)) {
+	if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
 		cfq_mark_cfqq_must_alloc_slice(cfqq);
 		return ELV_MQUEUE_MUST;
 	}
@@ -2317,7 +2453,6 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 	}
 
 	cfqq->allocated[rw]++;
-	cfq_clear_cfqq_must_alloc(cfqq);
 	atomic_inc(&cfqq->ref);
 
 	spin_unlock_irqrestore(q->queue_lock, flags);
@@ -2451,13 +2586,16 @@ static void cfq_exit_queue(struct elevator_queue *e)
 static void *cfq_init_queue(struct request_queue *q)
 {
 	struct cfq_data *cfqd;
-	int i;
+	int i,j;
 
 	cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
 	if (!cfqd)
 		return NULL;
 
-	cfqd->service_tree = CFQ_RB_ROOT;
+	for (i = 0; i < 2; ++i)
+		for (j = 0; j < 3; ++j)
+			cfqd->service_trees[i][j] = CFQ_RB_ROOT;
+	cfqd->service_tree_idle = CFQ_RB_ROOT;
 
 	/*
 	 * Not strictly needed (since RB_ROOT just clears the node and we
@@ -2494,6 +2632,9 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->cfq_slice[1] = cfq_slice_sync;
 	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
+	cfqd->cfq_target_latency = cfq_target_latency;
+	cfqd->cfq_hist_divisor = cfq_hist_divisor;
+	cfqd->cfq_async_penalty = cfq_async_penalty;
 	cfqd->hw_tag = 1;
 
 	return cfqd;
@@ -2530,6 +2671,7 @@ fail:
 /*
  * sysfs parts below -->
  */
+
 static ssize_t
 cfq_var_show(unsigned int var, char *page)
 {
@@ -2563,6 +2705,9 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
+SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
+SHOW_FUNCTION(cfq_hist_divisor_show, cfqd->cfq_hist_divisor, 0);
+SHOW_FUNCTION(cfq_async_penalty_show, cfqd->cfq_async_penalty, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
@@ -2594,6 +2739,11 @@ STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
 		UINT_MAX, 0);
+
+STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, 1000, 1);
+STORE_FUNCTION(cfq_hist_divisor_store, &cfqd->cfq_hist_divisor, 1, 100, 0);
+STORE_FUNCTION(cfq_async_penalty_store, &cfqd->cfq_async_penalty, 1, UINT_MAX, 0);
+
 #undef STORE_FUNCTION
 
 #define CFQ_ATTR(name) \
@@ -2609,6 +2759,9 @@ static struct elv_fs_entry cfq_attrs[] = {
 	CFQ_ATTR(slice_async),
 	CFQ_ATTR(slice_async_rq),
 	CFQ_ATTR(slice_idle),
+	CFQ_ATTR(target_latency),
+	CFQ_ATTR(hist_divisor),
+	CFQ_ATTR(async_penalty),
 	__ATTR_NULL
 };