public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: torvalds@linux-foundation.org, awalls@radix.net,
	linux-kernel@vger.kernel.org, jeff@garzik.org, mingo@elte.hu,
	akpm@linux-foundation.org, jens.axboe@oracle.com,
	rusty@rustcorp.com.au, cl@linux-foundation.org,
	dhowells@redhat.com, arjan@linux.intel.com, avi@redhat.com,
	peterz@infradead.org, johannes@sipsolutions.net,
	andi@firstfloor.org
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 18/27] workqueue: reimplement work flushing using linked works
Date: Fri, 18 Dec 2009 21:57:59 +0900	[thread overview]
Message-ID: <1261141088-2014-19-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1261141088-2014-1-git-send-email-tj@kernel.org>

A work is linked to the next one by having WORK_STRUCT_LINKED bit set
and these links can be chained.  When a linked work is dispatched to a
worker, all linked works are dispatched to the worker's newly added
->scheduled queue and processed back-to-back.

Currently, as there's only single worker per cwq, having linked works
doesn't make any visible behavior difference.  This change is to
prepare for multiple shared workers per cpu.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |    2 +
 kernel/workqueue.c        |  152 ++++++++++++++++++++++++++++++++++++++------
 2 files changed, 133 insertions(+), 21 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 316cc48..a6650f1 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -25,9 +25,11 @@ typedef void (*work_func_t)(struct work_struct *work);
 enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
 	WORK_STRUCT_STATIC_BIT	= 1,	/* static initializer (debugobjects) */
+	WORK_STRUCT_LINKED_BIT	= 2,	/* next work is linked to this one */
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
 	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
+	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
 
 	WORK_STRUCT_COLOR_SHIFT	= 3,	/* color for workqueue flushing */
 	WORK_STRUCT_COLOR_BITS	= 4,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 17d270b..2ac1624 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -51,6 +51,7 @@ struct cpu_workqueue_struct;
 
 struct worker {
 	struct work_struct	*current_work;	/* L: work being processed */
+	struct list_head	scheduled;	/* L: scheduled works */
 	struct task_struct	*task;		/* I: worker task */
 	struct cpu_workqueue_struct *cwq;	/* I: the associated cwq */
 	int			id;		/* I: worker id */
@@ -438,6 +439,8 @@ static struct worker *alloc_worker(void)
 	struct worker *worker;
 
 	worker = kzalloc(sizeof(*worker), GFP_KERNEL);
+	if (worker)
+		INIT_LIST_HEAD(&worker->scheduled);
 	return worker;
 }
 
@@ -523,6 +526,7 @@ static void destroy_worker(struct worker *worker)
 
 	/* sanity check frenzy */
 	BUG_ON(worker->current_work);
+	BUG_ON(!list_empty(&worker->scheduled));
 
 	kthread_stop(worker->task);
 	kfree(worker);
@@ -533,6 +537,48 @@ static void destroy_worker(struct worker *worker)
 }
 
 /**
+ * move_linked_works - move linked works to a list
+ * @work: start of series of works to be scheduled
+ * @head: target list to append @work to
+ * @nextp: out paramter for nested worklist walking
+ *
+ * Schedule linked works starting from @work to @head.  Work series to
+ * be scheduled starts at @work and includes any consecutive work with
+ * WORK_STRUCT_LINKED set in its predecessor.
+ *
+ * If @nextp is not NULL, it's updated to point to the next work of
+ * the last scheduled work.  This allows move_linked_works() to be
+ * nested inside outer list_for_each_entry_safe().
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock).
+ */
+static void move_linked_works(struct work_struct *work, struct list_head *head,
+			      struct work_struct **nextp)
+{
+	struct work_struct *n;
+
+	/*
+	 * Linked worklist will always end before the end of the list,
+	 * use NULL for list head.
+	 */
+	work = list_entry(work->entry.prev, struct work_struct, entry);
+	list_for_each_entry_safe_continue(work, n, NULL, entry) {
+		list_move_tail(&work->entry, head);
+		if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
+			break;
+	}
+
+	/*
+	 * If we're already inside safe list traversal and have moved
+	 * multiple works to the scheduled queue, the next position
+	 * needs to be updated.
+	 */
+	if (nextp)
+		*nextp = n;
+}
+
+/**
  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
  * @cwq: cwq of interest
  * @color: color of work which left the queue
@@ -632,17 +678,25 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 	cwq_dec_nr_in_flight(cwq, work_color);
 }
 
-static void run_workqueue(struct worker *worker)
+/**
+ * process_scheduled_works - process scheduled works
+ * @worker: self
+ *
+ * Process all scheduled works.  Please note that the scheduled list
+ * may change while processing a work, so this function repeatedly
+ * fetches a work from the top and executes it.
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock) which may be released and regrabbed
+ * multiple times.
+ */
+static void process_scheduled_works(struct worker *worker)
 {
-	struct cpu_workqueue_struct *cwq = worker->cwq;
-
-	spin_lock_irq(&cwq->lock);
-	while (!list_empty(&cwq->worklist)) {
-		struct work_struct *work = list_entry(cwq->worklist.next,
+	while (!list_empty(&worker->scheduled)) {
+		struct work_struct *work = list_first_entry(&worker->scheduled,
 						struct work_struct, entry);
 		process_one_work(worker, work);
 	}
-	spin_unlock_irq(&cwq->lock);
 }
 
 /**
@@ -673,7 +727,27 @@ static int worker_thread(void *__worker)
 		if (kthread_should_stop())
 			break;
 
-		run_workqueue(worker);
+		spin_lock_irq(&cwq->lock);
+
+		while (!list_empty(&cwq->worklist)) {
+			struct work_struct *work =
+				list_first_entry(&cwq->worklist,
+						 struct work_struct, entry);
+
+			if (likely(!(*work_data_bits(work) &
+				     WORK_STRUCT_LINKED))) {
+				/* optimization path, not strictly necessary */
+				process_one_work(worker, work);
+				if (unlikely(!list_empty(&worker->scheduled)))
+					process_scheduled_works(worker);
+			} else {
+				move_linked_works(work, &worker->scheduled,
+						  NULL);
+				process_scheduled_works(worker);
+			}
+		}
+
+		spin_unlock_irq(&cwq->lock);
 	}
 
 	return 0;
@@ -694,16 +768,33 @@ static void wq_barrier_func(struct work_struct *work)
  * insert_wq_barrier - insert a barrier work
  * @cwq: cwq to insert barrier into
  * @barr: wq_barrier to insert
- * @head: insertion point
+ * @target: target work to attach @barr to
+ * @worker: worker currently executing @target, NULL if @target is not executing
  *
- * Insert barrier @barr into @cwq before @head.
+ * @barr is linked to @target such that @barr is completed only after
+ * @target finishes execution.  Please note that the ordering
+ * guarantee is observed only with respect to @target and on the local
+ * cpu.
+ *
+ * Currently, a queued barrier can't be canceled.  This is because
+ * try_to_grab_pending() can't determine whether the work to be
+ * grabbed is at the head of the queue and thus can't clear LINKED
+ * flag of the previous work while there must be a valid next work
+ * after a work with LINKED flag set.
+ *
+ * Note that when @worker is non-NULL, @target may be modified
+ * underneath us, so we can't reliably determine cwq from @target.
  *
  * CONTEXT:
  * spin_lock_irq(cwq->lock).
  */
 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
-			struct wq_barrier *barr, struct list_head *head)
+			      struct wq_barrier *barr,
+			      struct work_struct *target, struct worker *worker)
 {
+	struct list_head *head;
+	unsigned int linked = 0;
+
 	/*
 	 * debugobject calls are safe here even with cwq->lock locked
 	 * as we know for sure that this will not trigger any of the
@@ -714,8 +805,24 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
 	init_completion(&barr->done);
 
+	/*
+	 * If @target is currently being executed, schedule the
+	 * barrier to the worker; otherwise, put it after @target.
+	 */
+	if (worker)
+		head = worker->scheduled.next;
+	else {
+		unsigned long *bits = work_data_bits(target);
+
+		head = target->entry.next;
+		/* there can already be other linked works, inherit and set */
+		linked = *bits & WORK_STRUCT_LINKED;
+		*bits |= WORK_STRUCT_LINKED;
+	}
+
 	debug_work_activate(&barr->work);
-	insert_work(cwq, &barr->work, head, work_color_to_flags(WORK_NO_COLOR));
+	insert_work(cwq, &barr->work, head,
+		    work_color_to_flags(WORK_NO_COLOR) | linked);
 }
 
 /**
@@ -948,8 +1055,8 @@ EXPORT_SYMBOL_GPL(flush_workqueue);
  */
 int flush_work(struct work_struct *work)
 {
+	struct worker *worker = NULL;
 	struct cpu_workqueue_struct *cwq;
-	struct list_head *prev;
 	struct wq_barrier barr;
 
 	might_sleep();
@@ -969,14 +1076,14 @@ int flush_work(struct work_struct *work)
 		smp_rmb();
 		if (unlikely(cwq != get_wq_data(work)))
 			goto already_gone;
-		prev = &work->entry;
 	} else {
-		if (!cwq->worker || cwq->worker->current_work != work)
+		if (cwq->worker && cwq->worker->current_work == work)
+			worker = cwq->worker;
+		if (!worker)
 			goto already_gone;
-		prev = &cwq->worklist;
 	}
-	insert_wq_barrier(cwq, &barr, prev->next);
 
+	insert_wq_barrier(cwq, &barr, work, worker);
 	spin_unlock_irq(&cwq->lock);
 	wait_for_completion(&barr.done);
 	destroy_work_on_stack(&barr.work);
@@ -1033,16 +1140,19 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
 				struct work_struct *work)
 {
 	struct wq_barrier barr;
-	int running = 0;
+	struct worker *worker;
 
 	spin_lock_irq(&cwq->lock);
+
+	worker = NULL;
 	if (unlikely(cwq->worker && cwq->worker->current_work == work)) {
-		insert_wq_barrier(cwq, &barr, cwq->worklist.next);
-		running = 1;
+		worker = cwq->worker;
+		insert_wq_barrier(cwq, &barr, work, worker);
 	}
+
 	spin_unlock_irq(&cwq->lock);
 
-	if (unlikely(running)) {
+	if (unlikely(worker)) {
 		wait_for_completion(&barr.done);
 		destroy_work_on_stack(&barr.work);
 	}
-- 
1.6.4.2


  parent reply	other threads:[~2009-12-18 13:01 UTC|newest]

Thread overview: 104+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-12-18 12:57 Tejun Heo
2009-12-18 12:57 ` [PATCH 01/27] sched: rename preempt_notifiers to sched_notifiers and refactor implementation Tejun Heo
2009-12-18 12:57 ` [PATCH 02/27] sched: refactor try_to_wake_up() Tejun Heo
2009-12-18 12:57 ` [PATCH 03/27] sched: implement __set_cpus_allowed() Tejun Heo
2009-12-18 12:57 ` [PATCH 04/27] sched: make sched_notifiers unconditional Tejun Heo
2009-12-18 12:57 ` [PATCH 05/27] sched: add wakeup/sleep sched_notifiers and allow NULL notifier ops Tejun Heo
2009-12-18 12:57 ` [PATCH 06/27] sched: implement try_to_wake_up_local() Tejun Heo
2009-12-18 12:57 ` [PATCH 07/27] acpi: use queue_work_on() instead of binding workqueue worker to cpu0 Tejun Heo
2009-12-18 12:57 ` [PATCH 08/27] stop_machine: reimplement without using workqueue Tejun Heo
2009-12-18 12:57 ` [PATCH 09/27] workqueue: misc/cosmetic updates Tejun Heo
2009-12-18 12:57 ` [PATCH 10/27] workqueue: merge feature parameters into flags Tejun Heo
2009-12-18 12:57 ` [PATCH 11/27] workqueue: define both bit position and mask for work flags Tejun Heo
2009-12-18 12:57 ` [PATCH 12/27] workqueue: separate out process_one_work() Tejun Heo
2009-12-18 12:57 ` [PATCH 13/27] workqueue: temporarily disable workqueue tracing Tejun Heo
2009-12-18 12:57 ` [PATCH 14/27] workqueue: kill cpu_populated_map Tejun Heo
2009-12-18 12:57 ` [PATCH 15/27] workqueue: update cwq alignement Tejun Heo
2009-12-18 12:57 ` [PATCH 16/27] workqueue: reimplement workqueue flushing using color coded works Tejun Heo
2009-12-18 12:57 ` [PATCH 17/27] workqueue: introduce worker Tejun Heo
2009-12-18 12:57 ` Tejun Heo [this message]
2009-12-18 12:58 ` [PATCH 19/27] workqueue: implement per-cwq active work limit Tejun Heo
2009-12-18 12:58 ` [PATCH 20/27] workqueue: reimplement workqueue freeze using max_active Tejun Heo
2009-12-18 12:58 ` [PATCH 21/27] workqueue: introduce global cwq and unify cwq locks Tejun Heo
2009-12-18 12:58 ` [PATCH 22/27] workqueue: implement worker states Tejun Heo
2009-12-18 12:58 ` [PATCH 23/27] workqueue: reimplement CPU hotplugging support using trustee Tejun Heo
2009-12-18 12:58 ` [PATCH 24/27] workqueue: make single thread workqueue shared worker pool friendly Tejun Heo
2009-12-18 12:58 ` [PATCH 25/27] workqueue: use shared worklist and pool all workers per cpu Tejun Heo
2009-12-18 12:58 ` [PATCH 26/27] workqueue: implement concurrency managed dynamic worker pool Tejun Heo
2009-12-18 12:58 ` [PATCH 27/27] workqueue: increase max_active of keventd and kill current_is_keventd() Tejun Heo
2009-12-18 13:00 ` SUBJ: [RFC PATCHSET] concurrency managed workqueue, take#2 Tejun Heo
2009-12-18 13:03 ` Tejun Heo
2009-12-18 13:45 ` workqueue thing Peter Zijlstra
2009-12-18 13:50   ` Andi Kleen
2009-12-18 15:01     ` Arjan van de Ven
2009-12-21  3:19       ` Tejun Heo
2009-12-21  9:17       ` Jens Axboe
2009-12-21 10:35         ` Peter Zijlstra
2009-12-21 11:09         ` Andi Kleen
2009-12-21 11:17           ` Arjan van de Ven
2009-12-21 11:33             ` Andi Kleen
2009-12-21 13:18             ` Tejun Heo
2009-12-21 11:11         ` Arjan van de Ven
2009-12-21 13:22           ` Tejun Heo
2009-12-21 13:53             ` Arjan van de Ven
2009-12-21 14:19               ` Tejun Heo
2009-12-21 15:19                 ` Arjan van de Ven
2009-12-22  0:00                   ` Tejun Heo
2009-12-22 11:10                     ` Peter Zijlstra
2009-12-22 17:20                       ` Linus Torvalds
2009-12-22 17:47                         ` Peter Zijlstra
2009-12-22 18:07                           ` Andi Kleen
2009-12-22 18:20                             ` Peter Zijlstra
2009-12-23  8:17                             ` Stijn Devriendt
2009-12-23  8:43                               ` Peter Zijlstra
2009-12-23  9:01                                 ` Stijn Devriendt
2009-12-22 18:28                           ` Linus Torvalds
2009-12-23  8:06                             ` Johannes Berg
2009-12-23  3:37                           ` Tejun Heo
2009-12-23  6:52                             ` Herbert Xu
2009-12-23  8:00                               ` Steffen Klassert
2009-12-23  8:01                                 ` [PATCH 0/2] Parallel crypto/IPsec v7 Steffen Klassert
2009-12-23  8:03                                   ` [PATCH 1/2] padata: generic parallelization/serialization interface Steffen Klassert
2009-12-23  8:04                                   ` [PATCH 2/2] crypto: pcrypt - Add pcrypt crypto parallelization wrapper Steffen Klassert
2010-01-07  5:39                                   ` [PATCH 0/2] Parallel crypto/IPsec v7 Herbert Xu
2010-01-16  9:44                                     ` David Miller
2009-12-18 15:30   ` workqueue thing Linus Torvalds
2009-12-18 15:39     ` Ingo Molnar
2009-12-18 15:39     ` Peter Zijlstra
2009-12-18 15:47       ` Linus Torvalds
2009-12-18 15:53         ` Peter Zijlstra
2009-12-21  3:04   ` Tejun Heo
2009-12-21  9:22     ` Peter Zijlstra
2009-12-21 13:30       ` Tejun Heo
2009-12-21 14:26         ` Peter Zijlstra
2009-12-21 23:50           ` Tejun Heo
2009-12-22 11:00             ` Peter Zijlstra
2009-12-22 11:03             ` Peter Zijlstra
2009-12-23  3:43               ` Tejun Heo
2009-12-22 11:04             ` Peter Zijlstra
2009-12-23  3:48               ` Tejun Heo
2009-12-22 11:06             ` Peter Zijlstra
2009-12-23  4:18               ` Tejun Heo
2009-12-23  4:42                 ` Linus Torvalds
2009-12-23  6:02                   ` Ingo Molnar
2009-12-23  6:13                     ` Jeff Garzik
2009-12-23  7:53                       ` Ingo Molnar
2009-12-23  8:41                       ` Peter Zijlstra
2009-12-23 10:25                         ` Jeff Garzik
2009-12-23 13:33                           ` Stefan Richter
2009-12-23 14:20                           ` Mark Brown
2009-12-23  7:09                     ` Tejun Heo
2009-12-23  8:01                       ` Ingo Molnar
2009-12-23  8:12                         ` Ingo Molnar
2009-12-23  8:32                           ` Tejun Heo
2009-12-23  8:42                             ` Ingo Molnar
2009-12-23  8:27                         ` Tejun Heo
2009-12-23  8:37                           ` Ingo Molnar
2009-12-23  8:49                             ` Tejun Heo
2009-12-23  8:49                               ` Ingo Molnar
2009-12-23  9:03                                 ` Tejun Heo
2009-12-23 13:40                             ` Stefan Richter
2009-12-23 13:43                               ` Stefan Richter
2009-12-23  8:25                       ` Arjan van de Ven
2009-12-23 13:00                     ` Stefan Richter
2009-12-23  8:31             ` Stijn Devriendt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1261141088-2014-19-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=arjan@linux.intel.com \
    --cc=avi@redhat.com \
    --cc=awalls@radix.net \
    --cc=cl@linux-foundation.org \
    --cc=dhowells@redhat.com \
    --cc=jeff@garzik.org \
    --cc=jens.axboe@oracle.com \
    --cc=johannes@sipsolutions.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    --cc=rusty@rustcorp.com.au \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox