public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] 2/5: Device-mapper: kcopyd
@ 2004-06-02 15:41 Alasdair G Kergon
  2004-06-02 16:15 ` Andreas Dilger
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Alasdair G Kergon @ 2004-06-02 15:41 UTC (permalink / raw)
  To: Andrew Morton; +Cc: LKML

kcopyd

--- diff/drivers/md/Makefile	2004-06-01 19:08:00.000000000 -0500
+++ source/drivers/md/Makefile	2004-06-01 19:51:31.000000000 -0500
@@ -3,7 +3,7 @@
 #
 
 dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
-		   dm-ioctl.o dm-io.o
+		   dm-ioctl.o dm-io.o kcopyd.o
 raid6-objs	:= raid6main.o raid6algos.o raid6recov.o raid6tables.o \
 		   raid6int1.o raid6int2.o raid6int4.o \
 		   raid6int8.o raid6int16.o raid6int32.o \
--- diff/drivers/md/dm.c	2004-06-01 15:27:33.000000000 -0500
+++ source/drivers/md/dm.c	2004-06-01 19:51:31.000000000 -0500
@@ -153,6 +153,7 @@
 	xx(dm_target)
 	xx(dm_linear)
 	xx(dm_stripe)
+	xx(kcopyd)
 	xx(dm_interface)
 #undef xx
 };
--- diff/drivers/md/dm.h	2004-06-01 15:27:33.000000000 -0500
+++ source/drivers/md/dm.h	2004-06-01 19:51:31.000000000 -0500
@@ -177,6 +177,9 @@
 int dm_stripe_init(void);
 void dm_stripe_exit(void);
 
+int kcopyd_init(void);
+void kcopyd_exit(void);
+
 void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
 
 #endif
--- diff/drivers/md/kcopyd.c	1969-12-31 18:00:00.000000000 -0600
+++ source/drivers/md/kcopyd.c	2004-06-01 19:51:31.000000000 -0500
@@ -0,0 +1,667 @@
+/*
+ * Copyright (C) 2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <asm/atomic.h>
+
+#include <linux/blkdev.h>
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/workqueue.h>
+
+#include "kcopyd.h"
+
+/* FIXME: this is only needed for the DMERR macros */
+#include "dm.h"
+
+static struct workqueue_struct *_kcopyd_wq;
+static struct work_struct _kcopyd_work;
+
+static inline void wake(void)
+{
+	queue_work(_kcopyd_wq, &_kcopyd_work);
+}
+
+/*-----------------------------------------------------------------
+ * Each kcopyd client has its own little pool of preallocated
+ * pages for kcopyd io.
+ *---------------------------------------------------------------*/
+struct kcopyd_client {
+	struct list_head list;
+
+	spinlock_t lock;
+	struct page_list *pages;
+	unsigned int nr_pages;
+	unsigned int nr_free_pages;
+};
+
+static struct page_list *alloc_pl(void)
+{
+	struct page_list *pl;
+
+	pl = kmalloc(sizeof(*pl), GFP_KERNEL);
+	if (!pl)
+		return NULL;
+
+	pl->page = alloc_page(GFP_KERNEL);
+	if (!pl->page) {
+		kfree(pl);
+		return NULL;
+	}
+
+	SetPageLocked(pl->page);
+	return pl;
+}
+
+static void free_pl(struct page_list *pl)
+{
+	ClearPageLocked(pl->page);
+	__free_page(pl->page);
+	kfree(pl);
+}
+
+static int kcopyd_get_pages(struct kcopyd_client *kc,
+			    unsigned int nr, struct page_list **pages)
+{
+	struct page_list *pl;
+
+	spin_lock(&kc->lock);
+	if (kc->nr_free_pages < nr) {
+		spin_unlock(&kc->lock);
+		return -ENOMEM;
+	}
+
+	kc->nr_free_pages -= nr;
+	for (*pages = pl = kc->pages; --nr; pl = pl->next)
+		;
+
+	kc->pages = pl->next;
+	pl->next = 0;
+
+	spin_unlock(&kc->lock);
+
+	return 0;
+}
+
+static void kcopyd_put_pages(struct kcopyd_client *kc, struct page_list *pl)
+{
+	struct page_list *cursor;
+
+	spin_lock(&kc->lock);
+	for (cursor = pl; cursor->next; cursor = cursor->next)
+		kc->nr_free_pages++;
+
+	kc->nr_free_pages++;
+	cursor->next = kc->pages;
+	kc->pages = pl;
+	spin_unlock(&kc->lock);
+}
+
+/*
+ * These three functions resize the page pool.
+ */
+static void drop_pages(struct page_list *pl)
+{
+	struct page_list *next;
+
+	while (pl) {
+		next = pl->next;
+		free_pl(pl);
+		pl = next;
+	}
+}
+
+static int client_alloc_pages(struct kcopyd_client *kc, unsigned int nr)
+{
+	unsigned int i;
+	struct page_list *pl = NULL, *next;
+
+	for (i = 0; i < nr; i++) {
+		next = alloc_pl();
+		if (!next) {
+			if (pl)
+				drop_pages(pl);
+			return -ENOMEM;
+		}
+		next->next = pl;
+		pl = next;
+	}
+
+	kcopyd_put_pages(kc, pl);
+	kc->nr_pages += nr;
+	return 0;
+}
+
+static void client_free_pages(struct kcopyd_client *kc)
+{
+	BUG_ON(kc->nr_free_pages != kc->nr_pages);
+	drop_pages(kc->pages);
+	kc->pages = NULL;
+	kc->nr_free_pages = kc->nr_pages = 0;
+}
+
+/*-----------------------------------------------------------------
+ * kcopyd_jobs need to be allocated by the *clients* of kcopyd,
+ * for this reason we use a mempool to prevent the client from
+ * ever having to do io (which could cause a deadlock).
+ *---------------------------------------------------------------*/
+struct kcopyd_job {
+	struct kcopyd_client *kc;
+	struct list_head list;
+	unsigned long flags;
+
+	/*
+	 * Error state of the job.
+	 */
+	int read_err;
+	unsigned int write_err;
+
+	/*
+	 * Either READ or WRITE
+	 */
+	int rw;
+	struct io_region source;
+
+	/*
+	 * The destinations for the transfer.
+	 */
+	unsigned int num_dests;
+	struct io_region dests[KCOPYD_MAX_REGIONS];
+
+	sector_t offset;
+	unsigned int nr_pages;
+	struct page_list *pages;
+
+	/*
+	 * Set this to ensure you are notified when the job has
+	 * completed.  'context' is for callback to use.
+	 */
+	kcopyd_notify_fn fn;
+	void *context;
+
+	/*
+	 * These fields are only used if the job has been split
+	 * into more manageable parts.
+	 */
+	struct semaphore lock;
+	atomic_t sub_jobs;
+	sector_t progress;
+};
+
+/* FIXME: this should scale with the number of pages */
+#define MIN_JOBS 512
+
+static kmem_cache_t *_job_cache;
+static mempool_t *_job_pool;
+
+/*
+ * We maintain three lists of jobs:
+ *
+ * i)   jobs waiting for pages
+ * ii)  jobs that have pages, and are waiting for the io to be issued.
+ * iii) jobs that have completed.
+ *
+ * All three of these are protected by job_lock.
+ */
+static spinlock_t _job_lock = SPIN_LOCK_UNLOCKED;
+
+static LIST_HEAD(_complete_jobs);
+static LIST_HEAD(_io_jobs);
+static LIST_HEAD(_pages_jobs);
+
+static int __init jobs_init(void)
+{
+	INIT_LIST_HEAD(&_complete_jobs);
+	INIT_LIST_HEAD(&_io_jobs);
+	INIT_LIST_HEAD(&_pages_jobs);
+
+	_job_cache = kmem_cache_create("kcopyd-jobs",
+				       sizeof(struct kcopyd_job),
+				       __alignof__(struct kcopyd_job),
+				       0, NULL, NULL);
+	if (!_job_cache)
+		return -ENOMEM;
+
+	_job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab,
+				   mempool_free_slab, _job_cache);
+	if (!_job_pool) {
+		kmem_cache_destroy(_job_cache);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void jobs_exit(void)
+{
+	BUG_ON(!list_empty(&_complete_jobs));
+	BUG_ON(!list_empty(&_io_jobs));
+	BUG_ON(!list_empty(&_pages_jobs));
+
+	mempool_destroy(_job_pool);
+	kmem_cache_destroy(_job_cache);
+}
+
+/*
+ * Functions to push and pop a job onto the head of a given job
+ * list.
+ */
+static inline struct kcopyd_job *pop(struct list_head *jobs)
+{
+	struct kcopyd_job *job = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&_job_lock, flags);
+
+	if (!list_empty(jobs)) {
+		job = list_entry(jobs->next, struct kcopyd_job, list);
+		list_del(&job->list);
+	}
+	spin_unlock_irqrestore(&_job_lock, flags);
+
+	return job;
+}
+
+static inline void push(struct list_head *jobs, struct kcopyd_job *job)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&_job_lock, flags);
+	list_add_tail(&job->list, jobs);
+	spin_unlock_irqrestore(&_job_lock, flags);
+}
+
+/*
+ * These three functions process 1 item from the corresponding
+ * job list.
+ *
+ * They return:
+ * < 0: error
+ *   0: success
+ * > 0: can't process yet.
+ */
+static int run_complete_job(struct kcopyd_job *job)
+{
+	void *context = job->context;
+	int read_err = job->read_err;
+	unsigned int write_err = job->write_err;
+	kcopyd_notify_fn fn = job->fn;
+
+	kcopyd_put_pages(job->kc, job->pages);
+	mempool_free(job, _job_pool);
+	fn(read_err, write_err, context);
+	return 0;
+}
+
+static void complete_io(unsigned long error, void *context)
+{
+	struct kcopyd_job *job = (struct kcopyd_job *) context;
+
+	if (error) {
+		if (job->rw == WRITE)
+			job->write_err &= error;
+		else
+			job->read_err = 1;
+
+		if (!test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) {
+			push(&_complete_jobs, job);
+			wake();
+			return;
+		}
+	}
+
+	if (job->rw == WRITE)
+		push(&_complete_jobs, job);
+
+	else {
+		job->rw = WRITE;
+		push(&_io_jobs, job);
+	}
+
+	wake();
+}
+
+/*
+ * Request io on as many buffer heads as we can currently get for
+ * a particular job.
+ */
+static int run_io_job(struct kcopyd_job *job)
+{
+	int r;
+
+	if (job->rw == READ)
+		r = dm_io_async(1, &job->source, job->rw,
+				job->pages,
+				job->offset, complete_io, job);
+
+	else
+		r = dm_io_async(job->num_dests, job->dests, job->rw,
+				job->pages,
+				job->offset, complete_io, job);
+
+	return r;
+}
+
+static int run_pages_job(struct kcopyd_job *job)
+{
+	int r;
+
+	job->nr_pages = dm_div_up(job->dests[0].count + job->offset,
+				  PAGE_SIZE >> 9);
+	r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages);
+	if (!r) {
+		/* this job is ready for io */
+		push(&_io_jobs, job);
+		return 0;
+	}
+
+	if (r == -ENOMEM)
+		/* can't complete now */
+		return 1;
+
+	return r;
+}
+
+/*
+ * Run through a list for as long as possible.  Returns the count
+ * of successful jobs.
+ */
+static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *))
+{
+	struct kcopyd_job *job;
+	int r, count = 0;
+
+	while ((job = pop(jobs))) {
+
+		r = fn(job);
+
+		if (r < 0) {
+			/* error this rogue job */
+			if (job->rw == WRITE)
+				job->write_err = (unsigned int) -1;
+			else
+				job->read_err = 1;
+			push(&_complete_jobs, job);
+			break;
+		}
+
+		if (r > 0) {
+			/*
+			 * We couldn't service this job ATM, so
+			 * push this job back onto the list.
+			 */
+			push(jobs, job);
+			break;
+		}
+
+		count++;
+	}
+
+	return count;
+}
+
+/*
+ * kcopyd does this every time it's woken up.
+ */
+static void do_work(void *ignored)
+{
+	/*
+	 * The order that these are called is *very* important.
+	 * complete jobs can free some pages for pages jobs.
+	 * Pages jobs when successful will jump onto the io jobs
+	 * list.  io jobs call wake when they complete and it all
+	 * starts again.
+	 */
+	process_jobs(&_complete_jobs, run_complete_job);
+	process_jobs(&_pages_jobs, run_pages_job);
+	process_jobs(&_io_jobs, run_io_job);
+}
+
+/*
+ * If we are copying a small region we just dispatch a single job
+ * to do the copy, otherwise the io has to be split up into many
+ * jobs.
+ */
+static void dispatch_job(struct kcopyd_job *job)
+{
+	push(&_pages_jobs, job);
+	wake();
+}
+
+#define SUB_JOB_SIZE 128
+static void segment_complete(int read_err,
+			     unsigned int write_err, void *context)
+{
+	/* FIXME: tidy this function */
+	sector_t progress = 0;
+	sector_t count = 0;
+	struct kcopyd_job *job = (struct kcopyd_job *) context;
+
+	down(&job->lock);
+
+	/* update the error */
+	if (read_err)
+		job->read_err = 1;
+
+	if (write_err)
+		job->write_err &= write_err;
+
+	/*
+	 * Only dispatch more work if there hasn't been an error.
+	 */
+	if ((!job->read_err && !job->write_err) ||
+	    test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) {
+		/* get the next chunk of work */
+		progress = job->progress;
+		count = job->source.count - progress;
+		if (count) {
+			if (count > SUB_JOB_SIZE)
+				count = SUB_JOB_SIZE;
+
+			job->progress += count;
+		}
+	}
+	up(&job->lock);
+
+	if (count) {
+		int i;
+		struct kcopyd_job *sub_job = mempool_alloc(_job_pool, GFP_NOIO);
+
+		memcpy(sub_job, job, sizeof(*job));
+		sub_job->source.sector += progress;
+		sub_job->source.count = count;
+
+		for (i = 0; i < job->num_dests; i++) {
+			sub_job->dests[i].sector += progress;
+			sub_job->dests[i].count = count;
+		}
+
+		sub_job->fn = segment_complete;
+		sub_job->context = job;
+		dispatch_job(sub_job);
+
+	} else if (atomic_dec_and_test(&job->sub_jobs)) {
+
+		/*
+		 * To avoid a race we must keep the job around
+		 * until after the notify function has completed.
+		 * Otherwise the client may try and stop the job
+		 * after we've completed.
+		 */
+		job->fn(read_err, write_err, job->context);
+		mempool_free(job, _job_pool);
+	}
+}
+
+/*
+ * Create some little jobs that will do the move between
+ * them.
+ */
+#define SPLIT_COUNT 8
+static void split_job(struct kcopyd_job *job)
+{
+	int i;
+
+	atomic_set(&job->sub_jobs, SPLIT_COUNT);
+	for (i = 0; i < SPLIT_COUNT; i++)
+		segment_complete(0, 0u, job);
+}
+
+int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
+		unsigned int num_dests, struct io_region *dests,
+		unsigned int flags, kcopyd_notify_fn fn, void *context)
+{
+	struct kcopyd_job *job;
+
+	/*
+	 * Allocate a new job.
+	 */
+	job = mempool_alloc(_job_pool, GFP_NOIO);
+
+	/*
+	 * set up for the read.
+	 */
+	job->kc = kc;
+	job->flags = flags;
+	job->read_err = 0;
+	job->write_err = 0;
+	job->rw = READ;
+
+	memcpy(&job->source, from, sizeof(*from));
+
+	job->num_dests = num_dests;
+	memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
+
+	job->offset = 0;
+	job->nr_pages = 0;
+	job->pages = NULL;
+
+	job->fn = fn;
+	job->context = context;
+
+	if (job->source.count < SUB_JOB_SIZE)
+		dispatch_job(job);
+
+	else {
+		init_MUTEX(&job->lock);
+		job->progress = 0;
+		split_job(job);
+	}
+
+	return 0;
+}
+
+/*
+ * Cancels a kcopyd job, eg. someone might be deactivating a
+ * mirror.
+ */
+int kcopyd_cancel(struct kcopyd_job *job, int block)
+{
+	/* FIXME: finish */
+	return -1;
+}
+
+/*-----------------------------------------------------------------
+ * Unit setup
+ *---------------------------------------------------------------*/
+static DECLARE_MUTEX(_client_lock);
+static LIST_HEAD(_clients);
+
+static int client_add(struct kcopyd_client *kc)
+{
+	down(&_client_lock);
+	list_add(&kc->list, &_clients);
+	up(&_client_lock);
+	return 0;
+}
+
+static void client_del(struct kcopyd_client *kc)
+{
+	down(&_client_lock);
+	list_del(&kc->list);
+	up(&_client_lock);
+}
+
+int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result)
+{
+	int r = 0;
+	struct kcopyd_client *kc;
+
+	kc = kmalloc(sizeof(*kc), GFP_KERNEL);
+	if (!kc)
+		return -ENOMEM;
+
+	kc->lock = SPIN_LOCK_UNLOCKED;
+	kc->pages = NULL;
+	kc->nr_pages = kc->nr_free_pages = 0;
+	r = client_alloc_pages(kc, nr_pages);
+	if (r) {
+		kfree(kc);
+		return r;
+	}
+
+	r = dm_io_get(nr_pages);
+	if (r) {
+		client_free_pages(kc);
+		kfree(kc);
+		return r;
+	}
+
+	r = client_add(kc);
+	if (r) {
+		dm_io_put(nr_pages);
+		client_free_pages(kc);
+		kfree(kc);
+		return r;
+	}
+
+	*result = kc;
+	return 0;
+}
+
+void kcopyd_client_destroy(struct kcopyd_client *kc)
+{
+	dm_io_put(kc->nr_pages);
+	client_free_pages(kc);
+	client_del(kc);
+	kfree(kc);
+}
+
+
+int __init kcopyd_init(void)
+{
+	int r;
+
+	r = jobs_init();
+	if (r)
+		return r;
+
+	_kcopyd_wq = create_singlethread_workqueue("kcopyd");
+	if (!_kcopyd_wq) {
+		jobs_exit();
+		return -ENOMEM;
+	}
+
+	INIT_WORK(&_kcopyd_work, do_work, NULL);
+	return 0;
+}
+
+void kcopyd_exit(void)
+{
+	jobs_exit();
+	destroy_workqueue(_kcopyd_wq);
+}
+
+EXPORT_SYMBOL(kcopyd_client_create);
+EXPORT_SYMBOL(kcopyd_client_destroy);
+EXPORT_SYMBOL(kcopyd_copy);
+EXPORT_SYMBOL(kcopyd_cancel);
--- diff/drivers/md/kcopyd.h	1969-12-31 18:00:00.000000000 -0600
+++ source/drivers/md/kcopyd.h	2004-06-01 19:51:31.000000000 -0500
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_KCOPYD_H
+#define DM_KCOPYD_H
+
+#include "dm-io.h"
+
+int kcopyd_init(void);
+void kcopyd_exit(void);
+
+/* FIXME: make this configurable */
+#define KCOPYD_MAX_REGIONS 8
+
+#define KCOPYD_IGNORE_ERROR 1
+
+/*
+ * To use kcopyd you must first create a kcopyd client object.
+ */
+struct kcopyd_client;
+int kcopyd_client_create(unsigned int num_pages, struct kcopyd_client **result);
+void kcopyd_client_destroy(struct kcopyd_client *kc);
+
+/*
+ * Submit a copy job to kcopyd.  This is built on top of the
+ * previous three fns.
+ *
+ * read_err is a boolean,
+ * write_err is a bitset, with 1 bit for each destination region
+ */
+typedef void (*kcopyd_notify_fn)(int read_err,
+				 unsigned int write_err, void *context);
+
+int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
+		unsigned int num_dests, struct io_region *dests,
+		unsigned int flags, kcopyd_notify_fn fn, void *context);
+
+#endif


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] 2/5: Device-mapper: kcopyd
  2004-06-02 15:41 [PATCH] 2/5: Device-mapper: kcopyd Alasdair G Kergon
@ 2004-06-02 16:15 ` Andreas Dilger
  2004-06-02 16:50   ` Kevin Corry
  2004-06-02 21:15   ` Alasdair G Kergon
  2004-06-03  3:41 ` Andrew Morton
  2004-06-10  6:18 ` Andrew Morton
  2 siblings, 2 replies; 11+ messages in thread
From: Andreas Dilger @ 2004-06-02 16:15 UTC (permalink / raw)
  To: Alasdair G Kergon; +Cc: Andrew Morton, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 671 bytes --]

On Jun 02, 2004  16:41 +0100, Alasdair G Kergon wrote:
> kcopyd
> 
> --- diff/drivers/md/kcopyd.c	1969-12-31 18:00:00.000000000 -0600
> +++ source/drivers/md/kcopyd.c	2004-06-01 19:51:31.000000000 -0500
> @@ -0,0 +1,667 @@
> +/*
> + * Copyright (C) 2002 Sistina Software (UK) Limited.
> + *
> + * This file is released under the GPL.
> + */

It might be nice to have a brief comment here explaining what this is
and how it is supposed to be used.

Cheers, Andreas

PS - It isn't really nice to exclude yourself from the reply-to list.
--
Andreas Dilger
http://sourceforge.net/projects/ext2resize/
http://www-mddsp.enel.ucalgary.ca/People/adilger/


[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] 2/5: Device-mapper: kcopyd
  2004-06-02 16:15 ` Andreas Dilger
@ 2004-06-02 16:50   ` Kevin Corry
  2004-07-03  5:44     ` Daniel Phillips
  2004-06-02 21:15   ` Alasdair G Kergon
  1 sibling, 1 reply; 11+ messages in thread
From: Kevin Corry @ 2004-06-02 16:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Andreas Dilger, Alasdair G Kergon, Andrew Morton

On Wednesday 02 June 2004 11:15 am, Andreas Dilger wrote:
> On Jun 02, 2004  16:41 +0100, Alasdair G Kergon wrote:
> > kcopyd
> >
> > --- diff/drivers/md/kcopyd.c	1969-12-31 18:00:00.000000000 -0600
> > +++ source/drivers/md/kcopyd.c	2004-06-01 19:51:31.000000000 -0500
> > @@ -0,0 +1,667 @@
> > +/*
> > + * Copyright (C) 2002 Sistina Software (UK) Limited.
> > + *
> > + * This file is released under the GPL.
> > + */
>
> It might be nice to have a brief comment here explaining what this is
> and how it is supposed to be used.

How's this?

We're also working on some general documentation which will go in 
Documentation/device-mapper and will include more detailed information about 
the core driver and the other sub-modules. We'll try to submit those patches 
in the near future.

-- 
Kevin Corry
kevcorry@us.ibm.com
http://evms.sourceforge.net/


--- diff/drivers/md/kcopyd.c	2004-06-02 11:44:53.000000000 -0500
+++ source/drivers/md/kcopyd.c	2004-06-02 11:44:33.000000000 -0500
@@ -2,6 +2,10 @@
  * Copyright (C) 2002 Sistina Software (UK) Limited.
  *
  * This file is released under the GPL.
+ *
+ * Kcopyd provides a simple interface for copying an area of one
+ * block-device to one or more other block-devices, with an asynchronous
+ * completion notification.
  */
 
 #include <asm/atomic.h>
--- diff/drivers/md/kcopyd.h	2004-06-02 11:44:53.000000000 -0500
+++ source/drivers/md/kcopyd.h	2004-06-02 11:44:47.000000000 -0500
@@ -2,6 +2,10 @@
  * Copyright (C) 2001 Sistina Software
  *
  * This file is released under the GPL.
+ *
+ * Kcopyd provides a simple interface for copying an area of one
+ * block-device to one or more other block-devices, with an asynchronous
+ * completion notification.
  */
 
 #ifndef DM_KCOPYD_H

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] 2/5: Device-mapper: kcopyd
  2004-06-02 16:15 ` Andreas Dilger
  2004-06-02 16:50   ` Kevin Corry
@ 2004-06-02 21:15   ` Alasdair G Kergon
  1 sibling, 0 replies; 11+ messages in thread
From: Alasdair G Kergon @ 2004-06-02 21:15 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: linux-kernel

On Wed, Jun 02, 2004 at 10:15:41AM -0600, Andreas Dilger wrote:
> It might be nice to have a brief comment here explaining what this is
> and how it is supposed to be used.

  A daemon for copying regions of block devices around in an efficient
  manner.  Multiple destinations can be specified for a copy.
  Designed to perform well both with many small chunks or few large chunks.

Alasdair
-- 
agk@redhat.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] 2/5: Device-mapper: kcopyd
  2004-06-02 15:41 [PATCH] 2/5: Device-mapper: kcopyd Alasdair G Kergon
  2004-06-02 16:15 ` Andreas Dilger
@ 2004-06-03  3:41 ` Andrew Morton
  2004-06-03 13:28   ` Alasdair G Kergon
  2004-06-10  6:18 ` Andrew Morton
  2 siblings, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2004-06-03  3:41 UTC (permalink / raw)
  To: Alasdair G Kergon; +Cc: linux-kernel

Alasdair G Kergon <agk@redhat.com> wrote:
>
> kcopyd

I dunno about the rest of the people around here but I for one do not know
what kcopyd does, nor why it is being added to the kernel.  The changelog
was your chance to enlighten us - it is a shame nobody took the five
minutes to prepare a description.


> +static void drop_pages(struct page_list *pl)
> +{
> +	struct page_list *next;
> +
> +	while (pl) {
> +		next = pl->next;
> +		free_pl(pl);
> +		pl = next;
> +	}
> +}

What is the page pool for?  It is unfortunate that there is no suitable
library code for managing this - it is a fairly common pattern.  Maybe we
could massage mempools in some manner.

Why are the pooled pages locked?

> +static LIST_HEAD(_complete_jobs);
> +static LIST_HEAD(_io_jobs);
> +static LIST_HEAD(_pages_jobs);
> +
> +static int __init jobs_init(void)
> +{
> +	INIT_LIST_HEAD(&_complete_jobs);
> +	INIT_LIST_HEAD(&_io_jobs);
> +	INIT_LIST_HEAD(&_pages_jobs);

Do these lists need to be initialised a second time?

> +		memcpy(sub_job, job, sizeof(*job));
> +	memcpy(&job->source, from, sizeof(*from));

Structure assignments are nice.  If the struct is small the compiler will
do an element-by-element copy.  At some compiler-determined breakpoint it
will do a memcpy.  And it's typesafe.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] 2/5: Device-mapper: kcopyd
  2004-06-03  3:41 ` Andrew Morton
@ 2004-06-03 13:28   ` Alasdair G Kergon
  0 siblings, 0 replies; 11+ messages in thread
From: Alasdair G Kergon @ 2004-06-03 13:28 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

On Wed, Jun 02, 2004 at 08:41:26PM -0700, Andrew Morton wrote:
> What is the page pool for?

On the I/O path it can't wait for pages to be allocated without 
risking deadlock.  (cf. pvmove in 2.4)


> Why are the pooled pages locked?

I can't see that having any effect (i.e. unnecessary).

 
> > +static int __init jobs_init(void)
> > +	INIT_LIST_HEAD(&_complete_jobs);
> > +	INIT_LIST_HEAD(&_io_jobs);
> > +	INIT_LIST_HEAD(&_pages_jobs);

> Do these lists need to be initialised a second time?

No:-)


Alasdair
-- 
agk@redhat.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] 2/5: Device-mapper: kcopyd
  2004-06-02 15:41 [PATCH] 2/5: Device-mapper: kcopyd Alasdair G Kergon
  2004-06-02 16:15 ` Andreas Dilger
  2004-06-03  3:41 ` Andrew Morton
@ 2004-06-10  6:18 ` Andrew Morton
  2004-06-10  8:16   ` Kevin Corry
  2004-06-10 14:55   ` Alasdair G Kergon
  2 siblings, 2 replies; 11+ messages in thread
From: Andrew Morton @ 2004-06-10  6:18 UTC (permalink / raw)
  To: Alasdair G Kergon; +Cc: linux-kernel

Alasdair G Kergon <agk@redhat.com> wrote:
>
> kcopyd
> 
> ...
> +/* FIXME: this should scale with the number of pages */
> +#define MIN_JOBS 512

This pins at least 2MB of RAM up-front, even if devicemapper is not in use.

Have you any plans to fix that up?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] 2/5: Device-mapper: kcopyd
  2004-06-10  6:18 ` Andrew Morton
@ 2004-06-10  8:16   ` Kevin Corry
  2004-06-10 18:12     ` Andrew Morton
  2004-06-10 14:55   ` Alasdair G Kergon
  1 sibling, 1 reply; 11+ messages in thread
From: Kevin Corry @ 2004-06-10  8:16 UTC (permalink / raw)
  To: linux-kernel; +Cc: Andrew Morton, Alasdair G Kergon

On Thursday 10 June 2004 6:18 am, Andrew Morton wrote:
> Alasdair G Kergon <agk@redhat.com> wrote:
> > kcopyd
> >
> > ...
> > +/* FIXME: this should scale with the number of pages */
> > +#define MIN_JOBS 512
>
> This pins at least 2MB of RAM up-front, even if devicemapper is not in use.

Is that really the case? The MIN_JOBS value is used to initialize the mempool 
of kcopyd_job structures (see kcopyd.c::jobs_init()). A kcopyd_job is 
(according to my calculations on i386) 272 bytes. If you assume they are 
nicely aligned, then we'll round that up to 512 bytes. This means you should 
be able to fit 8 of those structures in a page, and initializing to MIN_JOBS 
should only use 256kB of RAM. Granted, 256kB of RAM isn't all the great 
either, but it's about an order of magnitude less than 2MB.

Or am I not understanding how kmem_caches and mempools work?

The jobs_init() routine is run (and hence the kmem_cache and mempool are 
created) when kcopyd() is loaded, so the min-value has to be set to some 
static number. One possibility to cut down on the up-front memory usage would 
be to reduce the static MIN_JOBS value, and then use mempool_resize() when 
the kcopyd users call kcopyd_client_[create|destroy].

Another thing we could do would be to build kcopyd as its own kernel module. 
Right now it's built in the same module with the core DM driver, so it's 
loaded any time DM is loaded, regardless of whether any DM module is using 
it. It should be fairly easy to split it out in its own module, which means 
that mempool won't be created until some other module is loaded that depends 
on kcopyd.

-- 
Kevin Corry
kevcorry@us.ibm.com
http://evms.sourceforge.net/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] 2/5: Device-mapper: kcopyd
  2004-06-10  6:18 ` Andrew Morton
  2004-06-10  8:16   ` Kevin Corry
@ 2004-06-10 14:55   ` Alasdair G Kergon
  1 sibling, 0 replies; 11+ messages in thread
From: Alasdair G Kergon @ 2004-06-10 14:55 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

On Wed, Jun 09, 2004 at 11:18:05PM -0700, Andrew Morton wrote:
> This pins at least 2MB of RAM up-front, even if devicemapper is not in use.
> Have you any plans to fix that up?

kcopyd used to create/destroy itself so it was only present while
it was being used.  Nobody can remember why that got changed so we'll
try reimplementing it.

Alasdair
-- 
agk@redhat.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] 2/5: Device-mapper: kcopyd
  2004-06-10  8:16   ` Kevin Corry
@ 2004-06-10 18:12     ` Andrew Morton
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Morton @ 2004-06-10 18:12 UTC (permalink / raw)
  To: Kevin Corry; +Cc: linux-kernel, agk

Kevin Corry <kevcorry@us.ibm.com> wrote:
>
>  On Thursday 10 June 2004 6:18 am, Andrew Morton wrote:
>  > Alasdair G Kergon <agk@redhat.com> wrote:
>  > > kcopyd
>  > >
>  > > ...
>  > > +/* FIXME: this should scale with the number of pages */
>  > > +#define MIN_JOBS 512
>  >
>  > This pins at least 2MB of RAM up-front, even if devicemapper is not in use.
> 
>  Is that really the case?

No, sorry, I had CONFIG_DEBUG_PAGEALLOC turned on...

130k is still quite a lot.  Bear in mind that this memory is never used
unless the page allocation attept fails.  The mempool is there to prevent
OOM deadlocks and it is usually the case that a single mempool item is
sufficient for that.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] 2/5: Device-mapper: kcopyd
  2004-06-02 16:50   ` Kevin Corry
@ 2004-07-03  5:44     ` Daniel Phillips
  0 siblings, 0 replies; 11+ messages in thread
From: Daniel Phillips @ 2004-07-03  5:44 UTC (permalink / raw)
  To: Kevin Corry
  Cc: linux-kernel, Andreas Dilger, Alasdair G Kergon, Andrew Morton

On Wednesday 02 June 2004 12:50, Kevin Corry wrote:
> On Wednesday 02 June 2004 11:15 am, Andreas Dilger wrote:
> > It might be nice to have a brief comment here explaining what this is
> > and how it is supposed to be used.
>
> How's this?
>
> We're also working on some general documentation which will go in
> Documentation/device-mapper and will include more detailed information
> about the core driver and the other sub-modules. We'll try to submit those
> patches in the near future.
>
> + * Kcopyd provides a simple interface for copying an area of one
> + * block-device to one or more other block-devices, with an asynchronous
> + * completion notification.

Hi Kevin,

Once again, sorry for the lag.

Since not everybody knows the device-mapper code by heart, I'll add that 
kcopyd is used by the snapshot target to perform copyouts, that is, to 
preserve snapshotted data that would otherwise be overwritten.

Another potential use would be synching in the mirror target.

Regards,

Daniel

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2004-07-03  5:38 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-06-02 15:41 [PATCH] 2/5: Device-mapper: kcopyd Alasdair G Kergon
2004-06-02 16:15 ` Andreas Dilger
2004-06-02 16:50   ` Kevin Corry
2004-07-03  5:44     ` Daniel Phillips
2004-06-02 21:15   ` Alasdair G Kergon
2004-06-03  3:41 ` Andrew Morton
2004-06-03 13:28   ` Alasdair G Kergon
2004-06-10  6:18 ` Andrew Morton
2004-06-10  8:16   ` Kevin Corry
2004-06-10 18:12     ` Andrew Morton
2004-06-10 14:55   ` Alasdair G Kergon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox