[PATCH 0/4] PPL fixes and improvements

linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/4] PPL fixes and improvements
@ 2017-04-04 11:13 Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page Artur Paszkiewicz
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-04 11:13 UTC (permalink / raw)
  To: shli; +Cc: linux-raid, Artur Paszkiewicz

These are mostly fixes for possible deadlocks related to memory allocation and
one optimization.

Artur Paszkiewicz (4):
  raid5-ppl: use a single mempool for ppl_io_unit and header_page
  raid5-ppl: move no_mem_stripes to struct ppl_conf
  raid5-ppl: use resize_stripes() when enabling or disabling ppl
  raid5-ppl: partial parity calculation optimization

 drivers/md/raid5-log.h |   5 ++-
 drivers/md/raid5-ppl.c | 112 +++++++++++++++++++++++++++++++------------------
 drivers/md/raid5.c     |  94 ++++++++++++++++++-----------------------
 3 files changed, 115 insertions(+), 96 deletions(-)

-- 
2.11.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-04 11:13 [PATCH 0/4] PPL fixes and improvements Artur Paszkiewicz
@ 2017-04-04 11:13 ` Artur Paszkiewicz
  2017-04-10 19:09   ` Shaohua Li
  2017-04-04 11:13 ` [PATCH 2/4] raid5-ppl: move no_mem_stripes to struct ppl_conf Artur Paszkiewicz
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-04 11:13 UTC (permalink / raw)
  To: shli; +Cc: linux-raid, Artur Paszkiewicz

Allocate both struct ppl_io_unit and its header_page from a shared
mempool to avoid a possible deadlock. Implement allocate and free
functions for the mempool, remove the second pool for allocating
header_page. The header_pages are now freed with their io_units, not
when the ppl bio completes. Also, use GFP_NOWAIT instead of GFP_ATOMIC
for allocating ppl_io_unit because we can handle failed allocations and
there is no reason to utilize emergency reserves.

Suggested-by: NeilBrown <neilb@suse.com>
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/md/raid5-ppl.c | 53 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 86ea9addb51a..42e43467d1e8 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -102,7 +102,6 @@ struct ppl_conf {
 	struct kmem_cache *io_kc;
 	mempool_t *io_pool;
 	struct bio_set *bs;
-	mempool_t *meta_pool;
 
 	/* used only for recovery */
 	int recovered_entries;
@@ -195,6 +194,33 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
 	return tx;
 }
 
+static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
+{
+	struct kmem_cache *kc = pool_data;
+	struct ppl_io_unit *io;
+
+	io = kmem_cache_alloc(kc, gfp_mask);
+	if (!io)
+		return NULL;
+
+	io->header_page = alloc_page(gfp_mask);
+	if (!io->header_page) {
+		kmem_cache_free(kc, io);
+		return NULL;
+	}
+
+	return io;
+}
+
+static void ppl_io_pool_free(void *element, void *pool_data)
+{
+	struct kmem_cache *kc = pool_data;
+	struct ppl_io_unit *io = element;
+
+	__free_page(io->header_page);
+	kmem_cache_free(kc, io);
+}
+
 static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
 					  struct stripe_head *sh)
 {
@@ -202,18 +228,19 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
 	struct ppl_io_unit *io;
 	struct ppl_header *pplhdr;
 
-	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
+	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
 	if (!io)
 		return NULL;
 
-	memset(io, 0, sizeof(*io));
 	io->log = log;
+	io->entries_count = 0;
+	io->pp_size = 0;
+	io->submitted = false;
 	INIT_LIST_HEAD(&io->log_sibling);
 	INIT_LIST_HEAD(&io->stripe_list);
 	atomic_set(&io->pending_stripes, 0);
 	bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);
 
-	io->header_page = mempool_alloc(ppl_conf->meta_pool, GFP_NOIO);
 	pplhdr = page_address(io->header_page);
 	clear_page(pplhdr);
 	memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
@@ -369,8 +396,6 @@ static void ppl_log_endio(struct bio *bio)
 	if (bio->bi_error)
 		md_error(ppl_conf->mddev, log->rdev);
 
-	mempool_free(io->header_page, ppl_conf->meta_pool);
-
 	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
 		list_del_init(&sh->log_list);
 
@@ -998,7 +1023,6 @@ static void __ppl_exit_log(struct ppl_conf *ppl_conf)
 
 	kfree(ppl_conf->child_logs);
 
-	mempool_destroy(ppl_conf->meta_pool);
 	if (ppl_conf->bs)
 		bioset_free(ppl_conf->bs);
 	mempool_destroy(ppl_conf->io_pool);
@@ -1104,25 +1128,20 @@ int ppl_init_log(struct r5conf *conf)
 
 	ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0);
 	if (!ppl_conf->io_kc) {
-		ret = -EINVAL;
+		ret = -ENOMEM;
 		goto err;
 	}
 
-	ppl_conf->io_pool = mempool_create_slab_pool(conf->raid_disks, ppl_conf->io_kc);
+	ppl_conf->io_pool = mempool_create(conf->raid_disks, ppl_io_pool_alloc,
+					   ppl_io_pool_free, ppl_conf->io_kc);
 	if (!ppl_conf->io_pool) {
-		ret = -EINVAL;
+		ret = -ENOMEM;
 		goto err;
 	}
 
 	ppl_conf->bs = bioset_create(conf->raid_disks, 0);
 	if (!ppl_conf->bs) {
-		ret = -EINVAL;
-		goto err;
-	}
-
-	ppl_conf->meta_pool = mempool_create_page_pool(conf->raid_disks, 0);
-	if (!ppl_conf->meta_pool) {
-		ret = -EINVAL;
+		ret = -ENOMEM;
 		goto err;
 	}
 
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/4] raid5-ppl: move no_mem_stripes to struct ppl_conf
  2017-04-04 11:13 [PATCH 0/4] PPL fixes and improvements Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page Artur Paszkiewicz
@ 2017-04-04 11:13 ` Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 3/4] raid5-ppl: use resize_stripes() when enabling or disabling ppl Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 4/4] raid5-ppl: partial parity calculation optimization Artur Paszkiewicz
  3 siblings, 0 replies; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-04 11:13 UTC (permalink / raw)
  To: shli; +Cc: linux-raid, Artur Paszkiewicz

Use a single no_mem_stripes list instead of per member device lists for
handling stripes that need retrying in case of failed io_unit
allocation. Because io_units are allocated from a memory pool shared
between all member disks, the no_mem_stripes list should be checked when
an io_unit for any member is freed. This fixes a deadlock that could
happen if there are stripes in more than one no_mem_stripes list.

Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/md/raid5-ppl.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 42e43467d1e8..0a39a6bbcbde 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -106,6 +106,10 @@ struct ppl_conf {
 	/* used only for recovery */
 	int recovered_entries;
 	int mismatch_count;
+
+	/* stripes to retry if failed to allocate io_unit */
+	struct list_head no_mem_stripes;
+	spinlock_t no_mem_stripes_lock;
 };
 
 struct ppl_log {
@@ -118,8 +122,6 @@ struct ppl_log {
 					 * always at the end of io_list */
 	spinlock_t io_list_lock;
 	struct list_head io_list;	/* all io_units of this log */
-	struct list_head no_mem_stripes;/* stripes to retry if failed to
-					 * allocate io_unit */
 };
 
 #define PPL_IO_INLINE_BVECS 32
@@ -374,9 +376,9 @@ int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh)
 	atomic_inc(&sh->count);
 
 	if (ppl_log_stripe(log, sh)) {
-		spin_lock_irq(&log->io_list_lock);
-		list_add_tail(&sh->log_list, &log->no_mem_stripes);
-		spin_unlock_irq(&log->io_list_lock);
+		spin_lock_irq(&ppl_conf->no_mem_stripes_lock);
+		list_add_tail(&sh->log_list, &ppl_conf->no_mem_stripes);
+		spin_unlock_irq(&ppl_conf->no_mem_stripes_lock);
 	}
 
 	mutex_unlock(&log->io_mutex);
@@ -517,25 +519,32 @@ void ppl_write_stripe_run(struct r5conf *conf)
 static void ppl_io_unit_finished(struct ppl_io_unit *io)
 {
 	struct ppl_log *log = io->log;
+	struct ppl_conf *ppl_conf = log->ppl_conf;
 	unsigned long flags;
 
 	pr_debug("%s: seq: %llu\n", __func__, io->seq);
 
-	spin_lock_irqsave(&log->io_list_lock, flags);
+	local_irq_save(flags);
 
+	spin_lock(&log->io_list_lock);
 	list_del(&io->log_sibling);
-	mempool_free(io, log->ppl_conf->io_pool);
+	spin_unlock(&log->io_list_lock);
+
+	mempool_free(io, ppl_conf->io_pool);
+
+	spin_lock(&ppl_conf->no_mem_stripes_lock);
+	if (!list_empty(&ppl_conf->no_mem_stripes)) {
+		struct stripe_head *sh;
 
-	if (!list_empty(&log->no_mem_stripes)) {
-		struct stripe_head *sh = list_first_entry(&log->no_mem_stripes,
-							  struct stripe_head,
-							  log_list);
+		sh = list_first_entry(&ppl_conf->no_mem_stripes,
+				      struct stripe_head, log_list);
 		list_del_init(&sh->log_list);
 		set_bit(STRIPE_HANDLE, &sh->state);
 		raid5_release_stripe(sh);
 	}
+	spin_unlock(&ppl_conf->no_mem_stripes_lock);
 
-	spin_unlock_irqrestore(&log->io_list_lock, flags);
+	local_irq_restore(flags);
 }
 
 void ppl_stripe_write_finished(struct stripe_head *sh)
@@ -1154,6 +1163,8 @@ int ppl_init_log(struct r5conf *conf)
 	}
 
 	atomic64_set(&ppl_conf->seq, 0);
+	INIT_LIST_HEAD(&ppl_conf->no_mem_stripes);
+	spin_lock_init(&ppl_conf->no_mem_stripes_lock);
 
 	if (!mddev->external) {
 		ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
@@ -1169,7 +1180,6 @@ int ppl_init_log(struct r5conf *conf)
 		mutex_init(&log->io_mutex);
 		spin_lock_init(&log->io_list_lock);
 		INIT_LIST_HEAD(&log->io_list);
-		INIT_LIST_HEAD(&log->no_mem_stripes);
 
 		log->ppl_conf = ppl_conf;
 		log->rdev = rdev;
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/4] raid5-ppl: use resize_stripes() when enabling or disabling ppl
  2017-04-04 11:13 [PATCH 0/4] PPL fixes and improvements Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 2/4] raid5-ppl: move no_mem_stripes to struct ppl_conf Artur Paszkiewicz
@ 2017-04-04 11:13 ` Artur Paszkiewicz
  2017-04-04 11:13 ` [PATCH 4/4] raid5-ppl: partial parity calculation optimization Artur Paszkiewicz
  3 siblings, 0 replies; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-04 11:13 UTC (permalink / raw)
  To: shli; +Cc: linux-raid, Artur Paszkiewicz

Use resize_stripes() instead of raid5_reset_stripe_cache() to allocate
or free sh->ppl_page at runtime for all stripes in the stripe cache.
raid5_reset_stripe_cache() required suspending the mddev and could
deadlock because of GFP_KERNEL allocations.

Move the 'newsize' check to check_reshape() to allow reallocating the
stripes with the same number of disks. Allocate sh->ppl_page in
alloc_stripe() instead of grow_buffers(). Pass 'struct r5conf *conf' as
a parameter to alloc_stripe() because it is needed to check whether to
allocate ppl_page. Add free_stripe() and use it to free stripes rather
than directly call kmem_cache_free(). Also free sh->ppl_page in
free_stripe().

Set MD_HAS_PPL at the end of ppl_init_log() instead of explicitly
setting it in advance and add another parameter to log_init() to allow
calling ppl_init_log() without the bit set. Don't try to calculate
partial parity or add a stripe to log if it does not have ppl_page set.

Enabling ppl can now be performed without suspending the mddev, because
the log won't be used until new stripes are allocated with ppl_page.
Calling mddev_suspend/resume is still necessary when disabling ppl,
because we want all stripes to finish before stopping the log, but
resize_stripes() can be called after mddev_resume() when ppl is no
longer active.

Suggested-by: NeilBrown <neilb@suse.com>
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/md/raid5-log.h |  5 +--
 drivers/md/raid5-ppl.c |  3 +-
 drivers/md/raid5.c     | 88 ++++++++++++++++++++++----------------------------
 3 files changed, 43 insertions(+), 53 deletions(-)

diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
index 738930ff5d17..27097101ccca 100644
--- a/drivers/md/raid5-log.h
+++ b/drivers/md/raid5-log.h
@@ -93,11 +93,12 @@ static inline void log_exit(struct r5conf *conf)
 		ppl_exit_log(conf);
 }
 
-static inline int log_init(struct r5conf *conf, struct md_rdev *journal_dev)
+static inline int log_init(struct r5conf *conf, struct md_rdev *journal_dev,
+			   bool ppl)
 {
 	if (journal_dev)
 		return r5l_init_log(conf, journal_dev);
-	else if (raid5_has_ppl(conf))
+	else if (ppl)
 		return ppl_init_log(conf);
 
 	return 0;
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 0a39a6bbcbde..e938669810c4 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -355,7 +355,7 @@ int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh)
 	struct ppl_io_unit *io = sh->ppl_io;
 	struct ppl_log *log;
 
-	if (io || test_bit(STRIPE_SYNCING, &sh->state) ||
+	if (io || test_bit(STRIPE_SYNCING, &sh->state) || !sh->ppl_page ||
 	    !test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags) ||
 	    !test_bit(R5_Insync, &sh->dev[sh->pd_idx].flags)) {
 		clear_bit(STRIPE_LOG_TRAPPED, &sh->state);
@@ -1223,6 +1223,7 @@ int ppl_init_log(struct r5conf *conf)
 	}
 
 	conf->log_private = ppl_conf;
+	set_bit(MD_HAS_PPL, &ppl_conf->mddev->flags);
 
 	return 0;
 err:
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6036d5e41ddd..9cab2fe078c2 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -471,11 +471,6 @@ static void shrink_buffers(struct stripe_head *sh)
 		sh->dev[i].page = NULL;
 		put_page(p);
 	}
-
-	if (sh->ppl_page) {
-		put_page(sh->ppl_page);
-		sh->ppl_page = NULL;
-	}
 }
 
 static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
@@ -493,12 +488,6 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
 		sh->dev[i].orig_page = page;
 	}
 
-	if (raid5_has_ppl(sh->raid_conf)) {
-		sh->ppl_page = alloc_page(gfp);
-		if (!sh->ppl_page)
-			return 1;
-	}
-
 	return 0;
 }
 
@@ -2132,8 +2121,15 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
 	put_cpu();
 }
 
+static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
+{
+	if (sh->ppl_page)
+		__free_page(sh->ppl_page);
+	kmem_cache_free(sc, sh);
+}
+
 static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
-	int disks)
+	int disks, struct r5conf *conf)
 {
 	struct stripe_head *sh;
 	int i;
@@ -2147,6 +2143,7 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
 		INIT_LIST_HEAD(&sh->r5c);
 		INIT_LIST_HEAD(&sh->log_list);
 		atomic_set(&sh->count, 1);
+		sh->raid_conf = conf;
 		sh->log_start = MaxSector;
 		for (i = 0; i < disks; i++) {
 			struct r5dev *dev = &sh->dev[i];
@@ -2154,6 +2151,14 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
 			bio_init(&dev->req, &dev->vec, 1);
 			bio_init(&dev->rreq, &dev->rvec, 1);
 		}
+
+		if (raid5_has_ppl(conf)) {
+			sh->ppl_page = alloc_page(gfp);
+			if (!sh->ppl_page) {
+				free_stripe(sc, sh);
+				sh = NULL;
+			}
+		}
 	}
 	return sh;
 }
@@ -2161,15 +2166,13 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
 {
 	struct stripe_head *sh;
 
-	sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size);
+	sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size, conf);
 	if (!sh)
 		return 0;
 
-	sh->raid_conf = conf;
-
 	if (grow_buffers(sh, gfp)) {
 		shrink_buffers(sh);
-		kmem_cache_free(conf->slab_cache, sh);
+		free_stripe(conf->slab_cache, sh);
 		return 0;
 	}
 	sh->hash_lock_index =
@@ -2314,9 +2317,6 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 	int i;
 	int hash, cnt;
 
-	if (newsize <= conf->pool_size)
-		return 0; /* never bother to shrink */
-
 	err = md_allow_write(conf->mddev);
 	if (err)
 		return err;
@@ -2332,11 +2332,10 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 	mutex_lock(&conf->cache_size_mutex);
 
 	for (i = conf->max_nr_stripes; i; i--) {
-		nsh = alloc_stripe(sc, GFP_KERNEL, newsize);
+		nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf);
 		if (!nsh)
 			break;
 
-		nsh->raid_conf = conf;
 		list_add(&nsh->lru, &newstripes);
 	}
 	if (i) {
@@ -2344,7 +2343,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 		while (!list_empty(&newstripes)) {
 			nsh = list_entry(newstripes.next, struct stripe_head, lru);
 			list_del(&nsh->lru);
-			kmem_cache_free(sc, nsh);
+			free_stripe(sc, nsh);
 		}
 		kmem_cache_destroy(sc);
 		mutex_unlock(&conf->cache_size_mutex);
@@ -2370,7 +2369,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 			nsh->dev[i].orig_page = osh->dev[i].page;
 		}
 		nsh->hash_lock_index = hash;
-		kmem_cache_free(conf->slab_cache, osh);
+		free_stripe(conf->slab_cache, osh);
 		cnt++;
 		if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS +
 		    !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) {
@@ -2445,7 +2444,7 @@ static int drop_one_stripe(struct r5conf *conf)
 		return 0;
 	BUG_ON(atomic_read(&sh->count));
 	shrink_buffers(sh);
-	kmem_cache_free(conf->slab_cache, sh);
+	free_stripe(conf->slab_cache, sh);
 	atomic_dec(&conf->active_stripes);
 	conf->max_nr_stripes--;
 	return 1;
@@ -3168,7 +3167,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 		s->locked++;
 	}
 
-	if (raid5_has_ppl(sh->raid_conf) &&
+	if (raid5_has_ppl(sh->raid_conf) && sh->ppl_page &&
 	    test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) &&
 	    !test_bit(STRIPE_FULL_WRITE, &sh->state) &&
 	    test_bit(R5_Insync, &sh->dev[pd_idx].flags))
@@ -7414,7 +7413,7 @@ static int raid5_run(struct mddev *mddev)
 		blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
 	}
 
-	if (log_init(conf, journal_dev))
+	if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
 		goto abort;
 
 	return 0;
@@ -7623,7 +7622,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
 		 * The array is in readonly mode if journal is missing, so no
 		 * write requests running. We should be safe
 		 */
-		log_init(conf, rdev);
+		log_init(conf, rdev, false);
 		return 0;
 	}
 	if (mddev->recovery_disabled == conf->recovery_disabled)
@@ -7773,6 +7772,9 @@ static int check_reshape(struct mddev *mddev)
 				      mddev->chunk_sectors)
 			    ) < 0)
 			return -ENOMEM;
+
+	if (conf->previous_raid_disks + mddev->delta_disks <= conf->pool_size)
+		return 0; /* never bother to shrink */
 	return resize_stripes(conf, (conf->previous_raid_disks
 				     + mddev->delta_disks));
 }
@@ -8263,20 +8265,6 @@ static void *raid6_takeover(struct mddev *mddev)
 	return setup_conf(mddev);
 }
 
-static void raid5_reset_stripe_cache(struct mddev *mddev)
-{
-	struct r5conf *conf = mddev->private;
-
-	mutex_lock(&conf->cache_size_mutex);
-	while (conf->max_nr_stripes &&
-	       drop_one_stripe(conf))
-		;
-	while (conf->min_nr_stripes > conf->max_nr_stripes &&
-	       grow_one_stripe(conf, GFP_KERNEL))
-		;
-	mutex_unlock(&conf->cache_size_mutex);
-}
-
 static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
 {
 	struct r5conf *conf;
@@ -8291,23 +8279,23 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
 		return -ENODEV;
 	}
 
-	if (strncmp(buf, "ppl", 3) == 0 && !raid5_has_ppl(conf)) {
+	if (strncmp(buf, "ppl", 3) == 0) {
 		/* ppl only works with RAID 5 */
-		if (conf->level == 5) {
-			mddev_suspend(mddev);
-			set_bit(MD_HAS_PPL, &mddev->flags);
-			err = log_init(conf, NULL);
-			if (!err)
-				raid5_reset_stripe_cache(mddev);
-			mddev_resume(mddev);
+		if (!raid5_has_ppl(conf) && conf->level == 5) {
+			err = log_init(conf, NULL, true);
+			if (!err) {
+				err = resize_stripes(conf, conf->pool_size);
+				if (err)
+					log_exit(conf);
+			}
 		} else
 			err = -EINVAL;
 	} else if (strncmp(buf, "resync", 6) == 0) {
 		if (raid5_has_ppl(conf)) {
 			mddev_suspend(mddev);
 			log_exit(conf);
-			raid5_reset_stripe_cache(mddev);
 			mddev_resume(mddev);
+			err = resize_stripes(conf, conf->pool_size);
 		} else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) &&
 			   r5l_log_disk_error(conf)) {
 			bool journal_dev_exists = false;
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/4] raid5-ppl: partial parity calculation optimization
  2017-04-04 11:13 [PATCH 0/4] PPL fixes and improvements Artur Paszkiewicz
                   ` (2 preceding siblings ...)
  2017-04-04 11:13 ` [PATCH 3/4] raid5-ppl: use resize_stripes() when enabling or disabling ppl Artur Paszkiewicz
@ 2017-04-04 11:13 ` Artur Paszkiewicz
  3 siblings, 0 replies; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-04 11:13 UTC (permalink / raw)
  To: shli; +Cc: linux-raid, Artur Paszkiewicz

In case of read-modify-write, partial partity is the same as the result
of ops_run_prexor5(), so we can just copy sh->dev[pd_idx].page into
sh->ppl_page instead of calculating it again.

Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/md/raid5-ppl.c | 20 ++++++++++----------
 drivers/md/raid5.c     |  6 +++---
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index e938669810c4..6fef999f2150 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -152,7 +152,7 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
 		       struct dma_async_tx_descriptor *tx)
 {
 	int disks = sh->disks;
-	struct page **xor_srcs = flex_array_get(percpu->scribble, 0);
+	struct page **srcs = flex_array_get(percpu->scribble, 0);
 	int count = 0, pd_idx = sh->pd_idx, i;
 	struct async_submit_ctl submit;
 
@@ -165,18 +165,18 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
 	 * differently.
 	 */
 	if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
-		/* rmw: xor old data and parity from updated disks */
-		for (i = disks; i--;) {
-			struct r5dev *dev = &sh->dev[i];
-			if (test_bit(R5_Wantdrain, &dev->flags) || i == pd_idx)
-				xor_srcs[count++] = dev->page;
-		}
+		/*
+		 * rmw: xor old data and parity from updated disks
+		 * This is calculated earlier by ops_run_prexor5() so just copy
+		 * the parity dev page.
+		 */
+		srcs[count++] = sh->dev[pd_idx].page;
 	} else if (sh->reconstruct_state == reconstruct_state_drain_run) {
 		/* rcw: xor data from all not updated disks */
 		for (i = disks; i--;) {
 			struct r5dev *dev = &sh->dev[i];
 			if (test_bit(R5_UPTODATE, &dev->flags))
-				xor_srcs[count++] = dev->page;
+				srcs[count++] = dev->page;
 		}
 	} else {
 		return tx;
@@ -187,10 +187,10 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
 			  + sizeof(struct page *) * (sh->disks + 2));
 
 	if (count == 1)
-		tx = async_memcpy(sh->ppl_page, xor_srcs[0], 0, 0, PAGE_SIZE,
+		tx = async_memcpy(sh->ppl_page, srcs[0], 0, 0, PAGE_SIZE,
 				  &submit);
 	else
-		tx = async_xor(sh->ppl_page, xor_srcs, 0, count, PAGE_SIZE,
+		tx = async_xor(sh->ppl_page, srcs, 0, count, PAGE_SIZE,
 			       &submit);
 
 	return tx;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9cab2fe078c2..f99a12f50f9a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2079,9 +2079,6 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
 			async_tx_ack(tx);
 	}
 
-	if (test_bit(STRIPE_OP_PARTIAL_PARITY, &ops_request))
-		tx = ops_run_partial_parity(sh, percpu, tx);
-
 	if (test_bit(STRIPE_OP_PREXOR, &ops_request)) {
 		if (level < 6)
 			tx = ops_run_prexor5(sh, percpu, tx);
@@ -2089,6 +2086,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
 			tx = ops_run_prexor6(sh, percpu, tx);
 	}
 
+	if (test_bit(STRIPE_OP_PARTIAL_PARITY, &ops_request))
+		tx = ops_run_partial_parity(sh, percpu, tx);
+
 	if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
 		tx = ops_run_biodrain(sh, tx);
 		overlap_clear++;
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-04 11:13 ` [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page Artur Paszkiewicz
@ 2017-04-10 19:09   ` Shaohua Li
  2017-04-11  8:28     ` Artur Paszkiewicz
  0 siblings, 1 reply; 11+ messages in thread
From: Shaohua Li @ 2017-04-10 19:09 UTC (permalink / raw)
  To: Artur Paszkiewicz; +Cc: linux-raid, songliubraving


On Tue, Apr 04, 2017 at 01:13:55PM +0200, Artur Paszkiewicz wrote:

Cc: Song, the raid5-cache needs similar fix.

> Allocate both struct ppl_io_unit and its header_page from a shared
> mempool to avoid a possible deadlock. Implement allocate and free
> functions for the mempool, remove the second pool for allocating
> header_page. The header_pages are now freed with their io_units, not
> when the ppl bio completes. Also, use GFP_NOWAIT instead of GFP_ATOMIC
> for allocating ppl_io_unit because we can handle failed allocations and
> there is no reason to utilize emergency reserves.

I applied the last 3 patches, had some nitpicks for this one though

> Suggested-by: NeilBrown <neilb@suse.com>
> Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
> ---
>  drivers/md/raid5-ppl.c | 53 ++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 36 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
> index 86ea9addb51a..42e43467d1e8 100644
> --- a/drivers/md/raid5-ppl.c
> +++ b/drivers/md/raid5-ppl.c
> @@ -102,7 +102,6 @@ struct ppl_conf {
>  	struct kmem_cache *io_kc;
>  	mempool_t *io_pool;
>  	struct bio_set *bs;
> -	mempool_t *meta_pool;
>  
>  	/* used only for recovery */
>  	int recovered_entries;
> @@ -195,6 +194,33 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
>  	return tx;
>  }
>  
> +static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
> +{
> +	struct kmem_cache *kc = pool_data;
> +	struct ppl_io_unit *io;
> +
> +	io = kmem_cache_alloc(kc, gfp_mask);
> +	if (!io)
> +		return NULL;
> +
> +	io->header_page = alloc_page(gfp_mask);
> +	if (!io->header_page) {
> +		kmem_cache_free(kc, io);
> +		return NULL;
> +	}
> +
> +	return io;

Maybe directly use GFP_NOWAIT here, we don't use other gfp

> +}
> +
> +static void ppl_io_pool_free(void *element, void *pool_data)
> +{
> +	struct kmem_cache *kc = pool_data;
> +	struct ppl_io_unit *io = element;
> +
> +	__free_page(io->header_page);
> +	kmem_cache_free(kc, io);
> +}
> +
>  static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>  					  struct stripe_head *sh)
>  {
> @@ -202,18 +228,19 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>  	struct ppl_io_unit *io;
>  	struct ppl_header *pplhdr;
>  
> -	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
> +	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
>  	if (!io)
>  		return NULL;
>  
> -	memset(io, 0, sizeof(*io));
>  	io->log = log;
> +	io->entries_count = 0;
> +	io->pp_size = 0;
> +	io->submitted = false;

I'd suggest moving the memset to ppl_io_pool_alloc. Don't think we need to
optimize to avoid setting header_page. And doing memset is less error prone,
for example adding new fields.

Otherwise looks quite good.

Thanks,
Shaohua
>  	INIT_LIST_HEAD(&io->log_sibling);
>  	INIT_LIST_HEAD(&io->stripe_list);
>  	atomic_set(&io->pending_stripes, 0);
>  	bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);
>  
> -	io->header_page = mempool_alloc(ppl_conf->meta_pool, GFP_NOIO);
>  	pplhdr = page_address(io->header_page);
>  	clear_page(pplhdr);
>  	memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
> @@ -369,8 +396,6 @@ static void ppl_log_endio(struct bio *bio)
>  	if (bio->bi_error)
>  		md_error(ppl_conf->mddev, log->rdev);
>  
> -	mempool_free(io->header_page, ppl_conf->meta_pool);
> -
>  	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
>  		list_del_init(&sh->log_list);
>  
> @@ -998,7 +1023,6 @@ static void __ppl_exit_log(struct ppl_conf *ppl_conf)
>  
>  	kfree(ppl_conf->child_logs);
>  
> -	mempool_destroy(ppl_conf->meta_pool);
>  	if (ppl_conf->bs)
>  		bioset_free(ppl_conf->bs);
>  	mempool_destroy(ppl_conf->io_pool);
> @@ -1104,25 +1128,20 @@ int ppl_init_log(struct r5conf *conf)
>  
>  	ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0);
>  	if (!ppl_conf->io_kc) {
> -		ret = -EINVAL;
> +		ret = -ENOMEM;
>  		goto err;
>  	}
>  
> -	ppl_conf->io_pool = mempool_create_slab_pool(conf->raid_disks, ppl_conf->io_kc);
> +	ppl_conf->io_pool = mempool_create(conf->raid_disks, ppl_io_pool_alloc,
> +					   ppl_io_pool_free, ppl_conf->io_kc);
>  	if (!ppl_conf->io_pool) {
> -		ret = -EINVAL;
> +		ret = -ENOMEM;
>  		goto err;
>  	}
>  
>  	ppl_conf->bs = bioset_create(conf->raid_disks, 0);
>  	if (!ppl_conf->bs) {
> -		ret = -EINVAL;
> -		goto err;
> -	}
> -
> -	ppl_conf->meta_pool = mempool_create_page_pool(conf->raid_disks, 0);
> -	if (!ppl_conf->meta_pool) {
> -		ret = -EINVAL;
> +		ret = -ENOMEM;
>  		goto err;
>  	}
>  
> -- 
> 2.11.0
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-10 19:09   ` Shaohua Li
@ 2017-04-11  8:28     ` Artur Paszkiewicz
  2017-04-11  9:20       ` Artur Paszkiewicz
  0 siblings, 1 reply; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-11  8:28 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid, songliubraving

On 04/10/2017 09:09 PM, Shaohua Li wrote:
>> +static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
>> +{
>> +	struct kmem_cache *kc = pool_data;
>> +	struct ppl_io_unit *io;
>> +
>> +	io = kmem_cache_alloc(kc, gfp_mask);
>> +	if (!io)
>> +		return NULL;
>> +
>> +	io->header_page = alloc_page(gfp_mask);
>> +	if (!io->header_page) {
>> +		kmem_cache_free(kc, io);
>> +		return NULL;
>> +	}
>> +
>> +	return io;
> 
> Maybe directly use GFP_NOWAIT here, we don't use other gfp

Do you mean ignore the gfp_mask parameter? I don't think we should do
that. It is provided by the mempool implementation. We only use
GFP_NOWAIT for explicit allocations, but GFP_KERNEL is used for
allocating initial items in mempool_create().

>> +}
>> +
>> +static void ppl_io_pool_free(void *element, void *pool_data)
>> +{
>> +	struct kmem_cache *kc = pool_data;
>> +	struct ppl_io_unit *io = element;
>> +
>> +	__free_page(io->header_page);
>> +	kmem_cache_free(kc, io);
>> +}
>> +
>>  static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>>  					  struct stripe_head *sh)
>>  {
>> @@ -202,18 +228,19 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>>  	struct ppl_io_unit *io;
>>  	struct ppl_header *pplhdr;
>>  
>> -	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
>> +	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
>>  	if (!io)
>>  		return NULL;
>>  
>> -	memset(io, 0, sizeof(*io));
>>  	io->log = log;
>> +	io->entries_count = 0;
>> +	io->pp_size = 0;
>> +	io->submitted = false;
> 
> I'd suggest moving the memset to ppl_io_pool_alloc. Don't think we need to
> optimize to avoid setting header_page. And doing memset is less error prone,
> for example adding new fields.

OK, I'll change this and resend.

Thanks,
Artur

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-11  8:28     ` Artur Paszkiewicz
@ 2017-04-11  9:20       ` Artur Paszkiewicz
  2017-04-11 15:41         ` Shaohua Li
  0 siblings, 1 reply; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-11  9:20 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid, songliubraving

On 04/11/2017 10:28 AM, Artur Paszkiewicz wrote:
> On 04/10/2017 09:09 PM, Shaohua Li wrote:
>>> +static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
>>> +{
>>> +	struct kmem_cache *kc = pool_data;
>>> +	struct ppl_io_unit *io;
>>> +
>>> +	io = kmem_cache_alloc(kc, gfp_mask);
>>> +	if (!io)
>>> +		return NULL;
>>> +
>>> +	io->header_page = alloc_page(gfp_mask);
>>> +	if (!io->header_page) {
>>> +		kmem_cache_free(kc, io);
>>> +		return NULL;
>>> +	}
>>> +
>>> +	return io;
>>
>> Maybe directly use GFP_NOWAIT here, we don't use other gfp
> 
> Do you mean ignore the gfp_mask parameter? I don't think we should do
> that. It is provided by the mempool implementation. We only use
> GFP_NOWAIT for explicit allocations, but GFP_KERNEL is used for
> allocating initial items in mempool_create().
> 
>>> +}
>>> +
>>> +static void ppl_io_pool_free(void *element, void *pool_data)
>>> +{
>>> +	struct kmem_cache *kc = pool_data;
>>> +	struct ppl_io_unit *io = element;
>>> +
>>> +	__free_page(io->header_page);
>>> +	kmem_cache_free(kc, io);
>>> +}
>>> +
>>>  static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>>>  					  struct stripe_head *sh)
>>>  {
>>> @@ -202,18 +228,19 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>>>  	struct ppl_io_unit *io;
>>>  	struct ppl_header *pplhdr;
>>>  
>>> -	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
>>> +	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
>>>  	if (!io)
>>>  		return NULL;
>>>  
>>> -	memset(io, 0, sizeof(*io));
>>>  	io->log = log;
>>> +	io->entries_count = 0;
>>> +	io->pp_size = 0;
>>> +	io->submitted = false;
>>
>> I'd suggest moving the memset to ppl_io_pool_alloc. Don't think we need to
>> optimize to avoid setting header_page. And doing memset is less error prone,
>> for example adding new fields.
> 
> OK, I'll change this and resend.

Well, on second thought, actually I can't move the memset to
ppl_io_pool_alloc. Mempool can use the preallocated elements, which are
then returned back to the pool after calling mempool_free() and can be
reused later. So a ppl_io_unit must be initialized after retrieving it
from mempool_alloc(). I can leave the memset in ppl_new_iounit() but
io->header_page pointer will have to be temporarily copied before
zeroing the iounit and then set back. Is this ok?

Thanks,
Artur

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-11  9:20       ` Artur Paszkiewicz
@ 2017-04-11 15:41         ` Shaohua Li
  2017-04-11 18:50           ` [PATCH v2] " Artur Paszkiewicz
  0 siblings, 1 reply; 11+ messages in thread
From: Shaohua Li @ 2017-04-11 15:41 UTC (permalink / raw)
  To: Artur Paszkiewicz; +Cc: linux-raid, songliubraving

On Tue, Apr 11, 2017 at 11:20:13AM +0200, Artur Paszkiewicz wrote:
> On 04/11/2017 10:28 AM, Artur Paszkiewicz wrote:
> > On 04/10/2017 09:09 PM, Shaohua Li wrote:
> >>> +static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
> >>> +{
> >>> +	struct kmem_cache *kc = pool_data;
> >>> +	struct ppl_io_unit *io;
> >>> +
> >>> +	io = kmem_cache_alloc(kc, gfp_mask);
> >>> +	if (!io)
> >>> +		return NULL;
> >>> +
> >>> +	io->header_page = alloc_page(gfp_mask);
> >>> +	if (!io->header_page) {
> >>> +		kmem_cache_free(kc, io);
> >>> +		return NULL;
> >>> +	}
> >>> +
> >>> +	return io;
> >>
> >> Maybe directly use GFP_NOWAIT here, we don't use other gfp
> > 
> > Do you mean ignore the gfp_mask parameter? I don't think we should do
> > that. It is provided by the mempool implementation. We only use
> > GFP_NOWAIT for explicit allocations, but GFP_KERNEL is used for
> > allocating initial items in mempool_create().
> > 
> >>> +}
> >>> +
> >>> +static void ppl_io_pool_free(void *element, void *pool_data)
> >>> +{
> >>> +	struct kmem_cache *kc = pool_data;
> >>> +	struct ppl_io_unit *io = element;
> >>> +
> >>> +	__free_page(io->header_page);
> >>> +	kmem_cache_free(kc, io);
> >>> +}
> >>> +
> >>>  static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
> >>>  					  struct stripe_head *sh)
> >>>  {
> >>> @@ -202,18 +228,19 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
> >>>  	struct ppl_io_unit *io;
> >>>  	struct ppl_header *pplhdr;
> >>>  
> >>> -	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
> >>> +	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
> >>>  	if (!io)
> >>>  		return NULL;
> >>>  
> >>> -	memset(io, 0, sizeof(*io));
> >>>  	io->log = log;
> >>> +	io->entries_count = 0;
> >>> +	io->pp_size = 0;
> >>> +	io->submitted = false;
> >>
> >> I'd suggest moving the memset to ppl_io_pool_alloc. Don't think we need to
> >> optimize to avoid setting header_page. And doing memset is less error prone,
> >> for example adding new fields.
> > 
> > OK, I'll change this and resend.
> 
> Well, on second thought, actually I can't move the memset to
> ppl_io_pool_alloc. Mempool can use the preallocated elements, which are
> then returned back to the pool after calling mempool_free() and can be
> reused later. So a ppl_io_unit must be initialized after retrieving it
> from mempool_alloc(). I can leave the memset in ppl_new_iounit() but
> io->header_page pointer will have to be temporarily copied before
> zeroing the iounit and then set back. Is this ok?

Yes, that's preferred.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-11 15:41         ` Shaohua Li
@ 2017-04-11 18:50           ` Artur Paszkiewicz
  2017-04-11 21:58             ` Shaohua Li
  0 siblings, 1 reply; 11+ messages in thread
From: Artur Paszkiewicz @ 2017-04-11 18:50 UTC (permalink / raw)
  To: shli; +Cc: linux-raid, songliubraving, Artur Paszkiewicz

Allocate both struct ppl_io_unit and its header_page from a shared
mempool to avoid a possible deadlock. Implement allocate and free
functions for the mempool, remove the second pool for allocating
header_page. The header_pages are now freed with their io_units, not
when the ppl bio completes. Also, use GFP_NOWAIT instead of GFP_ATOMIC
for allocating ppl_io_unit because we can handle failed allocations and
there is no reason to utilize emergency reserves.

Suggested-by: NeilBrown <neilb@suse.com>
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/md/raid5-ppl.c | 53 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 4eb0ebcf9c29..5d25bebf3328 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -102,7 +102,6 @@ struct ppl_conf {
 	struct kmem_cache *io_kc;
 	mempool_t *io_pool;
 	struct bio_set *bs;
-	mempool_t *meta_pool;
 
 	/* used only for recovery */
 	int recovered_entries;
@@ -197,25 +196,55 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
 	return tx;
 }
 
+static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
+{
+	struct kmem_cache *kc = pool_data;
+	struct ppl_io_unit *io;
+
+	io = kmem_cache_alloc(kc, gfp_mask);
+	if (!io)
+		return NULL;
+
+	io->header_page = alloc_page(gfp_mask);
+	if (!io->header_page) {
+		kmem_cache_free(kc, io);
+		return NULL;
+	}
+
+	return io;
+}
+
+static void ppl_io_pool_free(void *element, void *pool_data)
+{
+	struct kmem_cache *kc = pool_data;
+	struct ppl_io_unit *io = element;
+
+	__free_page(io->header_page);
+	kmem_cache_free(kc, io);
+}
+
 static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
 					  struct stripe_head *sh)
 {
 	struct ppl_conf *ppl_conf = log->ppl_conf;
 	struct ppl_io_unit *io;
 	struct ppl_header *pplhdr;
+	struct page *header_page;
 
-	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
+	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
 	if (!io)
 		return NULL;
 
+	header_page = io->header_page;
 	memset(io, 0, sizeof(*io));
+	io->header_page = header_page;
+
 	io->log = log;
 	INIT_LIST_HEAD(&io->log_sibling);
 	INIT_LIST_HEAD(&io->stripe_list);
 	atomic_set(&io->pending_stripes, 0);
 	bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);
 
-	io->header_page = mempool_alloc(ppl_conf->meta_pool, GFP_NOIO);
 	pplhdr = page_address(io->header_page);
 	clear_page(pplhdr);
 	memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
@@ -371,8 +400,6 @@ static void ppl_log_endio(struct bio *bio)
 	if (bio->bi_error)
 		md_error(ppl_conf->mddev, log->rdev);
 
-	mempool_free(io->header_page, ppl_conf->meta_pool);
-
 	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
 		list_del_init(&sh->log_list);
 
@@ -1007,7 +1034,6 @@ static void __ppl_exit_log(struct ppl_conf *ppl_conf)
 
 	kfree(ppl_conf->child_logs);
 
-	mempool_destroy(ppl_conf->meta_pool);
 	if (ppl_conf->bs)
 		bioset_free(ppl_conf->bs);
 	mempool_destroy(ppl_conf->io_pool);
@@ -1113,25 +1139,20 @@ int ppl_init_log(struct r5conf *conf)
 
 	ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0);
 	if (!ppl_conf->io_kc) {
-		ret = -EINVAL;
+		ret = -ENOMEM;
 		goto err;
 	}
 
-	ppl_conf->io_pool = mempool_create_slab_pool(conf->raid_disks, ppl_conf->io_kc);
+	ppl_conf->io_pool = mempool_create(conf->raid_disks, ppl_io_pool_alloc,
+					   ppl_io_pool_free, ppl_conf->io_kc);
 	if (!ppl_conf->io_pool) {
-		ret = -EINVAL;
+		ret = -ENOMEM;
 		goto err;
 	}
 
 	ppl_conf->bs = bioset_create(conf->raid_disks, 0);
 	if (!ppl_conf->bs) {
-		ret = -EINVAL;
-		goto err;
-	}
-
-	ppl_conf->meta_pool = mempool_create_page_pool(conf->raid_disks, 0);
-	if (!ppl_conf->meta_pool) {
-		ret = -EINVAL;
+		ret = -ENOMEM;
 		goto err;
 	}
 
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] raid5-ppl: use a single mempool for ppl_io_unit and header_page
  2017-04-11 18:50           ` [PATCH v2] " Artur Paszkiewicz
@ 2017-04-11 21:58             ` Shaohua Li
  0 siblings, 0 replies; 11+ messages in thread
From: Shaohua Li @ 2017-04-11 21:58 UTC (permalink / raw)
  To: Artur Paszkiewicz; +Cc: linux-raid, songliubraving

On Tue, Apr 11, 2017 at 08:50:51PM +0200, Artur Paszkiewicz wrote:
> Allocate both struct ppl_io_unit and its header_page from a shared
> mempool to avoid a possible deadlock. Implement allocate and free
> functions for the mempool, remove the second pool for allocating
> header_page. The header_pages are now freed with their io_units, not
> when the ppl bio completes. Also, use GFP_NOWAIT instead of GFP_ATOMIC
> for allocating ppl_io_unit because we can handle failed allocations and
> there is no reason to utilize emergency reserves.
> 
> Suggested-by: NeilBrown <neilb@suse.com>
> Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>

applied, thanks!
> ---
>  drivers/md/raid5-ppl.c | 53 +++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 37 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
> index 4eb0ebcf9c29..5d25bebf3328 100644
> --- a/drivers/md/raid5-ppl.c
> +++ b/drivers/md/raid5-ppl.c
> @@ -102,7 +102,6 @@ struct ppl_conf {
>  	struct kmem_cache *io_kc;
>  	mempool_t *io_pool;
>  	struct bio_set *bs;
> -	mempool_t *meta_pool;
>  
>  	/* used only for recovery */
>  	int recovered_entries;
> @@ -197,25 +196,55 @@ ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
>  	return tx;
>  }
>  
> +static void *ppl_io_pool_alloc(gfp_t gfp_mask, void *pool_data)
> +{
> +	struct kmem_cache *kc = pool_data;
> +	struct ppl_io_unit *io;
> +
> +	io = kmem_cache_alloc(kc, gfp_mask);
> +	if (!io)
> +		return NULL;
> +
> +	io->header_page = alloc_page(gfp_mask);
> +	if (!io->header_page) {
> +		kmem_cache_free(kc, io);
> +		return NULL;
> +	}
> +
> +	return io;
> +}
> +
> +static void ppl_io_pool_free(void *element, void *pool_data)
> +{
> +	struct kmem_cache *kc = pool_data;
> +	struct ppl_io_unit *io = element;
> +
> +	__free_page(io->header_page);
> +	kmem_cache_free(kc, io);
> +}
> +
>  static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
>  					  struct stripe_head *sh)
>  {
>  	struct ppl_conf *ppl_conf = log->ppl_conf;
>  	struct ppl_io_unit *io;
>  	struct ppl_header *pplhdr;
> +	struct page *header_page;
>  
> -	io = mempool_alloc(ppl_conf->io_pool, GFP_ATOMIC);
> +	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
>  	if (!io)
>  		return NULL;
>  
> +	header_page = io->header_page;
>  	memset(io, 0, sizeof(*io));
> +	io->header_page = header_page;
> +
>  	io->log = log;
>  	INIT_LIST_HEAD(&io->log_sibling);
>  	INIT_LIST_HEAD(&io->stripe_list);
>  	atomic_set(&io->pending_stripes, 0);
>  	bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);
>  
> -	io->header_page = mempool_alloc(ppl_conf->meta_pool, GFP_NOIO);
>  	pplhdr = page_address(io->header_page);
>  	clear_page(pplhdr);
>  	memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
> @@ -371,8 +400,6 @@ static void ppl_log_endio(struct bio *bio)
>  	if (bio->bi_error)
>  		md_error(ppl_conf->mddev, log->rdev);
>  
> -	mempool_free(io->header_page, ppl_conf->meta_pool);
> -
>  	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
>  		list_del_init(&sh->log_list);
>  
> @@ -1007,7 +1034,6 @@ static void __ppl_exit_log(struct ppl_conf *ppl_conf)
>  
>  	kfree(ppl_conf->child_logs);
>  
> -	mempool_destroy(ppl_conf->meta_pool);
>  	if (ppl_conf->bs)
>  		bioset_free(ppl_conf->bs);
>  	mempool_destroy(ppl_conf->io_pool);
> @@ -1113,25 +1139,20 @@ int ppl_init_log(struct r5conf *conf)
>  
>  	ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0);
>  	if (!ppl_conf->io_kc) {
> -		ret = -EINVAL;
> +		ret = -ENOMEM;
>  		goto err;
>  	}
>  
> -	ppl_conf->io_pool = mempool_create_slab_pool(conf->raid_disks, ppl_conf->io_kc);
> +	ppl_conf->io_pool = mempool_create(conf->raid_disks, ppl_io_pool_alloc,
> +					   ppl_io_pool_free, ppl_conf->io_kc);
>  	if (!ppl_conf->io_pool) {
> -		ret = -EINVAL;
> +		ret = -ENOMEM;
>  		goto err;
>  	}
>  
>  	ppl_conf->bs = bioset_create(conf->raid_disks, 0);
>  	if (!ppl_conf->bs) {
> -		ret = -EINVAL;
> -		goto err;
> -	}
> -
> -	ppl_conf->meta_pool = mempool_create_page_pool(conf->raid_disks, 0);
> -	if (!ppl_conf->meta_pool) {
> -		ret = -EINVAL;
> +		ret = -ENOMEM;
>  		goto err;
>  	}
>  
> -- 
> 2.11.0
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-04-11 21:58 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-04-04 11:13 [PATCH 0/4] PPL fixes and improvements Artur Paszkiewicz
2017-04-04 11:13 ` [PATCH 1/4] raid5-ppl: use a single mempool for ppl_io_unit and header_page Artur Paszkiewicz
2017-04-10 19:09   ` Shaohua Li
2017-04-11  8:28     ` Artur Paszkiewicz
2017-04-11  9:20       ` Artur Paszkiewicz
2017-04-11 15:41         ` Shaohua Li
2017-04-11 18:50           ` [PATCH v2] " Artur Paszkiewicz
2017-04-11 21:58             ` Shaohua Li
2017-04-04 11:13 ` [PATCH 2/4] raid5-ppl: move no_mem_stripes to struct ppl_conf Artur Paszkiewicz
2017-04-04 11:13 ` [PATCH 3/4] raid5-ppl: use resize_stripes() when enabling or disabling ppl Artur Paszkiewicz
2017-04-04 11:13 ` [PATCH 4/4] raid5-ppl: partial parity calculation optimization Artur Paszkiewicz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).