All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: Christoph Hellwig <hch@infradead.org>
Cc: xfs@oss.sgi.com
Subject: Re: [PATCH 6/6] xfs: make discard operations asynchronous
Date: Wed, 23 Mar 2011 11:43:23 +1100	[thread overview]
Message-ID: <20110323004323.GH15270@dastard> (raw)
In-Reply-To: <20110322200138.216042448@bombadil.infradead.org>

On Tue, Mar 22, 2011 at 03:55:56PM -0400, Christoph Hellwig wrote:
> Instead of waiting for each discard request keep the CIL context alive
> until all of them are done, at which point we can tear it down completly
> and remove the busy extents from the rbtree.
> 
> At this point I'm doing the I/O completion from IRQ context for simplicity,
> but I'll benchmark it against a version that uses a workqueue.

A workqueue is probably a good idea, because then the processing has
some level of concurrency built into it. It also means we don't need
to convert the locking to irq-safe variants and all the overhead
that this introduces.

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Index: xfs/fs/xfs/linux-2.6/xfs_discard.c
> ===================================================================
> --- xfs.orig/fs/xfs/linux-2.6/xfs_discard.c	2011-03-22 15:58:10.301855813 +0100
> +++ xfs/fs/xfs/linux-2.6/xfs_discard.c	2011-03-22 18:39:09.000000000 +0100
> @@ -30,6 +30,7 @@
>  #include "xfs_inode.h"
>  #include "xfs_alloc.h"
>  #include "xfs_error.h"
> +#include "xfs_log_priv.h"
>  #include "xfs_discard.h"
>  #include "xfs_trace.h"
>  
> @@ -192,37 +193,119 @@ xfs_ioc_trim(
>  	return 0;
>  }
>  
> +void
> +xfs_cil_discard_done(
> +	struct xfs_cil_ctx	*ctx)
> +{
> +	if (atomic_dec_and_test(&ctx->discards)) {
> +		struct xfs_busy_extent	*busyp, *n;
> +
> +		list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
> +			xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
> +		kmem_free(ctx);
> +	}
> +}
> +
> +STATIC void
> +xfs_discard_end_io(
> +	struct bio		*bio,
> +	int			err)
> +{
> +	struct xfs_cil_ctx	*ctx = bio->bi_private;
> +
> +	if (err && err != -EOPNOTSUPP) {
> +		xfs_info(ctx->cil->xc_log->l_mp,
> +			 "I/O error during discard\n");
> +	}

Same comment about the bno/len in the error message as the previous
patch.

> +
> +	bio_put(bio);
> +	xfs_cil_discard_done(ctx);
> +}
> +
> +static int
> +xfs_issue_discard(
> +	struct block_device	*bdev,
> +	sector_t		sector,
> +	sector_t		nr_sects,
> +	gfp_t			gfp_mask,
> +	struct xfs_cil_ctx	*ctx)
> +{
> +	struct request_queue	*q = bdev_get_queue(bdev);
> +	unsigned int		max_discard_sectors;
> +	struct bio		*bio;
> +	int			ret = 0;
> +
> +	if (!q)
> +		return -ENXIO;
> +
> +	if (!blk_queue_discard(q))
> +		return -EOPNOTSUPP;
> +
> +	/*
> +	 * Ensure that max_discard_sectors is of the proper
> +	 * granularity
> +	 */
> +	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
> +	if (q->limits.discard_granularity) {
> +		unsigned int disc_sects = q->limits.discard_granularity >> 9;
> +
> +		max_discard_sectors &= ~(disc_sects - 1);
> +	}

This is asking for a helper function....

> +
> +
> +	while (nr_sects && !ret) {

no need to check ret here.

> +		bio = bio_alloc(gfp_mask, 1);
> +		if (!bio) {
> +			ret = -ENOMEM;
> +			break;
> +		}
> +
> +		bio->bi_sector = sector;
> +		bio->bi_end_io = xfs_discard_end_io;
> +		bio->bi_bdev = bdev;
> +		bio->bi_private = ctx;
> +
> +		if (nr_sects > max_discard_sectors) {
> +			bio->bi_size = max_discard_sectors << 9;
> +			nr_sects -= max_discard_sectors;
> +			sector += max_discard_sectors;
> +		} else {
> +			bio->bi_size = nr_sects << 9;
> +			nr_sects = 0;
> +		}
> +
> +		atomic_inc(&ctx->discards);
> +		submit_bio(REQ_WRITE | REQ_DISCARD, bio);
> +	}
> +
> +	return ret;
> +}
> +
>  int
>  xfs_discard_extent(
>  	struct xfs_mount	*mp,
> -	struct xfs_busy_extent	*busyp)
> +	struct xfs_busy_extent	*busyp,
> +	struct xfs_cil_ctx	*ctx)
>  {
>  	struct xfs_perag	*pag;
> -	int			error = 0;
>  	xfs_daddr_t		bno;
>  	int64_t			len;
>  	bool			done  = false;
>  
> -	if ((mp->m_flags & XFS_MOUNT_DISCARD) == 0)
> -		return 0;
> -
>  	bno = XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno);
>  	len = XFS_FSB_TO_BB(mp, busyp->length);
>  
>  	pag = xfs_perag_get(mp, busyp->agno);
> -	spin_lock(&pag->pagb_lock);
> +	spin_lock_irq(&pag->pagb_lock);
>  	if (!busyp->length)
>  		done = true;
>  	busyp->flags = XFS_ALLOC_BUSY_DISCARDED;
> -	spin_unlock(&pag->pagb_lock);
> +	spin_unlock_irq(&pag->pagb_lock);

Disabling/enabling interrupts on these locks could hurt quite a bit.
They are travelled quite frequently, and irq operations add quite a
bit of overhead....

>  	xfs_perag_put(pag);
>  
>  	if (done)
>  		return 0;
>  
> -	error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, bno, len,
> -				      GFP_NOFS, 0);
> -	if (error && error != EOPNOTSUPP)
> -		xfs_info(mp, "discard failed, error %d", error);
> -	return error;
> +	return -xfs_issue_discard(mp->m_ddev_targp->bt_bdev,
> +				  bno, len, GFP_NOFS, ctx);
>  }
> Index: xfs/fs/xfs/linux-2.6/xfs_discard.h
> ===================================================================
> --- xfs.orig/fs/xfs/linux-2.6/xfs_discard.h	2011-03-22 15:58:10.313857879 +0100
> +++ xfs/fs/xfs/linux-2.6/xfs_discard.h	2011-03-22 18:39:09.000000000 +0100
> @@ -3,10 +3,13 @@
>  
>  struct fstrim_range;
>  struct xfs_busy_extent;
> +struct xfs_cil_ctx;
>  
>  extern int	xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
>  
>  extern int	xfs_discard_extent(struct xfs_mount *,
> -				   struct xfs_busy_extent *);
> +				   struct xfs_busy_extent *,
> +				   struct xfs_cil_ctx *);
> +extern void	xfs_cil_discard_done(struct xfs_cil_ctx	*ctx);
>  
>  #endif /* XFS_DISCARD_H */
> Index: xfs/fs/xfs/xfs_log_cil.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_log_cil.c	2011-03-22 15:58:10.329855977 +0100
> +++ xfs/fs/xfs/xfs_log_cil.c	2011-03-22 18:39:09.000000000 +0100
> @@ -68,6 +68,7 @@ xlog_cil_init(
>  	INIT_LIST_HEAD(&ctx->busy_extents);
>  	ctx->sequence = 1;
>  	ctx->cil = cil;
> +	atomic_set(&ctx->discards, 1);
>  	cil->xc_ctx = ctx;
>  	cil->xc_current_sequence = ctx->sequence;
>  
> @@ -364,14 +365,18 @@ xlog_cil_committed(
>  	struct xfs_cil_ctx	*ctx = args;
>  	struct xfs_mount	*mp = ctx->cil->xc_log->l_mp;
>  	struct xfs_busy_extent	*busyp, *n;
> +	bool			keep_alive = false;
>  
>  	xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
>  					ctx->start_lsn, abort);
>  
> -	list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) {
> -		if (!abort)
> -			xfs_discard_extent(mp, busyp);
> -		xfs_alloc_busy_clear(mp, busyp);
> +	if (!(mp->m_flags & XFS_MOUNT_DISCARD) || abort) {
> +		list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
> +			xfs_alloc_busy_clear(mp, busyp);
> +	} else if (!list_empty(&ctx->busy_extents)) {
> +		list_for_each_entry(busyp, &ctx->busy_extents, list)
> +			xfs_discard_extent(mp, busyp, ctx);
> +		keep_alive = true;
>  	}

Oh, I see you've moved the XFS_MOUNT_DISCARD into the loop here...

>  
>  	spin_lock(&ctx->cil->xc_cil_lock);
> @@ -379,7 +384,10 @@ xlog_cil_committed(
>  	spin_unlock(&ctx->cil->xc_cil_lock);
>  
>  	xlog_cil_free_logvec(ctx->lv_chain);
> -	kmem_free(ctx);
> +	if (keep_alive)
> +		xfs_cil_discard_done(ctx);
> +	else
> +		kmem_free(ctx);

You could probably just call xfs_cil_discard_done(ctx) here as the
busy extent list will be empty in the !keep_alive case and so it
will simply do the kmem_free(ctx) call there.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  reply	other threads:[~2011-03-23  0:40 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-03-22 19:55 [PATCH 0/6] [PATCH 0/6] more efficient busy extent handling and discard support Christoph Hellwig
2011-03-22 19:55 ` [PATCH 1/6] xfs: optimize AGFL refills Christoph Hellwig
2011-03-22 22:30   ` Alex Elder
2011-03-23 12:16     ` Christoph Hellwig
2011-03-25 21:03       ` Alex Elder
2011-03-28 12:07         ` Christoph Hellwig
2011-03-22 23:30   ` Dave Chinner
2011-03-23 12:16     ` Christoph Hellwig
2011-03-22 19:55 ` [PATCH 2/6] xfs: do not immediately reuse busy extent ranges Christoph Hellwig
2011-03-22 22:30   ` Alex Elder
2011-03-23 12:17     ` Christoph Hellwig
2011-03-25 21:03   ` Alex Elder
2011-03-28 12:07     ` Christoph Hellwig
2011-03-22 19:55 ` [PATCH 3/6] xfs: exact busy extent tracking Christoph Hellwig
2011-03-22 23:47   ` Dave Chinner
2011-03-23 12:24     ` Christoph Hellwig
2011-03-25 21:04   ` Alex Elder
2011-03-28 12:10     ` Christoph Hellwig
2011-03-22 19:55 ` [PATCH 4/6] xfs: allow reusing busy extents where safe Christoph Hellwig
2011-03-23  0:20   ` Dave Chinner
2011-03-23 12:26     ` Christoph Hellwig
2011-03-25 21:04   ` Alex Elder
2011-03-22 19:55 ` [PATCH 5/6] xfs: add online discard support Christoph Hellwig
2011-03-23  0:30   ` Dave Chinner
2011-03-23 12:31     ` Christoph Hellwig
2011-03-25 21:04   ` Alex Elder
2011-03-28 12:35     ` Christoph Hellwig
2011-03-22 19:55 ` [PATCH 6/6] xfs: make discard operations asynchronous Christoph Hellwig
2011-03-23  0:43   ` Dave Chinner [this message]
2011-03-28 12:44     ` Christoph Hellwig
2011-03-25 21:04   ` Alex Elder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110323004323.GH15270@dastard \
    --to=david@fromorbit.com \
    --cc=hch@infradead.org \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.