[Cluster-devel] [GFS2 PATCH] GFS2: Queue final dlm unlock from gfs2_evict_inode

cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed

From: Steven Whitehouse <swhiteho@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [GFS2 PATCH] GFS2: Queue final dlm unlock from gfs2_evict_inode
Date: Tue, 2 Feb 2016 11:53:49 +0000	[thread overview]
Message-ID: <56B098CD.9010201@redhat.com> (raw)
In-Reply-To: <853571928.36038478.1450464138537.JavaMail.zimbra@redhat.com>

Hi,

On 18/12/15 18:42, Bob Peterson wrote:
> Hi,
>
> This patch introduces a new glock workqueue, gfs2_glock_final. The
> workqueue merely does work to call dlm's unlock.
> This prevents gfs2_evict_inode from calling dlm directly which
> might block, waiting for DLM to unlock, which may be waiting for
> something like a fence operation. By moving it to its own work queue,
> the final put happens later, which allows the shrinker to continue,
> and free memory, avoiding a livelock.
I don't think this is going to work... if the DLM lock put is delayed 
then it would be possible for a new glock to be created (or the existing 
one to be reused) if the inode gets recreated, and then there is no 
coordination between the unlock and the new lock request. If those were 
to be sent in the wrong order, then things are going to get very confused.

If you add the coordination then we are back to square one, in terms of 
the deadlock in this loop. The only way we can stop this is to try and 
avoid inode deallocation in the eviction path. By the time we've already 
decided to do the eviction, it is too late,

Steve.

> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> ---
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index 795c2f3..9d0f3d5 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -63,6 +63,7 @@ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int
>   
>   static struct dentry *gfs2_root;
>   static struct workqueue_struct *glock_workqueue;
> +static struct workqueue_struct *gfs2_final_workqueue;
>   struct workqueue_struct *gfs2_delete_workqueue;
>   static LIST_HEAD(lru_list);
>   static atomic_t lru_count = ATOMIC_INIT(0);
> @@ -152,6 +153,20 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
>   	spin_unlock(&lru_lock);
>   }
>   
> +/* The purpose of this function is to tell dlm when a glock is not needed
> + * We can't do this directly from gfs2_glock_put because dlm may block while
> + * waiting for a fence operation to complete. But the fence may block on
> + * memory allocation, which may block on the shrinker, which may block on
> + * the evict code. So the buck stops here.
> + */
> +static void final_work_func(struct work_struct *work)
> +{
> +	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_final);
> +	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
> +
> +	sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
> +}
> +
>   /**
>    * gfs2_glock_put() - Decrement reference count on glock
>    * @gl: The glock to put
> @@ -160,7 +175,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
>   
>   void gfs2_glock_put(struct gfs2_glock *gl)
>   {
> -	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
>   	struct address_space *mapping = gfs2_glock2aspace(gl);
>   
>   	if (lockref_put_or_lock(&gl->gl_lockref))
> @@ -174,7 +188,7 @@ void gfs2_glock_put(struct gfs2_glock *gl)
>   	GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
>   	GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
>   	trace_gfs2_glock_put(gl);
> -	sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
> +	BUG_ON(queue_work(gfs2_final_workqueue, &gl->gl_final) == 0);
>   }
>   
>   /**
> @@ -700,6 +714,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
>   	gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
>   	INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
>   	INIT_WORK(&gl->gl_delete, delete_work_func);
> +	INIT_WORK(&gl->gl_final, final_work_func);
>   
>   	mapping = gfs2_glock2aspace(gl);
>   	if (mapping) {
> @@ -1772,6 +1787,14 @@ int __init gfs2_glock_init(void)
>   		rhashtable_destroy(&gl_hash_table);
>   		return -ENOMEM;
>   	}
> +	gfs2_final_workqueue = alloc_workqueue("final_workqueue",
> +					       WQ_MEM_RECLAIM | WQ_HIGHPRI |
> +					       WQ_FREEZABLE, 0);
> +	if (IS_ERR(gfs2_final_workqueue)) {
> +		destroy_workqueue(glock_workqueue);
> +		destroy_workqueue(gfs2_delete_workqueue);
> +		return PTR_ERR(gfs2_final_workqueue);
> +	}
>   
>   	register_shrinker(&glock_shrinker);
>   
> @@ -1784,6 +1807,7 @@ void gfs2_glock_exit(void)
>   	rhashtable_destroy(&gl_hash_table);
>   	destroy_workqueue(glock_workqueue);
>   	destroy_workqueue(gfs2_delete_workqueue);
> +	destroy_workqueue(gfs2_final_workqueue);
>   }
>   
>   static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index a6a3389..1b63fbc 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -361,6 +361,7 @@ struct gfs2_glock {
>   	atomic_t gl_ail_count;
>   	atomic_t gl_revokes;
>   	struct delayed_work gl_work;
> +	struct work_struct gl_final;
>   	union {
>   		/* For inode and iopen glocks only */
>   		struct work_struct gl_delete;
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index 0357862..4232368 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -126,6 +126,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
>   		error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
>   		if (unlikely(error))
>   			goto fail;
> +
> +		flush_work(&ip->i_gl->gl_final);
>   		ip->i_gl->gl_object = ip;
>   
>   		error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
> @@ -189,6 +191,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
>   	if (error)
>   		return ERR_PTR(error);
>   
> +	flush_work(&i_gh.gh_gl->gl_final);
>   	error = gfs2_check_blk_type(sdp, no_addr, blktype);
>   	if (error)
>   		goto fail;
> @@ -681,6 +684,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
>   	if (error)
>   		goto fail_free_inode;
>   
> +	flush_work(&ip->i_gl->gl_final);
>   	ip->i_gl->gl_object = ip;
>   	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
>   	if (error)
>

next prev parent reply	other threads:[~2016-02-02 11:53 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <869029287.36038231.1450464080200.JavaMail.zimbra@redhat.com>
2015-12-18 18:42 ` [Cluster-devel] [GFS2 PATCH] GFS2: Queue final dlm unlock from gfs2_evict_inode Bob Peterson
2016-02-02 11:53   ` Steven Whitehouse [this message]
2016-02-10 19:59     ` [Cluster-devel] [GFS2 PATCH] GFS2: Delay glock disposal by way of a rubbish list Bob Peterson
2016-02-11 15:34       ` Steven Whitehouse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56B098CD.9010201@redhat.com \
    --to=swhiteho@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).