From mboxrd@z Thu Jan 1 00:00:00 1970 From: Steven Whitehouse Date: Mon, 3 Jul 2017 10:26:31 +0100 Subject: [Cluster-devel] [GFS2 PATCH] GFS2: Rework rgrp glock congestion functions for intra-node In-Reply-To: <513776377.27803960.1498841020404.JavaMail.zimbra@redhat.com> References: <513776377.27803960.1498841020404.JavaMail.zimbra@redhat.com> Message-ID: <155a05d7-f43e-f24a-c67a-b92e7e5974be@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi, On 30/06/17 17:43, Bob Peterson wrote: > Hi, > > This patch reworks the "congestion" algorithms for resource group > glocks to take into account intra-node demands as well as inter- > node demands. > > Signed-off-by: Bob Peterson I'm not sure I really understand what this is trying to achieve. What is better about the resulting performance/block layout? Steve. > --- > diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c > index ad62bfb..903f58b 100644 > --- a/fs/gfs2/rgrp.c > +++ b/fs/gfs2/rgrp.c > @@ -1807,6 +1807,21 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip > } > > /** > + * gfs2_rgrp_used_recently > + * @rs: The block reservation with the rgrp to test > + * @msecs: The time limit in milliseconds > + * > + * Returns: True if the rgrp glock has been used within the time limit > + */ > +static inline bool gfs2_rgrp_used_recently(const struct gfs2_rgrpd *rgd, > + u64 msecs) > +{ > + return (ktime_before(ktime_get_real(), > + ktime_add(rgd->rd_gl->gl_dstamp, > + ms_to_ktime(msecs)))); > +} > + > +/** > * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested > * @rgd: The rgrp in question > * @loops: An indication of how picky we can be (0=very, 1=less so) > @@ -1833,7 +1848,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip > * Returns: A boolean verdict on the congestion status > */ > > -static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) > +static bool gfs2_rgrp_congested_dlm(const struct gfs2_rgrpd *rgd, int loops) > { > const struct gfs2_glock *gl = rgd->rd_gl; > const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; > @@ -1845,6 +1860,11 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) > u64 var; > int cpu, nonzero = 0; > > + /* If it hasn't been used recently we can't judge the statistics, so > + assume it's not congested. */ > + if (!gfs2_rgrp_used_recently(rgd, HZ)) > + return false; > + > preempt_disable(); > for_each_present_cpu(cpu) { > st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP]; > @@ -1880,21 +1900,66 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) > } > > /** > - * gfs2_rgrp_used_recently > - * @rs: The block reservation with the rgrp to test > - * @msecs: The time limit in milliseconds > + * fast_to_acquire - determine if a resource group will be fast to acquire > * > - * Returns: True if the rgrp glock has been used within the time limit > + * If this is one of our preferred rgrps, it should be quicker to acquire, > + * because we tried to set ourselves up as dlm lock master. > + */ > +static inline bool fast_to_acquire(const struct gfs2_rgrpd *rgd) > +{ > + struct gfs2_glock *gl = rgd->rd_gl; > + > + if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) && > + !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && > + !test_bit(GLF_DEMOTE, &gl->gl_flags)) > + return true; > + if (rgd->rd_flags & GFS2_RDF_PREFERRED) > + return true; > + return false; > +} > + > +/** > + * gfs2_rgrp_congested - decide whether a rgrp glock is congested > + * @rgd: The rgrp in question > + * @loops: An indication of how picky we can be (0=very, 1=less so) > + * > + * There are two kinds of congestion: inter-node and intra-node. > + * > + * Inter-node congestion is where multiple nodes all want to allocate blocks > + * inside the same rgrp, which means they need to trade the rgrp glock back > + * and forth, which is a slow process. To mitigate this, we use glock > + * statistics to predict whether the glock is historically fast to acquire. > + * > + * Intra-node congestion is where you have multiple processes on the same > + * node, all trying to allocate blocks in the same rgrp. There's nothing wrong > + * with doing so, but each process needs to wait for the other to release the > + * rgrp glock before it may proceed. We can predict whether a rgrp glock is > + * congested by how many block reservations are currently attached. > + * > + * Both kinds of congestion can hurt performance, but it's faster to check > + * intra-node, so we do that first. After all, why bother to check if we can > + * get the glock quickly from DLM if other processes have also used that > + * same reasoning. > + * > + * We know the number of loops we've been around, so we know how desperate we > + * are to find something. On first loop, call it congested if anyone else has > + * a block reservation. On second loop, call it congested if it's not fast to > + * acquire. > */ > -static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, > - u64 msecs) > +static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) > { > - u64 tdiff; > + /* Check for intra-node congestion */ > + if (loops == 0 && !RB_EMPTY_ROOT(&rgd->rd_rstree)) > + return true; > + > + if (loops == 1 && !fast_to_acquire(rgd)) > + return true; > > - tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), > - rs->rs_rbm.rgd->rd_gl->gl_dstamp)); > + /* Check for inter-node congestion */ > + if (rgd->rd_sbd->sd_lockstruct.ls_ops->lm_lock) /* lock_dlm */ > + return gfs2_rgrp_congested_dlm(rgd, loops); > > - return tdiff > (msecs * 1000 * 1000); > + return false; > } > > static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) > @@ -1921,25 +1986,6 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b > } > > /** > - * fast_to_acquire - determine if a resource group will be fast to acquire > - * > - * If this is one of our preferred rgrps, it should be quicker to acquire, > - * because we tried to set ourselves up as dlm lock master. > - */ > -static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) > -{ > - struct gfs2_glock *gl = rgd->rd_gl; > - > - if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) && > - !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && > - !test_bit(GLF_DEMOTE, &gl->gl_flags)) > - return 1; > - if (rgd->rd_flags & GFS2_RDF_PREFERRED) > - return 1; > - return 0; > -} > - > -/** > * gfs2_inplace_reserve - Reserve space in the filesystem > * @ip: the inode to reserve space for > * @ap: the allocation parameters > @@ -1995,7 +2041,6 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) > !fast_to_acquire(rs->rs_rbm.rgd)) > goto next_rgrp; > if ((loops < 2) && > - gfs2_rgrp_used_recently(rs, 1000) && > gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) > goto next_rgrp; > }