All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] staging: zcache: fix serialization bug in zv stats
@ 2011-12-30 16:42 Seth Jennings
  2012-02-06 21:18 ` Seth Jennings
  0 siblings, 1 reply; 7+ messages in thread
From: Seth Jennings @ 2011-12-30 16:42 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: Seth Jennings, Dan Magenheimer, Brian King, devel, linux-kernel

In a multithreaded workload, the zv_curr_dist_counts
and zv_cumul_dist_counts statistics are being corrupted
because the increments and decrements in zv_create
and zv_free are not atomic.

This patch converts these statistics and their corresponding
increments/decrements/reads to atomic operations.

Based on v3.2-rc7

Signed-off-by: Seth Jennings <sjenning@linux.vnet.ibm.com>
---
 drivers/staging/zcache/zcache-main.c |   14 +++++++-------
 1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c
index 56c1f9c..d39bb51 100644
--- a/drivers/staging/zcache/zcache-main.c
+++ b/drivers/staging/zcache/zcache-main.c
@@ -655,8 +655,8 @@ static unsigned int zv_max_zsize = (PAGE_SIZE / 8) * 7;
  */
 static unsigned int zv_max_mean_zsize = (PAGE_SIZE / 8) * 5;
 
-static unsigned long zv_curr_dist_counts[NCHUNKS];
-static unsigned long zv_cumul_dist_counts[NCHUNKS];
+static atomic_t zv_curr_dist_counts[NCHUNKS];
+static atomic_t zv_cumul_dist_counts[NCHUNKS];
 
 static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
 				struct tmem_oid *oid, uint32_t index,
@@ -675,8 +675,8 @@ static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
 			&page, &offset, ZCACHE_GFP_MASK);
 	if (unlikely(ret))
 		goto out;
-	zv_curr_dist_counts[chunks]++;
-	zv_cumul_dist_counts[chunks]++;
+	atomic_inc(&zv_curr_dist_counts[chunks]);
+	atomic_inc(&zv_cumul_dist_counts[chunks]);
 	zv = kmap_atomic(page, KM_USER0) + offset;
 	zv->index = index;
 	zv->oid = *oid;
@@ -698,7 +698,7 @@ static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv)
 
 	ASSERT_SENTINEL(zv, ZVH);
 	BUG_ON(chunks >= NCHUNKS);
-	zv_curr_dist_counts[chunks]--;
+	atomic_dec(&zv_curr_dist_counts[chunks]);
 	size -= sizeof(*zv);
 	BUG_ON(size == 0);
 	INVERT_SENTINEL(zv, ZVH);
@@ -738,7 +738,7 @@ static int zv_curr_dist_counts_show(char *buf)
 	char *p = buf;
 
 	for (i = 0; i < NCHUNKS; i++) {
-		n = zv_curr_dist_counts[i];
+		n = atomic_read(&zv_curr_dist_counts[i]);
 		p += sprintf(p, "%lu ", n);
 		chunks += n;
 		sum_total_chunks += i * n;
@@ -754,7 +754,7 @@ static int zv_cumul_dist_counts_show(char *buf)
 	char *p = buf;
 
 	for (i = 0; i < NCHUNKS; i++) {
-		n = zv_cumul_dist_counts[i];
+		n = atomic_read(&zv_cumul_dist_counts[i]);
 		p += sprintf(p, "%lu ", n);
 		chunks += n;
 		sum_total_chunks += i * n;
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* RE: [PATCH] staging: zcache: fix serialization bug in zv stats
       [not found] <<1325263335-16254-1-git-send-email-sjenning@linux.vnet.ibm.com>
@ 2011-12-30 17:02 ` Dan Magenheimer
  2011-12-30 17:27   ` Seth Jennings
  2011-12-30 17:35   ` Seth Jennings
  0 siblings, 2 replies; 7+ messages in thread
From: Dan Magenheimer @ 2011-12-30 17:02 UTC (permalink / raw)
  To: Seth Jennings, Greg Kroah-Hartman
  Cc: Brian King, devel, linux-kernel, Konrad Wilk, Nitin Gupta

> From: Seth Jennings [mailto:sjenning@linux.vnet.ibm.com]
> Sent: Friday, December 30, 2011 9:42 AM
> To: Greg Kroah-Hartman
> Cc: Seth Jennings; Dan Magenheimer; Brian King; devel@driverdev.osuosl.org; linux-
> kernel@vger.kernel.org
> Subject: [PATCH] staging: zcache: fix serialization bug in zv stats
> 
> In a multithreaded workload, the zv_curr_dist_counts
> and zv_cumul_dist_counts statistics are being corrupted
> because the increments and decrements in zv_create
> and zv_free are not atomic.
> 
> This patch converts these statistics and their corresponding
> increments/decrements/reads to atomic operations.
> 
> Based on v3.2-rc7
> 
> Signed-off-by: Seth Jennings <sjenning@linux.vnet.ibm.com>

I'm inclined to nack this change, at least unless inside an #ifdef DEBUG,
as these counts are interesting to a developer but not useful to a normal
end user, whereas the incremental cost for atomic_inc and atomic_dec are
non-trivial.  I don't think any off-by-one in these counters could
result in a bug and, before promotion from staging, they probably
should just go away.  (They are fun to "watch -d" though ;-)

That said, as a developer, I too am annoyed by the occasional 64-bit
"negative unsigned" that show up in the output but IMHO a good fix for
that might be simply for the "show" routine to convert negative values
to zero before printing.

> ---
>  drivers/staging/zcache/zcache-main.c |   14 +++++++-------
>  1 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c
> index 56c1f9c..d39bb51 100644
> --- a/drivers/staging/zcache/zcache-main.c
> +++ b/drivers/staging/zcache/zcache-main.c
> @@ -655,8 +655,8 @@ static unsigned int zv_max_zsize = (PAGE_SIZE / 8) * 7;
>   */
>  static unsigned int zv_max_mean_zsize = (PAGE_SIZE / 8) * 5;
> 
> -static unsigned long zv_curr_dist_counts[NCHUNKS];
> -static unsigned long zv_cumul_dist_counts[NCHUNKS];
> +static atomic_t zv_curr_dist_counts[NCHUNKS];
> +static atomic_t zv_cumul_dist_counts[NCHUNKS];
> 
>  static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
>  				struct tmem_oid *oid, uint32_t index,
> @@ -675,8 +675,8 @@ static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
>  			&page, &offset, ZCACHE_GFP_MASK);
>  	if (unlikely(ret))
>  		goto out;
> -	zv_curr_dist_counts[chunks]++;
> -	zv_cumul_dist_counts[chunks]++;
> +	atomic_inc(&zv_curr_dist_counts[chunks]);
> +	atomic_inc(&zv_cumul_dist_counts[chunks]);
>  	zv = kmap_atomic(page, KM_USER0) + offset;
>  	zv->index = index;
>  	zv->oid = *oid;
> @@ -698,7 +698,7 @@ static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv)
> 
>  	ASSERT_SENTINEL(zv, ZVH);
>  	BUG_ON(chunks >= NCHUNKS);
> -	zv_curr_dist_counts[chunks]--;
> +	atomic_dec(&zv_curr_dist_counts[chunks]);
>  	size -= sizeof(*zv);
>  	BUG_ON(size == 0);
>  	INVERT_SENTINEL(zv, ZVH);
> @@ -738,7 +738,7 @@ static int zv_curr_dist_counts_show(char *buf)
>  	char *p = buf;
> 
>  	for (i = 0; i < NCHUNKS; i++) {
> -		n = zv_curr_dist_counts[i];
> +		n = atomic_read(&zv_curr_dist_counts[i]);
>  		p += sprintf(p, "%lu ", n);
>  		chunks += n;
>  		sum_total_chunks += i * n;
> @@ -754,7 +754,7 @@ static int zv_cumul_dist_counts_show(char *buf)
>  	char *p = buf;
> 
>  	for (i = 0; i < NCHUNKS; i++) {
> -		n = zv_cumul_dist_counts[i];
> +		n = atomic_read(&zv_cumul_dist_counts[i]);
>  		p += sprintf(p, "%lu ", n);
>  		chunks += n;
>  		sum_total_chunks += i * n;
> --
> 1.7.5.4

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] staging: zcache: fix serialization bug in zv stats
  2011-12-30 17:02 ` [PATCH] staging: zcache: fix serialization bug in zv stats Dan Magenheimer
@ 2011-12-30 17:27   ` Seth Jennings
  2011-12-30 17:31     ` Dan Magenheimer
  2011-12-30 17:35   ` Seth Jennings
  1 sibling, 1 reply; 7+ messages in thread
From: Seth Jennings @ 2011-12-30 17:27 UTC (permalink / raw)
  To: Dan Magenheimer
  Cc: Greg Kroah-Hartman, Brian King, devel, linux-kernel, Konrad Wilk,
	Nitin Gupta

On 12/30/2011 11:02 AM, Dan Magenheimer wrote:
>> From: Seth Jennings [mailto:sjenning@linux.vnet.ibm.com]
>> Sent: Friday, December 30, 2011 9:42 AM
>> To: Greg Kroah-Hartman
>> Cc: Seth Jennings; Dan Magenheimer; Brian King; devel@driverdev.osuosl.org; linux-
>> kernel@vger.kernel.org
>> Subject: [PATCH] staging: zcache: fix serialization bug in zv stats
>>
>> In a multithreaded workload, the zv_curr_dist_counts
>> and zv_cumul_dist_counts statistics are being corrupted
>> because the increments and decrements in zv_create
>> and zv_free are not atomic.
>>
>> This patch converts these statistics and their corresponding
>> increments/decrements/reads to atomic operations.
>>
>> Based on v3.2-rc7
>>
>> Signed-off-by: Seth Jennings <sjenning@linux.vnet.ibm.com>
> 
> I'm inclined to nack this change, at least unless inside an #ifdef DEBUG,
> as these counts are interesting to a developer but not useful to a normal
> end user, whereas the incremental cost for atomic_inc and atomic_dec are
> non-trivial.  I don't think any off-by-one in these counters could
> result in a bug and, before promotion from staging, they probably
> should just go away.  (They are fun to "watch -d" though ;-)

In my test, it hammers on particular chunk size and the counter is off
by hundreds :-/

I too was worried about performance impact, however, my tests showed
no degradation.  That's probably because there are bigger bottlenecks
elsewhere.

Perhaps we can commit this for now, so that the code is correct, and
revisit this when we try to replace zbud with zsmalloc.  I'm sure
we'll have to rethink the statistics at that time.

The only other option, IMO, is the remove the chunk stats altogether
until we can find a solution that is both fast and correct.

I think that continuing with incorrect stats, regardless of the degree
to which they are incorrect, isn't really a viable option.

--
Seth


^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH] staging: zcache: fix serialization bug in zv stats
  2011-12-30 17:27   ` Seth Jennings
@ 2011-12-30 17:31     ` Dan Magenheimer
  0 siblings, 0 replies; 7+ messages in thread
From: Dan Magenheimer @ 2011-12-30 17:31 UTC (permalink / raw)
  To: Seth Jennings
  Cc: Greg Kroah-Hartman, Brian King, devel, linux-kernel, Konrad Wilk,
	Nitin Gupta

> From: Seth Jennings [mailto:sjenning@linux.vnet.ibm.com]
> Subject: Re: [PATCH] staging: zcache: fix serialization bug in zv stats
> 
> On 12/30/2011 11:02 AM, Dan Magenheimer wrote:
> >> From: Seth Jennings [mailto:sjenning@linux.vnet.ibm.com]
> >> Sent: Friday, December 30, 2011 9:42 AM
> >> To: Greg Kroah-Hartman
> >> Cc: Seth Jennings; Dan Magenheimer; Brian King; devel@driverdev.osuosl.org; linux-
> >> kernel@vger.kernel.org
> >> Subject: [PATCH] staging: zcache: fix serialization bug in zv stats
> >>
> >> In a multithreaded workload, the zv_curr_dist_counts
> >> and zv_cumul_dist_counts statistics are being corrupted
> >> because the increments and decrements in zv_create
> >> and zv_free are not atomic.
> >>
> >> This patch converts these statistics and their corresponding
> >> increments/decrements/reads to atomic operations.
> >>
> >> Based on v3.2-rc7
> >>
> >> Signed-off-by: Seth Jennings <sjenning@linux.vnet.ibm.com>
> >
> > I'm inclined to nack this change, at least unless inside an #ifdef DEBUG,
> > as these counts are interesting to a developer but not useful to a normal
> > end user, whereas the incremental cost for atomic_inc and atomic_dec are
> > non-trivial.  I don't think any off-by-one in these counters could
> > result in a bug and, before promotion from staging, they probably
> > should just go away.  (They are fun to "watch -d" though ;-)
> 
> In my test, it hammers on particular chunk size and the counter is off
> by hundreds :-/
> 
> I too was worried about performance impact, however, my tests showed
> no degradation.  That's probably because there are bigger bottlenecks
> elsewhere.
> 
> Perhaps we can commit this for now, so that the code is correct, and
> revisit this when we try to replace zbud with zsmalloc.  I'm sure
> we'll have to rethink the statistics at that time.
> 
> The only other option, IMO, is the remove the chunk stats altogether
> until we can find a solution that is both fast and correct.
> 
> I think that continuing with incorrect stats, regardless of the degree
> to which they are incorrect, isn't really a viable option.

OK, well I guess as long as this is addressed before promotion
from staging, and until then the heaviest users will be developers
so I agree you are correct that accurate stats are a good thing.
So, consider my nack resolved and:

Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] staging: zcache: fix serialization bug in zv stats
  2011-12-30 17:02 ` [PATCH] staging: zcache: fix serialization bug in zv stats Dan Magenheimer
  2011-12-30 17:27   ` Seth Jennings
@ 2011-12-30 17:35   ` Seth Jennings
  2012-01-03 21:38     ` Konrad Rzeszutek Wilk
  1 sibling, 1 reply; 7+ messages in thread
From: Seth Jennings @ 2011-12-30 17:35 UTC (permalink / raw)
  To: Dan Magenheimer
  Cc: Greg Kroah-Hartman, Brian King, devel, linux-kernel, Konrad Wilk,
	Nitin Gupta

On 12/30/2011 11:02 AM, Dan Magenheimer wrote:
>> From: Seth Jennings [mailto:sjenning@linux.vnet.ibm.com]
>> Sent: Friday, December 30, 2011 9:42 AM
>> To: Greg Kroah-Hartman
>> Cc: Seth Jennings; Dan Magenheimer; Brian King; devel@driverdev.osuosl.org; linux-
>> kernel@vger.kernel.org
>> Subject: [PATCH] staging: zcache: fix serialization bug in zv stats
>>
>> In a multithreaded workload, the zv_curr_dist_counts
>> and zv_cumul_dist_counts statistics are being corrupted
>> because the increments and decrements in zv_create
>> and zv_free are not atomic.
>>
>> This patch converts these statistics and their corresponding
>> increments/decrements/reads to atomic operations.
>>
>> Based on v3.2-rc7
>>
>> Signed-off-by: Seth Jennings <sjenning@linux.vnet.ibm.com>
> 
> I'm inclined to nack this change, at least unless inside an #ifdef DEBUG,

I guess I didn't respond to this suggestion.  We could put #ifdef DEBUG
around the updates in zv_create and zv_free and around the two sysfs
*_show functions.  Kinda messy though.  I guess it might not matter
if we'll be re-evaluating this whole thing soon.

I'd just like to have it correct in mainline until then.

--
Seth


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] staging: zcache: fix serialization bug in zv stats
  2011-12-30 17:35   ` Seth Jennings
@ 2012-01-03 21:38     ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 7+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-01-03 21:38 UTC (permalink / raw)
  To: Seth Jennings
  Cc: Dan Magenheimer, Greg Kroah-Hartman, Brian King, devel,
	linux-kernel, Nitin Gupta

On Fri, Dec 30, 2011 at 11:35:45AM -0600, Seth Jennings wrote:
> On 12/30/2011 11:02 AM, Dan Magenheimer wrote:
> >> From: Seth Jennings [mailto:sjenning@linux.vnet.ibm.com]
> >> Sent: Friday, December 30, 2011 9:42 AM
> >> To: Greg Kroah-Hartman
> >> Cc: Seth Jennings; Dan Magenheimer; Brian King; devel@driverdev.osuosl.org; linux-
> >> kernel@vger.kernel.org
> >> Subject: [PATCH] staging: zcache: fix serialization bug in zv stats
> >>
> >> In a multithreaded workload, the zv_curr_dist_counts
> >> and zv_cumul_dist_counts statistics are being corrupted
> >> because the increments and decrements in zv_create
> >> and zv_free are not atomic.
> >>
> >> This patch converts these statistics and their corresponding
> >> increments/decrements/reads to atomic operations.
> >>
> >> Based on v3.2-rc7
> >>
> >> Signed-off-by: Seth Jennings <sjenning@linux.vnet.ibm.com>
> > 
> > I'm inclined to nack this change, at least unless inside an #ifdef DEBUG,
> 
> I guess I didn't respond to this suggestion.  We could put #ifdef DEBUG
> around the updates in zv_create and zv_free and around the two sysfs
> *_show functions.  Kinda messy though.  I guess it might not matter
> if we'll be re-evaluating this whole thing soon.

Perhaps also add in the TODO file that we want to move the stats collection
out of the code and only be enabled if CONFIG_DEBUGFS is set? (Which granted means
we need to implement the stats output in debugfs instead of sysfs).

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] staging: zcache: fix serialization bug in zv stats
  2011-12-30 16:42 Seth Jennings
@ 2012-02-06 21:18 ` Seth Jennings
  0 siblings, 0 replies; 7+ messages in thread
From: Seth Jennings @ 2012-02-06 21:18 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: Seth Jennings, Dan Magenheimer, Brian King, devel, linux-kernel

Hey Greg,

Just wanted to bump this fix again.  It's small and already Acked by
Dan Magenheimer.  It can go in an -rc release.

https://lkml.org/lkml/2011/12/30/48

Congrats on becoming a Linux Foundation Fellow!

--
Seth

On 12/30/2011 10:42 AM, Seth Jennings wrote:
> In a multithreaded workload, the zv_curr_dist_counts
> and zv_cumul_dist_counts statistics are being corrupted
> because the increments and decrements in zv_create
> and zv_free are not atomic.
> 
> This patch converts these statistics and their corresponding
> increments/decrements/reads to atomic operations.
> 
> Based on v3.2-rc7
> 
> Signed-off-by: Seth Jennings <sjenning@linux.vnet.ibm.com>
> ---
>  drivers/staging/zcache/zcache-main.c |   14 +++++++-------
>  1 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c
> index 56c1f9c..d39bb51 100644
> --- a/drivers/staging/zcache/zcache-main.c
> +++ b/drivers/staging/zcache/zcache-main.c
> @@ -655,8 +655,8 @@ static unsigned int zv_max_zsize = (PAGE_SIZE / 8) * 7;
>   */
>  static unsigned int zv_max_mean_zsize = (PAGE_SIZE / 8) * 5;
> 
> -static unsigned long zv_curr_dist_counts[NCHUNKS];
> -static unsigned long zv_cumul_dist_counts[NCHUNKS];
> +static atomic_t zv_curr_dist_counts[NCHUNKS];
> +static atomic_t zv_cumul_dist_counts[NCHUNKS];
> 
>  static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
>  				struct tmem_oid *oid, uint32_t index,
> @@ -675,8 +675,8 @@ static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
>  			&page, &offset, ZCACHE_GFP_MASK);
>  	if (unlikely(ret))
>  		goto out;
> -	zv_curr_dist_counts[chunks]++;
> -	zv_cumul_dist_counts[chunks]++;
> +	atomic_inc(&zv_curr_dist_counts[chunks]);
> +	atomic_inc(&zv_cumul_dist_counts[chunks]);
>  	zv = kmap_atomic(page, KM_USER0) + offset;
>  	zv->index = index;
>  	zv->oid = *oid;
> @@ -698,7 +698,7 @@ static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv)
> 
>  	ASSERT_SENTINEL(zv, ZVH);
>  	BUG_ON(chunks >= NCHUNKS);
> -	zv_curr_dist_counts[chunks]--;
> +	atomic_dec(&zv_curr_dist_counts[chunks]);
>  	size -= sizeof(*zv);
>  	BUG_ON(size == 0);
>  	INVERT_SENTINEL(zv, ZVH);
> @@ -738,7 +738,7 @@ static int zv_curr_dist_counts_show(char *buf)
>  	char *p = buf;
> 
>  	for (i = 0; i < NCHUNKS; i++) {
> -		n = zv_curr_dist_counts[i];
> +		n = atomic_read(&zv_curr_dist_counts[i]);
>  		p += sprintf(p, "%lu ", n);
>  		chunks += n;
>  		sum_total_chunks += i * n;
> @@ -754,7 +754,7 @@ static int zv_cumul_dist_counts_show(char *buf)
>  	char *p = buf;
> 
>  	for (i = 0; i < NCHUNKS; i++) {
> -		n = zv_cumul_dist_counts[i];
> +		n = atomic_read(&zv_cumul_dist_counts[i]);
>  		p += sprintf(p, "%lu ", n);
>  		chunks += n;
>  		sum_total_chunks += i * n;


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2012-02-06 21:20 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <<1325263335-16254-1-git-send-email-sjenning@linux.vnet.ibm.com>
2011-12-30 17:02 ` [PATCH] staging: zcache: fix serialization bug in zv stats Dan Magenheimer
2011-12-30 17:27   ` Seth Jennings
2011-12-30 17:31     ` Dan Magenheimer
2011-12-30 17:35   ` Seth Jennings
2012-01-03 21:38     ` Konrad Rzeszutek Wilk
2011-12-30 16:42 Seth Jennings
2012-02-06 21:18 ` Seth Jennings

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.