kernel-janitors.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] RDMA/irdma: Slightly optimize irdma_form_ah_cm_frame()
@ 2023-02-02 20:23 Christophe JAILLET
  2023-04-12 16:02 ` Jason Gunthorpe
  2023-04-12 23:06 ` Jason Gunthorpe
  0 siblings, 2 replies; 4+ messages in thread
From: Christophe JAILLET @ 2023-02-02 20:23 UTC (permalink / raw)
  To: Mustafa Ismail, Shiraz Saleem, Jason Gunthorpe, Leon Romanovsky
  Cc: linux-kernel, kernel-janitors, Christophe JAILLET, linux-rdma

There is no need to zero 'pktsize' bytes of 'buf', only the header needs
to be cleared, to be safe.
All the other bytes are already written with some memcpy() at the end of
the function.

Doing so also gives the opportunity to the compiler to avoid the memset()
call. It can be inlined now that the length is known as compile time.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
---
Just in case, here is the diff of what is generated by gcc 11.3.0 before
and after the patch.

 .L736:
-# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, pktsize);
+# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, sizeof(*tcph));
 	call	__sanitizer_cov_trace_pc	#
-	xorl	%esi, %esi	#
-	movzwl	%r13w, %edx	# _194, __fortify_size
-	movq	%rbp, %rdi	# buf,
-	call	memset	#
-	leaq	104(%r12), %rax	#, _259
+	movl	$0, 16(%rbp)	#, MEM <char[1:20]> [(void *)buf_114]
+	leaq	104(%r12), %rax	#, _295
+# drivers/infiniband/hw/irdma/cm.c:342: 	sqbuf->totallen = pktsize;
+	movzwl	%r13w, %r13d	# _192, _192
+# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, sizeof(*tcph));
+	movq	$0, 0(%rbp)	#, MEM <char[1:20]> [(void *)buf_114]
+# drivers/infiniband/hw/irdma/cm.c:342: 	sqbuf->totallen = pktsize;
+	movq	%rax, %rdi	# _295,
+# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, sizeof(*tcph));
+	movq	$0, 8(%rbp)	#, MEM <char[1:20]> [(void *)buf_114]
+	movq	%rax, 64(%rsp)	# _295, %sfp
 # drivers/infiniband/hw/irdma/cm.c:342: 	sqbuf->totallen = pktsize;
---
 drivers/infiniband/hw/irdma/cm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index 195aa9ea18b6..48c2a303e9ec 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -337,7 +337,7 @@ static struct irdma_puda_buf *irdma_form_ah_cm_frame(struct irdma_cm_node *cm_no
 
 	pktsize = sizeof(*tcph) + opts_len + hdr_len + pd_len;
 
-	memset(buf, 0, pktsize);
+	memset(buf, 0, sizeof(*tcph));
 
 	sqbuf->totallen = pktsize;
 	sqbuf->tcphlen = sizeof(*tcph) + opts_len;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] RDMA/irdma: Slightly optimize irdma_form_ah_cm_frame()
  2023-02-02 20:23 [PATCH] RDMA/irdma: Slightly optimize irdma_form_ah_cm_frame() Christophe JAILLET
@ 2023-04-12 16:02 ` Jason Gunthorpe
  2023-04-12 17:03   ` Saleem, Shiraz
  2023-04-12 23:06 ` Jason Gunthorpe
  1 sibling, 1 reply; 4+ messages in thread
From: Jason Gunthorpe @ 2023-04-12 16:02 UTC (permalink / raw)
  To: Christophe JAILLET, Shiraz Saleem
  Cc: Mustafa Ismail, Leon Romanovsky, linux-kernel, kernel-janitors,
	linux-rdma

On Thu, Feb 02, 2023 at 09:23:24PM +0100, Christophe JAILLET wrote:
> There is no need to zero 'pktsize' bytes of 'buf', only the header needs
> to be cleared, to be safe.
> All the other bytes are already written with some memcpy() at the end of
> the function.
> 
> Doing so also gives the opportunity to the compiler to avoid the memset()
> call. It can be inlined now that the length is known as compile time.
> 
> Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
> ---
> Just in case, here is the diff of what is generated by gcc 11.3.0 before
> and after the patch.
> 
>  .L736:
> -# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, pktsize);
> +# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, sizeof(*tcph));
>  	call	__sanitizer_cov_trace_pc	#
> -	xorl	%esi, %esi	#
> -	movzwl	%r13w, %edx	# _194, __fortify_size
> -	movq	%rbp, %rdi	# buf,
> -	call	memset	#
> -	leaq	104(%r12), %rax	#, _259
> +	movl	$0, 16(%rbp)	#, MEM <char[1:20]> [(void *)buf_114]
> +	leaq	104(%r12), %rax	#, _295
> +# drivers/infiniband/hw/irdma/cm.c:342: 	sqbuf->totallen = pktsize;
> +	movzwl	%r13w, %r13d	# _192, _192
> +# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, sizeof(*tcph));
> +	movq	$0, 0(%rbp)	#, MEM <char[1:20]> [(void *)buf_114]
> +# drivers/infiniband/hw/irdma/cm.c:342: 	sqbuf->totallen = pktsize;
> +	movq	%rax, %rdi	# _295,
> +# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, sizeof(*tcph));
> +	movq	$0, 8(%rbp)	#, MEM <char[1:20]> [(void *)buf_114]
> +	movq	%rax, 64(%rsp)	# _295, %sfp
>  # drivers/infiniband/hw/irdma/cm.c:342: 	sqbuf->totallen = pktsize;
> ---
>  drivers/infiniband/hw/irdma/cm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Shiraz??

Jason

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] RDMA/irdma: Slightly optimize irdma_form_ah_cm_frame()
  2023-04-12 16:02 ` Jason Gunthorpe
@ 2023-04-12 17:03   ` Saleem, Shiraz
  0 siblings, 0 replies; 4+ messages in thread
From: Saleem, Shiraz @ 2023-04-12 17:03 UTC (permalink / raw)
  To: Jason Gunthorpe, Christophe JAILLET
  Cc: Ismail, Mustafa, Leon Romanovsky, linux-kernel@vger.kernel.org,
	kernel-janitors@vger.kernel.org, linux-rdma@vger.kernel.org

> Subject: Re: [PATCH] RDMA/irdma: Slightly optimize
> irdma_form_ah_cm_frame()
> 
> On Thu, Feb 02, 2023 at 09:23:24PM +0100, Christophe JAILLET wrote:
> > There is no need to zero 'pktsize' bytes of 'buf', only the header
> > needs to be cleared, to be safe.
> > All the other bytes are already written with some memcpy() at the end
> > of the function.
> >
> > Doing so also gives the opportunity to the compiler to avoid the
> > memset() call. It can be inlined now that the length is known as compile time.
> >
> > Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
> > ---
> > Just in case, here is the diff of what is generated by gcc 11.3.0
> > before and after the patch.
> >
> >  .L736:
> > -# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, pktsize);
> > +# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, sizeof(*tcph));
> >  	call	__sanitizer_cov_trace_pc	#
> > -	xorl	%esi, %esi	#
> > -	movzwl	%r13w, %edx	# _194, __fortify_size
> > -	movq	%rbp, %rdi	# buf,
> > -	call	memset	#
> > -	leaq	104(%r12), %rax	#, _259
> > +	movl	$0, 16(%rbp)	#, MEM <char[1:20]> [(void *)buf_114]
> > +	leaq	104(%r12), %rax	#, _295
> > +# drivers/infiniband/hw/irdma/cm.c:342: 	sqbuf->totallen = pktsize;
> > +	movzwl	%r13w, %r13d	# _192, _192
> > +# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, sizeof(*tcph));
> > +	movq	$0, 0(%rbp)	#, MEM <char[1:20]> [(void *)buf_114]
> > +# drivers/infiniband/hw/irdma/cm.c:342: 	sqbuf->totallen = pktsize;
> > +	movq	%rax, %rdi	# _295,
> > +# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, sizeof(*tcph));
> > +	movq	$0, 8(%rbp)	#, MEM <char[1:20]> [(void *)buf_114]
> > +	movq	%rax, 64(%rsp)	# _295, %sfp
> >  # drivers/infiniband/hw/irdma/cm.c:342: 	sqbuf->totallen = pktsize;
> > ---
> >  drivers/infiniband/hw/irdma/cm.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> Shiraz??
> 

Sorry for the delay as I didn't see this earlier.

Reviewed-by: Shiraz Saleem <shiraz.saleem@intel.com>



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] RDMA/irdma: Slightly optimize irdma_form_ah_cm_frame()
  2023-02-02 20:23 [PATCH] RDMA/irdma: Slightly optimize irdma_form_ah_cm_frame() Christophe JAILLET
  2023-04-12 16:02 ` Jason Gunthorpe
@ 2023-04-12 23:06 ` Jason Gunthorpe
  1 sibling, 0 replies; 4+ messages in thread
From: Jason Gunthorpe @ 2023-04-12 23:06 UTC (permalink / raw)
  To: Christophe JAILLET
  Cc: Mustafa Ismail, Shiraz Saleem, Leon Romanovsky, linux-kernel,
	kernel-janitors, linux-rdma

On Thu, Feb 02, 2023 at 09:23:24PM +0100, Christophe JAILLET wrote:
> There is no need to zero 'pktsize' bytes of 'buf', only the header needs
> to be cleared, to be safe.
> All the other bytes are already written with some memcpy() at the end of
> the function.
> 
> Doing so also gives the opportunity to the compiler to avoid the memset()
> call. It can be inlined now that the length is known as compile time.
> 
> Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
> Reviewed-by: Shiraz Saleem <shiraz.saleem@intel.com>
> ---
> Just in case, here is the diff of what is generated by gcc 11.3.0 before
> and after the patch.
> 
>  .L736:
> -# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, pktsize);
> +# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, sizeof(*tcph));
>  	call	__sanitizer_cov_trace_pc	#
> -	xorl	%esi, %esi	#
> -	movzwl	%r13w, %edx	# _194, __fortify_size
> -	movq	%rbp, %rdi	# buf,
> -	call	memset	#
> -	leaq	104(%r12), %rax	#, _259
> +	movl	$0, 16(%rbp)	#, MEM <char[1:20]> [(void *)buf_114]
> +	leaq	104(%r12), %rax	#, _295
> +# drivers/infiniband/hw/irdma/cm.c:342: 	sqbuf->totallen = pktsize;
> +	movzwl	%r13w, %r13d	# _192, _192
> +# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, sizeof(*tcph));
> +	movq	$0, 0(%rbp)	#, MEM <char[1:20]> [(void *)buf_114]
> +# drivers/infiniband/hw/irdma/cm.c:342: 	sqbuf->totallen = pktsize;
> +	movq	%rax, %rdi	# _295,
> +# drivers/infiniband/hw/irdma/cm.c:340: 	memset(buf, 0, sizeof(*tcph));
> +	movq	$0, 8(%rbp)	#, MEM <char[1:20]> [(void *)buf_114]
> +	movq	%rax, 64(%rsp)	# _295, %sfp
>  # drivers/infiniband/hw/irdma/cm.c:342: 	sqbuf->totallen = pktsize;
> ---
>  drivers/infiniband/hw/irdma/cm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Applied to for-next

Thanks,
Jason

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-04-12 23:06 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-02-02 20:23 [PATCH] RDMA/irdma: Slightly optimize irdma_form_ah_cm_frame() Christophe JAILLET
2023-04-12 16:02 ` Jason Gunthorpe
2023-04-12 17:03   ` Saleem, Shiraz
2023-04-12 23:06 ` Jason Gunthorpe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).