Linux CIFS filesystem development
 help / color / mirror / Atom feed
* [PATCH] ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev()
@ 2023-10-15 14:45 Namjae Jeon
  2023-10-17 14:06 ` Tom Talpey
  0 siblings, 1 reply; 6+ messages in thread
From: Namjae Jeon @ 2023-10-15 14:45 UTC (permalink / raw)
  To: linux-cifs
  Cc: smfrench, senozhatsky, tom, atteh.mailbox, Kangjing Huang,
	Namjae Jeon

From: Kangjing Huang <huangkangjing@gmail.com>

Physical ib_device does not have an underlying net_device, thus its
association with IPoIB net_device cannot be retrieved via
ops.get_netdev() or ib_device_get_by_netdev(). ksmbd reads physical
ib_device port GUID from the lower 16 bytes of the hardware addresses on
IPoIB net_device and match its underlying ib_device using ib_find_gid()

Signed-off-by: Kangjing Huang <huangkangjing@gmail.com>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/smb/server/transport_rdma.c | 39 +++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 3b269e1f523a..a82131f7dd83 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -2140,8 +2140,7 @@ static int smb_direct_ib_client_add(struct ib_device *ib_dev)
 	if (ib_dev->node_type != RDMA_NODE_IB_CA)
 		smb_direct_port = SMB_DIRECT_PORT_IWARP;
 
-	if (!ib_dev->ops.get_netdev ||
-	    !rdma_frwr_is_supported(&ib_dev->attrs))
+	if (!rdma_frwr_is_supported(&ib_dev->attrs))
 		return 0;
 
 	smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
@@ -2241,17 +2240,37 @@ bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
 		for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
 			struct net_device *ndev;
 
-			ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
-							       i + 1);
-			if (!ndev)
-				continue;
+			/* RoCE and iWRAP ib_dev is backed by a netdev */
+			if (smb_dev->ib_dev->ops.get_netdev) {
+				ndev = smb_dev->ib_dev->ops.get_netdev(
+					smb_dev->ib_dev, i + 1);
+				if (!ndev)
+					continue;
 
-			if (ndev == netdev) {
+				if (ndev == netdev) {
+					dev_put(ndev);
+					rdma_capable = true;
+					goto out;
+				}
 				dev_put(ndev);
-				rdma_capable = true;
-				goto out;
+			/* match physical ib_dev with IPoIB netdev by GUID */
+			} else if (netdev->type == ARPHRD_INFINIBAND) {
+				struct netdev_hw_addr *ha;
+				union ib_gid gid;
+				u32 port_num;
+				int ret;
+
+				netdev_hw_addr_list_for_each(
+					ha, &netdev->dev_addrs) {
+					memcpy(&gid, ha->addr + 4, sizeof(gid));
+					ret = ib_find_gid(smb_dev->ib_dev, &gid,
+							  &port_num, NULL);
+					if (!ret) {
+						rdma_capable = true;
+						goto out;
+					}
+				}
 			}
-			dev_put(ndev);
 		}
 	}
 out:
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev()
  2023-10-15 14:45 [PATCH] ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev() Namjae Jeon
@ 2023-10-17 14:06 ` Tom Talpey
  2023-10-19  6:01   ` Kangjing (Chaser) Huang
  0 siblings, 1 reply; 6+ messages in thread
From: Tom Talpey @ 2023-10-17 14:06 UTC (permalink / raw)
  To: Namjae Jeon, linux-cifs
  Cc: smfrench, senozhatsky, atteh.mailbox, Kangjing Huang

On 10/15/2023 10:45 AM, Namjae Jeon wrote:
> From: Kangjing Huang <huangkangjing@gmail.com>
> 
> Physical ib_device does not have an underlying net_device, thus its
> association with IPoIB net_device cannot be retrieved via
> ops.get_netdev() or ib_device_get_by_netdev(). ksmbd reads physical
> ib_device port GUID from the lower 16 bytes of the hardware addresses on
> IPoIB net_device and match its underlying ib_device using ib_find_gid()
> 
> Signed-off-by: Kangjing Huang <huangkangjing@gmail.com>
> Acked-by: Namjae Jeon <linkinjeon@kernel.org>
> ---
>   fs/smb/server/transport_rdma.c | 39 +++++++++++++++++++++++++---------
>   1 file changed, 29 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
> index 3b269e1f523a..a82131f7dd83 100644
> --- a/fs/smb/server/transport_rdma.c
> +++ b/fs/smb/server/transport_rdma.c
> @@ -2140,8 +2140,7 @@ static int smb_direct_ib_client_add(struct ib_device *ib_dev)
>   	if (ib_dev->node_type != RDMA_NODE_IB_CA)
>   		smb_direct_port = SMB_DIRECT_PORT_IWARP;
>   
> -	if (!ib_dev->ops.get_netdev ||
> -	    !rdma_frwr_is_supported(&ib_dev->attrs))
> +	if (!rdma_frwr_is_supported(&ib_dev->attrs))
>   		return 0;
>   
>   	smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
> @@ -2241,17 +2240,37 @@ bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
>   		for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
>   			struct net_device *ndev;
>   
> -			ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
> -							       i + 1);
> -			if (!ndev)
> -				continue;
> +			/* RoCE and iWRAP ib_dev is backed by a netdev */
> +			if (smb_dev->ib_dev->ops.get_netdev) {

The "IWRAP" is a typo, but IMO the comment is misleading. This is simply
looking up the target netdev, it's not limited to these two rdma types.
I suggest deleting the comment.

> +				ndev = smb_dev->ib_dev->ops.get_netdev(
> +					smb_dev->ib_dev, i + 1);
> +				if (!ndev)
> +					continue;
>   
> -			if (ndev == netdev) {
> +				if (ndev == netdev) {
> +					dev_put(ndev);
> +					rdma_capable = true;
> +					goto out;
> +				}
>   				dev_put(ndev);

Why not move this dev_put up above the if (ndev == netdev) test? It's
needed in both cases, so it's confusing to have two copies.

> -				rdma_capable = true;
> -				goto out;
> +			/* match physical ib_dev with IPoIB netdev by GUID */

Add more information to this comment, perhaps:

   /* if no exact netdev match, check for matching Infiniband GUID */

> +			} else if (netdev->type == ARPHRD_INFINIBAND) {
> +				struct netdev_hw_addr *ha;
> +				union ib_gid gid;
> +				u32 port_num;
> +				int ret;
> +
> +				netdev_hw_addr_list_for_each(
> +					ha, &netdev->dev_addrs) {
> +					memcpy(&gid, ha->addr + 4, sizeof(gid));
> +					ret = ib_find_gid(smb_dev->ib_dev, &gid,
> +							  &port_num, NULL);
> +					if (!ret) {
> +						rdma_capable = true;
> +						goto out;

Won't this leak the ndev? It needs a dev_put(ndev) before breaking
the loop, too, right?

> +					}
> +				}
>   			}
> -			dev_put(ndev);
>   		}
>   	}
>   out:

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev()
  2023-10-17 14:06 ` Tom Talpey
@ 2023-10-19  6:01   ` Kangjing (Chaser) Huang
  2023-10-19 22:37     ` Namjae Jeon
  0 siblings, 1 reply; 6+ messages in thread
From: Kangjing (Chaser) Huang @ 2023-10-19  6:01 UTC (permalink / raw)
  To: Tom Talpey; +Cc: Namjae Jeon, linux-cifs, smfrench, senozhatsky, atteh.mailbox

Thank you for the review. Also thanks a lot to Namjae for submitting
this patch on my behalf.


On Tue, Oct 17, 2023 at 10:07 AM Tom Talpey <tom@talpey.com> wrote:
>
> On 10/15/2023 10:45 AM, Namjae Jeon wrote:
> > From: Kangjing Huang <huangkangjing@gmail.com>
> >
> > Physical ib_device does not have an underlying net_device, thus its
> > association with IPoIB net_device cannot be retrieved via
> > ops.get_netdev() or ib_device_get_by_netdev(). ksmbd reads physical
> > ib_device port GUID from the lower 16 bytes of the hardware addresses on
> > IPoIB net_device and match its underlying ib_device using ib_find_gid()
> >
> > Signed-off-by: Kangjing Huang <huangkangjing@gmail.com>
> > Acked-by: Namjae Jeon <linkinjeon@kernel.org>
> > ---
> >   fs/smb/server/transport_rdma.c | 39 +++++++++++++++++++++++++---------
> >   1 file changed, 29 insertions(+), 10 deletions(-)
> >
> > diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
> > index 3b269e1f523a..a82131f7dd83 100644
> > --- a/fs/smb/server/transport_rdma.c
> > +++ b/fs/smb/server/transport_rdma.c
> > @@ -2140,8 +2140,7 @@ static int smb_direct_ib_client_add(struct ib_device *ib_dev)
> >       if (ib_dev->node_type != RDMA_NODE_IB_CA)
> >               smb_direct_port = SMB_DIRECT_PORT_IWARP;
> >
> > -     if (!ib_dev->ops.get_netdev ||
> > -         !rdma_frwr_is_supported(&ib_dev->attrs))
> > +     if (!rdma_frwr_is_supported(&ib_dev->attrs))
> >               return 0;
> >
> >       smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
> > @@ -2241,17 +2240,37 @@ bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
> >               for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
> >                       struct net_device *ndev;
> >
> > -                     ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
> > -                                                            i + 1);
> > -                     if (!ndev)
> > -                             continue;
> > +                     /* RoCE and iWRAP ib_dev is backed by a netdev */
> > +                     if (smb_dev->ib_dev->ops.get_netdev) {
>
> The "IWRAP" is a typo, but IMO the comment is misleading. This is simply
> looking up the target netdev, it's not limited to these two rdma types.
> I suggest deleting the comment.
>

I agree with this point and will update this comment in version 2 of the patch.

> > +                             ndev = smb_dev->ib_dev->ops.get_netdev(
> > +                                     smb_dev->ib_dev, i + 1);
> > +                             if (!ndev)
> > +                                     continue;
> >
> > -                     if (ndev == netdev) {
> > +                             if (ndev == netdev) {
> > +                                     dev_put(ndev);
> > +                                     rdma_capable = true;
> > +                                     goto out;
> > +                             }
> >                               dev_put(ndev);
>
> Why not move this dev_put up above the if (ndev == netdev) test? It's
> needed in both cases, so it's confusing to have two copies.

I do agree that these two puts are very confusing. However, this code
structure comes from the original ksmbd_rdma_capable_netdev() and
this patch only indents it one more level and puts it in the if test for
get_netdev.

Also, while two puts look very weird, IMO putting it before the if statement
is equally unintuitive as well since that would be testing the pointer after its
reference is released. Although since no de-reference is happening here, it
might be fine. Please confirm this is good coding-style-wise and I will include
this change in v2 of the patch.

>
> > -                             rdma_capable = true;
> > -                             goto out;
> > +                     /* match physical ib_dev with IPoIB netdev by GUID */
>
> Add more information to this comment, perhaps:
>
>    /* if no exact netdev match, check for matching Infiniband GUID */
>

Fair point, will update this comment in v2.

> > +                     } else if (netdev->type == ARPHRD_INFINIBAND) {
> > +                             struct netdev_hw_addr *ha;
> > +                             union ib_gid gid;
> > +                             u32 port_num;
> > +                             int ret;
> > +
> > +                             netdev_hw_addr_list_for_each(
> > +                                     ha, &netdev->dev_addrs) {
> > +                                     memcpy(&gid, ha->addr + 4, sizeof(gid));
> > +                                     ret = ib_find_gid(smb_dev->ib_dev, &gid,
> > +                                                       &port_num, NULL);
> > +                                     if (!ret) {
> > +                                             rdma_capable = true;
> > +                                             goto out;
>
> Won't this leak the ndev? It needs a dev_put(ndev) before breaking
> the loop, too, right?
>

This will not leak the ndev. Please look closer, this else branch matches with
the if test on the existence of ops.get_netdev of the current ib_dev. And ndev
is acquired only through that op. In the else branch, ndev is just not acquired.
The original code was indented one more level here so the patch might look
a bit confusing, but one of the if before this block is a deleted(-) line.

> > +                                     }
> > +                             }
> >                       }
> > -                     dev_put(ndev);
> >               }
> >       }
> >   out:



--
Kangjing "Chaser" Huang

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev()
  2023-10-19  6:01   ` Kangjing (Chaser) Huang
@ 2023-10-19 22:37     ` Namjae Jeon
  2023-10-20  1:14       ` Tom Talpey
  0 siblings, 1 reply; 6+ messages in thread
From: Namjae Jeon @ 2023-10-19 22:37 UTC (permalink / raw)
  To: Kangjing (Chaser) Huang
  Cc: Tom Talpey, linux-cifs, smfrench, senozhatsky, atteh.mailbox

2023-10-19 15:01 GMT+09:00, Kangjing (Chaser) Huang <huangkangjing@gmail.com>:
> Thank you for the review. Also thanks a lot to Namjae for submitting
> this patch on my behalf.
>
>
> On Tue, Oct 17, 2023 at 10:07 AM Tom Talpey <tom@talpey.com> wrote:
>>
>> On 10/15/2023 10:45 AM, Namjae Jeon wrote:
>> > From: Kangjing Huang <huangkangjing@gmail.com>
>> >
>> > Physical ib_device does not have an underlying net_device, thus its
>> > association with IPoIB net_device cannot be retrieved via
>> > ops.get_netdev() or ib_device_get_by_netdev(). ksmbd reads physical
>> > ib_device port GUID from the lower 16 bytes of the hardware addresses
>> > on
>> > IPoIB net_device and match its underlying ib_device using ib_find_gid()
>> >
>> > Signed-off-by: Kangjing Huang <huangkangjing@gmail.com>
>> > Acked-by: Namjae Jeon <linkinjeon@kernel.org>
>> > ---
>> >   fs/smb/server/transport_rdma.c | 39
>> > +++++++++++++++++++++++++---------
>> >   1 file changed, 29 insertions(+), 10 deletions(-)
>> >
>> > diff --git a/fs/smb/server/transport_rdma.c
>> > b/fs/smb/server/transport_rdma.c
>> > index 3b269e1f523a..a82131f7dd83 100644
>> > --- a/fs/smb/server/transport_rdma.c
>> > +++ b/fs/smb/server/transport_rdma.c
>> > @@ -2140,8 +2140,7 @@ static int smb_direct_ib_client_add(struct
>> > ib_device *ib_dev)
>> >       if (ib_dev->node_type != RDMA_NODE_IB_CA)
>> >               smb_direct_port = SMB_DIRECT_PORT_IWARP;
>> >
>> > -     if (!ib_dev->ops.get_netdev ||
>> > -         !rdma_frwr_is_supported(&ib_dev->attrs))
>> > +     if (!rdma_frwr_is_supported(&ib_dev->attrs))
>> >               return 0;
>> >
>> >       smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
>> > @@ -2241,17 +2240,37 @@ bool ksmbd_rdma_capable_netdev(struct net_device
>> > *netdev)
>> >               for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
>> >                       struct net_device *ndev;
>> >
>> > -                     ndev =
>> > smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
>> > -                                                            i + 1);
>> > -                     if (!ndev)
>> > -                             continue;
>> > +                     /* RoCE and iWRAP ib_dev is backed by a netdev */
>> > +                     if (smb_dev->ib_dev->ops.get_netdev) {
>>
>> The "IWRAP" is a typo, but IMO the comment is misleading. This is simply
>> looking up the target netdev, it's not limited to these two rdma types.
>> I suggest deleting the comment.
>>
>
> I agree with this point and will update this comment in version 2 of the
> patch.
>
>> > +                             ndev = smb_dev->ib_dev->ops.get_netdev(
>> > +                                     smb_dev->ib_dev, i + 1);
>> > +                             if (!ndev)
>> > +                                     continue;
>> >
>> > -                     if (ndev == netdev) {
>> > +                             if (ndev == netdev) {
>> > +                                     dev_put(ndev);
>> > +                                     rdma_capable = true;
>> > +                                     goto out;
>> > +                             }
>> >                               dev_put(ndev);
>>
>> Why not move this dev_put up above the if (ndev == netdev) test? It's
>> needed in both cases, so it's confusing to have two copies.
>
> I do agree that these two puts are very confusing. However, this code
> structure comes from the original ksmbd_rdma_capable_netdev() and
> this patch only indents it one more level and puts it in the if test for
> get_netdev.
>
> Also, while two puts look very weird, IMO putting it before the if
> statement
> is equally unintuitive as well since that would be testing the pointer after
> its
> reference is released. Although since no de-reference is happening here, it
> might be fine. Please confirm this is good coding-style-wise and I will
> include
> this change in v2 of the patch.
There is no need to update code that does not have problems.
>
>>
>> > -                             rdma_capable = true;
>> > -                             goto out;
>> > +                     /* match physical ib_dev with IPoIB netdev by GUID
>> > */
>>
>> Add more information to this comment, perhaps:
>>
>>    /* if no exact netdev match, check for matching Infiniband GUID */
>>
>
> Fair point, will update this comment in v2.
>
>> > +                     } else if (netdev->type == ARPHRD_INFINIBAND) {
>> > +                             struct netdev_hw_addr *ha;
>> > +                             union ib_gid gid;
>> > +                             u32 port_num;
>> > +                             int ret;
>> > +
>> > +                             netdev_hw_addr_list_for_each(
>> > +                                     ha, &netdev->dev_addrs) {
>> > +                                     memcpy(&gid, ha->addr + 4,
>> > sizeof(gid));
>> > +                                     ret = ib_find_gid(smb_dev->ib_dev,
>> > &gid,
>> > +                                                       &port_num,
>> > NULL);
>> > +                                     if (!ret) {
>> > +                                             rdma_capable = true;
>> > +                                             goto out;
>>
>> Won't this leak the ndev? It needs a dev_put(ndev) before breaking
>> the loop, too, right?
>>
>
> This will not leak the ndev. Please look closer, this else branch matches
> with
> the if test on the existence of ops.get_netdev of the current ib_dev. And
> ndev
> is acquired only through that op. In the else branch, ndev is just not
> acquired.
> The original code was indented one more level here so the patch might look
> a bit confusing, but one of the if before this block is a deleted(-) line.
You're right.

Please send v2 patch after adding comments that Tom pointed out.

Thanks!
>
>> > +                                     }
>> > +                             }
>> >                       }
>> > -                     dev_put(ndev);
>> >               }
>> >       }
>> >   out:
>
>
>
> --
> Kangjing "Chaser" Huang
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev()
  2023-10-19 22:37     ` Namjae Jeon
@ 2023-10-20  1:14       ` Tom Talpey
  2023-10-20 11:38         ` Kangjing (Chaser) Huang
  0 siblings, 1 reply; 6+ messages in thread
From: Tom Talpey @ 2023-10-20  1:14 UTC (permalink / raw)
  To: Namjae Jeon, Kangjing (Chaser) Huang
  Cc: linux-cifs, smfrench, senozhatsky, atteh.mailbox

On 10/19/2023 6:37 PM, Namjae Jeon wrote:
> 2023-10-19 15:01 GMT+09:00, Kangjing (Chaser) Huang <huangkangjing@gmail.com>:
>> Thank you for the review. Also thanks a lot to Namjae for submitting
>> this patch on my behalf.
>>
>>
>> On Tue, Oct 17, 2023 at 10:07 AM Tom Talpey <tom@talpey.com> wrote:
>>>
>>> On 10/15/2023 10:45 AM, Namjae Jeon wrote:
>>>> From: Kangjing Huang <huangkangjing@gmail.com>
>>>>
>>>> Physical ib_device does not have an underlying net_device, thus its
>>>> association with IPoIB net_device cannot be retrieved via
>>>> ops.get_netdev() or ib_device_get_by_netdev(). ksmbd reads physical
>>>> ib_device port GUID from the lower 16 bytes of the hardware addresses
>>>> on
>>>> IPoIB net_device and match its underlying ib_device using ib_find_gid()
>>>>
>>>> Signed-off-by: Kangjing Huang <huangkangjing@gmail.com>
>>>> Acked-by: Namjae Jeon <linkinjeon@kernel.org>
>>>> ---
>>>>    fs/smb/server/transport_rdma.c | 39
>>>> +++++++++++++++++++++++++---------
>>>>    1 file changed, 29 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/fs/smb/server/transport_rdma.c
>>>> b/fs/smb/server/transport_rdma.c
>>>> index 3b269e1f523a..a82131f7dd83 100644
>>>> --- a/fs/smb/server/transport_rdma.c
>>>> +++ b/fs/smb/server/transport_rdma.c
>>>> @@ -2140,8 +2140,7 @@ static int smb_direct_ib_client_add(struct
>>>> ib_device *ib_dev)
>>>>        if (ib_dev->node_type != RDMA_NODE_IB_CA)
>>>>                smb_direct_port = SMB_DIRECT_PORT_IWARP;
>>>>
>>>> -     if (!ib_dev->ops.get_netdev ||
>>>> -         !rdma_frwr_is_supported(&ib_dev->attrs))
>>>> +     if (!rdma_frwr_is_supported(&ib_dev->attrs))
>>>>                return 0;
>>>>
>>>>        smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
>>>> @@ -2241,17 +2240,37 @@ bool ksmbd_rdma_capable_netdev(struct net_device
>>>> *netdev)
>>>>                for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
>>>>                        struct net_device *ndev;
>>>>
>>>> -                     ndev =
>>>> smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
>>>> -                                                            i + 1);
>>>> -                     if (!ndev)
>>>> -                             continue;
>>>> +                     /* RoCE and iWRAP ib_dev is backed by a netdev */
>>>> +                     if (smb_dev->ib_dev->ops.get_netdev) {
>>>
>>> The "IWRAP" is a typo, but IMO the comment is misleading. This is simply
>>> looking up the target netdev, it's not limited to these two rdma types.
>>> I suggest deleting the comment.
>>>
>>
>> I agree with this point and will update this comment in version 2 of the
>> patch.
>>
>>>> +                             ndev = smb_dev->ib_dev->ops.get_netdev(
>>>> +                                     smb_dev->ib_dev, i + 1);
>>>> +                             if (!ndev)
>>>> +                                     continue;
>>>>
>>>> -                     if (ndev == netdev) {
>>>> +                             if (ndev == netdev) {
>>>> +                                     dev_put(ndev);
>>>> +                                     rdma_capable = true;
>>>> +                                     goto out;
>>>> +                             }
>>>>                                dev_put(ndev);
>>>
>>> Why not move this dev_put up above the if (ndev == netdev) test? It's
>>> needed in both cases, so it's confusing to have two copies.
>>
>> I do agree that these two puts are very confusing. However, this code
>> structure comes from the original ksmbd_rdma_capable_netdev() and
>> this patch only indents it one more level and puts it in the if test for
>> get_netdev.
>>
>> Also, while two puts look very weird, IMO putting it before the if
>> statement
>> is equally unintuitive as well since that would be testing the pointer after
>> its
>> reference is released. Although since no de-reference is happening here, it
>> might be fine. Please confirm this is good coding-style-wise and I will
>> include
>> this change in v2 of the patch.
> There is no need to update code that does not have problems.

Well, there are now at least half a dozen stanzas of
	dev_put
	rdma_capable = <T|f>
	goto out

and I don't see why these can't be simplified to

	goto out_capable|out_notcapable

It woud be a lot clearer, at least to this reviewer. And more reliable
to maintain.

>>
>>>
>>>> -                             rdma_capable = true;
>>>> -                             goto out;
>>>> +                     /* match physical ib_dev with IPoIB netdev by GUID
>>>> */
>>>
>>> Add more information to this comment, perhaps:
>>>
>>>     /* if no exact netdev match, check for matching Infiniband GUID */
>>>
>>
>> Fair point, will update this comment in v2.
>>
>>>> +                     } else if (netdev->type == ARPHRD_INFINIBAND) {
>>>> +                             struct netdev_hw_addr *ha;
>>>> +                             union ib_gid gid;
>>>> +                             u32 port_num;
>>>> +                             int ret;
>>>> +
>>>> +                             netdev_hw_addr_list_for_each(
>>>> +                                     ha, &netdev->dev_addrs) {
>>>> +                                     memcpy(&gid, ha->addr + 4,
>>>> sizeof(gid));
>>>> +                                     ret = ib_find_gid(smb_dev->ib_dev,
>>>> &gid,
>>>> +                                                       &port_num,
>>>> NULL);
>>>> +                                     if (!ret) {
>>>> +                                             rdma_capable = true;
>>>> +                                             goto out;
>>>
>>> Won't this leak the ndev? It needs a dev_put(ndev) before breaking
>>> the loop, too, right?
>>>
>>
>> This will not leak the ndev. Please look closer, this else branch matches
>> with
>> the if test on the existence of ops.get_netdev of the current ib_dev. And
>> ndev
>> is acquired only through that op. In the else branch, ndev is just not
>> acquired.
>> The original code was indented one more level here so the patch might look
>> a bit confusing, but one of the if before this block is a deleted(-) line.
> You're right.

Ok, so I repeat my comment above!

Tom.

> 
> Please send v2 patch after adding comments that Tom pointed out.
> 
> Thanks!
>>
>>>> +                                     }
>>>> +                             }
>>>>                        }
>>>> -                     dev_put(ndev);
>>>>                }
>>>>        }
>>>>    out:
>>
>>
>>
>> --
>> Kangjing "Chaser" Huang
>>
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev()
  2023-10-20  1:14       ` Tom Talpey
@ 2023-10-20 11:38         ` Kangjing (Chaser) Huang
  0 siblings, 0 replies; 6+ messages in thread
From: Kangjing (Chaser) Huang @ 2023-10-20 11:38 UTC (permalink / raw)
  To: Tom Talpey; +Cc: Namjae Jeon, linux-cifs, smfrench, senozhatsky, atteh.mailbox

On Thu, Oct 19, 2023 at 9:14 PM Tom Talpey <tom@talpey.com> wrote:
>
> On 10/19/2023 6:37 PM, Namjae Jeon wrote:
> > 2023-10-19 15:01 GMT+09:00, Kangjing (Chaser) Huang <huangkangjing@gmail.com>:
> >> Thank you for the review. Also thanks a lot to Namjae for submitting
> >> this patch on my behalf.
> >>
> >>
> >> On Tue, Oct 17, 2023 at 10:07 AM Tom Talpey <tom@talpey.com> wrote:
> >>>
> >>> On 10/15/2023 10:45 AM, Namjae Jeon wrote:
> >>>> From: Kangjing Huang <huangkangjing@gmail.com>
> >>>>
> >>>> Physical ib_device does not have an underlying net_device, thus its
> >>>> association with IPoIB net_device cannot be retrieved via
> >>>> ops.get_netdev() or ib_device_get_by_netdev(). ksmbd reads physical
> >>>> ib_device port GUID from the lower 16 bytes of the hardware addresses
> >>>> on
> >>>> IPoIB net_device and match its underlying ib_device using ib_find_gid()
> >>>>
> >>>> Signed-off-by: Kangjing Huang <huangkangjing@gmail.com>
> >>>> Acked-by: Namjae Jeon <linkinjeon@kernel.org>
> >>>> ---
> >>>>    fs/smb/server/transport_rdma.c | 39
> >>>> +++++++++++++++++++++++++---------
> >>>>    1 file changed, 29 insertions(+), 10 deletions(-)
> >>>>
> >>>> diff --git a/fs/smb/server/transport_rdma.c
> >>>> b/fs/smb/server/transport_rdma.c
> >>>> index 3b269e1f523a..a82131f7dd83 100644
> >>>> --- a/fs/smb/server/transport_rdma.c
> >>>> +++ b/fs/smb/server/transport_rdma.c
> >>>> @@ -2140,8 +2140,7 @@ static int smb_direct_ib_client_add(struct
> >>>> ib_device *ib_dev)
> >>>>        if (ib_dev->node_type != RDMA_NODE_IB_CA)
> >>>>                smb_direct_port = SMB_DIRECT_PORT_IWARP;
> >>>>
> >>>> -     if (!ib_dev->ops.get_netdev ||
> >>>> -         !rdma_frwr_is_supported(&ib_dev->attrs))
> >>>> +     if (!rdma_frwr_is_supported(&ib_dev->attrs))
> >>>>                return 0;
> >>>>
> >>>>        smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
> >>>> @@ -2241,17 +2240,37 @@ bool ksmbd_rdma_capable_netdev(struct net_device
> >>>> *netdev)
> >>>>                for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
> >>>>                        struct net_device *ndev;
> >>>>
> >>>> -                     ndev =
> >>>> smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
> >>>> -                                                            i + 1);
> >>>> -                     if (!ndev)
> >>>> -                             continue;
> >>>> +                     /* RoCE and iWRAP ib_dev is backed by a netdev */
> >>>> +                     if (smb_dev->ib_dev->ops.get_netdev) {
> >>>
> >>> The "IWRAP" is a typo, but IMO the comment is misleading. This is simply
> >>> looking up the target netdev, it's not limited to these two rdma types.
> >>> I suggest deleting the comment.
> >>>
> >>
> >> I agree with this point and will update this comment in version 2 of the
> >> patch.
> >>
> >>>> +                             ndev = smb_dev->ib_dev->ops.get_netdev(
> >>>> +                                     smb_dev->ib_dev, i + 1);
> >>>> +                             if (!ndev)
> >>>> +                                     continue;
> >>>>
> >>>> -                     if (ndev == netdev) {
> >>>> +                             if (ndev == netdev) {
> >>>> +                                     dev_put(ndev);
> >>>> +                                     rdma_capable = true;
> >>>> +                                     goto out;
> >>>> +                             }
> >>>>                                dev_put(ndev);
> >>>
> >>> Why not move this dev_put up above the if (ndev == netdev) test? It's
> >>> needed in both cases, so it's confusing to have two copies.
> >>
> >> I do agree that these two puts are very confusing. However, this code
> >> structure comes from the original ksmbd_rdma_capable_netdev() and
> >> this patch only indents it one more level and puts it in the if test for
> >> get_netdev.
> >>
> >> Also, while two puts look very weird, IMO putting it before the if
> >> statement
> >> is equally unintuitive as well since that would be testing the pointer after
> >> its
> >> reference is released. Although since no de-reference is happening here, it
> >> might be fine. Please confirm this is good coding-style-wise and I will
> >> include
> >> this change in v2 of the patch.
> > There is no need to update code that does not have problems.
>

> Well, there are now at least half a dozen stanzas of
>         dev_put
>         rdma_capable = <T|f>
>         goto out
>
> and I don't see why these can't be simplified to
>
>         goto out_capable|out_notcapable
>
> It woud be a lot clearer, at least to this reviewer. And more reliable
> to maintain.
>

I agree, but this consolidation would be impossible without a
major overhaul of the code structure in
ksmbd_rdma_capable_netdev(). There are several clean-up
calls that need to be made (read_unlock(&smb_direct_device_lock),
dev_put, ib_device_put) and the biggest problem is brought in
by the block of code that is doing a reverse lookup on ib_dev
structs right after the patched loop.

I don't know why it is necessary for such a reverse lookup, but given
that these code depends on the behaviors of ib device drivers, which
can be very erratic. I feel that removing that block could break
something with some other devices.

In conclusion, I feel addressing these issues in this function is
beyond the scope of this patch and they should be addressed
separately. I will move forward with the comment change in v2.



> >>
> >>>
> >>>> -                             rdma_capable = true;
> >>>> -                             goto out;
> >>>> +                     /* match physical ib_dev with IPoIB netdev by GUID
> >>>> */
> >>>
> >>> Add more information to this comment, perhaps:
> >>>
> >>>     /* if no exact netdev match, check for matching Infiniband GUID */
> >>>
> >>
> >> Fair point, will update this comment in v2.
> >>
> >>>> +                     } else if (netdev->type == ARPHRD_INFINIBAND) {
> >>>> +                             struct netdev_hw_addr *ha;
> >>>> +                             union ib_gid gid;
> >>>> +                             u32 port_num;
> >>>> +                             int ret;
> >>>> +
> >>>> +                             netdev_hw_addr_list_for_each(
> >>>> +                                     ha, &netdev->dev_addrs) {
> >>>> +                                     memcpy(&gid, ha->addr + 4,
> >>>> sizeof(gid));
> >>>> +                                     ret = ib_find_gid(smb_dev->ib_dev,
> >>>> &gid,
> >>>> +                                                       &port_num,
> >>>> NULL);
> >>>> +                                     if (!ret) {
> >>>> +                                             rdma_capable = true;
> >>>> +                                             goto out;
> >>>
> >>> Won't this leak the ndev? It needs a dev_put(ndev) before breaking
> >>> the loop, too, right?
> >>>
> >>
> >> This will not leak the ndev. Please look closer, this else branch matches
> >> with
> >> the if test on the existence of ops.get_netdev of the current ib_dev. And
> >> ndev
> >> is acquired only through that op. In the else branch, ndev is just not
> >> acquired.
> >> The original code was indented one more level here so the patch might look
> >> a bit confusing, but one of the if before this block is a deleted(-) line.
> > You're right.
>
> Ok, so I repeat my comment above!
>
> Tom.
>
> >
> > Please send v2 patch after adding comments that Tom pointed out.
> >
> > Thanks!
> >>
> >>>> +                                     }
> >>>> +                             }
> >>>>                        }
> >>>> -                     dev_put(ndev);
> >>>>                }
> >>>>        }
> >>>>    out:
> >>
> >>
> >>
> >> --
> >> Kangjing "Chaser" Huang
> >>
> >



--
Kangjing "Chaser" Huang

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-10-20 11:40 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-10-15 14:45 [PATCH] ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev() Namjae Jeon
2023-10-17 14:06 ` Tom Talpey
2023-10-19  6:01   ` Kangjing (Chaser) Huang
2023-10-19 22:37     ` Namjae Jeon
2023-10-20  1:14       ` Tom Talpey
2023-10-20 11:38         ` Kangjing (Chaser) Huang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox