linux-can.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock
       [not found] <20230704064710.3189-1-astrajoan@yahoo.com>
@ 2023-07-04  6:47 ` syzbot
  2023-07-04  7:37   ` Oleksij Rempel
       [not found] ` <20230721162226.8639-1-astrajoan@yahoo.com>
  1 sibling, 1 reply; 8+ messages in thread
From: syzbot @ 2023-07-04  6:47 UTC (permalink / raw)
  To: astrajoan
  Cc: astrajoan, davem, edumazet, ivan.orlov0322, kernel, kuba,
	linux-can, linux-kernel, linux, mkl, netdev, pabeni, robin, skhan,
	socketcan, syzkaller-bugs

> The following 3 locks would race against each other, causing the
> deadlock situation in the Syzbot bug report:
>
> - j1939_socks_lock
> - active_session_list_lock
> - sk_session_queue_lock
>
> A reasonable fix is to change j1939_socks_lock to an rwlock, since in
> the rare situations where a write lock is required for the linked list
> that j1939_socks_lock is protecting, the code does not attempt to
> acquire any more locks. This would break the circular lock dependency,
> where, for example, the current thread already locks j1939_socks_lock
> and attempts to acquire sk_session_queue_lock, and at the same time,
> another thread attempts to acquire j1939_socks_lock while holding
> sk_session_queue_lock.
>
> NOTE: This patch along does not fix the unregister_netdevice bug
> reported by Syzbot; instead, it solves a deadlock situation to prepare
> for one or more further patches to actually fix the Syzbot bug, which
> appears to be a reference counting problem within the j1939 codebase.
>
> #syz test:

This crash does not have a reproducer. I cannot test it.

> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master
>
> Signed-off-by: Ziqi Zhao <astrajoan@yahoo.com>
> ---
>  net/can/j1939/j1939-priv.h |  2 +-
>  net/can/j1939/main.c       |  2 +-
>  net/can/j1939/socket.c     | 25 +++++++++++++------------
>  3 files changed, 15 insertions(+), 14 deletions(-)
>
> diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
> index 16af1a7f80f6..74f15592d170 100644
> --- a/net/can/j1939/j1939-priv.h
> +++ b/net/can/j1939/j1939-priv.h
> @@ -86,7 +86,7 @@ struct j1939_priv {
>  	unsigned int tp_max_packet_size;
>  
>  	/* lock for j1939_socks list */
> -	spinlock_t j1939_socks_lock;
> +	rwlock_t j1939_socks_lock;
>  	struct list_head j1939_socks;
>  
>  	struct kref rx_kref;
> diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
> index ecff1c947d68..a6fb89fa6278 100644
> --- a/net/can/j1939/main.c
> +++ b/net/can/j1939/main.c
> @@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
>  		return ERR_PTR(-ENOMEM);
>  
>  	j1939_tp_init(priv);
> -	spin_lock_init(&priv->j1939_socks_lock);
> +	rwlock_init(&priv->j1939_socks_lock);
>  	INIT_LIST_HEAD(&priv->j1939_socks);
>  
>  	mutex_lock(&j1939_netdev_lock);
> diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
> index feaec4ad6d16..a8b981dc2065 100644
> --- a/net/can/j1939/socket.c
> +++ b/net/can/j1939/socket.c
> @@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
>  	jsk->state |= J1939_SOCK_BOUND;
>  	j1939_priv_get(priv);
>  
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	write_lock_bh(&priv->j1939_socks_lock);
>  	list_add_tail(&jsk->list, &priv->j1939_socks);
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	write_unlock_bh(&priv->j1939_socks_lock);
>  }
>  
>  static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
>  {
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	write_lock_bh(&priv->j1939_socks_lock);
>  	list_del_init(&jsk->list);
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	write_unlock_bh(&priv->j1939_socks_lock);
>  
>  	j1939_priv_put(priv);
>  	jsk->state &= ~J1939_SOCK_BOUND;
> @@ -329,13 +329,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
>  	struct j1939_sock *jsk;
>  	bool match = false;
>  
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	read_lock_bh(&priv->j1939_socks_lock);
>  	list_for_each_entry(jsk, &priv->j1939_socks, list) {
>  		match = j1939_sk_recv_match_one(jsk, skcb);
>  		if (match)
>  			break;
>  	}
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	read_unlock_bh(&priv->j1939_socks_lock);
>  
>  	return match;
>  }
> @@ -344,11 +344,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
>  {
>  	struct j1939_sock *jsk;
>  
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	read_lock_bh(&priv->j1939_socks_lock);
>  	list_for_each_entry(jsk, &priv->j1939_socks, list) {
>  		j1939_sk_recv_one(jsk, skb);
>  	}
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	read_unlock_bh(&priv->j1939_socks_lock);
>  }
>  
>  static void j1939_sk_sock_destruct(struct sock *sk)
> @@ -484,6 +484,7 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
>  
>  		priv = j1939_netdev_start(ndev);
>  		dev_put(ndev);
> +
>  		if (IS_ERR(priv)) {
>  			ret = PTR_ERR(priv);
>  			goto out_release_sock;
> @@ -1078,12 +1079,12 @@ void j1939_sk_errqueue(struct j1939_session *session,
>  	}
>  
>  	/* spread RX notifications to all sockets subscribed to this session */
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	read_lock_bh(&priv->j1939_socks_lock);
>  	list_for_each_entry(jsk, &priv->j1939_socks, list) {
>  		if (j1939_sk_recv_match_one(jsk, &session->skcb))
>  			__j1939_sk_errqueue(session, &jsk->sk, type);
>  	}
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	read_unlock_bh(&priv->j1939_socks_lock);
>  };
>  
>  void j1939_sk_send_loop_abort(struct sock *sk, int err)
> @@ -1271,7 +1272,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
>  	struct j1939_sock *jsk;
>  	int error_code = ENETDOWN;
>  
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	read_lock_bh(&priv->j1939_socks_lock);
>  	list_for_each_entry(jsk, &priv->j1939_socks, list) {
>  		jsk->sk.sk_err = error_code;
>  		if (!sock_flag(&jsk->sk, SOCK_DEAD))
> @@ -1279,7 +1280,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
>  
>  		j1939_sk_queue_drop_all(priv, jsk, error_code);
>  	}
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	read_unlock_bh(&priv->j1939_socks_lock);
>  }
>  
>  static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
> -- 
> 2.34.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock
  2023-07-04  6:47 ` [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock syzbot
@ 2023-07-04  7:37   ` Oleksij Rempel
  0 siblings, 0 replies; 8+ messages in thread
From: Oleksij Rempel @ 2023-07-04  7:37 UTC (permalink / raw)
  To: syzbot
  Cc: astrajoan, davem, edumazet, ivan.orlov0322, kernel, kuba,
	linux-can, linux-kernel, linux, mkl, netdev, pabeni, robin, skhan,
	socketcan, syzkaller-bugs

On Mon, Jul 03, 2023 at 11:47:26PM -0700, syzbot wrote:
> > The following 3 locks would race against each other, causing the
> > deadlock situation in the Syzbot bug report:
> >
> > - j1939_socks_lock
> > - active_session_list_lock
> > - sk_session_queue_lock
> >
> > A reasonable fix is to change j1939_socks_lock to an rwlock, since in
> > the rare situations where a write lock is required for the linked list
> > that j1939_socks_lock is protecting, the code does not attempt to
> > acquire any more locks. This would break the circular lock dependency,
> > where, for example, the current thread already locks j1939_socks_lock
> > and attempts to acquire sk_session_queue_lock, and at the same time,
> > another thread attempts to acquire j1939_socks_lock while holding
> > sk_session_queue_lock.
> >
> > NOTE: This patch along does not fix the unregister_netdevice bug
> > reported by Syzbot; instead, it solves a deadlock situation to prepare
> > for one or more further patches to actually fix the Syzbot bug, which
> > appears to be a reference counting problem within the j1939 codebase.
> >
> > #syz test:
> 
> This crash does not have a reproducer. I cannot test it.
> 

To stress this code path, the socket should be configured with err queue
enabled. For example like this:

        value = 1;
        setsockopt(priv->sock, SOL_CAN_J1939, SO_J1939_ERRQUEUE, &value,
                         sizeof(value));

        sock_opt = SOF_TIMESTAMPING_SOFTWARE |
                   SOF_TIMESTAMPING_OPT_CMSG |
                   SOF_TIMESTAMPING_TX_ACK |
                   SOF_TIMESTAMPING_TX_SCHED |
                   SOF_TIMESTAMPING_OPT_STATS | SOF_TIMESTAMPING_OPT_TSONLY |
                   SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_RX_SOFTWARE;

        setsockopt(priv->sock, SOL_SOCKET, SO_TIMESTAMPING,
                       (char *) &sock_opt, sizeof(sock_opt));


I hope it will help to create the reproducer

Regards,
Oleksij
-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock
       [not found] ` <F17EC83C-9D70-463A-9C46-FBCC53A1F13C@yahoo.com>
@ 2023-07-05  4:39   ` Oleksij Rempel
  2023-07-05  4:50     ` Dmitry Vyukov
  0 siblings, 1 reply; 8+ messages in thread
From: Oleksij Rempel @ 2023-07-05  4:39 UTC (permalink / raw)
  To: Astra Joan
  Cc: davem, edumazet, ivan.orlov0322, kernel, kuba, linux-can,
	linux-kernel, linux, mkl, netdev, pabeni, robin, skhan, socketcan,
	syzbot+1591462f226d9cbf0564, syzkaller-bugs

On Tue, Jul 04, 2023 at 10:55:47AM -0700, Astra Joan wrote:
> Hi Oleksij,
> 
> Thank you for providing help with the bug fix! The patch was created
> when I was working on another bug:
> 
> https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84
> 
> But the patch was not a direct fix of the problem reported in the
> unregister_netdevice function call. Instead, it suppresses potential
> deadlock information to guarantee the real bug would show up. Since I
> have verified that the patch resolved a deadlock situation involving
> the exact same locks, I'm pretty confident it would be a proper fix for
> the current bug in this thread.
> 
> I'm not sure, though, about how I could instruct Syzbot to create a
> reproducer to properly test this patch. Could you or anyone here help
> me find the next step? Thank you so much!

Sorry, I'm not syzbot expert. I hope someone else can help here.

Regards,
Oleksij
-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock
  2023-07-05  4:39   ` Oleksij Rempel
@ 2023-07-05  4:50     ` Dmitry Vyukov
  0 siblings, 0 replies; 8+ messages in thread
From: Dmitry Vyukov @ 2023-07-05  4:50 UTC (permalink / raw)
  To: Oleksij Rempel
  Cc: Astra Joan, davem, edumazet, ivan.orlov0322, kernel, kuba,
	linux-can, linux-kernel, linux, mkl, netdev, pabeni, robin, skhan,
	socketcan, syzbot+1591462f226d9cbf0564, syzkaller-bugs, syzkaller

On Wed, 5 Jul 2023 at 06:40, Oleksij Rempel <o.rempel@pengutronix.de> wrote:
>
> On Tue, Jul 04, 2023 at 10:55:47AM -0700, Astra Joan wrote:
> > Hi Oleksij,
> >
> > Thank you for providing help with the bug fix! The patch was created
> > when I was working on another bug:
> >
> > https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84
> >
> > But the patch was not a direct fix of the problem reported in the
> > unregister_netdevice function call. Instead, it suppresses potential
> > deadlock information to guarantee the real bug would show up. Since I
> > have verified that the patch resolved a deadlock situation involving
> > the exact same locks, I'm pretty confident it would be a proper fix for
> > the current bug in this thread.
> >
> > I'm not sure, though, about how I could instruct Syzbot to create a
> > reproducer to properly test this patch. Could you or anyone here help
> > me find the next step? Thank you so much!
>
> Sorry, I'm not syzbot expert. I hope someone else can help here.

+syzkaller mailing list

Hi Astra,

You mean you have a reproducer and you want syzbot to run it with your patch?
No such feature exists at the moment.

Presumably you already run it locally, so I am not sure syzbot can add
much value here.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock
       [not found] ` <20230712004750.2476-1-astrajoan@yahoo.com>
@ 2023-07-13 22:23   ` Stephen Hemminger
  0 siblings, 0 replies; 8+ messages in thread
From: Stephen Hemminger @ 2023-07-13 22:23 UTC (permalink / raw)
  To: Ziqi Zhao
  Cc: syzbot+1591462f226d9cbf0564, davem, dvyukov, edumazet,
	ivan.orlov0322, kernel, kuba, linux-can, linux-kernel, linux, mkl,
	netdev, o.rempel, pabeni, robin, skhan, socketcan, syzkaller-bugs,
	syzkaller

On Tue, 11 Jul 2023 17:47:50 -0700
Ziqi Zhao <astrajoan@yahoo.com> wrote:

> The following 3 locks would race against each other, causing the
> deadlock situation in the Syzbot bug report:
> 
> - j1939_socks_lock
> - active_session_list_lock
> - sk_session_queue_lock
> 
> A reasonable fix is to change j1939_socks_lock to an rwlock, since in
> the rare situations where a write lock is required for the linked list
> that j1939_socks_lock is protecting, the code does not attempt to
> acquire any more locks. This would break the circular lock dependency,
> where, for example, the current thread already locks j1939_socks_lock
> and attempts to acquire sk_session_queue_lock, and at the same time,
> another thread attempts to acquire j1939_socks_lock while holding
> sk_session_queue_lock.
> 
> NOTE: This patch along does not fix the unregister_netdevice bug
> reported by Syzbot; instead, it solves a deadlock situation to prepare
> for one or more further patches to actually fix the Syzbot bug, which
> appears to be a reference counting problem within the j1939 codebase.
> 
> #syz test:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master
> 
> Signed-off-by: Ziqi Zhao <astrajoan@yahoo.com>
> ---

Reader-writer locks are not the best way to fix a lock hierarchy problem.
Instead either fix the lock ordering, or use RCU.

Other devices don't have this problem, so perhaps the unique locking
in this device is the problem.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock
       [not found] ` <20230721162226.8639-1-astrajoan@yahoo.com>
@ 2023-07-23 15:41   ` Oleksij Rempel
  2023-08-07  4:46   ` Oleksij Rempel
  1 sibling, 0 replies; 8+ messages in thread
From: Oleksij Rempel @ 2023-07-23 15:41 UTC (permalink / raw)
  To: Ziqi Zhao
  Cc: davem, edumazet, ivan.orlov0322, kernel, kuba, linux, linux-can,
	mkl, pabeni, robin, skhan, socketcan, arnd, bridge, linux-kernel,
	mudongliangabcd, netdev, nikolay, roopa,
	syzbot+881d65229ca4f9ae8c84, syzkaller-bugs,
	syzbot+1591462f226d9cbf0564

Hi,

Thank you for you patch. Right now I'm on vacation, I'll to take a look
on it as soon as possible. If i do not response for more then 3 weeks,
please ping me.

On Fri, Jul 21, 2023 at 09:22:26AM -0700, Ziqi Zhao wrote:
> The following 3 locks would race against each other, causing the
> deadlock situation in the Syzbot bug report:
> 
> - j1939_socks_lock
> - active_session_list_lock
> - sk_session_queue_lock
> 
> A reasonable fix is to change j1939_socks_lock to an rwlock, since in
> the rare situations where a write lock is required for the linked list
> that j1939_socks_lock is protecting, the code does not attempt to
> acquire any more locks. This would break the circular lock dependency,
> where, for example, the current thread already locks j1939_socks_lock
> and attempts to acquire sk_session_queue_lock, and at the same time,
> another thread attempts to acquire j1939_socks_lock while holding
> sk_session_queue_lock.
> 
> NOTE: This patch along does not fix the unregister_netdevice bug
> reported by Syzbot; instead, it solves a deadlock situation to prepare
> for one or more further patches to actually fix the Syzbot bug, which
> appears to be a reference counting problem within the j1939 codebase.
> 
> Reported-by: syzbot+1591462f226d9cbf0564@syzkaller.appspotmail.com
> Signed-off-by: Ziqi Zhao <astrajoan@yahoo.com>
> ---
>  net/can/j1939/j1939-priv.h |  2 +-
>  net/can/j1939/main.c       |  2 +-
>  net/can/j1939/socket.c     | 25 +++++++++++++------------
>  3 files changed, 15 insertions(+), 14 deletions(-)
> 
> diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
> index 16af1a7f80f6..74f15592d170 100644
> --- a/net/can/j1939/j1939-priv.h
> +++ b/net/can/j1939/j1939-priv.h
> @@ -86,7 +86,7 @@ struct j1939_priv {
>  	unsigned int tp_max_packet_size;
>  
>  	/* lock for j1939_socks list */
> -	spinlock_t j1939_socks_lock;
> +	rwlock_t j1939_socks_lock;
>  	struct list_head j1939_socks;
>  
>  	struct kref rx_kref;
> diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
> index ecff1c947d68..a6fb89fa6278 100644
> --- a/net/can/j1939/main.c
> +++ b/net/can/j1939/main.c
> @@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
>  		return ERR_PTR(-ENOMEM);
>  
>  	j1939_tp_init(priv);
> -	spin_lock_init(&priv->j1939_socks_lock);
> +	rwlock_init(&priv->j1939_socks_lock);
>  	INIT_LIST_HEAD(&priv->j1939_socks);
>  
>  	mutex_lock(&j1939_netdev_lock);
> diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
> index feaec4ad6d16..a8b981dc2065 100644
> --- a/net/can/j1939/socket.c
> +++ b/net/can/j1939/socket.c
> @@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
>  	jsk->state |= J1939_SOCK_BOUND;
>  	j1939_priv_get(priv);
>  
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	write_lock_bh(&priv->j1939_socks_lock);
>  	list_add_tail(&jsk->list, &priv->j1939_socks);
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	write_unlock_bh(&priv->j1939_socks_lock);
>  }
>  
>  static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
>  {
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	write_lock_bh(&priv->j1939_socks_lock);
>  	list_del_init(&jsk->list);
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	write_unlock_bh(&priv->j1939_socks_lock);
>  
>  	j1939_priv_put(priv);
>  	jsk->state &= ~J1939_SOCK_BOUND;
> @@ -329,13 +329,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
>  	struct j1939_sock *jsk;
>  	bool match = false;
>  
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	read_lock_bh(&priv->j1939_socks_lock);
>  	list_for_each_entry(jsk, &priv->j1939_socks, list) {
>  		match = j1939_sk_recv_match_one(jsk, skcb);
>  		if (match)
>  			break;
>  	}
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	read_unlock_bh(&priv->j1939_socks_lock);
>  
>  	return match;
>  }
> @@ -344,11 +344,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
>  {
>  	struct j1939_sock *jsk;
>  
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	read_lock_bh(&priv->j1939_socks_lock);
>  	list_for_each_entry(jsk, &priv->j1939_socks, list) {
>  		j1939_sk_recv_one(jsk, skb);
>  	}
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	read_unlock_bh(&priv->j1939_socks_lock);
>  }
>  
>  static void j1939_sk_sock_destruct(struct sock *sk)
> @@ -484,6 +484,7 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
>  
>  		priv = j1939_netdev_start(ndev);
>  		dev_put(ndev);
> +
>  		if (IS_ERR(priv)) {
>  			ret = PTR_ERR(priv);
>  			goto out_release_sock;
> @@ -1078,12 +1079,12 @@ void j1939_sk_errqueue(struct j1939_session *session,
>  	}
>  
>  	/* spread RX notifications to all sockets subscribed to this session */
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	read_lock_bh(&priv->j1939_socks_lock);
>  	list_for_each_entry(jsk, &priv->j1939_socks, list) {
>  		if (j1939_sk_recv_match_one(jsk, &session->skcb))
>  			__j1939_sk_errqueue(session, &jsk->sk, type);
>  	}
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	read_unlock_bh(&priv->j1939_socks_lock);
>  };
>  
>  void j1939_sk_send_loop_abort(struct sock *sk, int err)
> @@ -1271,7 +1272,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
>  	struct j1939_sock *jsk;
>  	int error_code = ENETDOWN;
>  
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	read_lock_bh(&priv->j1939_socks_lock);
>  	list_for_each_entry(jsk, &priv->j1939_socks, list) {
>  		jsk->sk.sk_err = error_code;
>  		if (!sock_flag(&jsk->sk, SOCK_DEAD))
> @@ -1279,7 +1280,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
>  
>  		j1939_sk_queue_drop_all(priv, jsk, error_code);
>  	}
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	read_unlock_bh(&priv->j1939_socks_lock);
>  }
>  
>  static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
> -- 
> 2.34.1
> 
> 
> 

-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock
       [not found] ` <20230721162226.8639-1-astrajoan@yahoo.com>
  2023-07-23 15:41   ` Oleksij Rempel
@ 2023-08-07  4:46   ` Oleksij Rempel
  2023-11-17  8:10     ` Oleksij Rempel
  1 sibling, 1 reply; 8+ messages in thread
From: Oleksij Rempel @ 2023-08-07  4:46 UTC (permalink / raw)
  To: Ziqi Zhao
  Cc: davem, edumazet, ivan.orlov0322, kernel, kuba, linux, linux-can,
	mkl, pabeni, robin, skhan, socketcan, arnd, netdev, bridge,
	syzkaller-bugs, linux-kernel, mudongliangabcd, nikolay,
	syzbot+1591462f226d9cbf0564, roopa, syzbot+881d65229ca4f9ae8c84

On Fri, Jul 21, 2023 at 09:22:26AM -0700, Ziqi Zhao wrote:
> The following 3 locks would race against each other, causing the
> deadlock situation in the Syzbot bug report:
> 
> - j1939_socks_lock
> - active_session_list_lock
> - sk_session_queue_lock
> 
> A reasonable fix is to change j1939_socks_lock to an rwlock, since in
> the rare situations where a write lock is required for the linked list
> that j1939_socks_lock is protecting, the code does not attempt to
> acquire any more locks. This would break the circular lock dependency,
> where, for example, the current thread already locks j1939_socks_lock
> and attempts to acquire sk_session_queue_lock, and at the same time,
> another thread attempts to acquire j1939_socks_lock while holding
> sk_session_queue_lock.
> 
> NOTE: This patch along does not fix the unregister_netdevice bug
> reported by Syzbot; instead, it solves a deadlock situation to prepare
> for one or more further patches to actually fix the Syzbot bug, which
> appears to be a reference counting problem within the j1939 codebase.
> 
> Reported-by: syzbot+1591462f226d9cbf0564@syzkaller.appspotmail.com
> Signed-off-by: Ziqi Zhao <astrajoan@yahoo.com>

Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>

Thank you!

> ---
>  net/can/j1939/j1939-priv.h |  2 +-
>  net/can/j1939/main.c       |  2 +-
>  net/can/j1939/socket.c     | 25 +++++++++++++------------
>  3 files changed, 15 insertions(+), 14 deletions(-)
> 
> diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
> index 16af1a7f80f6..74f15592d170 100644
> --- a/net/can/j1939/j1939-priv.h
> +++ b/net/can/j1939/j1939-priv.h
> @@ -86,7 +86,7 @@ struct j1939_priv {
>  	unsigned int tp_max_packet_size;
>  
>  	/* lock for j1939_socks list */
> -	spinlock_t j1939_socks_lock;
> +	rwlock_t j1939_socks_lock;
>  	struct list_head j1939_socks;
>  
>  	struct kref rx_kref;
> diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
> index ecff1c947d68..a6fb89fa6278 100644
> --- a/net/can/j1939/main.c
> +++ b/net/can/j1939/main.c
> @@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
>  		return ERR_PTR(-ENOMEM);
>  
>  	j1939_tp_init(priv);
> -	spin_lock_init(&priv->j1939_socks_lock);
> +	rwlock_init(&priv->j1939_socks_lock);
>  	INIT_LIST_HEAD(&priv->j1939_socks);
>  
>  	mutex_lock(&j1939_netdev_lock);
> diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
> index feaec4ad6d16..a8b981dc2065 100644
> --- a/net/can/j1939/socket.c
> +++ b/net/can/j1939/socket.c
> @@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
>  	jsk->state |= J1939_SOCK_BOUND;
>  	j1939_priv_get(priv);
>  
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	write_lock_bh(&priv->j1939_socks_lock);
>  	list_add_tail(&jsk->list, &priv->j1939_socks);
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	write_unlock_bh(&priv->j1939_socks_lock);
>  }
>  
>  static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
>  {
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	write_lock_bh(&priv->j1939_socks_lock);
>  	list_del_init(&jsk->list);
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	write_unlock_bh(&priv->j1939_socks_lock);
>  
>  	j1939_priv_put(priv);
>  	jsk->state &= ~J1939_SOCK_BOUND;
> @@ -329,13 +329,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
>  	struct j1939_sock *jsk;
>  	bool match = false;
>  
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	read_lock_bh(&priv->j1939_socks_lock);
>  	list_for_each_entry(jsk, &priv->j1939_socks, list) {
>  		match = j1939_sk_recv_match_one(jsk, skcb);
>  		if (match)
>  			break;
>  	}
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	read_unlock_bh(&priv->j1939_socks_lock);
>  
>  	return match;
>  }
> @@ -344,11 +344,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
>  {
>  	struct j1939_sock *jsk;
>  
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	read_lock_bh(&priv->j1939_socks_lock);
>  	list_for_each_entry(jsk, &priv->j1939_socks, list) {
>  		j1939_sk_recv_one(jsk, skb);
>  	}
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	read_unlock_bh(&priv->j1939_socks_lock);
>  }
>  
>  static void j1939_sk_sock_destruct(struct sock *sk)
> @@ -484,6 +484,7 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
>  
>  		priv = j1939_netdev_start(ndev);
>  		dev_put(ndev);
> +
>  		if (IS_ERR(priv)) {
>  			ret = PTR_ERR(priv);
>  			goto out_release_sock;
> @@ -1078,12 +1079,12 @@ void j1939_sk_errqueue(struct j1939_session *session,
>  	}
>  
>  	/* spread RX notifications to all sockets subscribed to this session */
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	read_lock_bh(&priv->j1939_socks_lock);
>  	list_for_each_entry(jsk, &priv->j1939_socks, list) {
>  		if (j1939_sk_recv_match_one(jsk, &session->skcb))
>  			__j1939_sk_errqueue(session, &jsk->sk, type);
>  	}
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	read_unlock_bh(&priv->j1939_socks_lock);
>  };
>  
>  void j1939_sk_send_loop_abort(struct sock *sk, int err)
> @@ -1271,7 +1272,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
>  	struct j1939_sock *jsk;
>  	int error_code = ENETDOWN;
>  
> -	spin_lock_bh(&priv->j1939_socks_lock);
> +	read_lock_bh(&priv->j1939_socks_lock);
>  	list_for_each_entry(jsk, &priv->j1939_socks, list) {
>  		jsk->sk.sk_err = error_code;
>  		if (!sock_flag(&jsk->sk, SOCK_DEAD))
> @@ -1279,7 +1280,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
>  
>  		j1939_sk_queue_drop_all(priv, jsk, error_code);
>  	}
> -	spin_unlock_bh(&priv->j1939_socks_lock);
> +	read_unlock_bh(&priv->j1939_socks_lock);
>  }
>  
>  static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
> -- 
> 2.34.1
> 
> 
> 

-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock
  2023-08-07  4:46   ` Oleksij Rempel
@ 2023-11-17  8:10     ` Oleksij Rempel
  0 siblings, 0 replies; 8+ messages in thread
From: Oleksij Rempel @ 2023-11-17  8:10 UTC (permalink / raw)
  To: Ziqi Zhao
  Cc: ivan.orlov0322, edumazet, syzbot+881d65229ca4f9ae8c84, socketcan,
	bridge, nikolay, syzbot+1591462f226d9cbf0564, roopa, kuba, pabeni,
	arnd, syzkaller-bugs, mudongliangabcd, linux-can, mkl, skhan,
	robin, linux-kernel, linux, kernel, netdev, davem

On Mon, Aug 07, 2023 at 06:46:34AM +0200, Oleksij Rempel wrote:
> On Fri, Jul 21, 2023 at 09:22:26AM -0700, Ziqi Zhao wrote:
> > The following 3 locks would race against each other, causing the
> > deadlock situation in the Syzbot bug report:
> > 
> > - j1939_socks_lock
> > - active_session_list_lock
> > - sk_session_queue_lock
> > 
> > A reasonable fix is to change j1939_socks_lock to an rwlock, since in
> > the rare situations where a write lock is required for the linked list
> > that j1939_socks_lock is protecting, the code does not attempt to
> > acquire any more locks. This would break the circular lock dependency,
> > where, for example, the current thread already locks j1939_socks_lock
> > and attempts to acquire sk_session_queue_lock, and at the same time,
> > another thread attempts to acquire j1939_socks_lock while holding
> > sk_session_queue_lock.
> > 
> > NOTE: This patch along does not fix the unregister_netdevice bug
> > reported by Syzbot; instead, it solves a deadlock situation to prepare
> > for one or more further patches to actually fix the Syzbot bug, which
> > appears to be a reference counting problem within the j1939 codebase.
> > 
> > Reported-by: syzbot+1591462f226d9cbf0564@syzkaller.appspotmail.com
> > Signed-off-by: Ziqi Zhao <astrajoan@yahoo.com>

Reviewed-by: Oleksij Rempel <o.rempel@pengutronix.de>

-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-11-17  8:10 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20230704064710.3189-1-astrajoan@yahoo.com>
2023-07-04  6:47 ` [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock syzbot
2023-07-04  7:37   ` Oleksij Rempel
     [not found] ` <20230721162226.8639-1-astrajoan@yahoo.com>
2023-07-23 15:41   ` Oleksij Rempel
2023-08-07  4:46   ` Oleksij Rempel
2023-11-17  8:10     ` Oleksij Rempel
     [not found] <F17EC83C-9D70-463A-9C46-FBCC53A1F13C.ref@yahoo.com>
     [not found] ` <F17EC83C-9D70-463A-9C46-FBCC53A1F13C@yahoo.com>
2023-07-05  4:39   ` Oleksij Rempel
2023-07-05  4:50     ` Dmitry Vyukov
2023-07-10 17:53 [syzbot] [can?] possible deadlock in j1939_sk_errqueue (2) syzbot
     [not found] ` <20230712004750.2476-1-astrajoan@yahoo.com>
2023-07-13 22:23   ` [PATCH] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).