All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vlad Buslov <vladbu@mellanox.com>
To: "wenxu@ucloud.cn" <wenxu@ucloud.cn>,
	Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: David Miller <davem@davemloft.net>, Jiri Pirko <jiri@resnulli.us>,
	"pablo@netfilter.org" <pablo@netfilter.org>,
	"netfilter-devel@vger.kernel.org"
	<netfilter-devel@vger.kernel.org>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>
Subject: Re: [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block
Date: Mon, 12 Aug 2019 14:11:56 +0000	[thread overview]
Message-ID: <vbfimr2o4ly.fsf@mellanox.com> (raw)
In-Reply-To: <1565140434-8109-6-git-send-email-wenxu@ucloud.cn>

On Wed 07 Aug 2019 at 04:13, wenxu@ucloud.cn wrote:
> From: wenxu <wenxu@ucloud.cn>
>
> It provide a callback list to find the blocks of tc
> and nft subsystems
>
> Signed-off-by: wenxu <wenxu@ucloud.cn>
> Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> ---
> v7: add a mutex lock for add/del flow_indr_block_ing_cb
>
>  include/net/flow_offload.h | 10 ++++++++-
>  net/core/flow_offload.c    | 51 ++++++++++++++++++++++++++++++++++------------
>  net/sched/cls_api.c        |  9 +++++++-
>  3 files changed, 55 insertions(+), 15 deletions(-)
>
> diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
> index 46b8777..e8069b6 100644
> --- a/include/net/flow_offload.h
> +++ b/include/net/flow_offload.h
> @@ -379,6 +379,15 @@ typedef void flow_indr_block_ing_cmd_t(struct net_device *dev,
>  					void *cb_priv,
>  					enum flow_block_command command);
>  
> +struct flow_indr_block_ing_entry {
> +	flow_indr_block_ing_cmd_t *cb;
> +	struct list_head	list;
> +};
> +
> +void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry);
> +
> +void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry);
> +
>  int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
>  				  flow_indr_block_bind_cb_t *cb,
>  				  void *cb_ident);
> @@ -395,7 +404,6 @@ void flow_indr_block_cb_unregister(struct net_device *dev,
>  				   void *cb_ident);
>  
>  void flow_indr_block_call(struct net_device *dev,
> -			  flow_indr_block_ing_cmd_t *cb,
>  			  struct flow_block_offload *bo,
>  			  enum flow_block_command command);
>  
> diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
> index 4cc18e4..64c3d4d 100644
> --- a/net/core/flow_offload.c
> +++ b/net/core/flow_offload.c
> @@ -3,6 +3,7 @@
>  #include <linux/slab.h>
>  #include <net/flow_offload.h>
>  #include <linux/rtnetlink.h>
> +#include <linux/mutex.h>
>  
>  struct flow_rule *flow_rule_alloc(unsigned int num_actions)
>  {
> @@ -282,6 +283,8 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
>  }
>  EXPORT_SYMBOL(flow_block_cb_setup_simple);
>  
> +static LIST_HEAD(block_ing_cb_list);
> +
>  static struct rhashtable indr_setup_block_ht;
>  
>  struct flow_indr_block_cb {
> @@ -295,7 +298,6 @@ struct flow_indr_block_dev {
>  	struct rhash_head ht_node;
>  	struct net_device *dev;
>  	unsigned int refcnt;
> -	flow_indr_block_ing_cmd_t  *block_ing_cmd_cb;
>  	struct list_head cb_list;
>  };
>  
> @@ -389,6 +391,20 @@ static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
>  	kfree(indr_block_cb);
>  }
>  
> +static void flow_block_ing_cmd(struct net_device *dev,
> +			       flow_indr_block_bind_cb_t *cb,
> +			       void *cb_priv,
> +			       enum flow_block_command command)
> +{
> +	struct flow_indr_block_ing_entry *entry;
> +
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(entry, &block_ing_cb_list, list) {
> +		entry->cb(dev, cb, cb_priv, command);
> +	}
> +	rcu_read_unlock();
> +}

Hi,

I'm getting following incorrect rcu usage warnings with this patch
caused by rcu_read_lock in flow_block_ing_cmd:

[  401.510948] =============================
[  401.510952] WARNING: suspicious RCU usage
[  401.510993] 5.3.0-rc3+ #589 Not tainted
[  401.510996] -----------------------------
[  401.511001] include/linux/rcupdate.h:265 Illegal context switch in RCU read-side critical section!
[  401.511004]
               other info that might help us debug this:

[  401.511008]
               rcu_scheduler_active = 2, debug_locks = 1
[  401.511012] 7 locks held by test-ecmp-add-v/7576:
[  401.511015]  #0: 00000000081d71a5 (sb_writers#4){.+.+}, at: vfs_write+0x166/0x1d0
[  401.511037]  #1: 000000002bd338c3 (&of->mutex){+.+.}, at: kernfs_fop_write+0xef/0x1b0
[  401.511051]  #2: 00000000c921c634 (kn->count#317){.+.+}, at: kernfs_fop_write+0xf7/0x1b0
[  401.511062]  #3: 00000000a19cdd56 (&dev->mutex){....}, at: sriov_numvfs_store+0x6b/0x130
[  401.511079]  #4: 000000005425fa52 (pernet_ops_rwsem){++++}, at: unregister_netdevice_notifier+0x30/0x140
[  401.511092]  #5: 00000000c5822793 (rtnl_mutex){+.+.}, at: unregister_netdevice_notifier+0x35/0x140
[  401.511101]  #6: 00000000c2f3507e (rcu_read_lock){....}, at: flow_block_ing_cmd+0x5/0x130
[  401.511115]
               stack backtrace:
[  401.511121] CPU: 21 PID: 7576 Comm: test-ecmp-add-v Not tainted 5.3.0-rc3+ #589
[  401.511124] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[  401.511127] Call Trace:
[  401.511138]  dump_stack+0x85/0xc0
[  401.511146]  ___might_sleep+0x100/0x180
[  401.511154]  __mutex_lock+0x5b/0x960
[  401.511162]  ? find_held_lock+0x2b/0x80
[  401.511173]  ? __tcf_get_next_chain+0x1d/0xb0
[  401.511179]  ? mark_held_locks+0x49/0x70
[  401.511194]  ? __tcf_get_next_chain+0x1d/0xb0
[  401.511198]  __tcf_get_next_chain+0x1d/0xb0
[  401.511251]  ? uplink_rep_async_event+0x70/0x70 [mlx5_core]
[  401.511261]  tcf_block_playback_offloads+0x39/0x160
[  401.511276]  tcf_block_setup+0x1b0/0x240
[  401.511312]  ? mlx5e_rep_indr_setup_tc_cb+0xca/0x290 [mlx5_core]
[  401.511347]  ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
[  401.511359]  tc_indr_block_get_and_ing_cmd+0x11b/0x1e0
[  401.511404]  ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
[  401.511414]  flow_block_ing_cmd+0x7e/0x130
[  401.511453]  ? mlx5e_rep_indr_tc_block_unbind+0x50/0x50 [mlx5_core]
[  401.511462]  __flow_indr_block_cb_unregister+0x7f/0xf0
[  401.511502]  mlx5e_nic_rep_netdevice_event+0x75/0xb0 [mlx5_core]
[  401.511513]  unregister_netdevice_notifier+0xe9/0x140
[  401.511554]  mlx5e_cleanup_rep_tx+0x6f/0xe0 [mlx5_core]
[  401.511597]  mlx5e_detach_netdev+0x4b/0x60 [mlx5_core]
[  401.511637]  mlx5e_vport_rep_unload+0x71/0xc0 [mlx5_core]
[  401.511679]  esw_offloads_disable+0x5b/0x90 [mlx5_core]
[  401.511724]  mlx5_eswitch_disable.cold+0xdf/0x176 [mlx5_core]
[  401.511759]  mlx5_device_disable_sriov+0xab/0xb0 [mlx5_core]
[  401.511794]  mlx5_core_sriov_configure+0xaf/0xd0 [mlx5_core]
[  401.511805]  sriov_numvfs_store+0xf8/0x130
[  401.511817]  kernfs_fop_write+0x122/0x1b0
[  401.511826]  vfs_write+0xdb/0x1d0
[  401.511835]  ksys_write+0x65/0xe0
[  401.511847]  do_syscall_64+0x5c/0xb0
[  401.511857]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  401.511862] RIP: 0033:0x7fad892d30f8
[  401.511868] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 25 96 0d 00 8b 00 85 c0 75 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 60 c3 0f 1f 80 00 00 00 00 48 83
 ec 28 48 89
[  401.511871] RSP: 002b:00007ffca2a9fad8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[  401.511875] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fad892d30f8
[  401.511878] RDX: 0000000000000002 RSI: 000055afeb072a90 RDI: 0000000000000001
[  401.511881] RBP: 000055afeb072a90 R08: 00000000ffffffff R09: 000000000000000a
[  401.511884] R10: 000055afeb058710 R11: 0000000000000246 R12: 0000000000000002
[  401.511887] R13: 00007fad893a8780 R14: 0000000000000002 R15: 00007fad893a3740

I don't think it is correct approach to try to call these callbacks with
rcu protection because:

- Cls API uses sleeping locks that cannot be used in rcu read section
  (hence the included trace).

- It assumes that all implementation of classifier ops reoffload() don't
  sleep.

- And that all driver offload callbacks (both block and classifier
  setup) don't sleep, which is not the case.

I don't see any straightforward way to fix this, besides using some
other locking mechanism to protect block_ing_cb_list.

Regards,
Vlad

  reply	other threads:[~2019-08-12 14:12 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-07  1:13 [PATCH net-next v7 0/6] flow_offload: add indr-block in nf_table_offload wenxu
2019-08-07  1:13 ` [PATCH net-next v7 1/6] cls_api: modify the tc_indr_block_ing_cmd parameters wenxu
2019-08-07  1:13 ` [PATCH net-next v7 2/6] cls_api: remove the tcf_block cache wenxu
2019-08-07  1:13 ` [PATCH net-next v7 3/6] cls_api: add flow_indr_block_call function wenxu
2019-08-07  1:13 ` [PATCH net-next v7 4/6] flow_offload: move tc indirect block to flow offload wenxu
2019-08-07  1:13 ` [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block wenxu
2019-08-12 14:11   ` Vlad Buslov [this message]
2019-08-14  2:50     ` wenxu
2019-08-16 15:04       ` Vlad Buslov
2019-08-16 17:56         ` Jakub Kicinski
2019-08-16 18:44           ` Vlad Buslov
2019-08-19  7:26           ` Vlad Buslov
2019-08-19 20:27             ` Jakub Kicinski
2019-08-07  1:13 ` [PATCH net-next v7 6/6] netfilter: nf_tables_offload: support indr block call wenxu
2019-08-09  1:44 ` [PATCH net-next v7 0/6] flow_offload: add indr-block in nf_table_offload David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=vbfimr2o4ly.fsf@mellanox.com \
    --to=vladbu@mellanox.com \
    --cc=davem@davemloft.net \
    --cc=jakub.kicinski@netronome.com \
    --cc=jiri@resnulli.us \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=pablo@netfilter.org \
    --cc=wenxu@ucloud.cn \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.