netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Lorenzo Bianconi <lorenzo@kernel.org>
To: Florian Westphal <fw@strlen.de>
Cc: netfilter-devel@vger.kernel.org, netdev@vger.kernel.org
Subject: Re: [PATCH nf-next 7/8] netfilter: nf_tables: add flowtable map for xdp offload
Date: Tue, 21 Nov 2023 15:25:58 +0100	[thread overview]
Message-ID: <ZVy99gga2EnjnTWP@lore-desk> (raw)
In-Reply-To: <20231121122800.13521-8-fw@strlen.de>

[-- Attachment #1: Type: text/plain, Size: 7529 bytes --]

> This adds a small internal mapping table so that a new bpf (xdp) kfunc
> can perform lookups in a flowtable.
> 
> As-is, xdp program has access to the device pointer, but no way to do a
> lookup in a flowtable -- there is no way to obtain the needed struct
> without questionable stunts.
> 
> This allows to obtain an nf_flowtable pointer given a net_device
> structure.
> 
> A device cannot be added to multiple flowtables, the mapping needs
> to be unique.  This is enforced when a flowtables with the
> NF_FLOWTABLE_XDP_OFFLOAD was added.
> 
> Exposure of this NF_FLOWTABLE_XDP_OFFLOAD in UAPI could be avoided,
> iff the 'net_device maps to 0 or 1 flowtable' paradigm is enforced
> regardless of offload-or-not flag.
> 
> HOWEVER, that does break existing behaviour.
> 
> An alternative would be to repurpose the hw offload flag by allowing
> XDP fallback when hw offload cannot be done due to lack of ndo
> callbacks.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>

Tested-by: Lorenzo Bianconi <lorenzo@kernel.org>

> ---
>  include/net/netfilter/nf_flow_table.h |   7 ++
>  net/netfilter/nf_flow_table_offload.c | 131 +++++++++++++++++++++++++-
>  net/netfilter/nf_tables_api.c         |   3 +-
>  3 files changed, 139 insertions(+), 2 deletions(-)
> 
> diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
> index 11985d9b8370..b8b7fcb98732 100644
> --- a/include/net/netfilter/nf_flow_table.h
> +++ b/include/net/netfilter/nf_flow_table.h
> @@ -93,6 +93,11 @@ static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
>  	return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
>  }
>  
> +static inline bool nf_flowtable_xdp_offload(struct nf_flowtable *flowtable)
> +{
> +	return flowtable->flags & NF_FLOWTABLE_XDP_OFFLOAD;
> +}
> +
>  enum flow_offload_tuple_dir {
>  	FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
>  	FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
> @@ -299,6 +304,8 @@ struct flow_ports {
>  	__be16 source, dest;
>  };
>  
> +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev);
> +
>  unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
>  				     const struct nf_hook_state *state);
>  unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
> diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
> index a010b25076ca..9ec7aa4ad2e5 100644
> --- a/net/netfilter/nf_flow_table_offload.c
> +++ b/net/netfilter/nf_flow_table_offload.c
> @@ -17,6 +17,92 @@ static struct workqueue_struct *nf_flow_offload_add_wq;
>  static struct workqueue_struct *nf_flow_offload_del_wq;
>  static struct workqueue_struct *nf_flow_offload_stats_wq;
>  
> +struct flow_offload_xdp {
> +	struct hlist_node hnode;
> +
> +	unsigned long net_device_addr;
> +	struct nf_flowtable *ft;
> +
> +	struct rcu_head	rcuhead;
> +};
> +
> +#define NF_XDP_HT_BITS	4
> +static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS);
> +static DEFINE_MUTEX(nf_xdp_hashtable_lock);
> +
> +/* caller must hold rcu read lock */
> +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev)
> +{
> +	unsigned long key = (unsigned long)dev;
> +	const struct flow_offload_xdp *cur;
> +
> +	hash_for_each_possible_rcu(nf_xdp_hashtable, cur, hnode, key) {
> +		if (key == cur->net_device_addr)
> +			return cur->ft;
> +	}
> +
> +	return NULL;
> +}
> +
> +static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft,
> +				      const struct net_device *dev)
> +{
> +	unsigned long key = (unsigned long)dev;
> +	struct flow_offload_xdp *cur;
> +	int err = 0;
> +
> +	mutex_lock(&nf_xdp_hashtable_lock);
> +	hash_for_each_possible(nf_xdp_hashtable, cur, hnode, key) {
> +		if (key != cur->net_device_addr)
> +			continue;
> +		err = -EEXIST;
> +		break;
> +	}
> +
> +	if (err == 0) {
> +		struct flow_offload_xdp *new;
> +
> +		new = kzalloc(sizeof(*new), GFP_KERNEL_ACCOUNT);
> +		if (new) {
> +			new->net_device_addr = key;
> +			new->ft = ft;
> +
> +			hash_add_rcu(nf_xdp_hashtable, &new->hnode, key);
> +		} else {
> +			err = -ENOMEM;
> +		}
> +	}
> +
> +	mutex_unlock(&nf_xdp_hashtable_lock);
> +
> +	DEBUG_NET_WARN_ON_ONCE(err == 0 && nf_flowtable_by_dev(dev) != ft);
> +
> +	return err;
> +}
> +
> +static void nf_flowtable_by_dev_remove(const struct net_device *dev)
> +{
> +	unsigned long key = (unsigned long)dev;
> +	struct flow_offload_xdp *cur;
> +	bool found = false;
> +
> +	mutex_lock(&nf_xdp_hashtable_lock);
> +
> +	hash_for_each_possible(nf_xdp_hashtable, cur, hnode, key) {
> +		if (key != cur->net_device_addr)
> +			continue;
> +
> +		hash_del_rcu(&cur->hnode);
> +		kfree_rcu(cur, rcuhead);
> +		found = true;
> +		break;
> +	}
> +
> +	mutex_unlock(&nf_xdp_hashtable_lock);
> +
> +	WARN_ON_ONCE(!found);
> +}
> +
>  struct flow_offload_work {
>  	struct list_head	list;
>  	enum flow_cls_command	cmd;
> @@ -1183,6 +1269,44 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
>  	return 0;
>  }
>  
> +static int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable,
> +				     struct net_device *dev,
> +				     enum flow_block_command cmd)
> +{
> +	if (!nf_flowtable_xdp_offload(flowtable))
> +		return 0;
> +
> +	switch (cmd) {
> +	case FLOW_BLOCK_BIND:
> +		return nf_flowtable_by_dev_insert(flowtable, dev);
> +	case FLOW_BLOCK_UNBIND:
> +		nf_flowtable_by_dev_remove(dev);
> +		return 0;
> +	}
> +
> +	WARN_ON_ONCE(1);
> +	return 0;
> +}
> +
> +static void nf_flow_offload_xdp_cancel(struct nf_flowtable *flowtable,
> +				       struct net_device *dev,
> +				       enum flow_block_command cmd)
> +{
> +	if (!nf_flowtable_xdp_offload(flowtable))
> +		return;
> +
> +	switch (cmd) {
> +	case FLOW_BLOCK_BIND:
> +		nf_flowtable_by_dev_remove(dev);
> +		return;
> +	case FLOW_BLOCK_UNBIND:
> +		/* We do not re-bind in case hw offload would report error
> +		 * on *unregister*.
> +		 */
> +		break;
> +	}
> +}
> +
>  int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
>  				struct net_device *dev,
>  				enum flow_block_command cmd)
> @@ -1191,6 +1315,9 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
>  	struct flow_block_offload bo;
>  	int err;
>  
> +	if (nf_flow_offload_xdp_setup(flowtable, dev, cmd))
> +		return -EBUSY;
> +
>  	if (!nf_flowtable_hw_offload(flowtable))
>  		return 0;
>  
> @@ -1200,8 +1327,10 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
>  	else
>  		err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
>  						     &extack);
> -	if (err < 0)
> +	if (err < 0) {
> +		nf_flow_offload_xdp_cancel(flowtable, dev, cmd);
>  		return err;
> +	}
>  
>  	return nf_flow_table_block_setup(flowtable, &bo, cmd);
>  }
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index 4e21311ec768..223ca4d0e2a5 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -8198,7 +8198,8 @@ static bool nft_flowtable_offload_clash(struct net *net,
>  	const struct nft_table *table;
>  
>  	/* No offload requested, no need to validate */
> -	if (!nf_flowtable_hw_offload(flowtable->ft))
> +	if (!nf_flowtable_hw_offload(flowtable->ft) &&
> +	    !nf_flowtable_xdp_offload(flowtable->ft))
>  		return false;
>  
>  	nft_net = nft_pernet(net);
> -- 
> 2.41.0
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

  reply	other threads:[~2023-11-21 14:26 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-21 12:27 [PATCH nf-next 0/8] netfilter: make nf_flowtable lifetime differ from container struct Florian Westphal
2023-11-21 12:27 ` [PATCH nf-next 1/8] netfilter: flowtable: move nf_flowtable out of container structures Florian Westphal
2023-11-23 13:52   ` Simon Horman
2023-11-23 14:10     ` Florian Westphal
2023-11-25  8:26       ` Simon Horman
2023-11-25  8:36         ` Simon Horman
2023-11-21 12:27 ` [PATCH nf-next 2/8] netfilter: nf_flowtable: replace init callback with a create one Florian Westphal
2023-11-21 12:27 ` [PATCH nf-next 3/8] netfilter: nf_flowtable: make free a real free function Florian Westphal
2023-11-21 12:27 ` [PATCH nf-next 4/8] netfilter: nf_flowtable: delay flowtable release a second time Florian Westphal
2023-11-21 12:27 ` [PATCH nf-next 5/8] netfilter: nf_tables: reject flowtable hw offload for same device Florian Westphal
2023-11-21 12:27 ` [PATCH nf-next 6/8] netfilter: nf_tables: add xdp offload flag Florian Westphal
2023-11-21 12:27 ` [PATCH nf-next 7/8] netfilter: nf_tables: add flowtable map for xdp offload Florian Westphal
2023-11-21 14:25   ` Lorenzo Bianconi [this message]
2023-11-24 10:59   ` Toke Høiland-Jørgensen
2023-11-30 13:53     ` Florian Westphal
2023-11-30 14:17       ` Toke Høiland-Jørgensen
2023-11-21 12:27 ` [PATCH nf-next 8/8] netfilter: nf_tables: permit duplicate flowtable mappings Florian Westphal
2023-11-24  9:50 ` [PATCH nf-next 0/8] netfilter: make nf_flowtable lifetime differ from container struct Pablo Neira Ayuso
2023-11-24  9:55   ` Florian Westphal
2023-11-24 10:10     ` Pablo Neira Ayuso
2023-11-24 10:16       ` Florian Westphal
2023-11-24 10:48   ` Toke Høiland-Jørgensen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZVy99gga2EnjnTWP@lore-desk \
    --to=lorenzo@kernel.org \
    --cc=fw@strlen.de \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).