Re: [PATCH v5 bpf-next 13/14] bpf: add new frame_length field to the XDP ctx

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
To: Lorenzo Bianconi <lorenzo@kernel.org>
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org, davem@davemloft.net,
	kuba@kernel.org, ast@kernel.org, daniel@iogearbox.net,
	shayagr@amazon.com, sameehj@amazon.com, john.fastabend@gmail.com,
	dsahern@kernel.org, brouer@redhat.com, echaudro@redhat.com,
	lorenzo.bianconi@redhat.com, jasowang@redhat.com
Subject: Re: [PATCH v5 bpf-next 13/14] bpf: add new frame_length field to the XDP ctx
Date: Tue, 8 Dec 2020 23:17:46 +0100	[thread overview]
Message-ID: <20201208221746.GA33399@ranger.igk.intel.com> (raw)
In-Reply-To: <0547d6f752e325f56a8e5f6466b50e81ff29d65f.1607349924.git.lorenzo@kernel.org>

On Mon, Dec 07, 2020 at 05:32:42PM +0100, Lorenzo Bianconi wrote:
> From: Eelco Chaudron <echaudro@redhat.com>
> 
> This patch adds a new field to the XDP context called frame_length,
> which will hold the full length of the packet, including fragments
> if existing.

The approach you took for ctx access conversion is barely described :/

> 
> eBPF programs can determine if fragments are present using something
> like:
> 
>   if (ctx->data_end - ctx->data < ctx->frame_length) {
>     /* Fragements exists. /*
>   }
> 
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
>  include/net/xdp.h              | 22 +++++++++
>  include/uapi/linux/bpf.h       |  1 +
>  kernel/bpf/verifier.c          |  2 +-
>  net/core/filter.c              | 83 ++++++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h |  1 +
>  5 files changed, 108 insertions(+), 1 deletion(-)
> 
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index 09078ab6644c..e54d733c90ed 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -73,8 +73,30 @@ struct xdp_buff {
>  	void *data_hard_start;
>  	struct xdp_rxq_info *rxq;
>  	struct xdp_txq_info *txq;
> +	/* If any of the bitfield lengths for frame_sz or mb below change,
> +	 * make sure the defines here are also updated!
> +	 */
> +#ifdef __BIG_ENDIAN_BITFIELD
> +#define MB_SHIFT	  0
> +#define MB_MASK		  0x00000001
> +#define FRAME_SZ_SHIFT	  1
> +#define FRAME_SZ_MASK	  0xfffffffe
> +#else
> +#define MB_SHIFT	  31
> +#define MB_MASK		  0x80000000
> +#define FRAME_SZ_SHIFT	  0
> +#define FRAME_SZ_MASK	  0x7fffffff
> +#endif
> +#define FRAME_SZ_OFFSET() offsetof(struct xdp_buff, __u32_bit_fields_offset)
> +#define MB_OFFSET()	  offsetof(struct xdp_buff, __u32_bit_fields_offset)
> +	/* private: */
> +	u32 __u32_bit_fields_offset[0];

Why? I don't get that. Please explain.
Also, looking at all the need for masking/shifting, I wonder if it would
be better to have u32 frame_sz and u8 mb...

> +	/* public: */
>  	u32 frame_sz:31; /* frame size to deduce data_hard_end/reserved tailroom*/
>  	u32 mb:1; /* xdp non-linear buffer */
> +
> +	/* Temporary registers to make conditional access/stores possible. */
> +	u64 tmp_reg[2];

IMHO this kills the bitfield approach we have for vars above.

>  };
>  
>  /* Reserve memory area at end-of data area.
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 30b477a26482..62c50ab28ea9 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -4380,6 +4380,7 @@ struct xdp_md {
>  	__u32 rx_queue_index;  /* rxq->queue_index  */
>  
>  	__u32 egress_ifindex;  /* txq->dev->ifindex */
> +	__u32 frame_length;
>  };
>  
>  /* DEVMAP map-value layout
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 93def76cf32b..c50caea29fa2 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -10526,7 +10526,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
>  	const struct bpf_verifier_ops *ops = env->ops;
>  	int i, cnt, size, ctx_field_size, delta = 0;
>  	const int insn_cnt = env->prog->len;
> -	struct bpf_insn insn_buf[16], *insn;
> +	struct bpf_insn insn_buf[32], *insn;
>  	u32 target_size, size_default, off;
>  	struct bpf_prog *new_prog;
>  	enum bpf_access_type type;
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 4c4882d4d92c..278640db9e0a 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -8908,6 +8908,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
>  				  struct bpf_insn *insn_buf,
>  				  struct bpf_prog *prog, u32 *target_size)
>  {
> +	int ctx_reg, dst_reg, scratch_reg;
>  	struct bpf_insn *insn = insn_buf;
>  
>  	switch (si->off) {
> @@ -8954,6 +8955,88 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
>  		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
>  				      offsetof(struct net_device, ifindex));
>  		break;
> +	case offsetof(struct xdp_md, frame_length):
> +		/* Need tmp storage for src_reg in case src_reg == dst_reg,
> +		 * and a scratch reg */
> +		scratch_reg = BPF_REG_9;
> +		dst_reg = si->dst_reg;
> +
> +		if (dst_reg == scratch_reg)
> +			scratch_reg--;
> +
> +		ctx_reg = (si->src_reg == si->dst_reg) ? scratch_reg - 1 : si->src_reg;
> +		while (dst_reg == ctx_reg || scratch_reg == ctx_reg)
> +			ctx_reg--;
> +
> +		/* Save scratch registers */
> +		if (ctx_reg != si->src_reg) {
> +			*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, ctx_reg,
> +					      offsetof(struct xdp_buff,
> +						       tmp_reg[1]));
> +
> +			*insn++ = BPF_MOV64_REG(ctx_reg, si->src_reg);
> +		}
> +
> +		*insn++ = BPF_STX_MEM(BPF_DW, ctx_reg, scratch_reg,
> +				      offsetof(struct xdp_buff, tmp_reg[0]));

Why don't you push regs to stack, use it and then pop it back? That way I
suppose you could avoid polluting xdp_buff with tmp_reg[2].

> +
> +		/* What does this code do?
> +		 *   dst_reg = 0
> +		 *
> +		 *   if (!ctx_reg->mb)
> +		 *      goto no_mb:
> +		 *
> +		 *   dst_reg = (struct xdp_shared_info *)xdp_data_hard_end(xdp)
> +		 *   dst_reg = dst_reg->data_length
> +		 *
> +		 * NOTE: xdp_data_hard_end() is xdp->hard_start +
> +		 *       xdp->frame_sz - sizeof(shared_info)
> +		 *
> +		 * no_mb:
> +		 *   dst_reg += ctx_reg->data_end - ctx_reg->data
> +		 */
> +		*insn++ = BPF_MOV64_IMM(dst_reg, 0);
> +
> +		*insn++ = BPF_LDX_MEM(BPF_W, scratch_reg, ctx_reg, MB_OFFSET());
> +		*insn++ = BPF_ALU32_IMM(BPF_AND, scratch_reg, MB_MASK);
> +		*insn++ = BPF_JMP_IMM(BPF_JEQ, scratch_reg, 0, 7); /*goto no_mb; */
> +
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff,
> +						       data_hard_start),
> +				      dst_reg, ctx_reg,
> +				      offsetof(struct xdp_buff, data_hard_start));
> +		*insn++ = BPF_LDX_MEM(BPF_W, scratch_reg, ctx_reg,
> +				      FRAME_SZ_OFFSET());
> +		*insn++ = BPF_ALU32_IMM(BPF_AND, scratch_reg, FRAME_SZ_MASK);
> +		*insn++ = BPF_ALU32_IMM(BPF_RSH, scratch_reg, FRAME_SZ_SHIFT);
> +		*insn++ = BPF_ALU64_REG(BPF_ADD, dst_reg, scratch_reg);
> +		*insn++ = BPF_ALU64_IMM(BPF_SUB, dst_reg,
> +					SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_shared_info,
> +						       data_length),
> +				      dst_reg, dst_reg,
> +				      offsetof(struct xdp_shared_info,
> +					       data_length));
> +
> +		/* no_mb: */
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
> +				      scratch_reg, ctx_reg,
> +				      offsetof(struct xdp_buff, data_end));
> +		*insn++ = BPF_ALU64_REG(BPF_ADD, dst_reg, scratch_reg);
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
> +				      scratch_reg, ctx_reg,
> +				      offsetof(struct xdp_buff, data));
> +		*insn++ = BPF_ALU64_REG(BPF_SUB, dst_reg, scratch_reg);
> +
> +		/* Restore scratch registers */
> +		*insn++ = BPF_LDX_MEM(BPF_DW, scratch_reg, ctx_reg,
> +				      offsetof(struct xdp_buff, tmp_reg[0]));
> +
> +		if (ctx_reg != si->src_reg)
> +			*insn++ = BPF_LDX_MEM(BPF_DW, ctx_reg, ctx_reg,
> +					      offsetof(struct xdp_buff,
> +						       tmp_reg[1]));
> +		break;
>  	}
>  
>  	return insn - insn_buf;
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 30b477a26482..62c50ab28ea9 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -4380,6 +4380,7 @@ struct xdp_md {
>  	__u32 rx_queue_index;  /* rxq->queue_index  */
>  
>  	__u32 egress_ifindex;  /* txq->dev->ifindex */
> +	__u32 frame_length;
>  };
>  
>  /* DEVMAP map-value layout
> -- 
> 2.28.0
>

next prev parent reply	other threads:[~2020-12-08 22:27 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-07 16:32 [PATCH v5 bpf-next 00/14] mvneta: introduce XDP multi-buffer support Lorenzo Bianconi
2020-12-07 16:32 ` [PATCH v5 bpf-next 01/14] xdp: introduce mb in xdp_buff/xdp_frame Lorenzo Bianconi
2020-12-07 21:16   ` Alexander Duyck
2020-12-07 23:03     ` Saeed Mahameed
2020-12-08  3:16       ` Alexander Duyck
2020-12-08  6:49         ` Saeed Mahameed
2020-12-08  9:47           ` Jesper Dangaard Brouer
2020-12-07 16:32 ` [PATCH v5 bpf-next 02/14] xdp: initialize xdp_buff mb bit to 0 in all XDP drivers Lorenzo Bianconi
2020-12-07 21:15   ` Alexander Duyck
2020-12-07 21:37     ` Maciej Fijalkowski
2020-12-07 23:20       ` Saeed Mahameed
2020-12-08 10:31         ` Lorenzo Bianconi
2020-12-08 13:29           ` Jesper Dangaard Brouer
2020-12-07 16:32 ` [PATCH v5 bpf-next 03/14] xdp: add xdp_shared_info data structure Lorenzo Bianconi
2020-12-08  0:22   ` Saeed Mahameed
2020-12-08 11:01     ` Lorenzo Bianconi
2020-12-19 14:53       ` Shay Agroskin
2020-12-19 15:30         ` Jamal Hadi Salim
2020-12-21  9:01           ` Jesper Dangaard Brouer
2020-12-21 13:00             ` Jamal Hadi Salim
2020-12-20 17:52         ` Lorenzo Bianconi
2020-12-21 20:55           ` Shay Agroskin
2020-12-07 16:32 ` [PATCH v5 bpf-next 04/14] net: mvneta: update mb bit before passing the xdp buffer to eBPF layer Lorenzo Bianconi
2020-12-07 16:32 ` [PATCH v5 bpf-next 05/14] xdp: add multi-buff support to xdp_return_{buff/frame} Lorenzo Bianconi
2020-12-07 16:32 ` [PATCH v5 bpf-next 06/14] net: mvneta: add multi buffer support to XDP_TX Lorenzo Bianconi
2020-12-19 15:56   ` Shay Agroskin
2020-12-20 18:06     ` Lorenzo Bianconi
2020-12-07 16:32 ` [PATCH v5 bpf-next 07/14] bpf: move user_size out of bpf_test_init Lorenzo Bianconi
2020-12-07 16:32 ` [PATCH v5 bpf-next 08/14] bpf: introduce multibuff support to bpf_prog_test_run_xdp() Lorenzo Bianconi
2020-12-07 16:32 ` [PATCH v5 bpf-next 09/14] bpf: test_run: add xdp_shared_info pointer in bpf_test_finish signature Lorenzo Bianconi
2020-12-07 16:32 ` [PATCH v5 bpf-next 10/14] net: mvneta: enable jumbo frames for XDP Lorenzo Bianconi
2020-12-07 16:32 ` [PATCH v5 bpf-next 11/14] bpf: cpumap: introduce xdp multi-buff support Lorenzo Bianconi
2020-12-19 17:46   ` Shay Agroskin
2020-12-20 17:56     ` Lorenzo Bianconi
2020-12-07 16:32 ` [PATCH v5 bpf-next 12/14] bpf: add multi-buff support to the bpf_xdp_adjust_tail() API Lorenzo Bianconi
2020-12-07 16:32 ` [PATCH v5 bpf-next 13/14] bpf: add new frame_length field to the XDP ctx Lorenzo Bianconi
2020-12-08 22:17   ` Maciej Fijalkowski [this message]
2020-12-09 10:35     ` Eelco Chaudron
2020-12-09 11:10       ` Maciej Fijalkowski
2020-12-09 12:07         ` Eelco Chaudron
2020-12-15 13:28           ` Eelco Chaudron
2020-12-15 18:06             ` Maciej Fijalkowski
2020-12-16 14:08               ` Eelco Chaudron
2021-01-15 16:36                 ` Eelco Chaudron
2021-01-18 16:48                   ` Maciej Fijalkowski
2021-01-20 13:20                     ` Eelco Chaudron
2021-02-01 16:00                       ` Eelco Chaudron
2020-12-07 16:32 ` [PATCH v5 bpf-next 14/14] bpf: update xdp_adjust_tail selftest to include multi-buffer Lorenzo Bianconi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201208221746.GA33399@ranger.igk.intel.com \
    --to=maciej.fijalkowski@intel.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brouer@redhat.com \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=echaudro@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=john.fastabend@gmail.com \
    --cc=kuba@kernel.org \
    --cc=lorenzo.bianconi@redhat.com \
    --cc=lorenzo@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=sameehj@amazon.com \
    --cc=shayagr@amazon.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.