From: Stanislav Fomichev <sdf@google.com>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
martin.lau@linux.dev, song@kernel.org, yhs@fb.com,
john.fastabend@gmail.com, kpsingh@kernel.org, sdf@google.com,
haoluo@google.com, jolsa@kernel.org, kuba@kernel.org,
toke@kernel.org, willemb@google.com, dsahern@kernel.org,
magnus.karlsson@intel.com, bjorn@kernel.org,
maciej.fijalkowski@intel.com, hawk@kernel.org,
yoong.siang.song@intel.com, netdev@vger.kernel.org,
xdp-hints@xdp-project.net
Subject: [PATCH bpf-next v5 01/13] xsk: Support tx_metadata_len
Date: Thu, 2 Nov 2023 15:58:25 -0700 [thread overview]
Message-ID: <20231102225837.1141915-2-sdf@google.com> (raw)
In-Reply-To: <20231102225837.1141915-1-sdf@google.com>
For zerocopy mode, tx_desc->addr can point to an arbitrary offset
and carry some TX metadata in the headroom. For copy mode, there
is no way currently to populate skb metadata.
Introduce new tx_metadata_len umem config option that indicates how many
bytes to treat as metadata. Metadata bytes come prior to tx_desc address
(same as in RX case).
The size of the metadata has mostly the same constraints as XDP:
- less than 256 bytes
- 8-byte aligned (compared to 4-byte alignment on xdp, due to 8-byte
timestamp in the completion)
- non-zero
This data is not interpreted in any way right now.
Reviewed-by: Song Yoong Siang <yoong.siang.song@intel.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
include/net/xdp_sock.h | 1 +
include/net/xsk_buff_pool.h | 1 +
include/uapi/linux/if_xdp.h | 1 +
net/xdp/xdp_umem.c | 4 ++++
net/xdp/xsk.c | 12 +++++++++++-
net/xdp/xsk_buff_pool.c | 1 +
net/xdp/xsk_queue.h | 17 ++++++++++-------
tools/include/uapi/linux/if_xdp.h | 1 +
8 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index f83128007fb0..bcf765124f72 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -30,6 +30,7 @@ struct xdp_umem {
struct user_struct *user;
refcount_t users;
u8 flags;
+ u8 tx_metadata_len;
bool zc;
struct page **pgs;
int id;
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index b0bdff26fc88..1985ffaf9b0c 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -77,6 +77,7 @@ struct xsk_buff_pool {
u32 chunk_size;
u32 chunk_shift;
u32 frame_len;
+ u8 tx_metadata_len; /* inherited from umem */
u8 cached_need_wakeup;
bool uses_need_wakeup;
bool dma_need_sync;
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 8d48863472b9..2ecf79282c26 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -76,6 +76,7 @@ struct xdp_umem_reg {
__u32 chunk_size;
__u32 headroom;
__u32 flags;
+ __u32 tx_metadata_len;
};
struct xdp_statistics {
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 06cead2b8e34..946a687fb8e8 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;
+ if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8)
+ return -EINVAL;
+
umem->size = size;
umem->headroom = headroom;
umem->chunk_size = chunk_size;
@@ -207,6 +210,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pgs = NULL;
umem->user = NULL;
umem->flags = mr->flags;
+ umem->tx_metadata_len = mr->tx_metadata_len;
INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ae9f8cb611f6..c904356e2800 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1283,6 +1283,14 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};
+struct xdp_umem_reg_v2 {
+ __u64 addr; /* Start of packet data area */
+ __u64 len; /* Length of packet data area */
+ __u32 chunk_size;
+ __u32 headroom;
+ __u32 flags;
+};
+
static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -1326,8 +1334,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL;
- else if (optlen < sizeof(mr))
+ else if (optlen < sizeof(struct xdp_umem_reg_v2))
mr_size = sizeof(struct xdp_umem_reg_v1);
+ else if (optlen < sizeof(mr))
+ mr_size = sizeof(struct xdp_umem_reg_v2);
if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 49cb9f9a09be..386eddcdf837 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
XDP_PACKET_HEADROOM;
pool->umem = umem;
pool->addrs = umem->addrs;
+ pool->tx_metadata_len = umem->tx_metadata_len;
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 13354a1e4280..c74a1372bcb9 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options)
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 offset = desc->addr & (pool->chunk_size - 1);
+ u64 addr = desc->addr - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
+ u64 offset = addr & (pool->chunk_size - 1);
if (!desc->len)
return false;
- if (offset + desc->len > pool->chunk_size)
+ if (offset + len > pool->chunk_size)
return false;
- if (desc->addr >= pool->addrs_cnt)
+ if (addr >= pool->addrs_cnt)
return false;
if (xp_unused_options_set(desc->options))
@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
if (!desc->len)
return false;
- if (desc->len > pool->chunk_size)
+ if (len > pool->chunk_size)
return false;
- if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt ||
- xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+ if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt ||
+ xp_desc_crosses_non_contig_pg(pool, addr, len))
return false;
if (xp_unused_options_set(desc->options))
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 73a47da885dc..34411a2e5b6c 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -76,6 +76,7 @@ struct xdp_umem_reg {
__u32 chunk_size;
__u32 headroom;
__u32 flags;
+ __u32 tx_metadata_len;
};
struct xdp_statistics {
--
2.42.0.869.gea05f2083d-goog
next prev parent reply other threads:[~2023-11-02 22:58 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-02 22:58 [PATCH bpf-next v5 00/13] xsk: TX metadata Stanislav Fomichev
2023-11-02 22:58 ` Stanislav Fomichev [this message]
2023-11-02 22:58 ` [PATCH bpf-next v5 02/13] xsk: Add TX timestamp and TX checksum offload support Stanislav Fomichev
2023-11-13 13:16 ` Jesper Dangaard Brouer
2023-11-13 14:10 ` Stanislav Fomichev
2023-11-13 16:29 ` Jesper Dangaard Brouer
2023-11-13 17:02 ` Stanislav Fomichev
2023-11-15 10:05 ` Jesper Dangaard Brouer
2023-11-15 13:37 ` Stanislav Fomichev
2023-11-16 14:30 ` Jesper Dangaard Brouer
2023-11-20 20:45 ` Stanislav Fomichev
2023-11-02 22:58 ` [PATCH bpf-next v5 03/13] tools: ynl: Print xsk-features from the sample Stanislav Fomichev
2023-11-02 22:58 ` [PATCH bpf-next v5 04/13] net/mlx5e: Implement AF_XDP TX timestamp and checksum offload Stanislav Fomichev
2023-11-02 22:58 ` [PATCH bpf-next v5 05/13] net: stmmac: Add Tx HWTS support to XDP ZC Stanislav Fomichev
2023-11-02 22:58 ` [PATCH bpf-next v5 06/13] xsk: Document tx_metadata_len layout Stanislav Fomichev
2023-11-05 12:45 ` Simon Horman
2023-11-06 16:47 ` Stanislav Fomichev
2023-11-02 22:58 ` [PATCH bpf-next v5 07/13] xsk: Validate xsk_tx_metadata flags Stanislav Fomichev
2023-11-02 22:58 ` [PATCH bpf-next v5 08/13] xsk: Add option to calculate TX checksum in SW Stanislav Fomichev
2023-11-02 22:58 ` [PATCH bpf-next v5 09/13] selftests/xsk: Support tx_metadata_len Stanislav Fomichev
2023-11-02 22:58 ` [PATCH bpf-next v5 10/13] selftests/bpf: Add csum helpers Stanislav Fomichev
2023-11-02 22:58 ` [PATCH bpf-next v5 11/13] selftests/bpf: Add TX side to xdp_metadata Stanislav Fomichev
2023-11-02 22:58 ` [PATCH bpf-next v5 12/13] selftests/bpf: Convert xdp_hw_metadata to XDP_USE_NEED_WAKEUP Stanislav Fomichev
2023-11-02 22:58 ` [PATCH bpf-next v5 13/13] selftests/bpf: Add TX side to xdp_hw_metadata Stanislav Fomichev
2023-11-06 23:31 ` [PATCH bpf-next v5 00/13] xsk: TX metadata Jakub Kicinski
2023-11-09 18:03 ` Alexei Starovoitov
2023-11-09 18:07 ` Stanislav Fomichev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231102225837.1141915-2-sdf@google.com \
--to=sdf@google.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bjorn@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=dsahern@kernel.org \
--cc=haoluo@google.com \
--cc=hawk@kernel.org \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=maciej.fijalkowski@intel.com \
--cc=magnus.karlsson@intel.com \
--cc=martin.lau@linux.dev \
--cc=netdev@vger.kernel.org \
--cc=song@kernel.org \
--cc=toke@kernel.org \
--cc=willemb@google.com \
--cc=xdp-hints@xdp-project.net \
--cc=yhs@fb.com \
--cc=yoong.siang.song@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.