From: Stanislav Fomichev <sdf@google.com>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
martin.lau@linux.dev, song@kernel.org, yhs@fb.com,
john.fastabend@gmail.com, kpsingh@kernel.org, sdf@google.com,
haoluo@google.com, jolsa@kernel.org, kuba@kernel.org,
toke@kernel.org, willemb@google.com, dsahern@kernel.org,
magnus.karlsson@intel.com, bjorn@kernel.org,
maciej.fijalkowski@intel.com, hawk@kernel.org,
yoong.siang.song@intel.com, netdev@vger.kernel.org,
xdp-hints@xdp-project.net
Subject: [PATCH bpf-next v3 01/10] xsk: Support tx_metadata_len
Date: Tue, 3 Oct 2023 13:05:13 -0700 [thread overview]
Message-ID: <20231003200522.1914523-2-sdf@google.com> (raw)
In-Reply-To: <20231003200522.1914523-1-sdf@google.com>
For zerocopy mode, tx_desc->addr can point to the arbitrary offset
and carry some TX metadata in the headroom. For copy mode, there
is no way currently to populate skb metadata.
Introduce new tx_metadata_len umem config option that indicates how many
bytes to treat as metadata. Metadata bytes come prior to tx_desc address
(same as in RX case).
The size of the metadata has the same constraints as XDP:
- less than 256 bytes
- 4-byte aligned
- non-zero
This data is not interpreted in any way right now.
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
include/net/xdp_sock.h | 1 +
include/net/xsk_buff_pool.h | 1 +
include/uapi/linux/if_xdp.h | 1 +
net/xdp/xdp_umem.c | 4 ++++
net/xdp/xsk.c | 12 +++++++++++-
net/xdp/xsk_buff_pool.c | 1 +
net/xdp/xsk_queue.h | 17 ++++++++++-------
tools/include/uapi/linux/if_xdp.h | 1 +
8 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 69b472604b86..caa1f04106be 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -30,6 +30,7 @@ struct xdp_umem {
struct user_struct *user;
refcount_t users;
u8 flags;
+ u8 tx_metadata_len;
bool zc;
struct page **pgs;
int id;
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index b0bdff26fc88..1985ffaf9b0c 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -77,6 +77,7 @@ struct xsk_buff_pool {
u32 chunk_size;
u32 chunk_shift;
u32 frame_len;
+ u8 tx_metadata_len; /* inherited from umem */
u8 cached_need_wakeup;
bool uses_need_wakeup;
bool dma_need_sync;
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 8d48863472b9..2ecf79282c26 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -76,6 +76,7 @@ struct xdp_umem_reg {
__u32 chunk_size;
__u32 headroom;
__u32 flags;
+ __u32 tx_metadata_len;
};
struct xdp_statistics {
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 06cead2b8e34..333f3d53aad4 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;
+ if (mr->tx_metadata_len > 256 || mr->tx_metadata_len % 4)
+ return -EINVAL;
+
umem->size = size;
umem->headroom = headroom;
umem->chunk_size = chunk_size;
@@ -207,6 +210,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pgs = NULL;
umem->user = NULL;
umem->flags = mr->flags;
+ umem->tx_metadata_len = mr->tx_metadata_len;
INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 7482d0aca504..c1e12b602213 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1255,6 +1255,14 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};
+struct xdp_umem_reg_v2 {
+ __u64 addr; /* Start of packet data area */
+ __u64 len; /* Length of packet data area */
+ __u32 chunk_size;
+ __u32 headroom;
+ __u32 flags;
+};
+
static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -1298,8 +1306,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL;
- else if (optlen < sizeof(mr))
+ else if (optlen < sizeof(struct xdp_umem_reg_v2))
mr_size = sizeof(struct xdp_umem_reg_v1);
+ else if (optlen < sizeof(mr))
+ mr_size = sizeof(struct xdp_umem_reg_v2);
if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 49cb9f9a09be..386eddcdf837 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
XDP_PACKET_HEADROOM;
pool->umem = umem;
pool->addrs = umem->addrs;
+ pool->tx_metadata_len = umem->tx_metadata_len;
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 13354a1e4280..c74a1372bcb9 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options)
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 offset = desc->addr & (pool->chunk_size - 1);
+ u64 addr = desc->addr - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
+ u64 offset = addr & (pool->chunk_size - 1);
if (!desc->len)
return false;
- if (offset + desc->len > pool->chunk_size)
+ if (offset + len > pool->chunk_size)
return false;
- if (desc->addr >= pool->addrs_cnt)
+ if (addr >= pool->addrs_cnt)
return false;
if (xp_unused_options_set(desc->options))
@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
if (!desc->len)
return false;
- if (desc->len > pool->chunk_size)
+ if (len > pool->chunk_size)
return false;
- if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt ||
- xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+ if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt ||
+ xp_desc_crosses_non_contig_pg(pool, addr, len))
return false;
if (xp_unused_options_set(desc->options))
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 73a47da885dc..34411a2e5b6c 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -76,6 +76,7 @@ struct xdp_umem_reg {
__u32 chunk_size;
__u32 headroom;
__u32 flags;
+ __u32 tx_metadata_len;
};
struct xdp_statistics {
--
2.42.0.582.g8ccd20d70d-goog
next prev parent reply other threads:[~2023-10-03 20:05 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-10-03 20:05 [PATCH bpf-next v3 00/10] xsk: TX metadata Stanislav Fomichev
2023-10-03 20:05 ` Stanislav Fomichev [this message]
2023-10-03 20:05 ` [PATCH bpf-next v3 02/10] xsk: add TX timestamp and TX checksum offload support Stanislav Fomichev
2023-10-04 6:18 ` Song, Yoong Siang
2023-10-04 17:48 ` Stanislav Fomichev
2023-10-04 17:56 ` Stanislav Fomichev
2023-10-05 1:16 ` Song, Yoong Siang
2023-10-03 20:05 ` [PATCH bpf-next v3 03/10] tools: ynl: print xsk-features from the sample Stanislav Fomichev
2023-10-03 20:05 ` [PATCH bpf-next v3 04/10] net/mlx5e: Implement AF_XDP TX timestamp and checksum offload Stanislav Fomichev
2023-10-04 23:47 ` kernel test robot
2023-10-03 20:05 ` [PATCH bpf-next v3 05/10] net: stmmac: Add Tx HWTS support to XDP ZC Stanislav Fomichev
2023-10-04 23:05 ` kernel test robot
2023-10-04 23:14 ` Stanislav Fomichev
2023-10-06 4:38 ` kernel test robot
2023-10-03 20:05 ` [PATCH bpf-next v3 06/10] selftests/xsk: Support tx_metadata_len Stanislav Fomichev
2023-10-03 20:05 ` [PATCH bpf-next v3 07/10] selftests/bpf: Add csum helpers Stanislav Fomichev
2023-10-03 20:05 ` [PATCH bpf-next v3 08/10] selftests/bpf: Add TX side to xdp_metadata Stanislav Fomichev
2023-10-03 20:05 ` [PATCH bpf-next v3 09/10] selftests/bpf: Add TX side to xdp_hw_metadata Stanislav Fomichev
2023-10-09 8:12 ` Jesper Dangaard Brouer
2023-10-09 16:37 ` Stanislav Fomichev
2023-10-10 20:40 ` Stanislav Fomichev
2023-10-13 1:13 ` Song, Yoong Siang
2023-10-13 18:47 ` Stanislav Fomichev
2023-10-15 13:28 ` Song, Yoong Siang
2023-10-03 20:05 ` [PATCH bpf-next v3 10/10] xsk: document tx_metadata_len layout Stanislav Fomichev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231003200522.1914523-2-sdf@google.com \
--to=sdf@google.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bjorn@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=dsahern@kernel.org \
--cc=haoluo@google.com \
--cc=hawk@kernel.org \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=maciej.fijalkowski@intel.com \
--cc=magnus.karlsson@intel.com \
--cc=martin.lau@linux.dev \
--cc=netdev@vger.kernel.org \
--cc=song@kernel.org \
--cc=toke@kernel.org \
--cc=willemb@google.com \
--cc=xdp-hints@xdp-project.net \
--cc=yhs@fb.com \
--cc=yoong.siang.song@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.