From: Stanislav Fomichev <sdf@google.com>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
martin.lau@linux.dev, song@kernel.org, yhs@fb.com,
john.fastabend@gmail.com, kpsingh@kernel.org, sdf@google.com,
haoluo@google.com, jolsa@kernel.org, kuba@kernel.org,
toke@kernel.org, willemb@google.com, dsahern@kernel.org,
magnus.karlsson@intel.com, bjorn@kernel.org,
maciej.fijalkowski@intel.com, hawk@kernel.org,
yoong.siang.song@intel.com, netdev@vger.kernel.org,
xdp-hints@xdp-project.net
Subject: [PATCH bpf-next v2 1/9] xsk: Support tx_metadata_len
Date: Thu, 14 Sep 2023 14:04:44 -0700 [thread overview]
Message-ID: <20230914210452.2588884-2-sdf@google.com> (raw)
In-Reply-To: <20230914210452.2588884-1-sdf@google.com>
For zerocopy mode, tx_desc->addr can point to the arbitrary offset
and carry some TX metadata in the headroom. For copy mode, there
is no way currently to populate skb metadata.
Introduce new tx_metadata_len umem config option that indicates how many
bytes to treat as metadata. Metadata bytes come prior to tx_desc address
(same as in RX case).
The size of the metadata has the same constraints as XDP:
- less than 256 bytes
- 4-byte aligned
- non-zero
This data is not interpreted in any way right now.
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
include/net/xdp_sock.h | 1 +
include/net/xsk_buff_pool.h | 1 +
include/uapi/linux/if_xdp.h | 1 +
net/xdp/xdp_umem.c | 4 ++++
net/xdp/xsk.c | 12 +++++++++++-
net/xdp/xsk_buff_pool.c | 1 +
net/xdp/xsk_queue.h | 17 ++++++++++-------
tools/include/uapi/linux/if_xdp.h | 1 +
8 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 1617af380162..10993a05d220 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -28,6 +28,7 @@ struct xdp_umem {
struct user_struct *user;
refcount_t users;
u8 flags;
+ u8 tx_metadata_len;
bool zc;
struct page **pgs;
int id;
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index b0bdff26fc88..1985ffaf9b0c 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -77,6 +77,7 @@ struct xsk_buff_pool {
u32 chunk_size;
u32 chunk_shift;
u32 frame_len;
+ u8 tx_metadata_len; /* inherited from umem */
u8 cached_need_wakeup;
bool uses_need_wakeup;
bool dma_need_sync;
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 8d48863472b9..2ecf79282c26 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -76,6 +76,7 @@ struct xdp_umem_reg {
__u32 chunk_size;
__u32 headroom;
__u32 flags;
+ __u32 tx_metadata_len;
};
struct xdp_statistics {
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 06cead2b8e34..333f3d53aad4 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;
+ if (mr->tx_metadata_len > 256 || mr->tx_metadata_len % 4)
+ return -EINVAL;
+
umem->size = size;
umem->headroom = headroom;
umem->chunk_size = chunk_size;
@@ -207,6 +210,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pgs = NULL;
umem->user = NULL;
umem->flags = mr->flags;
+ umem->tx_metadata_len = mr->tx_metadata_len;
INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 55f8b9b0e06d..5e479869ede1 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1255,6 +1255,14 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};
+struct xdp_umem_reg_v2 {
+ __u64 addr; /* Start of packet data area */
+ __u64 len; /* Length of packet data area */
+ __u32 chunk_size;
+ __u32 headroom;
+ __u32 flags;
+};
+
static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -1298,8 +1306,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL;
- else if (optlen < sizeof(mr))
+ else if (optlen < sizeof(struct xdp_umem_reg_v2))
mr_size = sizeof(struct xdp_umem_reg_v1);
+ else if (optlen < sizeof(mr))
+ mr_size = sizeof(struct xdp_umem_reg_v2);
if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index b3f7b310811e..57c8d7100de8 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
XDP_PACKET_HEADROOM;
pool->umem = umem;
pool->addrs = umem->addrs;
+ pool->tx_metadata_len = umem->tx_metadata_len;
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 13354a1e4280..c74a1372bcb9 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options)
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 offset = desc->addr & (pool->chunk_size - 1);
+ u64 addr = desc->addr - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
+ u64 offset = addr & (pool->chunk_size - 1);
if (!desc->len)
return false;
- if (offset + desc->len > pool->chunk_size)
+ if (offset + len > pool->chunk_size)
return false;
- if (desc->addr >= pool->addrs_cnt)
+ if (addr >= pool->addrs_cnt)
return false;
if (xp_unused_options_set(desc->options))
@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
if (!desc->len)
return false;
- if (desc->len > pool->chunk_size)
+ if (len > pool->chunk_size)
return false;
- if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt ||
- xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+ if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt ||
+ xp_desc_crosses_non_contig_pg(pool, addr, len))
return false;
if (xp_unused_options_set(desc->options))
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 73a47da885dc..34411a2e5b6c 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -76,6 +76,7 @@ struct xdp_umem_reg {
__u32 chunk_size;
__u32 headroom;
__u32 flags;
+ __u32 tx_metadata_len;
};
struct xdp_statistics {
--
2.42.0.459.ge4e396fd5e-goog
next prev parent reply other threads:[~2023-09-14 21:04 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-14 21:04 [PATCH bpf-next v2 0/9] xsk: TX metadata Stanislav Fomichev
2023-09-14 21:04 ` Stanislav Fomichev [this message]
2023-09-14 21:04 ` [PATCH bpf-next v2 2/9] xsk: add TX timestamp and TX checksum offload support Stanislav Fomichev
2023-09-15 1:29 ` kernel test robot
2023-09-15 1:30 ` Vinicius Costa Gomes
2023-09-15 16:25 ` Stanislav Fomichev
2023-09-15 9:44 ` kernel test robot
2023-09-14 21:04 ` [PATCH bpf-next v2 3/9] tools: ynl: print xsk-features from the sample Stanislav Fomichev
2023-09-14 21:04 ` [PATCH bpf-next v2 4/9] net/mlx5e: Implement AF_XDP TX timestamp and checksum offload Stanislav Fomichev
2023-09-15 2:44 ` kernel test robot
2023-09-14 21:04 ` [PATCH bpf-next v2 5/9] selftests/xsk: Support tx_metadata_len Stanislav Fomichev
2023-09-14 21:04 ` [PATCH bpf-next v2 6/9] selftests/bpf: Add csum helpers Stanislav Fomichev
2023-09-14 21:04 ` [PATCH bpf-next v2 7/9] selftests/bpf: Add TX side to xdp_metadata Stanislav Fomichev
2023-09-14 21:04 ` [PATCH bpf-next v2 8/9] selftests/bpf: Add TX side to xdp_hw_metadata Stanislav Fomichev
2023-09-14 21:04 ` [PATCH bpf-next v2 9/9] xsk: document tx_metadata_len layout Stanislav Fomichev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230914210452.2588884-2-sdf@google.com \
--to=sdf@google.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bjorn@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=dsahern@kernel.org \
--cc=haoluo@google.com \
--cc=hawk@kernel.org \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=maciej.fijalkowski@intel.com \
--cc=magnus.karlsson@intel.com \
--cc=martin.lau@linux.dev \
--cc=netdev@vger.kernel.org \
--cc=song@kernel.org \
--cc=toke@kernel.org \
--cc=willemb@google.com \
--cc=xdp-hints@xdp-project.net \
--cc=yhs@fb.com \
--cc=yoong.siang.song@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.