All of lore.kernel.org
 help / color / mirror / Atom feed
From: Martin KaFai Lau <martin.lau@linux.dev>
To: Jason Xing <kerneljasonxing@gmail.com>
Cc: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, dsahern@kernel.org,
	willemdebruijn.kernel@gmail.com, willemb@google.com,
	ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	eddyz87@gmail.com, song@kernel.org, yonghong.song@linux.dev,
	john.fastabend@gmail.com, kpsingh@kernel.org, sdf@fomichev.me,
	haoluo@google.com, jolsa@kernel.org, shuah@kernel.org,
	ykolal@fb.com, bpf@vger.kernel.org, netdev@vger.kernel.org
Subject: Re: [PATCH bpf-next v10 12/12] selftests/bpf: add simple bpf tests in the tx path for timestamping feature
Date: Wed, 12 Feb 2025 17:08:18 -0800	[thread overview]
Message-ID: <4e51fae2-0b43-426f-8fae-ea267fe2839f@linux.dev> (raw)
In-Reply-To: <20250212061855.71154-13-kerneljasonxing@gmail.com>

On 2/11/25 10:18 PM, Jason Xing wrote:
> BPF program calculates a couple of latency deltas between each tx
> timestamping callbacks. It can be used in the real world to diagnose
> the kernel behaviour in the tx path.
> 
> Check the safety issues by accessing a few bpf calls in
> bpf_test_access_bpf_calls() which are implemented in the patch 3 and 4.
> 
> Check if the bpf timestamping can co-exist with socket timestamping.
> 
> There remains a few realistic things[1][2] to highlight:
> 1. in general a packet may pass through multiple qdiscs. For instance
> with bonding or tunnel virtual devices in the egress path.
> 2. packets may be resent, in which case an ACK might precede a repeat
> SCHED and SND.
> 3. erroneous or malicious peers may also just never send an ACK.
> 
> [1]: https://lore.kernel.org/all/67a389af981b0_14e0832949d@willemb.c.googlers.com.notmuch/
> [2]: https://lore.kernel.org/all/c329a0c1-239b-4ca1-91f2-cb30b8dd2f6a@linux.dev/
> 
> Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
> ---
>   .../bpf/prog_tests/net_timestamping.c         | 231 +++++++++++++++++
>   .../selftests/bpf/progs/net_timestamping.c    | 244 ++++++++++++++++++
>   2 files changed, 475 insertions(+)
>   create mode 100644 tools/testing/selftests/bpf/prog_tests/net_timestamping.c
>   create mode 100644 tools/testing/selftests/bpf/progs/net_timestamping.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/net_timestamping.c b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c
> new file mode 100644
> index 000000000000..dcdc40473a7d
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c
> @@ -0,0 +1,231 @@
> +#include <linux/net_tstamp.h>
> +#include <sys/time.h>
> +#include <linux/errqueue.h>
> +#include "test_progs.h"
> +#include "network_helpers.h"
> +#include "net_timestamping.skel.h"
> +
> +#define CG_NAME "/net-timestamping-test"
> +#define NSEC_PER_SEC    1000000000LL
> +
> +static const char addr4_str[] = "127.0.0.1";
> +static const char addr6_str[] = "::1";
> +static struct net_timestamping *skel;
> +static int cfg_payload_len = 30;

const ?

> +static struct timespec usr_ts;
> +static u64 delay_tolerance_nsec = 10000000000; /* 10 seconds */
> +int SK_TS_SCHED;
> +int SK_TS_TXSW;
> +int SK_TS_ACK;
> +
> +static int64_t timespec_to_ns64(struct timespec *ts)
> +{
> +	return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
> +}
> +
> +static void validate_key(int tskey, int tstype)
> +{
> +	static int expected_tskey = -1;
> +
> +	if (tstype == SCM_TSTAMP_SCHED)
> +		expected_tskey = cfg_payload_len - 1;
> +
> +	ASSERT_EQ(expected_tskey, tskey, "tskey mismatch");
> +
> +	expected_tskey = tskey;
> +}
> +
> +static void validate_timestamp(struct timespec *cur, struct timespec *prev)
> +{
> +	int64_t cur_ns, prev_ns;
> +
> +	cur_ns = timespec_to_ns64(cur);
> +	prev_ns = timespec_to_ns64(prev);
> +
> +	ASSERT_TRUE((cur_ns - prev_ns) < delay_tolerance_nsec, "latency");

ASSERT_LT()

> +}
> +
> +static void test_socket_timestamp(struct scm_timestamping *tss, int tstype,
> +				  int tskey)
> +{
> +	static struct timespec *prev_ts = &usr_ts;
> +
> +	validate_key(tskey, tstype);
> +
> +	switch (tstype) {
> +	case SCM_TSTAMP_SCHED:
> +		validate_timestamp(&tss->ts[0], prev_ts);
> +		SK_TS_SCHED = 1;
> +		SK_TS_TXSW = SK_TS_ACK = 0;
> +		break;
> +	case SCM_TSTAMP_SND:
> +		validate_timestamp(&tss->ts[0], prev_ts);
> +		SK_TS_TXSW = 1;
> +		break;
> +	case SCM_TSTAMP_ACK:
> +		validate_timestamp(&tss->ts[0], prev_ts);
> +		SK_TS_ACK = 1;
> +		break;
> +	}
> +
> +	prev_ts = &tss->ts[0];
> +}
> +
> +static void test_recv_errmsg_cmsg(struct msghdr *msg)
> +{
> +	struct sock_extended_err *serr = NULL;
> +	struct scm_timestamping *tss = NULL;
> +	struct cmsghdr *cm;
> +
> +	for (cm = CMSG_FIRSTHDR(msg);
> +	     cm && cm->cmsg_len;
> +	     cm = CMSG_NXTHDR(msg, cm)) {
> +		if (cm->cmsg_level == SOL_SOCKET &&
> +		    cm->cmsg_type == SCM_TIMESTAMPING) {
> +			tss = (void *) CMSG_DATA(cm);
> +		} else if ((cm->cmsg_level == SOL_IP &&
> +			    cm->cmsg_type == IP_RECVERR) ||
> +			   (cm->cmsg_level == SOL_IPV6 &&
> +			    cm->cmsg_type == IPV6_RECVERR) ||
> +			   (cm->cmsg_level == SOL_PACKET &&
> +			    cm->cmsg_type == PACKET_TX_TIMESTAMP)) {
> +			serr = (void *) CMSG_DATA(cm);
> +			ASSERT_EQ(serr->ee_origin, SO_EE_ORIGIN_TIMESTAMPING,
> +				    "cmsg type");
> +		}
> +
> +		if (serr && tss)
> +			test_socket_timestamp(tss, serr->ee_info,
> +					      serr->ee_data);
> +	}
> +}
> +
> +static bool socket_recv_errmsg(int fd)
> +{
> +	static char ctrl[1024 /* overprovision*/];
> +	char data[cfg_payload_len];
> +	static struct msghdr msg;
> +	struct iovec entry;
> +	int n = 0;
> +
> +	memset(&msg, 0, sizeof(msg));
> +	memset(&entry, 0, sizeof(entry));
> +	memset(ctrl, 0, sizeof(ctrl));
> +
> +	entry.iov_base = data;
> +	entry.iov_len = cfg_payload_len;
> +	msg.msg_iov = &entry;
> +	msg.msg_iovlen = 1;
> +	msg.msg_name = NULL;
> +	msg.msg_namelen = 0;
> +	msg.msg_control = ctrl;
> +	msg.msg_controllen = sizeof(ctrl);
> +
> +	n = recvmsg(fd, &msg, MSG_ERRQUEUE);
> +	if (n == -1)
> +		ASSERT_EQ(errno, EAGAIN, "recvmsg MSG_ERRQUEUE");
> +
> +	if (n >= 0)
> +		test_recv_errmsg_cmsg(&msg);
> +
> +	return n == -1;
> +
> +}
> +
> +static void test_socket_timestamping(int fd)
> +{
> +	while (!socket_recv_errmsg(fd));
> +
> +	ASSERT_EQ(SK_TS_SCHED, 1, "SCM_TSTAMP_SCHED");
> +	ASSERT_EQ(SK_TS_TXSW, 1, "SCM_TSTAMP_SND");
> +	ASSERT_EQ(SK_TS_ACK, 1, "SCM_TSTAMP_ACK");
> +}
> +
> +static void test_tcp(int family)
> +{
> +	struct net_timestamping__bss *bss = skel->bss;
> +	char buf[cfg_payload_len];
> +	int sfd = -1, cfd = -1;
> +	unsigned int sock_opt;
> +	int ret;
> +
> +	memset(bss, 0, sizeof(*bss));

No need to reset some of the new global variables, e.g. SK_TS_SCHED?

> +
> +	sfd = start_server(family, SOCK_STREAM,
> +			   family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
> +	if (!ASSERT_OK_FD(sfd, "start_server"))
> +		goto out;
> +
> +	cfd = connect_to_fd(sfd, 0);
> +	if (!ASSERT_OK_FD(cfd, "connect_to_fd_server"))
> +		goto out;
> +
> +	sock_opt = SOF_TIMESTAMPING_SOFTWARE |
> +		   SOF_TIMESTAMPING_OPT_ID |
> +		   SOF_TIMESTAMPING_TX_SCHED |
> +		   SOF_TIMESTAMPING_TX_SOFTWARE |
> +		   SOF_TIMESTAMPING_TX_ACK;
> +	ret = setsockopt(cfd, SOL_SOCKET, SO_TIMESTAMPING,
> +			 (char *) &sock_opt, sizeof(sock_opt));

It also needs the original test in v9 to check the bpf timestamping works 
without the user space's SO_TIMESTAMPING, which is the major use case of this 
series.

It should be easy to do by conditionally enabling the SO_TIMESTAMPING here.

> +	if (!ASSERT_OK(ret, "setsockopt SO_TIMESTAMPING"))
> +		goto out;
> +
> +	ret = clock_gettime(CLOCK_REALTIME, &usr_ts);
> +	if (!ASSERT_OK(ret, "get user time"))
> +		goto out;
> +
> +	ret = write(cfd, buf, sizeof(buf));
> +	if (!ASSERT_EQ(ret, sizeof(buf), "send to server"))
> +		goto out;
> +
> +	/* Test if socket timestamping works correctly even with bpf
> +	 * extension enabled.
> +	 */
> +	test_socket_timestamping(cfd);
> +
> +	ASSERT_EQ(bss->nr_active, 1, "nr_active");
> +	ASSERT_EQ(bss->nr_snd, 2, "nr_snd");
> +	ASSERT_EQ(bss->nr_sched, 1, "nr_sched");
> +	ASSERT_EQ(bss->nr_txsw, 1, "nr_txsw");
> +	ASSERT_EQ(bss->nr_ack, 1, "nr_ack");
> +
> +out:
> +	if (sfd >= 0)
> +		close(sfd);
> +	if (cfd >= 0)
> +		close(cfd);
> +}
> +
> +void test_net_timestamping(void)
> +{
> +	struct netns_obj *ns;
> +	int cg_fd;
> +
> +	cg_fd = test__join_cgroup(CG_NAME);
> +	if (!ASSERT_OK_FD(cg_fd, "join cgroup"))
> +		return;
> +
> +	ns = netns_new("net_timestamping_ns", true);
> +	if (!ASSERT_OK_PTR(ns, "create ns"))
> +		goto done;
> +
> +	skel = net_timestamping__open_and_load();
> +	if (!ASSERT_OK_PTR(skel, "open and load skel"))
> +		goto done;
> +
> +	if (!ASSERT_OK(net_timestamping__attach(skel), "attach skel"))
> +		goto done;
> +
> +	skel->links.skops_sockopt =
> +		bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd);
> +	if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup"))
> +		goto done;
> +
> +	test_tcp(AF_INET6);
> +	test_tcp(AF_INET);

Considering the w and w/o SO_TIMESTAMPING combinations (i.e. x2), it is worth to 
have proper subtests. It is easy also. Take a look at the test__start_subtest() 
usage `under the prog_tests/.

> +
> +done:
> +	net_timestamping__destroy(skel);
> +	netns_free(ns);
> +	close(cg_fd);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/net_timestamping.c b/tools/testing/selftests/bpf/progs/net_timestamping.c
> new file mode 100644
> index 000000000000..d3e1da599626
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/net_timestamping.c
> @@ -0,0 +1,244 @@
> +#include "vmlinux.h"
> +#include "bpf_tracing_net.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +#include "bpf_misc.h"
> +#include "bpf_kfuncs.h"
> +#include <errno.h>
> +
> +#define SK_BPF_CB_FLAGS 1009
> +#define SK_BPF_CB_TX_TIMESTAMPING 1

Remove these two defines. The vmlinux.h has it.

[ ... ]

> +SEC("fentry/tcp_sendmsg_locked")
> +int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg, size_t size)
> +{
> +	u64 timestamp = bpf_ktime_get_ns();
> +	u32 flag = sk->sk_bpf_cb_flags;
> +	struct sk_stg *stg;
> +
> +	if (!flag)

I just noticed this one.

Lets replace the "flag" check with a better check (e.g. pid check used in other 
tests). Then it won't affect sk of other tests running in parallel.

It is pretty easy. Take a look at how bpf_get_current_pid_tgid() is used in 
progs/local_storage.c.


> +		return 0;
> +
> +	stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
> +				 BPF_SK_STORAGE_GET_F_CREATE);
> +	if (!stg)
> +		return 0;
> +
> +	stg->sendmsg_ns = timestamp;
> +	nr_snd += 1;
> +	return 0;
> +}
> +

  reply	other threads:[~2025-02-13  1:08 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-12  6:18 [PATCH bpf-next v10 00/12] net-timestamp: bpf extension to equip applications transparently Jason Xing
2025-02-12  6:18 ` [PATCH bpf-next v10 01/12] bpf: add networking timestamping support to bpf_get/setsockopt() Jason Xing
2025-02-12  6:18 ` [PATCH bpf-next v10 02/12] bpf: prepare the sock_ops ctx and call bpf prog for TX timestamping Jason Xing
2025-02-12  6:18 ` [PATCH bpf-next v10 03/12] bpf: prevent unsafe access to the sock fields in the BPF timestamping callback Jason Xing
2025-02-12  6:18 ` [PATCH bpf-next v10 04/12] bpf: disable unsafe helpers in TX timestamping callbacks Jason Xing
2025-02-12  6:18 ` [PATCH bpf-next v10 05/12] net-timestamp: prepare for isolating two modes of SO_TIMESTAMPING Jason Xing
2025-02-12  6:18 ` [PATCH bpf-next v10 06/12] bpf: add BPF_SOCK_OPS_TS_SCHED_OPT_CB callback Jason Xing
2025-02-12  6:18 ` [PATCH bpf-next v10 07/12] bpf: add BPF_SOCK_OPS_TS_SW_OPT_CB callback Jason Xing
2025-02-12 23:18   ` Martin KaFai Lau
2025-02-13  7:24     ` Jason Xing
2025-02-12  6:18 ` [PATCH bpf-next v10 08/12] bpf: add BPF_SOCK_OPS_TS_HW_OPT_CB callback Jason Xing
2025-02-12 23:20   ` Martin KaFai Lau
2025-02-13  7:24     ` Jason Xing
2025-02-12  6:18 ` [PATCH bpf-next v10 09/12] bpf: add BPF_SOCK_OPS_TS_ACK_OPT_CB callback Jason Xing
2025-02-12 15:26   ` Willem de Bruijn
2025-02-13  0:07     ` Jason Xing
2025-02-13  7:23       ` Jason Xing
2025-02-13 15:09         ` Willem de Bruijn
2025-02-12  6:18 ` [PATCH bpf-next v10 10/12] bpf: add BPF_SOCK_OPS_TS_SND_CB callback Jason Xing
2025-02-12  6:18 ` [PATCH bpf-next v10 11/12] bpf: support selective sampling for bpf timestamping Jason Xing
2025-02-12 23:49   ` Martin KaFai Lau
2025-02-13  7:26     ` Jason Xing
2025-02-12  6:18 ` [PATCH bpf-next v10 12/12] selftests/bpf: add simple bpf tests in the tx path for timestamping feature Jason Xing
2025-02-13  1:08   ` Martin KaFai Lau [this message]
2025-02-13 11:31     ` Jason Xing

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4e51fae2-0b43-426f-8fae-ea267fe2839f@linux.dev \
    --to=martin.lau@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=eddyz87@gmail.com \
    --cc=edumazet@google.com \
    --cc=haoluo@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kerneljasonxing@gmail.com \
    --cc=kpsingh@kernel.org \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sdf@fomichev.me \
    --cc=shuah@kernel.org \
    --cc=song@kernel.org \
    --cc=willemb@google.com \
    --cc=willemdebruijn.kernel@gmail.com \
    --cc=ykolal@fb.com \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.