netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Martin KaFai Lau <martin.lau@linux.dev>
To: Jason Xing <kerneljasonxing@gmail.com>
Cc: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, dsahern@kernel.org,
	willemdebruijn.kernel@gmail.com, willemb@google.com,
	ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	eddyz87@gmail.com, song@kernel.org, yonghong.song@linux.dev,
	john.fastabend@gmail.com, kpsingh@kernel.org, sdf@fomichev.me,
	haoluo@google.com, jolsa@kernel.org, shuah@kernel.org,
	ykolal@fb.com, bpf@vger.kernel.org, netdev@vger.kernel.org,
	Jason Xing <kernelxing@tencent.com>
Subject: Re: [PATCH net-next v3 03/14] net-timestamp: open gate for bpf_setsockopt/_getsockopt
Date: Tue, 29 Oct 2024 17:32:41 -0700	[thread overview]
Message-ID: <9a821495-cac7-48d8-a2bc-1bd7ebeef23c@linux.dev> (raw)
In-Reply-To: <20241028110535.82999-4-kerneljasonxing@gmail.com>

On 10/28/24 4:05 AM, Jason Xing wrote:
> From: Jason Xing <kernelxing@tencent.com>
> 
> For now, we support bpf_setsockopt to set or clear timestamps flags.
> 
> Users can use something like this in bpf program to turn on the feature:
> flags = SOF_TIMESTAMPING_TX_SCHED;
> bpf_setsockopt(skops, SOL_SOCKET, SO_TIMESTAMPING, &flags, sizeof(flags));
> The specific use cases can be seen in the bpf selftest in this series.
> 
> Later, I will support each flags one by one based on this.
> 
> Signed-off-by: Jason Xing <kernelxing@tencent.com>
> ---
>   include/net/sock.h              |  4 ++--
>   include/uapi/linux/net_tstamp.h |  7 +++++++
>   net/core/filter.c               |  7 +++++--
>   net/core/sock.c                 | 34 ++++++++++++++++++++++++++-------
>   net/ipv4/udp.c                  |  2 +-
>   net/mptcp/sockopt.c             |  2 +-
>   net/socket.c                    |  2 +-
>   7 files changed, 44 insertions(+), 14 deletions(-)
> 
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 5384f1e49f5c..062f405c744e 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1775,7 +1775,7 @@ static inline void skb_set_owner_edemux(struct sk_buff *skb, struct sock *sk)
>   #endif
>   
>   int sk_setsockopt(struct sock *sk, int level, int optname,
> -		  sockptr_t optval, unsigned int optlen);
> +		  sockptr_t optval, unsigned int optlen, bool bpf_timetamping);
>   int sock_setsockopt(struct socket *sock, int level, int op,
>   		    sockptr_t optval, unsigned int optlen);
>   int do_sock_setsockopt(struct socket *sock, bool compat, int level,
> @@ -1784,7 +1784,7 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
>   		       int optname, sockptr_t optval, sockptr_t optlen);
>   
>   int sk_getsockopt(struct sock *sk, int level, int optname,
> -		  sockptr_t optval, sockptr_t optlen);
> +		  sockptr_t optval, sockptr_t optlen, bool bpf_timetamping);
>   int sock_gettstamp(struct socket *sock, void __user *userstamp,
>   		   bool timeval, bool time32);
>   struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
> index 858339d1c1c4..0696699cf964 100644
> --- a/include/uapi/linux/net_tstamp.h
> +++ b/include/uapi/linux/net_tstamp.h
> @@ -49,6 +49,13 @@ enum {
>   					 SOF_TIMESTAMPING_TX_SCHED | \
>   					 SOF_TIMESTAMPING_TX_ACK)
>   
> +#define SOF_TIMESTAMPING_BPF_SUPPPORTED_MASK (SOF_TIMESTAMPING_SOFTWARE | \

hmm... so we are allowing it but SOF_TIMESTAMPING_SOFTWARE won't do anything 
(meaning set and not-set are both no-op) ?

> +					      SOF_TIMESTAMPING_TX_SCHED | \
> +					      SOF_TIMESTAMPING_TX_SOFTWARE | \
> +					      SOF_TIMESTAMPING_TX_ACK | \
> +					      SOF_TIMESTAMPING_OPT_ID | \
> +					      SOF_TIMESTAMPING_OPT_ID_TCP)
> +
>   /**
>    * struct so_timestamping - SO_TIMESTAMPING parameter
>    *
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 58761263176c..dc8ecf899ced 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5238,6 +5238,9 @@ static int sol_socket_sockopt(struct sock *sk, int optname,
>   		break;
>   	case SO_BINDTODEVICE:
>   		break;
> +	case SO_TIMESTAMPING_NEW:

How about only allow bpf_setsockopt(SO_TIMESTAMPING_NEW) instead of 
bpf_setsockopt(SO_TIMESTAMPING). Does it solve the issue reported in v2?

> +	case SO_TIMESTAMPING_OLD:
> +		break;
>   	default:
>   		return -EINVAL;
>   	}
> @@ -5247,11 +5250,11 @@ static int sol_socket_sockopt(struct sock *sk, int optname,
>   			return -EINVAL;
>   		return sk_getsockopt(sk, SOL_SOCKET, optname,
>   				     KERNEL_SOCKPTR(optval),
> -				     KERNEL_SOCKPTR(optlen));
> +				     KERNEL_SOCKPTR(optlen), true);
>   	}
>   
>   	return sk_setsockopt(sk, SOL_SOCKET, optname,
> -			     KERNEL_SOCKPTR(optval), *optlen);
> +			     KERNEL_SOCKPTR(optval), *optlen, true);
>   }
>   
>   static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 7f398bd07fb7..7e05748b1a06 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -941,6 +941,19 @@ int sock_set_timestamping(struct sock *sk, int optname,
>   	return 0;
>   }
>   
> +static int sock_set_timestamping_bpf(struct sock *sk,
> +				     struct so_timestamping timestamping)
> +{
> +	u32 flags = timestamping.flags;
> +
> +	if (flags & ~SOF_TIMESTAMPING_BPF_SUPPPORTED_MASK)
> +		return -EINVAL;
> +
> +	WRITE_ONCE(sk->sk_tsflags_bpf, flags);

I think it is cleaner to directly "WRITE_ONCE(sk->sk_tsflags_bpf, flags);" in 
sol_socket_sockopt() instead of adding "bool bpf_timestamping" to sk_setsockopt. 
sk_tsflags_bpf is a separate u32 anyway, so not a lot of code to share. The same 
for getsockopt.

[ will continue the remaining patches a little later ]

> +
> +	return 0;
> +}
> +
>   void sock_set_keepalive(struct sock *sk)
>   {
>   	lock_sock(sk);
> @@ -1159,7 +1172,7 @@ static int sockopt_validate_clockid(__kernel_clockid_t value)
>    */
>   
>   int sk_setsockopt(struct sock *sk, int level, int optname,
> -		  sockptr_t optval, unsigned int optlen)
> +		  sockptr_t optval, unsigned int optlen, bool bpf_timetamping)
>   {
>   	struct so_timestamping timestamping;
>   	struct socket *sock = sk->sk_socket;
> @@ -1409,7 +1422,10 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
>   			memset(&timestamping, 0, sizeof(timestamping));
>   			timestamping.flags = val;
>   		}
> -		ret = sock_set_timestamping(sk, optname, timestamping);
> +		if (!bpf_timetamping)
> +			ret = sock_set_timestamping(sk, optname, timestamping);
> +		else
> +			ret = sock_set_timestamping_bpf(sk, timestamping);
>   		break;
>   
>   	case SO_RCVLOWAT:
> @@ -1626,7 +1642,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
>   		    sockptr_t optval, unsigned int optlen)
>   {
>   	return sk_setsockopt(sock->sk, level, optname,
> -			     optval, optlen);
> +			     optval, optlen, false);
>   }
>   EXPORT_SYMBOL(sock_setsockopt);
>   
> @@ -1670,7 +1686,7 @@ static int groups_to_user(sockptr_t dst, const struct group_info *src)
>   }
>   
>   int sk_getsockopt(struct sock *sk, int level, int optname,
> -		  sockptr_t optval, sockptr_t optlen)
> +		  sockptr_t optval, sockptr_t optlen, bool bpf_timetamping)
>   {
>   	struct socket *sock = sk->sk_socket;
>   
> @@ -1793,9 +1809,13 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
>   		 * returning the flags when they were set through the same option.
>   		 * Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
>   		 */
> -		if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
> -			v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
> -			v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
> +		if (!bpf_timetamping) {
> +			if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
> +				v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
> +				v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
> +			}
> +		} else {
> +			v.timestamping.flags = READ_ONCE(sk->sk_tsflags_bpf);
>   		}
>   		break;
>   
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 0e24916b39d4..9a20af41e272 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -2679,7 +2679,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
>   	int is_udplite = IS_UDPLITE(sk);
>   
>   	if (level == SOL_SOCKET) {
> -		err = sk_setsockopt(sk, level, optname, optval, optlen);
> +		err = sk_setsockopt(sk, level, optname, optval, optlen, false);
>   
>   		if (optname == SO_RCVBUF || optname == SO_RCVBUFFORCE) {
>   			sockopt_lock_sock(sk);
> diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
> index 505445a9598f..7b12cc2db136 100644
> --- a/net/mptcp/sockopt.c
> +++ b/net/mptcp/sockopt.c
> @@ -306,7 +306,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
>   			return PTR_ERR(ssk);
>   		}
>   
> -		ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen);
> +		ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen, false);
>   		if (ret == 0) {
>   			if (optname == SO_REUSEPORT)
>   				sk->sk_reuseport = ssk->sk_reuseport;
> diff --git a/net/socket.c b/net/socket.c
> index 9a8e4452b9b2..4bdca39685a6 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -2385,7 +2385,7 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
>   
>   	ops = READ_ONCE(sock->ops);
>   	if (level == SOL_SOCKET) {
> -		err = sk_getsockopt(sock->sk, level, optname, optval, optlen);
> +		err = sk_getsockopt(sock->sk, level, optname, optval, optlen, false);
>   	} else if (unlikely(!ops->getsockopt)) {
>   		err = -EOPNOTSUPP;
>   	} else {


  parent reply	other threads:[~2024-10-30  0:32 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-28 11:05 [PATCH net-next v3 00/14] net-timestamp: bpf extension to equip applications transparently Jason Xing
2024-10-28 11:05 ` [PATCH net-next v3 01/14] net-timestamp: reorganize in skb_tstamp_tx_output() Jason Xing
2024-10-28 11:05 ` [PATCH net-next v3 02/14] net-timestamp: allow two features to work parallelly Jason Xing
2024-10-29 23:00   ` Martin KaFai Lau
2024-10-30  1:23     ` Jason Xing
2024-10-30  1:45       ` Willem de Bruijn
2024-10-30  2:32         ` Jason Xing
2024-10-30  2:47           ` Willem de Bruijn
2024-10-30  3:04             ` Jason Xing
2024-10-30  5:37               ` Martin KaFai Lau
2024-10-30  6:42                 ` Jason Xing
2024-10-30 17:15                   ` Willem de Bruijn
2024-10-30 23:54                     ` Jason Xing
2024-10-31  0:13                       ` Jason Xing
2024-10-31  6:27                         ` Martin KaFai Lau
2024-10-31  7:04                           ` Jason Xing
2024-10-31 12:30                             ` Willem de Bruijn
2024-10-31 13:50                               ` Jason Xing
2024-10-31 23:26                                 ` Martin KaFai Lau
2024-11-01  7:47                                   ` Jason Xing
2024-11-05  1:50                                     ` Martin KaFai Lau
2024-11-05  3:13                                       ` Jason Xing
2024-11-01 13:32                                   ` Willem de Bruijn
2024-11-01 16:08                                     ` Jason Xing
2024-11-01 16:39                                       ` Willem de Bruijn
2024-11-05  2:09                                     ` Martin KaFai Lau
2024-11-05  6:22                                       ` Jason Xing
2024-11-05 19:22                                         ` Martin KaFai Lau
2024-11-06  0:17                                           ` Jason Xing
2024-11-06  1:09                                             ` Martin KaFai Lau
2024-11-06  2:51                                               ` Jason Xing
2024-11-07  1:19                                                 ` Martin KaFai Lau
2024-11-07  3:31                                                   ` Jason Xing
2024-11-07 19:05                                                     ` Martin KaFai Lau
2024-11-06  1:11                                             ` Willem de Bruijn
2024-11-06  2:37                                               ` Jason Xing
2024-11-05 14:29                                       ` Willem de Bruijn
2024-11-02 13:43   ` Simon Horman
2024-11-03  0:42     ` Jason Xing
2024-10-28 11:05 ` [PATCH net-next v3 03/14] net-timestamp: open gate for bpf_setsockopt/_getsockopt Jason Xing
2024-10-29  0:59   ` Willem de Bruijn
2024-10-29  1:18     ` Jason Xing
2024-10-30  0:32   ` Martin KaFai Lau [this message]
2024-10-30  1:15     ` Jason Xing
2024-10-28 11:05 ` [PATCH net-next v3 04/14] net-timestamp: introduce TS_SCHED_OPT_CB to generate dev xmit timestamp Jason Xing
2024-10-29  0:23   ` kernel test robot
2024-10-29  1:02   ` Willem de Bruijn
2024-10-29  1:30     ` Jason Xing
2024-10-29  1:04   ` kernel test robot
2024-10-28 11:05 ` [PATCH net-next v3 05/14] net-timestamp: introduce TS_SW_OPT_CB to generate driver timestamp Jason Xing
2024-10-28 11:05 ` [PATCH net-next v3 06/14] net-timestamp: introduce TS_ACK_OPT_CB to generate tcp acked timestamp Jason Xing
2024-10-29  1:03   ` Willem de Bruijn
2024-10-29  1:19     ` Jason Xing
2024-10-28 11:05 ` [PATCH net-next v3 07/14] net-timestamp: add a new triggered point to set sk_tsflags_bpf in UDP layer Jason Xing
2024-10-29  1:07   ` Willem de Bruijn
2024-10-29  1:23     ` Jason Xing
2024-10-29  1:33       ` Willem de Bruijn
2024-10-29  3:12         ` Jason Xing
2024-10-29 15:04           ` Willem de Bruijn
2024-10-29 15:44             ` Jason Xing
2024-10-28 11:05 ` [PATCH net-next v3 08/14] net-timestamp: make bpf for tx timestamp work Jason Xing
2024-10-28 11:05 ` [PATCH net-next v3 09/14] net-timestamp: add a common helper to set tskey Jason Xing
2024-10-28 11:05 ` [PATCH net-next v3 10/14] net-timestamp: add basic support with tskey offset Jason Xing
2024-10-29  1:24   ` Willem de Bruijn
2024-10-29  2:41     ` Jason Xing
2024-10-29 15:03       ` Willem de Bruijn
2024-10-29 15:50         ` Jason Xing
2024-10-29 19:45           ` Willem de Bruijn
2024-10-30  3:27             ` Jason Xing
2024-10-30  5:42   ` Martin KaFai Lau
2024-10-30  6:50     ` Jason Xing
2024-10-31  1:17       ` Martin KaFai Lau
2024-10-31  2:41         ` Jason Xing
2024-10-31  3:27           ` Jason Xing
2024-10-31  5:52           ` Martin KaFai Lau
2024-10-31  6:16             ` Jason Xing
2024-10-31 23:50           ` Martin KaFai Lau
2024-11-01  6:33             ` Jason Xing
2024-10-28 11:05 ` [PATCH net-next v3 11/14] net-timestamp: support OPT_ID for TCP proto Jason Xing
2024-10-28 11:05 ` [PATCH net-next v3 12/14] net-timestamp: add OPT_ID for UDP proto Jason Xing
2024-10-28 11:05 ` [PATCH net-next v3 13/14] net-timestamp: use static key to control bpf extension Jason Xing
2024-10-28 11:05 ` [PATCH net-next v3 14/14] bpf: add simple bpf tests in the tx path for so_timstamping feature Jason Xing
2024-10-29  1:26   ` Willem de Bruijn
2024-10-29  1:33     ` Jason Xing
2024-10-29  1:40       ` Willem de Bruijn
2024-10-29  3:13         ` Jason Xing
2024-10-30  5:57   ` Martin KaFai Lau
2024-10-30  6:54     ` Jason Xing

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9a821495-cac7-48d8-a2bc-1bd7ebeef23c@linux.dev \
    --to=martin.lau@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=eddyz87@gmail.com \
    --cc=edumazet@google.com \
    --cc=haoluo@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kerneljasonxing@gmail.com \
    --cc=kernelxing@tencent.com \
    --cc=kpsingh@kernel.org \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sdf@fomichev.me \
    --cc=shuah@kernel.org \
    --cc=song@kernel.org \
    --cc=willemb@google.com \
    --cc=willemdebruijn.kernel@gmail.com \
    --cc=ykolal@fb.com \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).