netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH bpf-next 02/17] bpf: net: Change sk_getsockopt() to take the sockptr_t argument
       [not found] ` <20220824222614.1918332-1-kafai@fb.com>
@ 2022-08-25 18:07   ` Stanislav Fomichev
  2022-08-26 19:15     ` Martin KaFai Lau
  0 siblings, 1 reply; 5+ messages in thread
From: Stanislav Fomichev @ 2022-08-25 18:07 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: bpf, netdev, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	David Miller, Eric Dumazet, Jakub Kicinski, kernel-team,
	Paolo Abeni

On Wed, Aug 24, 2022 at 3:29 PM Martin KaFai Lau <kafai@fb.com> wrote:
>
> This patch changes sk_getsockopt() to take the sockptr_t argument
> such that it can be used by bpf_getsockopt(SOL_SOCKET) in a
> latter patch.
>
> security_socket_getpeersec_stream() is not changed.  It stays
> with the __user ptr (optval.user and optlen.user) to avoid changes
> to other security hooks.  bpf_getsockopt(SOL_SOCKET) also does not
> support SO_PEERSEC.
>
> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> ---
>  include/linux/filter.h  |  3 +--
>  include/linux/sockptr.h |  5 +++++
>  net/core/filter.c       |  5 ++---
>  net/core/sock.c         | 43 +++++++++++++++++++++++------------------
>  4 files changed, 32 insertions(+), 24 deletions(-)
>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index a5f21dc3c432..527ae1d64e27 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -900,8 +900,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
>  int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
>  void sk_reuseport_prog_free(struct bpf_prog *prog);
>  int sk_detach_filter(struct sock *sk);
> -int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
> -                 unsigned int len);
> +int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len);
>
>  bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
>  void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
> diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
> index d45902fb4cad..bae5e2369b4f 100644
> --- a/include/linux/sockptr.h
> +++ b/include/linux/sockptr.h
> @@ -64,6 +64,11 @@ static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset,
>         return 0;
>  }
>
> +static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size)
> +{
> +       return copy_to_sockptr_offset(dst, 0, src, size);
> +}
> +
>  static inline void *memdup_sockptr(sockptr_t src, size_t len)
>  {
>         void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 63e25d8ce501..0f6f86b9e487 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -10712,8 +10712,7 @@ int sk_detach_filter(struct sock *sk)
>  }
>  EXPORT_SYMBOL_GPL(sk_detach_filter);
>
> -int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
> -                 unsigned int len)
> +int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len)
>  {
>         struct sock_fprog_kern *fprog;
>         struct sk_filter *filter;
> @@ -10744,7 +10743,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
>                 goto out;
>
>         ret = -EFAULT;
> -       if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
> +       if (copy_to_sockptr(optval, fprog->filter, bpf_classic_proglen(fprog)))
>                 goto out;
>
>         /* Instead of bytes, the API requests to return the number
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 21bc4bf6b485..7fa30fd4b37f 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -712,8 +712,8 @@ static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
>         return ret;
>  }
>
> -static int sock_getbindtodevice(struct sock *sk, char __user *optval,
> -                               int __user *optlen, int len)
> +static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
> +                               sockptr_t optlen, int len)
>  {
>         int ret = -ENOPROTOOPT;
>  #ifdef CONFIG_NETDEVICES
> @@ -737,12 +737,12 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
>         len = strlen(devname) + 1;
>
>         ret = -EFAULT;
> -       if (copy_to_user(optval, devname, len))
> +       if (copy_to_sockptr(optval, devname, len))
>                 goto out;
>
>  zero:
>         ret = -EFAULT;
> -       if (put_user(len, optlen))
> +       if (copy_to_sockptr(optlen, &len, sizeof(int)))
>                 goto out;
>
>         ret = 0;
> @@ -1568,20 +1568,23 @@ static void cred_to_ucred(struct pid *pid, const struct cred *cred,
>         }
>  }
>
> -static int groups_to_user(gid_t __user *dst, const struct group_info *src)
> +static int groups_to_user(sockptr_t dst, const struct group_info *src)
>  {
>         struct user_namespace *user_ns = current_user_ns();
>         int i;
>
> -       for (i = 0; i < src->ngroups; i++)
> -               if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
> +       for (i = 0; i < src->ngroups; i++) {
> +               gid_t gid = from_kgid_munged(user_ns, src->gid[i]);
> +
> +               if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
>                         return -EFAULT;
> +       }
>
>         return 0;
>  }
>
>  static int sk_getsockopt(struct sock *sk, int level, int optname,
> -                        char __user *optval, int __user *optlen)
> +                        sockptr_t optval, sockptr_t optlen)
>  {
>         struct socket *sock = sk->sk_socket;
>
> @@ -1600,7 +1603,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
>         int lv = sizeof(int);
>         int len;
>
> -       if (get_user(len, optlen))
> +       if (copy_from_sockptr(&len, optlen, sizeof(int)))

Do we want to be consistent wrt to sizeof?

copy_from_sockptr(&len, optlen, sizeof(int))
vs
copy_from_sockptr(&len, optlen, sizeof(optlen))

Alternatively, should we have put_sockptr/get_sockopt with a semantics
similar to put_user/get_user to remove all this ambiguity?

>                 return -EFAULT;
>         if (len < 0)
>                 return -EINVAL;
> @@ -1735,7 +1738,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
>                 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
>                 spin_unlock(&sk->sk_peer_lock);
>
> -               if (copy_to_user(optval, &peercred, len))
> +               if (copy_to_sockptr(optval, &peercred, len))
>                         return -EFAULT;
>                 goto lenout;
>         }
> @@ -1753,11 +1756,11 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
>                 if (len < n * sizeof(gid_t)) {
>                         len = n * sizeof(gid_t);
>                         put_cred(cred);
> -                       return put_user(len, optlen) ? -EFAULT : -ERANGE;
> +                       return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
>                 }
>                 len = n * sizeof(gid_t);
>
> -               ret = groups_to_user((gid_t __user *)optval, cred->group_info);
> +               ret = groups_to_user(optval, cred->group_info);
>                 put_cred(cred);
>                 if (ret)
>                         return ret;
> @@ -1773,7 +1776,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
>                         return -ENOTCONN;
>                 if (lv < len)
>                         return -EINVAL;
> -               if (copy_to_user(optval, address, len))
> +               if (copy_to_sockptr(optval, address, len))
>                         return -EFAULT;
>                 goto lenout;
>         }
> @@ -1790,7 +1793,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
>                 break;
>
>         case SO_PEERSEC:
> -               return security_socket_getpeersec_stream(sock, optval, optlen, len);
> +               return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len);

I'm assuming there should be something to prevent this being called
from BPF? (haven't read all the patches yet)
Do we want to be a bit more defensive with 'if (!optval.user) return
-EFAULT' or something similar?


>         case SO_MARK:
>                 v.val = sk->sk_mark;
> @@ -1822,7 +1825,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
>                 return sock_getbindtodevice(sk, optval, optlen, len);
>
>         case SO_GET_FILTER:
> -               len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
> +               len = sk_get_filter(sk, optval, len);
>                 if (len < 0)
>                         return len;
>
> @@ -1870,7 +1873,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
>                 sk_get_meminfo(sk, meminfo);
>
>                 len = min_t(unsigned int, len, sizeof(meminfo));
> -               if (copy_to_user(optval, &meminfo, len))
> +               if (copy_to_sockptr(optval, &meminfo, len))
>                         return -EFAULT;
>
>                 goto lenout;
> @@ -1939,10 +1942,10 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
>
>         if (len > lv)
>                 len = lv;
> -       if (copy_to_user(optval, &v, len))
> +       if (copy_to_sockptr(optval, &v, len))
>                 return -EFAULT;
>  lenout:
> -       if (put_user(len, optlen))
> +       if (copy_to_sockptr(optlen, &len, sizeof(int)))
>                 return -EFAULT;
>         return 0;
>  }
> @@ -1950,7 +1953,9 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
>  int sock_getsockopt(struct socket *sock, int level, int optname,
>                     char __user *optval, int __user *optlen)
>  {
> -       return sk_getsockopt(sock->sk, level, optname, optval, optlen);
> +       return sk_getsockopt(sock->sk, level, optname,
> +                            USER_SOCKPTR(optval),
> +                            USER_SOCKPTR(optlen));
>  }
>
>  /*
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next 14/17] bpf: Change bpf_getsockopt(SOL_TCP) to reuse do_tcp_getsockopt()
       [not found] ` <20220824222730.1923992-1-kafai@fb.com>
@ 2022-08-25 18:36   ` sdf
  2022-08-26 19:24     ` Martin KaFai Lau
  0 siblings, 1 reply; 5+ messages in thread
From: sdf @ 2022-08-25 18:36 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: bpf, netdev, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	David Miller, Eric Dumazet, Jakub Kicinski, kernel-team,
	Paolo Abeni

On 08/24, Martin KaFai Lau wrote:
> This patch changes bpf_getsockopt(SOL_TCP) to reuse
> do_tcp_getsockopt().  It removes the duplicated code from
> bpf_getsockopt(SOL_TCP).

> Before this patch, there were some optnames available to
> bpf_setsockopt(SOL_TCP) but missing in bpf_getsockopt(SOL_TCP).
> For example, TCP_NODELAY, TCP_MAXSEG, TCP_KEEPIDLE, TCP_KEEPINTVL,
> and a few more.  It surprises users from time to time.  This patch
> automatically closes this gap without duplicating more code.

> bpf_getsockopt(TCP_SAVED_SYN) does not free the saved_syn,
> so it stays in sol_tcp_sockopt().

> For string name value like TCP_CONGESTION, bpf expects it
> is always null terminated, so sol_tcp_sockopt() decrements
> optlen by one before calling do_tcp_getsockopt() and
> the 'if (optlen < saved_optlen) memset(..,0,..);'
> in __bpf_getsockopt() will always do a null termination.

> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> ---
>   include/net/tcp.h |  2 ++
>   net/core/filter.c | 70 ++++++++++++++++++++++++++---------------------
>   net/ipv4/tcp.c    |  4 +--
>   3 files changed, 43 insertions(+), 33 deletions(-)

> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index c03a50c72f40..735e957f7f4b 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -402,6 +402,8 @@ void tcp_init_sock(struct sock *sk);
>   void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
>   __poll_t tcp_poll(struct file *file, struct socket *sock,
>   		      struct poll_table_struct *wait);
> +int do_tcp_getsockopt(struct sock *sk, int level,
> +		      int optname, sockptr_t optval, sockptr_t optlen);
>   int tcp_getsockopt(struct sock *sk, int level, int optname,
>   		   char __user *optval, int __user *optlen);
>   bool tcp_bpf_bypass_getsockopt(int level, int optname);
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 68b52243b306..cdbbcec46e8b 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5096,8 +5096,9 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk,  
> int optname,
>   	return 0;
>   }

> -static int sol_tcp_setsockopt(struct sock *sk, int optname,
> -			      char *optval, int optlen)
> +static int sol_tcp_sockopt(struct sock *sk, int optname,
> +			   char *optval, int *optlen,
> +			   bool getopt)
>   {
>   	if (sk->sk_prot->setsockopt != tcp_setsockopt)
>   		return -EINVAL;
> @@ -5114,17 +5115,47 @@ static int sol_tcp_setsockopt(struct sock *sk,  
> int optname,
>   	case TCP_USER_TIMEOUT:
>   	case TCP_NOTSENT_LOWAT:
>   	case TCP_SAVE_SYN:
> -		if (optlen != sizeof(int))
> +		if (*optlen != sizeof(int))
>   			return -EINVAL;
>   		break;

[..]

>   	case TCP_CONGESTION:
> +		if (*optlen < 2)
> +			return -EINVAL;
> +		break;
> +	case TCP_SAVED_SYN:
> +		if (*optlen < 1)
> +			return -EINVAL;
>   		break;

This looks a bit inconsistent vs '*optlen != sizeof(int)' above. Maybe

if (*optlen < sizeof(u16))
if (*optlen < sizeof(u8))

?

>   	default:
> -		return bpf_sol_tcp_setsockopt(sk, optname, optval, optlen);
> +		if (getopt)
> +			return -EINVAL;
> +		return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
> +	}
> +
> +	if (getopt) {
> +		if (optname == TCP_SAVED_SYN) {
> +			struct tcp_sock *tp = tcp_sk(sk);
> +
> +			if (!tp->saved_syn ||
> +			    *optlen > tcp_saved_syn_len(tp->saved_syn))
> +				return -EINVAL;

You mention in the description that bpf doesn't doesn't free saved_syn,
maybe worth putting a comment with the rationale here as well?
I'm assuming we don't free from bpf because we want userspace to
have an opportunity to read it as well?

> +			memcpy(optval, tp->saved_syn->data, *optlen);
> +			return 0;
> +		}
> +
> +		if (optname == TCP_CONGESTION) {
> +			if (!inet_csk(sk)->icsk_ca_ops)
> +				return -EINVAL;

Is it worth it doing null termination more explicitly here?
For readability sake:
			/* BPF always expects NULL-terminated strings. */
			optval[*optlen-1] = '\0';
> +			(*optlen)--;
> +		}
> +
> +		return do_tcp_getsockopt(sk, SOL_TCP, optname,
> +					 KERNEL_SOCKPTR(optval),
> +					 KERNEL_SOCKPTR(optlen));
>   	}

>   	return do_tcp_setsockopt(sk, SOL_TCP, optname,
> -				 KERNEL_SOCKPTR(optval), optlen);
> +				 KERNEL_SOCKPTR(optval), *optlen);
>   }

>   static int sol_ip_setsockopt(struct sock *sk, int optname,
> @@ -5179,7 +5210,7 @@ static int __bpf_setsockopt(struct sock *sk, int  
> level, int optname,
>   	else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
>   		return sol_ipv6_setsockopt(sk, optname, optval, optlen);
>   	else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
> -		return sol_tcp_setsockopt(sk, optname, optval, optlen);
> +		return sol_tcp_sockopt(sk, optname, optval, &optlen, false);

>   	return -EINVAL;
>   }
> @@ -5202,31 +5233,8 @@ static int __bpf_getsockopt(struct sock *sk, int  
> level, int optname,

>   	if (level == SOL_SOCKET) {
>   		err = sol_socket_sockopt(sk, optname, optval, &optlen, true);
> -	} else if (IS_ENABLED(CONFIG_INET) &&
> -		   level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
> -		struct inet_connection_sock *icsk;
> -		struct tcp_sock *tp;
> -
> -		switch (optname) {
> -		case TCP_CONGESTION:
> -			icsk = inet_csk(sk);
> -
> -			if (!icsk->icsk_ca_ops || optlen <= 1)
> -				goto err_clear;
> -			strncpy(optval, icsk->icsk_ca_ops->name, optlen);
> -			optval[optlen - 1] = 0;
> -			break;
> -		case TCP_SAVED_SYN:
> -			tp = tcp_sk(sk);
> -
> -			if (optlen <= 0 || !tp->saved_syn ||
> -			    optlen > tcp_saved_syn_len(tp->saved_syn))
> -				goto err_clear;
> -			memcpy(optval, tp->saved_syn->data, optlen);
> -			break;
> -		default:
> -			goto err_clear;
> -		}
> +	} else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) {
> +		err = sol_tcp_sockopt(sk, optname, optval, &optlen, true);
>   	} else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) {
>   		struct inet_sock *inet = inet_sk(sk);

> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index ab8118225797..a47cb5662be6 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -4043,8 +4043,8 @@ struct sk_buff  
> *tcp_get_timestamping_opt_stats(const struct sock *sk,
>   	return stats;
>   }

> -static int do_tcp_getsockopt(struct sock *sk, int level,
> -			     int optname, sockptr_t optval, sockptr_t optlen)
> +int do_tcp_getsockopt(struct sock *sk, int level,
> +		      int optname, sockptr_t optval, sockptr_t optlen)
>   {
>   	struct inet_connection_sock *icsk = inet_csk(sk);
>   	struct tcp_sock *tp = tcp_sk(sk);
> --
> 2.30.2


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next 02/17] bpf: net: Change sk_getsockopt() to take the sockptr_t argument
  2022-08-25 18:07   ` [PATCH bpf-next 02/17] bpf: net: Change sk_getsockopt() to take the sockptr_t argument Stanislav Fomichev
@ 2022-08-26 19:15     ` Martin KaFai Lau
  2022-08-26 20:40       ` Stanislav Fomichev
  0 siblings, 1 reply; 5+ messages in thread
From: Martin KaFai Lau @ 2022-08-26 19:15 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: bpf, netdev, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	David Miller, Eric Dumazet, Jakub Kicinski, kernel-team,
	Paolo Abeni

On Thu, Aug 25, 2022 at 11:07:36AM -0700, Stanislav Fomichev wrote:
> On Wed, Aug 24, 2022 at 3:29 PM Martin KaFai Lau <kafai@fb.com> wrote:
> >
> > This patch changes sk_getsockopt() to take the sockptr_t argument
> > such that it can be used by bpf_getsockopt(SOL_SOCKET) in a
> > latter patch.
> >
> > security_socket_getpeersec_stream() is not changed.  It stays
> > with the __user ptr (optval.user and optlen.user) to avoid changes
> > to other security hooks.  bpf_getsockopt(SOL_SOCKET) also does not
> > support SO_PEERSEC.
> >
> > Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> > ---
> >  include/linux/filter.h  |  3 +--
> >  include/linux/sockptr.h |  5 +++++
> >  net/core/filter.c       |  5 ++---
> >  net/core/sock.c         | 43 +++++++++++++++++++++++------------------
> >  4 files changed, 32 insertions(+), 24 deletions(-)
> >
> > diff --git a/include/linux/filter.h b/include/linux/filter.h
> > index a5f21dc3c432..527ae1d64e27 100644
> > --- a/include/linux/filter.h
> > +++ b/include/linux/filter.h
> > @@ -900,8 +900,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
> >  int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
> >  void sk_reuseport_prog_free(struct bpf_prog *prog);
> >  int sk_detach_filter(struct sock *sk);
> > -int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
> > -                 unsigned int len);
> > +int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len);
> >
> >  bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
> >  void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
> > diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
> > index d45902fb4cad..bae5e2369b4f 100644
> > --- a/include/linux/sockptr.h
> > +++ b/include/linux/sockptr.h
> > @@ -64,6 +64,11 @@ static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset,
> >         return 0;
> >  }
> >
> > +static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size)
> > +{
> > +       return copy_to_sockptr_offset(dst, 0, src, size);
> > +}
> > +
> >  static inline void *memdup_sockptr(sockptr_t src, size_t len)
> >  {
> >         void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 63e25d8ce501..0f6f86b9e487 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -10712,8 +10712,7 @@ int sk_detach_filter(struct sock *sk)
> >  }
> >  EXPORT_SYMBOL_GPL(sk_detach_filter);
> >
> > -int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
> > -                 unsigned int len)
> > +int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len)
> >  {
> >         struct sock_fprog_kern *fprog;
> >         struct sk_filter *filter;
> > @@ -10744,7 +10743,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
> >                 goto out;
> >
> >         ret = -EFAULT;
> > -       if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
> > +       if (copy_to_sockptr(optval, fprog->filter, bpf_classic_proglen(fprog)))
> >                 goto out;
> >
> >         /* Instead of bytes, the API requests to return the number
> > diff --git a/net/core/sock.c b/net/core/sock.c
> > index 21bc4bf6b485..7fa30fd4b37f 100644
> > --- a/net/core/sock.c
> > +++ b/net/core/sock.c
> > @@ -712,8 +712,8 @@ static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
> >         return ret;
> >  }
> >
> > -static int sock_getbindtodevice(struct sock *sk, char __user *optval,
> > -                               int __user *optlen, int len)
> > +static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
> > +                               sockptr_t optlen, int len)
> >  {
> >         int ret = -ENOPROTOOPT;
> >  #ifdef CONFIG_NETDEVICES
> > @@ -737,12 +737,12 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
> >         len = strlen(devname) + 1;
> >
> >         ret = -EFAULT;
> > -       if (copy_to_user(optval, devname, len))
> > +       if (copy_to_sockptr(optval, devname, len))
> >                 goto out;
> >
> >  zero:
> >         ret = -EFAULT;
> > -       if (put_user(len, optlen))
> > +       if (copy_to_sockptr(optlen, &len, sizeof(int)))
> >                 goto out;
> >
> >         ret = 0;
> > @@ -1568,20 +1568,23 @@ static void cred_to_ucred(struct pid *pid, const struct cred *cred,
> >         }
> >  }
> >
> > -static int groups_to_user(gid_t __user *dst, const struct group_info *src)
> > +static int groups_to_user(sockptr_t dst, const struct group_info *src)
> >  {
> >         struct user_namespace *user_ns = current_user_ns();
> >         int i;
> >
> > -       for (i = 0; i < src->ngroups; i++)
> > -               if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
> > +       for (i = 0; i < src->ngroups; i++) {
> > +               gid_t gid = from_kgid_munged(user_ns, src->gid[i]);
> > +
> > +               if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
> >                         return -EFAULT;
> > +       }
> >
> >         return 0;
> >  }
> >
> >  static int sk_getsockopt(struct sock *sk, int level, int optname,
> > -                        char __user *optval, int __user *optlen)
> > +                        sockptr_t optval, sockptr_t optlen)
> >  {
> >         struct socket *sock = sk->sk_socket;
> >
> > @@ -1600,7 +1603,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> >         int lv = sizeof(int);
> >         int len;
> >
> > -       if (get_user(len, optlen))
> > +       if (copy_from_sockptr(&len, optlen, sizeof(int)))
> 
> Do we want to be consistent wrt to sizeof?
> 
> copy_from_sockptr(&len, optlen, sizeof(int))
> vs
> copy_from_sockptr(&len, optlen, sizeof(optlen))
optlen type is sockptr_t now. sizeof(optlen) won't work.

so either

copy_from_sockptr(&len, optlen, sizeof(len))
or
copy_from_sockptr(&len, optlen, sizeof(int))

I went with the latter 'sizeof(int)' for consistency because the
name is not always 'len' but optlen is always in 'int'.

> 
> Alternatively, should we have put_sockptr/get_sockopt with a semantics
> similar to put_user/get_user to remove all this ambiguity?
The type is lost in sockptr.{kernel,user} which is 'void *'.  {get,put}_user()
depends on it.  The very early sockptr_t introduction also changes
get_user() to copy_from_sockptr() for integer value.

One option could be to make {get,put}_sockopt(x, sockptr) to use x to decide the
type.  Not sure how that may look like.  I can give it a try.

> 
> >                 return -EFAULT;
> >         if (len < 0)
> >                 return -EINVAL;
> > @@ -1735,7 +1738,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> >                 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
> >                 spin_unlock(&sk->sk_peer_lock);
> >
> > -               if (copy_to_user(optval, &peercred, len))
> > +               if (copy_to_sockptr(optval, &peercred, len))
> >                         return -EFAULT;
> >                 goto lenout;
> >         }
> > @@ -1753,11 +1756,11 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> >                 if (len < n * sizeof(gid_t)) {
> >                         len = n * sizeof(gid_t);
> >                         put_cred(cred);
> > -                       return put_user(len, optlen) ? -EFAULT : -ERANGE;
> > +                       return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
> >                 }
> >                 len = n * sizeof(gid_t);
> >
> > -               ret = groups_to_user((gid_t __user *)optval, cred->group_info);
> > +               ret = groups_to_user(optval, cred->group_info);
> >                 put_cred(cred);
> >                 if (ret)
> >                         return ret;
> > @@ -1773,7 +1776,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> >                         return -ENOTCONN;
> >                 if (lv < len)
> >                         return -EINVAL;
> > -               if (copy_to_user(optval, address, len))
> > +               if (copy_to_sockptr(optval, address, len))
> >                         return -EFAULT;
> >                 goto lenout;
> >         }
> > @@ -1790,7 +1793,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> >                 break;
> >
> >         case SO_PEERSEC:
> > -               return security_socket_getpeersec_stream(sock, optval, optlen, len);
> > +               return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len);
> 
> I'm assuming there should be something to prevent this being called
> from BPF? (haven't read all the patches yet)
Not sure if any of the hooks may block.

> Do we want to be a bit more defensive with 'if (!optval.user) return
> -EFAULT' or something similar?
Checking 'optval.is_kernel || optlen.is_kernel'?
Yep.  Make sense.  It may be better to do the check inside
security_socket_getpeersec_stream(sock, optval, optlen, ...).

> 
> 
> >         case SO_MARK:
> >                 v.val = sk->sk_mark;
> > @@ -1822,7 +1825,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> >                 return sock_getbindtodevice(sk, optval, optlen, len);
> >
> >         case SO_GET_FILTER:
> > -               len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
> > +               len = sk_get_filter(sk, optval, len);
> >                 if (len < 0)
> >                         return len;
> >
> > @@ -1870,7 +1873,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> >                 sk_get_meminfo(sk, meminfo);
> >
> >                 len = min_t(unsigned int, len, sizeof(meminfo));
> > -               if (copy_to_user(optval, &meminfo, len))
> > +               if (copy_to_sockptr(optval, &meminfo, len))
> >                         return -EFAULT;
> >
> >                 goto lenout;
> > @@ -1939,10 +1942,10 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> >
> >         if (len > lv)
> >                 len = lv;
> > -       if (copy_to_user(optval, &v, len))
> > +       if (copy_to_sockptr(optval, &v, len))
> >                 return -EFAULT;
> >  lenout:
> > -       if (put_user(len, optlen))
> > +       if (copy_to_sockptr(optlen, &len, sizeof(int)))
> >                 return -EFAULT;
> >         return 0;
> >  }
> > @@ -1950,7 +1953,9 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> >  int sock_getsockopt(struct socket *sock, int level, int optname,
> >                     char __user *optval, int __user *optlen)
> >  {
> > -       return sk_getsockopt(sock->sk, level, optname, optval, optlen);
> > +       return sk_getsockopt(sock->sk, level, optname,
> > +                            USER_SOCKPTR(optval),
> > +                            USER_SOCKPTR(optlen));
> >  }
> >
> >  /*
> > --
> > 2.30.2
> >

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next 14/17] bpf: Change bpf_getsockopt(SOL_TCP) to reuse do_tcp_getsockopt()
  2022-08-25 18:36   ` [PATCH bpf-next 14/17] bpf: Change bpf_getsockopt(SOL_TCP) to reuse do_tcp_getsockopt() sdf
@ 2022-08-26 19:24     ` Martin KaFai Lau
  0 siblings, 0 replies; 5+ messages in thread
From: Martin KaFai Lau @ 2022-08-26 19:24 UTC (permalink / raw)
  To: sdf
  Cc: bpf, netdev, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	David Miller, Eric Dumazet, Jakub Kicinski, kernel-team,
	Paolo Abeni

On Thu, Aug 25, 2022 at 11:36:36AM -0700, sdf@google.com wrote:
> On 08/24, Martin KaFai Lau wrote:
> > This patch changes bpf_getsockopt(SOL_TCP) to reuse
> > do_tcp_getsockopt().  It removes the duplicated code from
> > bpf_getsockopt(SOL_TCP).
> 
> > Before this patch, there were some optnames available to
> > bpf_setsockopt(SOL_TCP) but missing in bpf_getsockopt(SOL_TCP).
> > For example, TCP_NODELAY, TCP_MAXSEG, TCP_KEEPIDLE, TCP_KEEPINTVL,
> > and a few more.  It surprises users from time to time.  This patch
> > automatically closes this gap without duplicating more code.
> 
> > bpf_getsockopt(TCP_SAVED_SYN) does not free the saved_syn,
> > so it stays in sol_tcp_sockopt().
> 
> > For string name value like TCP_CONGESTION, bpf expects it
> > is always null terminated, so sol_tcp_sockopt() decrements
> > optlen by one before calling do_tcp_getsockopt() and
> > the 'if (optlen < saved_optlen) memset(..,0,..);'
> > in __bpf_getsockopt() will always do a null termination.
> 
> > Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> > ---
> >   include/net/tcp.h |  2 ++
> >   net/core/filter.c | 70 ++++++++++++++++++++++++++---------------------
> >   net/ipv4/tcp.c    |  4 +--
> >   3 files changed, 43 insertions(+), 33 deletions(-)
> 
> > diff --git a/include/net/tcp.h b/include/net/tcp.h
> > index c03a50c72f40..735e957f7f4b 100644
> > --- a/include/net/tcp.h
> > +++ b/include/net/tcp.h
> > @@ -402,6 +402,8 @@ void tcp_init_sock(struct sock *sk);
> >   void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
> >   __poll_t tcp_poll(struct file *file, struct socket *sock,
> >   		      struct poll_table_struct *wait);
> > +int do_tcp_getsockopt(struct sock *sk, int level,
> > +		      int optname, sockptr_t optval, sockptr_t optlen);
> >   int tcp_getsockopt(struct sock *sk, int level, int optname,
> >   		   char __user *optval, int __user *optlen);
> >   bool tcp_bpf_bypass_getsockopt(int level, int optname);
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 68b52243b306..cdbbcec46e8b 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -5096,8 +5096,9 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk,
> > int optname,
> >   	return 0;
> >   }
> 
> > -static int sol_tcp_setsockopt(struct sock *sk, int optname,
> > -			      char *optval, int optlen)
> > +static int sol_tcp_sockopt(struct sock *sk, int optname,
> > +			   char *optval, int *optlen,
> > +			   bool getopt)
> >   {
> >   	if (sk->sk_prot->setsockopt != tcp_setsockopt)
> >   		return -EINVAL;
> > @@ -5114,17 +5115,47 @@ static int sol_tcp_setsockopt(struct sock *sk,
> > int optname,
> >   	case TCP_USER_TIMEOUT:
> >   	case TCP_NOTSENT_LOWAT:
> >   	case TCP_SAVE_SYN:
> > -		if (optlen != sizeof(int))
> > +		if (*optlen != sizeof(int))
> >   			return -EINVAL;
> >   		break;
> 
> [..]
> 
> >   	case TCP_CONGESTION:
> > +		if (*optlen < 2)
> > +			return -EINVAL;
> > +		break;
> > +	case TCP_SAVED_SYN:
> > +		if (*optlen < 1)
> > +			return -EINVAL;
> >   		break;
> 
> This looks a bit inconsistent vs '*optlen != sizeof(int)' above. Maybe
> 
> if (*optlen < sizeof(u16))
> if (*optlen < sizeof(u8))
TCP_CONGESTION (name string) and TCP_SAVED_SYN (raw binary)
are not expecting integer optval, so I think it is
better to stay away from using integer u16 or u8.

> 
> ?
> 
> >   	default:
> > -		return bpf_sol_tcp_setsockopt(sk, optname, optval, optlen);
> > +		if (getopt)
> > +			return -EINVAL;
> > +		return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
> > +	}
> > +
> > +	if (getopt) {
> > +		if (optname == TCP_SAVED_SYN) {
> > +			struct tcp_sock *tp = tcp_sk(sk);
> > +
> > +			if (!tp->saved_syn ||
> > +			    *optlen > tcp_saved_syn_len(tp->saved_syn))
> > +				return -EINVAL;
> 
> You mention in the description that bpf doesn't doesn't free saved_syn,
> maybe worth putting a comment with the rationale here as well?
> I'm assuming we don't free from bpf because we want userspace to
> have an opportunity to read it as well?
Yes, it is the reason.  I will add a comment.

> 
> > +			memcpy(optval, tp->saved_syn->data, *optlen);
> > +			return 0;
> > +		}
> > +
> > +		if (optname == TCP_CONGESTION) {
> > +			if (!inet_csk(sk)->icsk_ca_ops)
> > +				return -EINVAL;
> 
> Is it worth it doing null termination more explicitly here?
> For readability sake:
> 			/* BPF always expects NULL-terminated strings. */
> 			optval[*optlen-1] = '\0';
Yep.  will do in v2.

> > +			(*optlen)--;
> > +		}
> > +
> > +		return do_tcp_getsockopt(sk, SOL_TCP, optname,
> > +					 KERNEL_SOCKPTR(optval),
> > +					 KERNEL_SOCKPTR(optlen));
> >   	}
> 
> >   	return do_tcp_setsockopt(sk, SOL_TCP, optname,
> > -				 KERNEL_SOCKPTR(optval), optlen);
> > +				 KERNEL_SOCKPTR(optval), *optlen);
> >   }

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next 02/17] bpf: net: Change sk_getsockopt() to take the sockptr_t argument
  2022-08-26 19:15     ` Martin KaFai Lau
@ 2022-08-26 20:40       ` Stanislav Fomichev
  0 siblings, 0 replies; 5+ messages in thread
From: Stanislav Fomichev @ 2022-08-26 20:40 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: bpf, netdev, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	David Miller, Eric Dumazet, Jakub Kicinski, kernel-team,
	Paolo Abeni

On Fri, Aug 26, 2022 at 12:15 PM Martin KaFai Lau <kafai@fb.com> wrote:
>
> On Thu, Aug 25, 2022 at 11:07:36AM -0700, Stanislav Fomichev wrote:
> > On Wed, Aug 24, 2022 at 3:29 PM Martin KaFai Lau <kafai@fb.com> wrote:
> > >
> > > This patch changes sk_getsockopt() to take the sockptr_t argument
> > > such that it can be used by bpf_getsockopt(SOL_SOCKET) in a
> > > latter patch.
> > >
> > > security_socket_getpeersec_stream() is not changed.  It stays
> > > with the __user ptr (optval.user and optlen.user) to avoid changes
> > > to other security hooks.  bpf_getsockopt(SOL_SOCKET) also does not
> > > support SO_PEERSEC.
> > >
> > > Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> > > ---
> > >  include/linux/filter.h  |  3 +--
> > >  include/linux/sockptr.h |  5 +++++
> > >  net/core/filter.c       |  5 ++---
> > >  net/core/sock.c         | 43 +++++++++++++++++++++++------------------
> > >  4 files changed, 32 insertions(+), 24 deletions(-)
> > >
> > > diff --git a/include/linux/filter.h b/include/linux/filter.h
> > > index a5f21dc3c432..527ae1d64e27 100644
> > > --- a/include/linux/filter.h
> > > +++ b/include/linux/filter.h
> > > @@ -900,8 +900,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
> > >  int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
> > >  void sk_reuseport_prog_free(struct bpf_prog *prog);
> > >  int sk_detach_filter(struct sock *sk);
> > > -int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
> > > -                 unsigned int len);
> > > +int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len);
> > >
> > >  bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
> > >  void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
> > > diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
> > > index d45902fb4cad..bae5e2369b4f 100644
> > > --- a/include/linux/sockptr.h
> > > +++ b/include/linux/sockptr.h
> > > @@ -64,6 +64,11 @@ static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset,
> > >         return 0;
> > >  }
> > >
> > > +static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size)
> > > +{
> > > +       return copy_to_sockptr_offset(dst, 0, src, size);
> > > +}
> > > +
> > >  static inline void *memdup_sockptr(sockptr_t src, size_t len)
> > >  {
> > >         void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
> > > diff --git a/net/core/filter.c b/net/core/filter.c
> > > index 63e25d8ce501..0f6f86b9e487 100644
> > > --- a/net/core/filter.c
> > > +++ b/net/core/filter.c
> > > @@ -10712,8 +10712,7 @@ int sk_detach_filter(struct sock *sk)
> > >  }
> > >  EXPORT_SYMBOL_GPL(sk_detach_filter);
> > >
> > > -int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
> > > -                 unsigned int len)
> > > +int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len)
> > >  {
> > >         struct sock_fprog_kern *fprog;
> > >         struct sk_filter *filter;
> > > @@ -10744,7 +10743,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
> > >                 goto out;
> > >
> > >         ret = -EFAULT;
> > > -       if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
> > > +       if (copy_to_sockptr(optval, fprog->filter, bpf_classic_proglen(fprog)))
> > >                 goto out;
> > >
> > >         /* Instead of bytes, the API requests to return the number
> > > diff --git a/net/core/sock.c b/net/core/sock.c
> > > index 21bc4bf6b485..7fa30fd4b37f 100644
> > > --- a/net/core/sock.c
> > > +++ b/net/core/sock.c
> > > @@ -712,8 +712,8 @@ static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
> > >         return ret;
> > >  }
> > >
> > > -static int sock_getbindtodevice(struct sock *sk, char __user *optval,
> > > -                               int __user *optlen, int len)
> > > +static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
> > > +                               sockptr_t optlen, int len)
> > >  {
> > >         int ret = -ENOPROTOOPT;
> > >  #ifdef CONFIG_NETDEVICES
> > > @@ -737,12 +737,12 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
> > >         len = strlen(devname) + 1;
> > >
> > >         ret = -EFAULT;
> > > -       if (copy_to_user(optval, devname, len))
> > > +       if (copy_to_sockptr(optval, devname, len))
> > >                 goto out;
> > >
> > >  zero:
> > >         ret = -EFAULT;
> > > -       if (put_user(len, optlen))
> > > +       if (copy_to_sockptr(optlen, &len, sizeof(int)))
> > >                 goto out;
> > >
> > >         ret = 0;
> > > @@ -1568,20 +1568,23 @@ static void cred_to_ucred(struct pid *pid, const struct cred *cred,
> > >         }
> > >  }
> > >
> > > -static int groups_to_user(gid_t __user *dst, const struct group_info *src)
> > > +static int groups_to_user(sockptr_t dst, const struct group_info *src)
> > >  {
> > >         struct user_namespace *user_ns = current_user_ns();
> > >         int i;
> > >
> > > -       for (i = 0; i < src->ngroups; i++)
> > > -               if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
> > > +       for (i = 0; i < src->ngroups; i++) {
> > > +               gid_t gid = from_kgid_munged(user_ns, src->gid[i]);
> > > +
> > > +               if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
> > >                         return -EFAULT;
> > > +       }
> > >
> > >         return 0;
> > >  }
> > >
> > >  static int sk_getsockopt(struct sock *sk, int level, int optname,
> > > -                        char __user *optval, int __user *optlen)
> > > +                        sockptr_t optval, sockptr_t optlen)
> > >  {
> > >         struct socket *sock = sk->sk_socket;
> > >
> > > @@ -1600,7 +1603,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> > >         int lv = sizeof(int);
> > >         int len;
> > >
> > > -       if (get_user(len, optlen))
> > > +       if (copy_from_sockptr(&len, optlen, sizeof(int)))
> >
> > Do we want to be consistent wrt to sizeof?
> >
> > copy_from_sockptr(&len, optlen, sizeof(int))
> > vs
> > copy_from_sockptr(&len, optlen, sizeof(optlen))
> optlen type is sockptr_t now. sizeof(optlen) won't work.
>
> so either
>
> copy_from_sockptr(&len, optlen, sizeof(len))
> or
> copy_from_sockptr(&len, optlen, sizeof(int))
>
> I went with the latter 'sizeof(int)' for consistency because the
> name is not always 'len' but optlen is always in 'int'.
>
> >
> > Alternatively, should we have put_sockptr/get_sockopt with a semantics
> > similar to put_user/get_user to remove all this ambiguity?
> The type is lost in sockptr.{kernel,user} which is 'void *'.  {get,put}_user()
> depends on it.  The very early sockptr_t introduction also changes
> get_user() to copy_from_sockptr() for integer value.
>
> One option could be to make {get,put}_sockopt(x, sockptr) to use x to decide the
> type.  Not sure how that may look like.  I can give it a try.

I was thinking:
#define get_sockopt(x, ptr) copy_from_sockptr(&x, ptr, sizeof(x))

I was also slightly preferring the following:
copy_from_sockptr(&len, optlen, sizeof(len))

But now I'm not sure. I guess with that call, we really want
sizeof(len) == sizeof(actual pointer of optlen data).
So maybe the way you do it, with explicit sizeof(int), communicates it better.

Let's keep your version for now, because I'm not sure whatever I'm
suggesting actually makes it better..

> >
> > >                 return -EFAULT;
> > >         if (len < 0)
> > >                 return -EINVAL;
> > > @@ -1735,7 +1738,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> > >                 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
> > >                 spin_unlock(&sk->sk_peer_lock);
> > >
> > > -               if (copy_to_user(optval, &peercred, len))
> > > +               if (copy_to_sockptr(optval, &peercred, len))
> > >                         return -EFAULT;
> > >                 goto lenout;
> > >         }
> > > @@ -1753,11 +1756,11 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> > >                 if (len < n * sizeof(gid_t)) {
> > >                         len = n * sizeof(gid_t);
> > >                         put_cred(cred);
> > > -                       return put_user(len, optlen) ? -EFAULT : -ERANGE;
> > > +                       return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
> > >                 }
> > >                 len = n * sizeof(gid_t);
> > >
> > > -               ret = groups_to_user((gid_t __user *)optval, cred->group_info);
> > > +               ret = groups_to_user(optval, cred->group_info);
> > >                 put_cred(cred);
> > >                 if (ret)
> > >                         return ret;
> > > @@ -1773,7 +1776,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> > >                         return -ENOTCONN;
> > >                 if (lv < len)
> > >                         return -EINVAL;
> > > -               if (copy_to_user(optval, address, len))
> > > +               if (copy_to_sockptr(optval, address, len))
> > >                         return -EFAULT;
> > >                 goto lenout;
> > >         }
> > > @@ -1790,7 +1793,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> > >                 break;
> > >
> > >         case SO_PEERSEC:
> > > -               return security_socket_getpeersec_stream(sock, optval, optlen, len);
> > > +               return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len);
> >
> > I'm assuming there should be something to prevent this being called
> > from BPF? (haven't read all the patches yet)
> Not sure if any of the hooks may block.

I wasn't clear with my question. I found the answer later on, need to
develop a habit of not pressing 'send' before I read the whole series
:-)
You do the checks in sol_socket_sockopt to ignore that SO_PEERSEC from bpf.

> > Do we want to be a bit more defensive with 'if (!optval.user) return
> > -EFAULT' or something similar?
> Checking 'optval.is_kernel || optlen.is_kernel'?
> Yep.  Make sense.  It may be better to do the check inside
> security_socket_getpeersec_stream(sock, optval, optlen, ...).
>
> >
> >
> > >         case SO_MARK:
> > >                 v.val = sk->sk_mark;
> > > @@ -1822,7 +1825,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> > >                 return sock_getbindtodevice(sk, optval, optlen, len);
> > >
> > >         case SO_GET_FILTER:
> > > -               len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
> > > +               len = sk_get_filter(sk, optval, len);
> > >                 if (len < 0)
> > >                         return len;
> > >
> > > @@ -1870,7 +1873,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> > >                 sk_get_meminfo(sk, meminfo);
> > >
> > >                 len = min_t(unsigned int, len, sizeof(meminfo));
> > > -               if (copy_to_user(optval, &meminfo, len))
> > > +               if (copy_to_sockptr(optval, &meminfo, len))
> > >                         return -EFAULT;
> > >
> > >                 goto lenout;
> > > @@ -1939,10 +1942,10 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> > >
> > >         if (len > lv)
> > >                 len = lv;
> > > -       if (copy_to_user(optval, &v, len))
> > > +       if (copy_to_sockptr(optval, &v, len))
> > >                 return -EFAULT;
> > >  lenout:
> > > -       if (put_user(len, optlen))
> > > +       if (copy_to_sockptr(optlen, &len, sizeof(int)))
> > >                 return -EFAULT;
> > >         return 0;
> > >  }
> > > @@ -1950,7 +1953,9 @@ static int sk_getsockopt(struct sock *sk, int level, int optname,
> > >  int sock_getsockopt(struct socket *sock, int level, int optname,
> > >                     char __user *optval, int __user *optlen)
> > >  {
> > > -       return sk_getsockopt(sock->sk, level, optname, optval, optlen);
> > > +       return sk_getsockopt(sock->sk, level, optname,
> > > +                            USER_SOCKPTR(optval),
> > > +                            USER_SOCKPTR(optlen));
> > >  }
> > >
> > >  /*
> > > --
> > > 2.30.2
> > >

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-08-26 20:40 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20220824222601.1916776-1-kafai@fb.com>
     [not found] ` <20220824222614.1918332-1-kafai@fb.com>
2022-08-25 18:07   ` [PATCH bpf-next 02/17] bpf: net: Change sk_getsockopt() to take the sockptr_t argument Stanislav Fomichev
2022-08-26 19:15     ` Martin KaFai Lau
2022-08-26 20:40       ` Stanislav Fomichev
     [not found] ` <20220824222730.1923992-1-kafai@fb.com>
2022-08-25 18:36   ` [PATCH bpf-next 14/17] bpf: Change bpf_getsockopt(SOL_TCP) to reuse do_tcp_getsockopt() sdf
2022-08-26 19:24     ` Martin KaFai Lau

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).