netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Martin KaFai Lau <martin.lau@linux.dev>
To: Daan De Meyer <daan.j.demeyer@gmail.com>
Cc: kernel-team@meta.com, netdev@vger.kernel.org, bpf@vger.kernel.org
Subject: Re: [PATCH bpf-next v6 4/9] bpf: Implement cgroup sockaddr hooks for unix sockets
Date: Wed, 27 Sep 2023 15:32:35 -0700	[thread overview]
Message-ID: <f78069e0-9885-b3ab-4ad7-8abe94b9ad0a@linux.dev> (raw)
In-Reply-To: <20230926202753.1482200-5-daan.j.demeyer@gmail.com>

On 9/26/23 1:27 PM, Daan De Meyer wrote:
> These hooks allows intercepting connect(), getsockname(),
> getpeername(), sendmsg() and recvmsg() for unix sockets. The unix
> socket hooks get write access to the address length because the
> address length is not fixed when dealing with unix sockets and
> needs to be modified when a unix socket address is modified by
> the hook. Because abstract socket unix addresses start with a
> NUL byte, we cannot recalculate the socket address in kernelspace
> after running the hook by calculating the length of the unix socket
> path using strlen().
> 
> These hooks can be used when users want to multiplex syscall to a
> single unix socket to multiple different processes behind the scenes
> by redirecting the connect() and other syscalls to process specific
> sockets.
> 
> We do not implement support for intercepting bind() because when
> using bind() with unix sockets with a pathname address, this creates
> an inode in the filesystem which must be cleaned up. If we rewrite
> the address, the user might try to clean up the wrong file, leaking
> the socket in the filesystem where it is never cleaned up. Until we
> figure out a solution for this (and a use case for intercepting bind()),
> we opt to not allow rewriting the sockaddr in bind() calls.

> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index 31561e789715..c069f3510365 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
>   	CGROUP_ATYPE(CGROUP_INET6_BIND);
>   	CGROUP_ATYPE(CGROUP_INET4_CONNECT);
>   	CGROUP_ATYPE(CGROUP_INET6_CONNECT);
> +	CGROUP_ATYPE(CGROUP_UNIX_CONNECT);
>   	CGROUP_ATYPE(CGROUP_INET4_POST_BIND);
>   	CGROUP_ATYPE(CGROUP_INET6_POST_BIND);
>   	CGROUP_ATYPE(CGROUP_UDP4_SENDMSG);
>   	CGROUP_ATYPE(CGROUP_UDP6_SENDMSG);
> +	CGROUP_ATYPE(CGROUP_UNIX_SENDMSG);
>   	CGROUP_ATYPE(CGROUP_SYSCTL);
>   	CGROUP_ATYPE(CGROUP_UDP4_RECVMSG);
>   	CGROUP_ATYPE(CGROUP_UDP6_RECVMSG);
> +	CGROUP_ATYPE(CGROUP_UNIX_RECVMSG);
>   	CGROUP_ATYPE(CGROUP_GETSOCKOPT);
>   	CGROUP_ATYPE(CGROUP_SETSOCKOPT);
>   	CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME);
>   	CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME);
> +	CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME);
>   	CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
>   	CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
> +	CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME);
>   	CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
>   	default:
>   		return CGROUP_BPF_ATTACH_TYPE_INVALID;
> @@ -283,24 +288,36 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
>   #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen)			\
>   	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT)
>   
> +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT(sk, uaddr, uaddrlen)			\
> +	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT)
> +

Remove the no _LOCK version because it is not used.

>   #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen)		\
>   	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL)
>   
>   #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen)		\
>   	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL)
>   
> +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen)		\
> +	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL)
> +
>   #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)	\
>   	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx)
>   
>   #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)	\
>   	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx)
>   
> +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)	\
> +	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx)
> +
>   #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen)		\
>   	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL)
>   
>   #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen)		\
>   	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL)
>   
> +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen)		\
> +	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL)
> +
>   /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
>    * fullsock and its parent fullsock cannot be traced by
>    * sk_to_full_sk().
> @@ -492,10 +509,14 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
>   #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT(sk, uaddr, uaddrlen) ({ 0; })

Same here. Not needed.

> +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
>   #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })

[ ... ]

> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index ba2c57cf4046..4a4e3b1f00b1 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -1455,7 +1455,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
>    * @flags: Pointer to u32 which contains higher bits of BPF program
>    *         return value (OR'ed together).
>    *
> - * socket is expected to be of type INET or INET6.
> + * socket is expected to be of type INET, INET6 or UNIX.
>    *
>    * This function will return %-EPERM if an attached program is found and
>    * returned value != 1 during execution. In all other cases, 0 is returned.
> @@ -1479,7 +1479,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
>   	/* Check socket family since not all sockets represent network
>   	 * endpoint (e.g. AF_UNIX).
>   	 */
> -	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
> +	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 &&
> +		sk->sk_family != AF_UNIX)

./scripts/checkpatch.pl --strict:

CHECK: Alignment should match open parenthesis
#211: FILE: kernel/bpf/cgroup.c:1483:
+	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 &&
+		sk->sk_family != AF_UNIX)

[ ... ]

> diff --git a/net/core/filter.c b/net/core/filter.c
> index bd1c42b28483..956f413e98a3 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -7829,6 +7829,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>   		switch (prog->expected_attach_type) {
>   		case BPF_CGROUP_INET4_CONNECT:
>   		case BPF_CGROUP_INET6_CONNECT:
> +		case BPF_CGROUP_UNIX_CONNECT:
>   			return &bpf_bind_proto;

bpf_bind support is still not removed...

[ ... ]

> @@ -2744,6 +2773,11 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
>   			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
>   					 state->msg->msg_name);
>   			unix_copy_addr(state->msg, skb->sk);
> +
> +			BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
> +							      state->msg->msg_name,
> +							      &state->msg->msg_namelen);
> +

Re-pasting the comment from v5:

it will be useful to mention the reason in the commit message in case we need to 
look back a few months later why only recvmsg is supported but not sendmsg for 
connected stream.


  reply	other threads:[~2023-09-27 22:32 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-26 20:27 [PATCH bpf-next v6 0/9] Add cgroup sockaddr hooks for unix sockets Daan De Meyer
2023-09-26 20:27 ` [PATCH bpf-next v6 1/9] selftests/bpf: Add missing section name tests for getpeername/getsockname Daan De Meyer
2023-09-26 20:27 ` [PATCH bpf-next v6 2/9] bpf: Propagate modified uaddrlen from cgroup sockaddr programs Daan De Meyer
2023-09-27 22:19   ` Martin KaFai Lau
2023-09-26 20:27 ` [PATCH bpf-next v6 3/9] bpf: Add bpf_sock_addr_set_unix_addr() to allow writing unix sockaddr from bpf Daan De Meyer
2023-09-26 20:27 ` [PATCH bpf-next v6 4/9] bpf: Implement cgroup sockaddr hooks for unix sockets Daan De Meyer
2023-09-27 22:32   ` Martin KaFai Lau [this message]
2023-09-26 20:27 ` [PATCH bpf-next v6 5/9] libbpf: Add support for cgroup unix socket address hooks Daan De Meyer
2023-09-26 20:27 ` [PATCH bpf-next v6 6/9] bpftool: " Daan De Meyer
2023-09-27  8:26   ` Quentin Monnet
2023-09-26 20:27 ` [PATCH bpf-next v6 7/9] documentation/bpf: Document " Daan De Meyer
2023-09-26 20:27 ` [PATCH bpf-next v6 8/9] selftests/bpf: Make sure mount directory exists Daan De Meyer
2023-09-26 20:27 ` [PATCH bpf-next v6 9/9] selftests/bpf: Add tests for cgroup unix socket address hooks Daan De Meyer
2023-09-27 22:36   ` Martin KaFai Lau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f78069e0-9885-b3ab-4ad7-8abe94b9ad0a@linux.dev \
    --to=martin.lau@linux.dev \
    --cc=bpf@vger.kernel.org \
    --cc=daan.j.demeyer@gmail.com \
    --cc=kernel-team@meta.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).