From: Jakub Sitnicki <jakub@cloudflare.com>
To: Jiang Wang <jiang.wang@bytedance.com>
Cc: netdev@vger.kernel.org, cong.wang@bytedance.com,
duanxiongchun@bytedance.com, xieyongji@bytedance.com,
chaiwen.cc@bytedance.com, "David S. Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>,
John Fastabend <john.fastabend@gmail.com>,
Daniel Borkmann <daniel@iogearbox.net>,
Lorenz Bauer <lmb@cloudflare.com>,
Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
Yonghong Song <yhs@fb.com>, KP Singh <kpsingh@kernel.org>,
Shuah Khan <shuah@kernel.org>, Al Viro <viro@zeniv.linux.org.uk>,
Christian Brauner <christian.brauner@ubuntu.com>,
Johan Almbladh <johan.almbladh@anyfinetworks.com>,
linux-kernel@vger.kernel.org, bpf@vger.kernel.org,
linux-kselftest@vger.kernel.org
Subject: Re: [PATCH bpf-next v4 2/5] af_unix: add unix_stream_proto for sockmap
Date: Thu, 05 Aug 2021 20:44:37 +0200 [thread overview]
Message-ID: <87y29fd94a.fsf@cloudflare.com> (raw)
In-Reply-To: <20210805051340.3798543-3-jiang.wang@bytedance.com>
On Thu, Aug 05, 2021 at 07:13 AM CEST, Jiang Wang wrote:
[...]
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -791,17 +791,35 @@ static void unix_close(struct sock *sk, long timeout)
> */
> }
>
> -struct proto unix_proto = {
> - .name = "UNIX",
> +static void unix_unhash(struct sock *sk)
> +{
> + /* Nothing to do here, unix socket does not need a ->unhash().
> + * This is merely for sockmap.
> + */
> +}
> +
> +struct proto unix_dgram_proto = {
> + .name = "UNIX-DGRAM",
> + .owner = THIS_MODULE,
> + .obj_size = sizeof(struct unix_sock),
> + .close = unix_close,
> +#ifdef CONFIG_BPF_SYSCALL
> + .psock_update_sk_prot = unix_dgram_bpf_update_proto,
> +#endif
> +};
> +
> +struct proto unix_stream_proto = {
> + .name = "UNIX-STREAM",
> .owner = THIS_MODULE,
> .obj_size = sizeof(struct unix_sock),
> .close = unix_close,
> + .unhash = unix_unhash,
> #ifdef CONFIG_BPF_SYSCALL
> - .psock_update_sk_prot = unix_bpf_update_proto,
> + .psock_update_sk_prot = unix_stream_bpf_update_proto,
> #endif
> };
>
> -static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
> +static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
> {
> struct sock *sk = NULL;
> struct unix_sock *u;
> @@ -810,7 +828,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
> if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
> goto out;
>
> - sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
> + if (type == SOCK_STREAM)
> + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
> + else /*dgram and seqpacket */
> + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
> +
> if (!sk)
> goto out;
>
> @@ -872,7 +894,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
> return -ESOCKTNOSUPPORT;
> }
>
> - return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
> + return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM;
> }
>
> static int unix_release(struct socket *sock)
> @@ -1286,7 +1308,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> err = -ENOMEM;
>
> /* create new sock for complete connection */
> - newsk = unix_create1(sock_net(sk), NULL, 0);
> + newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
> if (newsk == NULL)
> goto out;
>
> @@ -2261,7 +2283,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si
> struct sock *sk = sock->sk;
>
> #ifdef CONFIG_BPF_SYSCALL
> - if (sk->sk_prot != &unix_proto)
> + if (READ_ONCE(sk->sk_prot) != &unix_dgram_proto)
> return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
> flags & ~MSG_DONTWAIT, NULL);
Notice we have two reads from sk->sk_prot here. And the value
sk->sk_prot holds might change between reads (that is when we remove the
socket from sockmap). So we want to load it just once.
Otherwise, it seems possible that sk->sk_prot->recvmsg will be called,
when sk->sk_prot == unix_proto. Which means sk->sk_prot->recvmsg is NULL.
> #endif
> @@ -2580,6 +2602,20 @@ static int unix_stream_read_actor(struct sk_buff *skb,
> return ret ?: chunk;
> }
>
> +int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
> + size_t size, int flags)
> +{
> + struct unix_stream_read_state state = {
> + .recv_actor = unix_stream_read_actor,
> + .socket = sk->sk_socket,
> + .msg = msg,
> + .size = size,
> + .flags = flags
> + };
> +
> + return unix_stream_read_generic(&state, true);
> +}
> +
> static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
> size_t size, int flags)
> {
> @@ -2591,6 +2627,12 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
> .flags = flags
> };
>
> +#ifdef CONFIG_BPF_SYSCALL
> + struct sock *sk = sock->sk;
> + if (sk->sk_prot != &unix_stream_proto)
> + return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
> + flags & ~MSG_DONTWAIT, NULL);
Also needs READ_ONCE annotations.
> +#endif
> return unix_stream_read_generic(&state, true);
> }
>
> @@ -2652,6 +2694,7 @@ static int unix_shutdown(struct socket *sock, int mode)
>
> int peer_mode = 0;
>
> + other->sk_prot->unhash(other);
Here as well.
> if (mode&RCV_SHUTDOWN)
> peer_mode |= SEND_SHUTDOWN;
> if (mode&SEND_SHUTDOWN)
[...]
next prev parent reply other threads:[~2021-08-05 18:44 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-05 5:13 [PATCH bpf-next v4 0/5] sockmap: add sockmap support for unix stream socket Jiang Wang
2021-08-05 5:13 ` [PATCH bpf-next v4 1/5] af_unix: add read_sock for stream socket types Jiang Wang
2021-08-05 5:13 ` [PATCH bpf-next v4 2/5] af_unix: add unix_stream_proto for sockmap Jiang Wang
2021-08-05 18:44 ` Jakub Sitnicki [this message]
2021-08-05 5:13 ` [PATCH bpf-next v4 3/5] selftest/bpf: add tests for sockmap with unix stream type Jiang Wang
2021-08-05 5:13 ` [PATCH bpf-next v4 4/5] selftest/bpf: change udp to inet in some function names Jiang Wang
2021-08-05 5:13 ` [PATCH bpf-next v4 5/5] selftest/bpf: add new tests in sockmap for unix stream to tcp Jiang Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87y29fd94a.fsf@cloudflare.com \
--to=jakub@cloudflare.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=chaiwen.cc@bytedance.com \
--cc=christian.brauner@ubuntu.com \
--cc=cong.wang@bytedance.com \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=duanxiongchun@bytedance.com \
--cc=jiang.wang@bytedance.com \
--cc=johan.almbladh@anyfinetworks.com \
--cc=john.fastabend@gmail.com \
--cc=kafai@fb.com \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=lmb@cloudflare.com \
--cc=netdev@vger.kernel.org \
--cc=shuah@kernel.org \
--cc=songliubraving@fb.com \
--cc=viro@zeniv.linux.org.uk \
--cc=xieyongji@bytedance.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.