* [PATCH net-next v2 1/4] net: add getsockopt_iter callback to proto_ops
2026-04-01 15:44 [PATCH net-next v2 0/4] net: move .getsockopt away from __user buffers Breno Leitao
@ 2026-04-01 15:44 ` Breno Leitao
2026-04-01 15:44 ` [PATCH net-next v2 2/4] net: call getsockopt_iter if available Breno Leitao
` (2 subsequent siblings)
3 siblings, 0 replies; 10+ messages in thread
From: Breno Leitao @ 2026-04-01 15:44 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, Willem de Bruijn, metze, axboe,
Stanislav Fomichev
Cc: io-uring, bpf, netdev, Linus Torvalds, linux-kernel, kernel-team,
Breno Leitao
Add a new getsockopt_iter callback to struct proto_ops that uses
sockopt_t, a type-safe wrapper around iov_iter. This provides a clean
interface for socket option operations that works with both user and
kernel buffers.
The sockopt_t type encapsulates an iov_iter and an optlen field.
The optlen field, although not suggested by Linus, serves as both input
(buffer size) and output (returned data size), allowing callbacks to
return random values independent of the bytes written via
copy_to_iter(), so, keep it separated from iov_iter.count.
This is preparatory work for removing the SOL_SOCKET level restriction
from io_uring getsockopt operations.
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Breno Leitao <leitao@debian.org>
---
include/linux/net.h | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/include/linux/net.h b/include/linux/net.h
index a8e818de95b3..9dde75b4bf77 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -23,9 +23,26 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/sockptr.h>
+#include <linux/uio.h>
#include <uapi/linux/net.h>
+/**
+ * struct sockopt - socket option value container
+ * @iter: iov_iter for reading/writing option data
+ * @optlen: serves as both input (buffer size) and output (returned data size).
+ *
+ * Type-safe wrapper for socket option data that works with both
+ * user and kernel buffers.
+ *
+ * The optlen field allows callbacks to return a specific length value
+ * independent of the bytes written via copy_to_iter().
+ */
+typedef struct sockopt {
+ struct iov_iter iter;
+ int optlen;
+} sockopt_t;
+
struct poll_table_struct;
struct pipe_inode_info;
struct inode;
@@ -192,6 +209,8 @@ struct proto_ops {
unsigned int optlen);
int (*getsockopt)(struct socket *sock, int level,
int optname, char __user *optval, int __user *optlen);
+ int (*getsockopt_iter)(struct socket *sock, int level,
+ int optname, sockopt_t *opt);
void (*show_fdinfo)(struct seq_file *m, struct socket *sock);
int (*sendmsg) (struct socket *sock, struct msghdr *m,
size_t total_len);
--
2.52.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH net-next v2 2/4] net: call getsockopt_iter if available
2026-04-01 15:44 [PATCH net-next v2 0/4] net: move .getsockopt away from __user buffers Breno Leitao
2026-04-01 15:44 ` [PATCH net-next v2 1/4] net: add getsockopt_iter callback to proto_ops Breno Leitao
@ 2026-04-01 15:44 ` Breno Leitao
2026-04-01 16:34 ` Stanislav Fomichev
2026-04-01 15:44 ` [PATCH net-next v2 3/4] af_packet: convert to getsockopt_iter Breno Leitao
2026-04-01 15:44 ` [PATCH net-next v2 4/4] can: raw: " Breno Leitao
3 siblings, 1 reply; 10+ messages in thread
From: Breno Leitao @ 2026-04-01 15:44 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, Willem de Bruijn, metze, axboe,
Stanislav Fomichev
Cc: io-uring, bpf, netdev, Linus Torvalds, linux-kernel, kernel-team,
Breno Leitao
Update do_sock_getsockopt() to use the new getsockopt_iter callback
when available. Add do_sock_getsockopt_iter() helper that:
1. Reads optlen from user/kernel space
2. Initializes a sockopt_t with the appropriate iov_iter (kvec for
kernel, ubuf for user buffers) and sets opt.optlen
3. Calls the protocol's getsockopt_iter callback
4. Writes opt.optlen back to user/kernel space
The optlen is always written back, even on failure. Some protocols
(e.g. CAN raw) return -ERANGE and set optlen to the required buffer
size so userspace knows how much to allocate.
The callback is responsible for setting opt.optlen to indicate the
returned data size.
Signed-off-by: Breno Leitao <leitao@debian.org>
---
net/socket.c | 48 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 45 insertions(+), 3 deletions(-)
diff --git a/net/socket.c b/net/socket.c
index ade2ff5845a0..4a74a4aa1bb4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -77,6 +77,7 @@
#include <linux/mount.h>
#include <linux/pseudo_fs.h>
#include <linux/security.h>
+#include <linux/uio.h>
#include <linux/syscalls.h>
#include <linux/compat.h>
#include <linux/kmod.h>
@@ -2349,6 +2350,44 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
int optname));
+static int do_sock_getsockopt_iter(struct socket *sock,
+ const struct proto_ops *ops, int level,
+ int optname, sockptr_t optval,
+ sockptr_t optlen)
+{
+ struct kvec kvec;
+ sockopt_t opt;
+ int koptlen;
+ int err;
+
+ if (copy_from_sockptr(&koptlen, optlen, sizeof(int)))
+ return -EFAULT;
+
+ if (optval.is_kernel) {
+ kvec.iov_base = optval.kernel;
+ kvec.iov_len = koptlen;
+ iov_iter_kvec(&opt.iter, ITER_DEST, &kvec, 1, koptlen);
+ } else {
+ iov_iter_ubuf(&opt.iter, ITER_DEST, optval.user, koptlen);
+ }
+ opt.optlen = koptlen;
+
+ /* iter is initialized as ITER_DEST. Callbacks that need to read
+ * from optval (e.g. PACKET_HDRLEN) must flip data_source to
+ * ITER_SOURCE, then restore ITER_DEST before writing back.
+ */
+ err = ops->getsockopt_iter(sock, level, optname, &opt);
+
+ /* Always write back optlen, even on failure. Some protocols
+ * (e.g. CAN raw) return -ERANGE and set optlen to the required
+ * buffer size so userspace knows how much to allocate.
+ */
+ if (copy_to_sockptr(optlen, &opt.optlen, sizeof(int)))
+ return -EFAULT;
+
+ return err;
+}
+
int do_sock_getsockopt(struct socket *sock, bool compat, int level,
int optname, sockptr_t optval, sockptr_t optlen)
{
@@ -2366,15 +2405,18 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
ops = READ_ONCE(sock->ops);
if (level == SOL_SOCKET) {
err = sk_getsockopt(sock->sk, level, optname, optval, optlen);
- } else if (unlikely(!ops->getsockopt)) {
- err = -EOPNOTSUPP;
- } else {
+ } else if (ops->getsockopt_iter) {
+ err = do_sock_getsockopt_iter(sock, ops, level, optname,
+ optval, optlen);
+ } else if (ops->getsockopt) {
if (WARN_ONCE(optval.is_kernel || optlen.is_kernel,
"Invalid argument type"))
return -EOPNOTSUPP;
err = ops->getsockopt(sock, level, optname, optval.user,
optlen.user);
+ } else {
+ err = -EOPNOTSUPP;
}
if (!compat)
--
2.52.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH net-next v2 2/4] net: call getsockopt_iter if available
2026-04-01 15:44 ` [PATCH net-next v2 2/4] net: call getsockopt_iter if available Breno Leitao
@ 2026-04-01 16:34 ` Stanislav Fomichev
2026-04-01 17:43 ` Breno Leitao
0 siblings, 1 reply; 10+ messages in thread
From: Stanislav Fomichev @ 2026-04-01 16:34 UTC (permalink / raw)
To: Breno Leitao
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, Willem de Bruijn, metze, axboe,
Stanislav Fomichev, io-uring, bpf, netdev, Linus Torvalds,
linux-kernel, kernel-team
On 04/01, Breno Leitao wrote:
> Update do_sock_getsockopt() to use the new getsockopt_iter callback
> when available. Add do_sock_getsockopt_iter() helper that:
>
> 1. Reads optlen from user/kernel space
> 2. Initializes a sockopt_t with the appropriate iov_iter (kvec for
> kernel, ubuf for user buffers) and sets opt.optlen
> 3. Calls the protocol's getsockopt_iter callback
> 4. Writes opt.optlen back to user/kernel space
>
> The optlen is always written back, even on failure. Some protocols
> (e.g. CAN raw) return -ERANGE and set optlen to the required buffer
> size so userspace knows how much to allocate.
>
> The callback is responsible for setting opt.optlen to indicate the
> returned data size.
>
> Signed-off-by: Breno Leitao <leitao@debian.org>
> ---
> net/socket.c | 48 +++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 45 insertions(+), 3 deletions(-)
>
> diff --git a/net/socket.c b/net/socket.c
> index ade2ff5845a0..4a74a4aa1bb4 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -77,6 +77,7 @@
> #include <linux/mount.h>
> #include <linux/pseudo_fs.h>
> #include <linux/security.h>
> +#include <linux/uio.h>
> #include <linux/syscalls.h>
> #include <linux/compat.h>
> #include <linux/kmod.h>
> @@ -2349,6 +2350,44 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
> INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
> int optname));
>
> +static int do_sock_getsockopt_iter(struct socket *sock,
> + const struct proto_ops *ops, int level,
> + int optname, sockptr_t optval,
> + sockptr_t optlen)
If we want to eventually remove sockptr_t, why not make this new handler
work with iov_iters from the beginning? The callers can have some new temporary
sockptr_to_iter() or something?
> +{
> + struct kvec kvec;
> + sockopt_t opt;
> + int koptlen;
> + int err;
> +
> + if (copy_from_sockptr(&koptlen, optlen, sizeof(int)))
> + return -EFAULT;
> +
> + if (optval.is_kernel) {
> + kvec.iov_base = optval.kernel;
> + kvec.iov_len = koptlen;
> + iov_iter_kvec(&opt.iter, ITER_DEST, &kvec, 1, koptlen);
> + } else {
> + iov_iter_ubuf(&opt.iter, ITER_DEST, optval.user, koptlen);
> + }
> + opt.optlen = koptlen;
> +
> + /* iter is initialized as ITER_DEST. Callbacks that need to read
> + * from optval (e.g. PACKET_HDRLEN) must flip data_source to
> + * ITER_SOURCE, then restore ITER_DEST before writing back.
> + */
Have you considered creating two iters? opt.iter_in and opt.iter_out.
That way you don't have to flip the source back and forth in the
handlers.
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH net-next v2 2/4] net: call getsockopt_iter if available
2026-04-01 16:34 ` Stanislav Fomichev
@ 2026-04-01 17:43 ` Breno Leitao
2026-04-01 18:10 ` Stanislav Fomichev
0 siblings, 1 reply; 10+ messages in thread
From: Breno Leitao @ 2026-04-01 17:43 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, Willem de Bruijn, metze, axboe,
Stanislav Fomichev, io-uring, bpf, netdev, Linus Torvalds,
linux-kernel, kernel-team
On Wed, Apr 01, 2026 at 09:34:04AM -0700, Stanislav Fomichev wrote:
> > +static int do_sock_getsockopt_iter(struct socket *sock,
> > + const struct proto_ops *ops, int level,
> > + int optname, sockptr_t optval,
> > + sockptr_t optlen)
>
> If we want to eventually remove sockptr_t, why not make this new handler
> work with iov_iters from the beginning? The callers can have some new temporary
> sockptr_to_iter() or something?
The goal is to eliminate __user memory from the callbacks entirely, which
would make sockptr_t unnecessary. This series removes the callbacks that
originally necessitated sockptr_t's existence.
Therefore, working from the callbacks back to userspace seem to be a more
logical approach than replacing the middle layers of the implementation,
and then touching the callbacks.
So, yes, the sockptr_t() is used here as temporary glue to be able to
get rid of the elephant in the room.
> > + /* iter is initialized as ITER_DEST. Callbacks that need to read
> > + * from optval (e.g. PACKET_HDRLEN) must flip data_source to
> > + * ITER_SOURCE, then restore ITER_DEST before writing back.
> > + */
>
> Have you considered creating two iters? opt.iter_in and opt.iter_out.
> That way you don't have to flip the source back and forth in the
> handlers.
That's a good suggestion I hadn't considered. My initial thought was to
create a helper like sockopt_read_val() to handle the flip-read-flip
dance.
Would opt.iter_in and opt.iter_out be clearer than the helper approach?
Thanks for the review,
--breno
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH net-next v2 2/4] net: call getsockopt_iter if available
2026-04-01 17:43 ` Breno Leitao
@ 2026-04-01 18:10 ` Stanislav Fomichev
2026-04-02 15:39 ` Breno Leitao
0 siblings, 1 reply; 10+ messages in thread
From: Stanislav Fomichev @ 2026-04-01 18:10 UTC (permalink / raw)
To: Breno Leitao
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, Willem de Bruijn, metze, axboe,
Stanislav Fomichev, io-uring, bpf, netdev, Linus Torvalds,
linux-kernel, kernel-team
On 04/01, Breno Leitao wrote:
> On Wed, Apr 01, 2026 at 09:34:04AM -0700, Stanislav Fomichev wrote:
> > > +static int do_sock_getsockopt_iter(struct socket *sock,
> > > + const struct proto_ops *ops, int level,
> > > + int optname, sockptr_t optval,
> > > + sockptr_t optlen)
> >
> > If we want to eventually remove sockptr_t, why not make this new handler
> > work with iov_iters from the beginning? The callers can have some new temporary
> > sockptr_to_iter() or something?
>
> The goal is to eliminate __user memory from the callbacks entirely, which
> would make sockptr_t unnecessary. This series removes the callbacks that
> originally necessitated sockptr_t's existence.
>
> Therefore, working from the callbacks back to userspace seem to be a more
> logical approach than replacing the middle layers of the implementation,
> and then touching the callbacks.
>
> So, yes, the sockptr_t() is used here as temporary glue to be able to
> get rid of the elephant in the room.
So maybe something like this is better to communicate your long term intent?
} else if (ops->getsockopt_iter) {
optval = sockptr_to_iter(optval)
optlen = sockptr_to_iter(optlen)
do_sock_getsockopt_iter(...) /* does not know what sockpt_t is */
}
?
Then your new do_sock_getsockopt_iter is sockptr-free from the beginning
and at some point we'll just drop/move those sockptr_to_iter calls?
> > > + /* iter is initialized as ITER_DEST. Callbacks that need to read
> > > + * from optval (e.g. PACKET_HDRLEN) must flip data_source to
> > > + * ITER_SOURCE, then restore ITER_DEST before writing back.
> > > + */
> >
> > Have you considered creating two iters? opt.iter_in and opt.iter_out.
> > That way you don't have to flip the source back and forth in the
> > handlers.
>
> That's a good suggestion I hadn't considered. My initial thought was to
> create a helper like sockopt_read_val() to handle the flip-read-flip
> dance.
>
> Would opt.iter_in and opt.iter_out be clearer than the helper approach?
>
> Thanks for the review,
> --breno
I hope this way it will be easier to review protocol handler changes.
For example, looking at your AF_PACKET patch, you won't have to care
about flipping the source and doing the revert. Most/all of the changes will
be simple:
- s/get_user(len, optlen)/len = opt->optlen/
- s/put_user(len, optlen)/opt->optlen = len/
- s/copy_from_user(xxx, optval, len)/copy_from_iter(xxx, len, &opt->iter_in)/
- s/copy_to_user(optval, xxx, len)/copy_to_iter(xxx, len, &opt->iter_out)/
Might be even possible to express these with coccinelle?
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH net-next v2 2/4] net: call getsockopt_iter if available
2026-04-01 18:10 ` Stanislav Fomichev
@ 2026-04-02 15:39 ` Breno Leitao
2026-04-02 23:00 ` Stanislav Fomichev
0 siblings, 1 reply; 10+ messages in thread
From: Breno Leitao @ 2026-04-02 15:39 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, Willem de Bruijn, metze, axboe,
Stanislav Fomichev, io-uring, bpf, netdev, Linus Torvalds,
linux-kernel, kernel-team
Hello Stanislav,
On Wed, Apr 01, 2026 at 11:10:22AM -0700, Stanislav Fomichev wrote:
> So maybe something like this is better to communicate your long term intent?
>
> } else if (ops->getsockopt_iter) {
> optval = sockptr_to_iter(optval)
> optlen = sockptr_to_iter(optlen)
> do_sock_getsockopt_iter(...) /* does not know what sockpt_t is */
> }
>
> ?
>
> Then your new do_sock_getsockopt_iter is sockptr-free from the beginning
> and at some point we'll just drop/move those sockptr_to_iter calls?
Sure, that would work as well. It would look like the following, from my
current implemention:
+static int sockptr_to_sockopt(sockopt_t *opt, sockptr_t optval,
+ sockptr_t optlen, struct kvec *kvec)
+{
+ int koptlen;
+
+ if (copy_from_sockptr(&koptlen, optlen, sizeof(int)))
+ return -EFAULT;
+
+ if (optval.is_kernel) {
+ kvec->iov_base = optval.kernel;
+ kvec->iov_len = koptlen;
+ iov_iter_kvec(&opt->iter_out, ITER_DEST, kvec, 1, koptlen);
+ iov_iter_kvec(&opt->iter_in, ITER_SOURCE, kvec, 1, koptlen);
+ } else {
+ iov_iter_ubuf(&opt->iter_out, ITER_DEST, optval.user, koptlen);
+ iov_iter_ubuf(&opt->iter_in, ITER_SOURCE, optval.user,
+ koptlen);
+ }
+ opt->optlen = koptlen;
+
+ return 0;
+}
+
int do_sock_getsockopt(struct socket *sock, bool compat, int level,
int optname, sockptr_t optval, sockptr_t optlen)
{
@@ -2366,15 +2390,31 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
+ } else if (ops->getsockopt_iter) {
+ struct kvec kvec;
+ sockopt_t opt;
+
+ err = sockptr_to_sockopt(&opt, optval, optlen, &kvec);
+ if (err)
+ return err;
+
+ err = ops->getsockopt_iter(sock, level, optname, &opt);
+
+ /* Always write back optlen, even on failure. Some protocols
+ * (e.g. CAN raw) return -ERANGE and set optlen to the
+ * required buffer size so userspace can discover it.
+ */
+ if (copy_to_sockptr(optlen, &opt.optlen, sizeof(int)))
+ return -EFAULT;
+ } else if (ops->getsockopt) {
....
> I hope this way it will be easier to review protocol handler changes.
>
> For example, looking at your AF_PACKET patch, you won't have to care
> about flipping the source and doing the revert. Most/all of the changes will
> be simple:
> - s/get_user(len, optlen)/len = opt->optlen/
> - s/put_user(len, optlen)/opt->optlen = len/
> - s/copy_from_user(xxx, optval, len)/copy_from_iter(xxx, len, &opt->iter_in)/
> - s/copy_to_user(optval, xxx, len)/copy_to_iter(xxx, len, &opt->iter_out)/
That is, in fact, a great proposal. It will make the protocol changes review
way easier.
This is what I have right now.
typedef struct sockopt {
struct iov_iter iter_out;
struct iov_iter iter_in;
int optlen;
} sockopt_t;
And then, the drivers change would be as simple as:
static int packet_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
+ sockopt_t *opt)
{
int len;
int val, lv = sizeof(val);
@@ -4065,8 +4066,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
if (level != SOL_PACKET)
return -ENOPROTOOPT;
- if (get_user(len, optlen))
- return -EFAULT;
+ len = opt->optlen;
if (len < 0)
return -EINVAL;
@@ -4115,7 +4115,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
len = sizeof(int);
if (len < sizeof(int))
return -EINVAL;
- if (copy_from_user(&val, optval, len))
+ if (copy_from_iter(&val, len, &opt->iter_in) != len)
return -EFAULT;
switch (val) {
case TPACKET_V1:
@@ -4171,9 +4171,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
if (len > lv)
len = lv;
- if (put_user(len, optlen))
- return -EFAULT;
- if (copy_to_user(optval, data, len))
+ opt->optlen = len;
+ if (copy_to_iter(data, len, &opt->iter_out) != len)
return -EFAULT;
return 0;
This is not fully tested yet, but, in case you want to see how this looks like
so far, I have it in https://github.com/leitao/linux/tree/b4/getsockopt_v3.
I will submit a newer version after I am done with the testing.
Thanks for the insights,
--breno
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH net-next v2 2/4] net: call getsockopt_iter if available
2026-04-02 15:39 ` Breno Leitao
@ 2026-04-02 23:00 ` Stanislav Fomichev
0 siblings, 0 replies; 10+ messages in thread
From: Stanislav Fomichev @ 2026-04-02 23:00 UTC (permalink / raw)
To: Breno Leitao
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, Willem de Bruijn, metze, axboe,
Stanislav Fomichev, io-uring, bpf, netdev, Linus Torvalds,
linux-kernel, kernel-team
On 04/02, Breno Leitao wrote:
> Hello Stanislav,
>
> On Wed, Apr 01, 2026 at 11:10:22AM -0700, Stanislav Fomichev wrote:
> > So maybe something like this is better to communicate your long term intent?
> >
> > } else if (ops->getsockopt_iter) {
> > optval = sockptr_to_iter(optval)
> > optlen = sockptr_to_iter(optlen)
> > do_sock_getsockopt_iter(...) /* does not know what sockpt_t is */
> > }
> >
> > ?
> >
> > Then your new do_sock_getsockopt_iter is sockptr-free from the beginning
> > and at some point we'll just drop/move those sockptr_to_iter calls?
>
> Sure, that would work as well. It would look like the following, from my
> current implemention:
>
> +static int sockptr_to_sockopt(sockopt_t *opt, sockptr_t optval,
> + sockptr_t optlen, struct kvec *kvec)
> +{
> + int koptlen;
> +
> + if (copy_from_sockptr(&koptlen, optlen, sizeof(int)))
> + return -EFAULT;
> +
> + if (optval.is_kernel) {
> + kvec->iov_base = optval.kernel;
> + kvec->iov_len = koptlen;
> + iov_iter_kvec(&opt->iter_out, ITER_DEST, kvec, 1, koptlen);
> + iov_iter_kvec(&opt->iter_in, ITER_SOURCE, kvec, 1, koptlen);
> + } else {
> + iov_iter_ubuf(&opt->iter_out, ITER_DEST, optval.user, koptlen);
> + iov_iter_ubuf(&opt->iter_in, ITER_SOURCE, optval.user,
> + koptlen);
> + }
> + opt->optlen = koptlen;
> +
> + return 0;
> +}
> +
> int do_sock_getsockopt(struct socket *sock, bool compat, int level,
> int optname, sockptr_t optval, sockptr_t optlen)
> {
> @@ -2366,15 +2390,31 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
>
> + } else if (ops->getsockopt_iter) {
> + struct kvec kvec;
> + sockopt_t opt;
> +
> + err = sockptr_to_sockopt(&opt, optval, optlen, &kvec);
> + if (err)
> + return err;
> +
> + err = ops->getsockopt_iter(sock, level, optname, &opt);
> +
> + /* Always write back optlen, even on failure. Some protocols
> + * (e.g. CAN raw) return -ERANGE and set optlen to the
> + * required buffer size so userspace can discover it.
> + */
> + if (copy_to_sockptr(optlen, &opt.optlen, sizeof(int)))
> + return -EFAULT;
> + } else if (ops->getsockopt) {
> ....
>
> > I hope this way it will be easier to review protocol handler changes.
> >
> > For example, looking at your AF_PACKET patch, you won't have to care
> > about flipping the source and doing the revert. Most/all of the changes will
> > be simple:
> > - s/get_user(len, optlen)/len = opt->optlen/
> > - s/put_user(len, optlen)/opt->optlen = len/
> > - s/copy_from_user(xxx, optval, len)/copy_from_iter(xxx, len, &opt->iter_in)/
> > - s/copy_to_user(optval, xxx, len)/copy_to_iter(xxx, len, &opt->iter_out)/
>
> That is, in fact, a great proposal. It will make the protocol changes review
> way easier.
>
> This is what I have right now.
>
> typedef struct sockopt {
> struct iov_iter iter_out;
> struct iov_iter iter_in;
> int optlen;
> } sockopt_t;
>
>
> And then, the drivers change would be as simple as:
>
> static int packet_getsockopt(struct socket *sock, int level, int optname,
> - char __user *optval, int __user *optlen)
> + sockopt_t *opt)
> {
> int len;
> int val, lv = sizeof(val);
> @@ -4065,8 +4066,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
> if (level != SOL_PACKET)
> return -ENOPROTOOPT;
>
> - if (get_user(len, optlen))
> - return -EFAULT;
> + len = opt->optlen;
>
> if (len < 0)
> return -EINVAL;
> @@ -4115,7 +4115,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
> len = sizeof(int);
> if (len < sizeof(int))
> return -EINVAL;
> - if (copy_from_user(&val, optval, len))
> + if (copy_from_iter(&val, len, &opt->iter_in) != len)
> return -EFAULT;
> switch (val) {
> case TPACKET_V1:
> @@ -4171,9 +4171,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
>
> if (len > lv)
> len = lv;
> - if (put_user(len, optlen))
> - return -EFAULT;
> - if (copy_to_user(optval, data, len))
> + opt->optlen = len;
> + if (copy_to_iter(data, len, &opt->iter_out) != len)
> return -EFAULT;
> return 0;
>
> This is not fully tested yet, but, in case you want to see how this looks like
> so far, I have it in https://github.com/leitao/linux/tree/b4/getsockopt_v3.
>
> I will submit a newer version after I am done with the testing.
>
> Thanks for the insights,
> --breno
LGTM, thanks!
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH net-next v2 3/4] af_packet: convert to getsockopt_iter
2026-04-01 15:44 [PATCH net-next v2 0/4] net: move .getsockopt away from __user buffers Breno Leitao
2026-04-01 15:44 ` [PATCH net-next v2 1/4] net: add getsockopt_iter callback to proto_ops Breno Leitao
2026-04-01 15:44 ` [PATCH net-next v2 2/4] net: call getsockopt_iter if available Breno Leitao
@ 2026-04-01 15:44 ` Breno Leitao
2026-04-01 15:44 ` [PATCH net-next v2 4/4] can: raw: " Breno Leitao
3 siblings, 0 replies; 10+ messages in thread
From: Breno Leitao @ 2026-04-01 15:44 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, Willem de Bruijn, metze, axboe,
Stanislav Fomichev
Cc: io-uring, bpf, netdev, Linus Torvalds, linux-kernel, kernel-team,
Breno Leitao
Convert AF_PACKET's getsockopt implementation to use the new
getsockopt_iter callback with sockopt_t.
Key changes:
- Replace (char __user *optval, int __user *optlen) with sockopt_t *opt
- Use opt->optlen for buffer length (input) and returned size (output)
- Use copy_to_iter() instead of put_user()/copy_to_user()
- For PACKET_HDRLEN which reads from optval: flip data_source to
ITER_SOURCE, copy_from_iter(), iov_iter_revert(), then restore
ITER_DEST before the common copy_to_iter() epilogue
Signed-off-by: Breno Leitao <leitao@debian.org>
---
net/packet/af_packet.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index bb2d88205e5a..531bcee02899 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -49,6 +49,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/ethtool.h>
+#include <linux/uio.h>
#include <linux/filter.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -4051,7 +4052,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
}
static int packet_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
+ sockopt_t *opt)
{
int len;
int val, lv = sizeof(val);
@@ -4065,8 +4066,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
if (level != SOL_PACKET)
return -ENOPROTOOPT;
- if (get_user(len, optlen))
- return -EFAULT;
+ len = opt->optlen;
if (len < 0)
return -EINVAL;
@@ -4115,8 +4115,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
len = sizeof(int);
if (len < sizeof(int))
return -EINVAL;
- if (copy_from_user(&val, optval, len))
+ opt->iter.data_source = ITER_SOURCE;
+ if (copy_from_iter(&val, len, &opt->iter) != len)
return -EFAULT;
+ iov_iter_revert(&opt->iter, len);
+ opt->iter.data_source = ITER_DEST;
switch (val) {
case TPACKET_V1:
val = sizeof(struct tpacket_hdr);
@@ -4171,9 +4174,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
if (len > lv)
len = lv;
- if (put_user(len, optlen))
- return -EFAULT;
- if (copy_to_user(optval, data, len))
+ opt->optlen = len;
+ if (copy_to_iter(data, len, &opt->iter) != len)
return -EFAULT;
return 0;
}
@@ -4672,7 +4674,7 @@ static const struct proto_ops packet_ops = {
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = packet_setsockopt,
- .getsockopt = packet_getsockopt,
+ .getsockopt_iter = packet_getsockopt,
.sendmsg = packet_sendmsg,
.recvmsg = packet_recvmsg,
.mmap = packet_mmap,
--
2.52.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH net-next v2 4/4] can: raw: convert to getsockopt_iter
2026-04-01 15:44 [PATCH net-next v2 0/4] net: move .getsockopt away from __user buffers Breno Leitao
` (2 preceding siblings ...)
2026-04-01 15:44 ` [PATCH net-next v2 3/4] af_packet: convert to getsockopt_iter Breno Leitao
@ 2026-04-01 15:44 ` Breno Leitao
3 siblings, 0 replies; 10+ messages in thread
From: Breno Leitao @ 2026-04-01 15:44 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, Willem de Bruijn, metze, axboe,
Stanislav Fomichev
Cc: io-uring, bpf, netdev, Linus Torvalds, linux-kernel, kernel-team,
Breno Leitao
Convert CAN raw socket's getsockopt implementation to use the new
getsockopt_iter callback with sockopt_t.
Key changes:
- Replace (char __user *optval, int __user *optlen) with sockopt_t *opt
- Use opt->optlen for buffer length (input) and returned size (output)
- Use copy_to_iter() instead of copy_to_user()
- For CAN_RAW_FILTER and CAN_RAW_XL_VCID_OPTS: on -ERANGE, set
opt->optlen to the required buffer size. The wrapper writes this
back to userspace even on error, preserving the existing API that
lets userspace discover the needed allocation size.
Signed-off-by: Breno Leitao <leitao@debian.org>
---
net/can/raw.c | 28 +++++++++++++---------------
1 file changed, 13 insertions(+), 15 deletions(-)
diff --git a/net/can/raw.c b/net/can/raw.c
index eee244ffc31e..4b3408528637 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -760,7 +760,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
}
static int raw_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
+ sockopt_t *opt)
{
struct sock *sk = sock->sk;
struct raw_sock *ro = raw_sk(sk);
@@ -770,8 +770,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
if (level != SOL_CAN_RAW)
return -EINVAL;
- if (get_user(len, optlen))
- return -EFAULT;
+ len = opt->optlen;
if (len < 0)
return -EINVAL;
@@ -787,12 +786,12 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
if (len < fsize) {
/* return -ERANGE and needed space in optlen */
err = -ERANGE;
- if (put_user(fsize, optlen))
- err = -EFAULT;
+ opt->optlen = fsize;
} else {
if (len > fsize)
len = fsize;
- if (copy_to_user(optval, ro->filter, len))
+ if (copy_to_iter(ro->filter, len,
+ &opt->iter) != len)
err = -EFAULT;
}
} else {
@@ -801,7 +800,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
release_sock(sk);
if (!err)
- err = put_user(len, optlen);
+ opt->optlen = len;
return err;
}
case CAN_RAW_ERR_FILTER:
@@ -845,16 +844,16 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(ro->raw_vcid_opts)) {
/* return -ERANGE and needed space in optlen */
err = -ERANGE;
- if (put_user(sizeof(ro->raw_vcid_opts), optlen))
- err = -EFAULT;
+ opt->optlen = sizeof(ro->raw_vcid_opts);
} else {
if (len > sizeof(ro->raw_vcid_opts))
len = sizeof(ro->raw_vcid_opts);
- if (copy_to_user(optval, &ro->raw_vcid_opts, len))
+ if (copy_to_iter(&ro->raw_vcid_opts, len,
+ &opt->iter) != len)
err = -EFAULT;
}
if (!err)
- err = put_user(len, optlen);
+ opt->optlen = len;
return err;
}
case CAN_RAW_JOIN_FILTERS:
@@ -868,9 +867,8 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return -ENOPROTOOPT;
}
- if (put_user(len, optlen))
- return -EFAULT;
- if (copy_to_user(optval, val, len))
+ opt->optlen = len;
+ if (copy_to_iter(val, len, &opt->iter) != len)
return -EFAULT;
return 0;
}
@@ -1077,7 +1075,7 @@ static const struct proto_ops raw_ops = {
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = raw_setsockopt,
- .getsockopt = raw_getsockopt,
+ .getsockopt_iter = raw_getsockopt,
.sendmsg = raw_sendmsg,
.recvmsg = raw_recvmsg,
.mmap = sock_no_mmap,
--
2.52.0
^ permalink raw reply related [flat|nested] 10+ messages in thread