From: Eric Dumazet <edumazet@google.com>
To: "David S . Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>,
Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <simon.horman@corigine.com>,
Soheil Hassas Yeganeh <soheil@google.com>,
netdev@vger.kernel.org, eric.dumazet@gmail.com,
Eric Dumazet <edumazet@google.com>
Subject: [PATCH net-next 02/15] inet: set/get simple options locklessly
Date: Thu, 10 Aug 2023 10:39:14 +0000 [thread overview]
Message-ID: <20230810103927.1705940-3-edumazet@google.com> (raw)
In-Reply-To: <20230810103927.1705940-1-edumazet@google.com>
Now we have inet->inet_flags, we can set following options
without having to hold the socket lock:
IP_PKTINFO, IP_RECVTTL, IP_RECVTOS, IP_RECVOPTS, IP_RETOPTS,
IP_PASSSEC, IP_RECVORIGDSTADDR, IP_RECVFRAGSIZE.
ip_sock_set_pktinfo() no longer hold the socket lock.
Similarly we can get the following options whithout holding
the socket lock:
IP_PKTINFO, IP_RECVTTL, IP_RECVTOS, IP_RECVOPTS, IP_RETOPTS,
IP_PASSSEC, IP_RECVORIGDSTADDR, IP_CHECKSUM, IP_RECVFRAGSIZE.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/ip_sockglue.c | 118 ++++++++++++++++++++++-------------------
1 file changed, 62 insertions(+), 56 deletions(-)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 66f55f3db5ec88e1c771847444eba1d554aca8dc..69b87518348aa5697edc6d88679384f00681f539 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -636,9 +636,7 @@ EXPORT_SYMBOL(ip_sock_set_mtu_discover);
void ip_sock_set_pktinfo(struct sock *sk)
{
- lock_sock(sk);
inet_set_bit(PKTINFO, sk);
- release_sock(sk);
}
EXPORT_SYMBOL(ip_sock_set_pktinfo);
@@ -952,6 +950,36 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (ip_mroute_opt(optname))
return ip_mroute_setsockopt(sk, optname, optval, optlen);
+ /* Handle options that can be set without locking the socket. */
+ switch (optname) {
+ case IP_PKTINFO:
+ inet_assign_bit(PKTINFO, sk, val);
+ return 0;
+ case IP_RECVTTL:
+ inet_assign_bit(TTL, sk, val);
+ return 0;
+ case IP_RECVTOS:
+ inet_assign_bit(TOS, sk, val);
+ return 0;
+ case IP_RECVOPTS:
+ inet_assign_bit(RECVOPTS, sk, val);
+ return 0;
+ case IP_RETOPTS:
+ inet_assign_bit(RETOPTS, sk, val);
+ return 0;
+ case IP_PASSSEC:
+ inet_assign_bit(PASSSEC, sk, val);
+ return 0;
+ case IP_RECVORIGDSTADDR:
+ inet_assign_bit(ORIGDSTADDR, sk, val);
+ return 0;
+ case IP_RECVFRAGSIZE:
+ if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
+ return -EINVAL;
+ inet_assign_bit(RECVFRAGSIZE, sk, val);
+ return 0;
+ }
+
err = 0;
if (needs_rtnl)
rtnl_lock();
@@ -991,27 +1019,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
kfree_rcu(old, rcu);
break;
}
- case IP_PKTINFO:
- inet_assign_bit(PKTINFO, sk, val);
- break;
- case IP_RECVTTL:
- inet_assign_bit(TTL, sk, val);
- break;
- case IP_RECVTOS:
- inet_assign_bit(TOS, sk, val);
- break;
- case IP_RECVOPTS:
- inet_assign_bit(RECVOPTS, sk, val);
- break;
- case IP_RETOPTS:
- inet_assign_bit(RETOPTS, sk, val);
- break;
- case IP_PASSSEC:
- inet_assign_bit(PASSSEC, sk, val);
- break;
- case IP_RECVORIGDSTADDR:
- inet_assign_bit(ORIGDSTADDR, sk, val);
- break;
case IP_CHECKSUM:
if (val) {
if (!(inet_test_bit(CHECKSUM, sk))) {
@@ -1025,11 +1032,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
}
}
break;
- case IP_RECVFRAGSIZE:
- if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
- goto e_inval;
- inet_assign_bit(RECVFRAGSIZE, sk, val);
- break;
case IP_TOS: /* This sets both TOS and Precedence */
__ip_sock_set_tos(sk, val);
break;
@@ -1544,6 +1546,37 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (len < 0)
return -EINVAL;
+ /* Handle options that can be read without locking the socket. */
+ switch (optname) {
+ case IP_PKTINFO:
+ val = inet_test_bit(PKTINFO, sk);
+ goto copyval;
+ case IP_RECVTTL:
+ val = inet_test_bit(TTL, sk);
+ goto copyval;
+ case IP_RECVTOS:
+ val = inet_test_bit(TOS, sk);
+ goto copyval;
+ case IP_RECVOPTS:
+ val = inet_test_bit(RECVOPTS, sk);
+ goto copyval;
+ case IP_RETOPTS:
+ val = inet_test_bit(RETOPTS, sk);
+ goto copyval;
+ case IP_PASSSEC:
+ val = inet_test_bit(PASSSEC, sk);
+ goto copyval;
+ case IP_RECVORIGDSTADDR:
+ val = inet_test_bit(ORIGDSTADDR, sk);
+ goto copyval;
+ case IP_CHECKSUM:
+ val = inet_test_bit(CHECKSUM, sk);
+ goto copyval;
+ case IP_RECVFRAGSIZE:
+ val = inet_test_bit(RECVFRAGSIZE, sk);
+ goto copyval;
+ }
+
if (needs_rtnl)
rtnl_lock();
sockopt_lock_sock(sk);
@@ -1578,33 +1611,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
return 0;
}
- case IP_PKTINFO:
- val = inet_test_bit(PKTINFO, sk);
- break;
- case IP_RECVTTL:
- val = inet_test_bit(TTL, sk);
- break;
- case IP_RECVTOS:
- val = inet_test_bit(TOS, sk);
- break;
- case IP_RECVOPTS:
- val = inet_test_bit(RECVOPTS, sk);
- break;
- case IP_RETOPTS:
- val = inet_test_bit(RETOPTS, sk);
- break;
- case IP_PASSSEC:
- val = inet_test_bit(PASSSEC, sk);
- break;
- case IP_RECVORIGDSTADDR:
- val = inet_test_bit(ORIGDSTADDR, sk);
- break;
- case IP_CHECKSUM:
- val = inet_test_bit(CHECKSUM, sk);
- break;
- case IP_RECVFRAGSIZE:
- val = inet_test_bit(RECVFRAGSIZE, sk);
- break;
case IP_TOS:
val = inet->tos;
break;
@@ -1754,7 +1760,7 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -ENOPROTOOPT;
}
sockopt_release_sock(sk);
-
+copyval:
if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
unsigned char ucval = (unsigned char)val;
len = 1;
--
2.41.0.640.ga95def55d0-goog
next prev parent reply other threads:[~2023-08-10 10:39 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-10 10:39 [PATCH net-next 00/15] inet: socket lock and data-races avoidance Eric Dumazet
2023-08-10 10:39 ` [PATCH net-next 01/15] inet: introduce inet->inet_flags Eric Dumazet
2023-08-10 10:39 ` Eric Dumazet [this message]
2023-08-10 10:39 ` [PATCH net-next 03/15] inet: move inet->recverr to inet->inet_flags Eric Dumazet
2023-08-10 10:39 ` [PATCH net-next 04/15] inet: move inet->recverr_rfc4884 " Eric Dumazet
2023-08-10 10:39 ` [PATCH net-next 05/15] inet: move inet->freebind " Eric Dumazet
2023-08-10 10:39 ` [PATCH net-next 06/15] inet: move inet->hdrincl " Eric Dumazet
2023-08-10 10:39 ` [PATCH net-next 07/15] inet: move inet->mc_loop to inet->inet_frags Eric Dumazet
2023-08-10 10:39 ` [PATCH net-next 08/15] inet: move inet->mc_all " Eric Dumazet
2023-08-10 10:39 ` [PATCH net-next 09/15] inet: move inet->transparent to inet->inet_flags Eric Dumazet
2023-08-10 13:45 ` kernel test robot
2023-08-10 10:39 ` [PATCH net-next 10/15] inet: move inet->is_icsk " Eric Dumazet
2023-08-10 10:39 ` [PATCH net-next 11/15] inet: move inet->nodefrag " Eric Dumazet
2023-08-10 10:39 ` [PATCH net-next 12/15] inet: move inet->bind_address_no_port " Eric Dumazet
2023-08-10 10:39 ` [PATCH net-next 13/15] inet: move inet->defer_connect " Eric Dumazet
2023-08-10 10:39 ` [PATCH net-next 14/15] inet: implement lockless IP_TTL Eric Dumazet
2023-08-10 10:39 ` [PATCH net-next 15/15] inet: implement lockless IP_MINTTL Eric Dumazet
2023-08-11 3:43 ` [PATCH net-next 00/15] inet: socket lock and data-races avoidance Soheil Hassas Yeganeh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230810103927.1705940-3-edumazet@google.com \
--to=edumazet@google.com \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=simon.horman@corigine.com \
--cc=soheil@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).