From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
patches@lists.linux.dev, Eric Dumazet <edumazet@google.com>,
Chao Wu <wwchao@google.com>, Wei Wang <weiwan@google.com>,
Coco Li <lixiaoyan@google.com>, YiFei Zhu <zhuyifei@google.com>,
Simon Horman <simon.horman@corigine.com>,
"David S. Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>
Subject: [PATCH 6.5 6/8] ipv6: remove hard coded limitation on ipv6_pinfo
Date: Thu, 31 Aug 2023 13:10:33 +0200 [thread overview]
Message-ID: <20230831110831.051111512@linuxfoundation.org> (raw)
In-Reply-To: <20230831110830.817738361@linuxfoundation.org>
6.5-stable review patch. If anyone has any objections, please let me know.
------------------
From: Eric Dumazet <edumazet@google.com>
commit f5f80e32de12fad2813d37270e8364a03e6d3ef0 upstream.
IPv6 inet sockets are supposed to have a "struct ipv6_pinfo"
field at the end of their definition, so that inet6_sk_generic()
can derive from socket size the offset of the "struct ipv6_pinfo".
This is very fragile, and prevents adding bigger alignment
in sockets, because inet6_sk_generic() does not work
if the compiler adds padding after the ipv6_pinfo component.
We are currently working on a patch series to reorganize
TCP structures for better data locality and found issues
similar to the one fixed in commit f5d547676ca0
("tcp: fix tcp_inet6_sk() for 32bit kernels")
Alternative would be to force an alignment on "struct ipv6_pinfo",
greater or equal to __alignof__(any ipv6 sock) to ensure there is
no padding. This does not look great.
v2: fix typo in mptcp_proto_v6_init() (Paolo)
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Chao Wu <wwchao@google.com>
Cc: Wei Wang <weiwan@google.com>
Cc: Coco Li <lixiaoyan@google.com>
Cc: YiFei Zhu <zhuyifei@google.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/linux/ipv6.h | 15 ++++-----------
include/net/sock.h | 1 +
net/dccp/ipv6.c | 1 +
net/dccp/ipv6.h | 4 ----
net/ipv6/af_inet6.c | 4 ++--
net/ipv6/ping.c | 1 +
net/ipv6/raw.c | 1 +
net/ipv6/tcp_ipv6.c | 1 +
net/ipv6/udp.c | 1 +
net/ipv6/udplite.c | 1 +
net/l2tp/l2tp_ip6.c | 4 +---
net/mptcp/protocol.c | 1 +
net/sctp/socket.c | 1 +
13 files changed, 16 insertions(+), 20 deletions(-)
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -199,14 +199,7 @@ struct inet6_cork {
u8 tclass;
};
-/**
- * struct ipv6_pinfo - ipv6 private area
- *
- * In the struct sock hierarchy (tcp6_sock, upd6_sock, etc)
- * this _must_ be the last member, so that inet6_sk_generic
- * is able to calculate its offset from the base struct sock
- * by using the struct proto->slab_obj_size member. -acme
- */
+/* struct ipv6_pinfo - ipv6 private area */
struct ipv6_pinfo {
struct in6_addr saddr;
struct in6_pktinfo sticky_pktinfo;
@@ -306,19 +299,19 @@ struct raw6_sock {
__u32 offset; /* checksum offset */
struct icmp6_filter filter;
__u32 ip6mr_table;
- /* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */
+
struct ipv6_pinfo inet6;
};
struct udp6_sock {
struct udp_sock udp;
- /* ipv6_pinfo has to be the last member of udp6_sock, see inet6_sk_generic */
+
struct ipv6_pinfo inet6;
};
struct tcp6_sock {
struct tcp_sock tcp;
- /* ipv6_pinfo has to be the last member of tcp6_sock, see inet6_sk_generic */
+
struct ipv6_pinfo inet6;
};
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1340,6 +1340,7 @@ struct proto {
struct kmem_cache *slab;
unsigned int obj_size;
+ unsigned int ipv6_pinfo_offset;
slab_flags_t slab_flags;
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1056,6 +1056,7 @@ static struct proto dccp_v6_prot = {
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct dccp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.rsk_prot = &dccp6_request_sock_ops,
.twsk_prot = &dccp6_timewait_sock_ops,
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h
@@ -13,10 +13,6 @@
struct dccp6_sock {
struct dccp_sock dccp;
- /*
- * ipv6_pinfo has to be the last member of dccp6_sock,
- * see inet6_sk_generic.
- */
struct ipv6_pinfo inet6;
};
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -102,9 +102,9 @@ bool ipv6_mod_enabled(void)
}
EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
-static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
+static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
{
- const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
+ const int offset = sk->sk_prot->ipv6_pinfo_offset;
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -215,6 +215,7 @@ struct proto pingv6_prot = {
.get_port = ping_get_port,
.put_port = ping_unhash,
.obj_size = sizeof(struct raw6_sock),
+ .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
};
EXPORT_SYMBOL_GPL(pingv6_prot);
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1216,6 +1216,7 @@ struct proto rawv6_prot = {
.hash = raw_hash_sk,
.unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw6_sock),
+ .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
.useroffset = offsetof(struct raw6_sock, filter),
.usersize = sizeof_field(struct raw6_sock, filter),
.h.raw_hash = &raw_v6_hashinfo,
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2176,6 +2176,7 @@ struct proto tcpv6_prot = {
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1802,6 +1802,7 @@ struct proto udpv6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = NULL,
.diag_destroy = udp_abort,
};
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -67,6 +67,7 @@ struct proto udplitev6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = &udplite_table,
};
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -36,9 +36,6 @@ struct l2tp_ip6_sock {
u32 conn_id;
u32 peer_conn_id;
- /* ipv6_pinfo has to be the last member of l2tp_ip6_sock, see
- * inet6_sk_generic
- */
struct ipv6_pinfo inet6;
};
@@ -730,6 +727,7 @@ static struct proto l2tp_ip6_prot = {
.hash = l2tp_ip6_hash,
.unhash = l2tp_ip6_unhash,
.obj_size = sizeof(struct l2tp_ip6_sock),
+ .ipv6_pinfo_offset = offsetof(struct l2tp_ip6_sock, inet6),
};
static const struct proto_ops l2tp_ip6_ops = {
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3987,6 +3987,7 @@ int __init mptcp_proto_v6_init(void)
strcpy(mptcp_v6_prot.name, "MPTCPv6");
mptcp_v6_prot.slab = NULL;
mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
+ mptcp_v6_prot.ipv6_pinfo_offset = offsetof(struct mptcp6_sock, np);
err = proto_register(&mptcp_v6_prot, 1);
if (err)
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -9732,6 +9732,7 @@ struct proto sctpv6_prot = {
.unhash = sctp_unhash,
.no_autobind = true,
.obj_size = sizeof(struct sctp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct sctp6_sock, inet6),
.useroffset = offsetof(struct sctp6_sock, sctp.subscribe),
.usersize = offsetof(struct sctp6_sock, sctp.initmsg) -
offsetof(struct sctp6_sock, sctp.subscribe) +
next prev parent reply other threads:[~2023-08-31 11:11 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-31 11:10 [PATCH 6.5 0/8] 6.5.1-rc1 review Greg Kroah-Hartman
2023-08-31 11:10 ` [PATCH 6.5 1/8] ACPI: thermal: Drop nocrt parameter Greg Kroah-Hartman
2023-08-31 11:10 ` [PATCH 6.5 2/8] module: Expose module_init_layout_section() Greg Kroah-Hartman
2023-08-31 11:10 ` [PATCH 6.5 3/8] arm64: module: Use module_init_layout_section() to spot init sections Greg Kroah-Hartman
2023-08-31 11:10 ` [PATCH 6.5 4/8] ARM: " Greg Kroah-Hartman
2023-08-31 11:10 ` [PATCH 6.5 5/8] module/decompress: use vmalloc() for zstd decompression workspace Greg Kroah-Hartman
2023-08-31 11:10 ` Greg Kroah-Hartman [this message]
2023-08-31 11:10 ` [PATCH 6.5 7/8] lockdep: fix static memory detection even more Greg Kroah-Hartman
2023-08-31 11:10 ` [PATCH 6.5 8/8] kallsyms: Fix kallsyms_selftest failure Greg Kroah-Hartman
2023-09-01 6:10 ` [PATCH 6.5 0/8] 6.5.1-rc1 review Ron Economos
2023-09-01 9:21 ` Bagas Sanjaya
2023-09-01 13:42 ` Justin Forbes
2023-09-01 13:48 ` Naresh Kamboju
2023-09-01 15:20 ` Shuah Khan
2023-09-01 18:45 ` Jon Hunter
2023-09-02 4:20 ` Guenter Roeck
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230831110831.051111512@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=lixiaoyan@google.com \
--cc=patches@lists.linux.dev \
--cc=simon.horman@corigine.com \
--cc=stable@vger.kernel.org \
--cc=weiwan@google.com \
--cc=wwchao@google.com \
--cc=zhuyifei@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).