From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
patches@lists.linux.dev, Eric Dumazet <edumazet@google.com>,
Chao Wu <wwchao@google.com>, Wei Wang <weiwan@google.com>,
Coco Li <lixiaoyan@google.com>, YiFei Zhu <zhuyifei@google.com>,
Simon Horman <simon.horman@corigine.com>,
"David S. Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>
Subject: [PATCH 6.5 6/8] ipv6: remove hard coded limitation on ipv6_pinfo
Date: Thu, 31 Aug 2023 13:10:33 +0200 [thread overview]
Message-ID: <20230831110831.051111512@linuxfoundation.org> (raw)
In-Reply-To: <20230831110830.817738361@linuxfoundation.org>
6.5-stable review patch. If anyone has any objections, please let me know.
------------------
From: Eric Dumazet <edumazet@google.com>
commit f5f80e32de12fad2813d37270e8364a03e6d3ef0 upstream.
IPv6 inet sockets are supposed to have a "struct ipv6_pinfo"
field at the end of their definition, so that inet6_sk_generic()
can derive from socket size the offset of the "struct ipv6_pinfo".
This is very fragile, and prevents adding bigger alignment
in sockets, because inet6_sk_generic() does not work
if the compiler adds padding after the ipv6_pinfo component.
We are currently working on a patch series to reorganize
TCP structures for better data locality and found issues
similar to the one fixed in commit f5d547676ca0
("tcp: fix tcp_inet6_sk() for 32bit kernels")
Alternative would be to force an alignment on "struct ipv6_pinfo",
greater or equal to __alignof__(any ipv6 sock) to ensure there is
no padding. This does not look great.
v2: fix typo in mptcp_proto_v6_init() (Paolo)
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Chao Wu <wwchao@google.com>
Cc: Wei Wang <weiwan@google.com>
Cc: Coco Li <lixiaoyan@google.com>
Cc: YiFei Zhu <zhuyifei@google.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/linux/ipv6.h | 15 ++++-----------
include/net/sock.h | 1 +
net/dccp/ipv6.c | 1 +
net/dccp/ipv6.h | 4 ----
net/ipv6/af_inet6.c | 4 ++--
net/ipv6/ping.c | 1 +
net/ipv6/raw.c | 1 +
net/ipv6/tcp_ipv6.c | 1 +
net/ipv6/udp.c | 1 +
net/ipv6/udplite.c | 1 +
net/l2tp/l2tp_ip6.c | 4 +---
net/mptcp/protocol.c | 1 +
net/sctp/socket.c | 1 +
13 files changed, 16 insertions(+), 20 deletions(-)
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -199,14 +199,7 @@ struct inet6_cork {
u8 tclass;
};
-/**
- * struct ipv6_pinfo - ipv6 private area
- *
- * In the struct sock hierarchy (tcp6_sock, upd6_sock, etc)
- * this _must_ be the last member, so that inet6_sk_generic
- * is able to calculate its offset from the base struct sock
- * by using the struct proto->slab_obj_size member. -acme
- */
+/* struct ipv6_pinfo - ipv6 private area */
struct ipv6_pinfo {
struct in6_addr saddr;
struct in6_pktinfo sticky_pktinfo;
@@ -306,19 +299,19 @@ struct raw6_sock {
__u32 offset; /* checksum offset */
struct icmp6_filter filter;
__u32 ip6mr_table;
- /* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */
+
struct ipv6_pinfo inet6;
};
struct udp6_sock {
struct udp_sock udp;
- /* ipv6_pinfo has to be the last member of udp6_sock, see inet6_sk_generic */
+
struct ipv6_pinfo inet6;
};
struct tcp6_sock {
struct tcp_sock tcp;
- /* ipv6_pinfo has to be the last member of tcp6_sock, see inet6_sk_generic */
+
struct ipv6_pinfo inet6;
};
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1340,6 +1340,7 @@ struct proto {
struct kmem_cache *slab;
unsigned int obj_size;
+ unsigned int ipv6_pinfo_offset;
slab_flags_t slab_flags;
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1056,6 +1056,7 @@ static struct proto dccp_v6_prot = {
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct dccp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.rsk_prot = &dccp6_request_sock_ops,
.twsk_prot = &dccp6_timewait_sock_ops,
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h
@@ -13,10 +13,6 @@
struct dccp6_sock {
struct dccp_sock dccp;
- /*
- * ipv6_pinfo has to be the last member of dccp6_sock,
- * see inet6_sk_generic.
- */
struct ipv6_pinfo inet6;
};
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -102,9 +102,9 @@ bool ipv6_mod_enabled(void)
}
EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
-static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
+static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
{
- const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
+ const int offset = sk->sk_prot->ipv6_pinfo_offset;
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -215,6 +215,7 @@ struct proto pingv6_prot = {
.get_port = ping_get_port,
.put_port = ping_unhash,
.obj_size = sizeof(struct raw6_sock),
+ .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
};
EXPORT_SYMBOL_GPL(pingv6_prot);
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1216,6 +1216,7 @@ struct proto rawv6_prot = {
.hash = raw_hash_sk,
.unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw6_sock),
+ .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
.useroffset = offsetof(struct raw6_sock, filter),
.usersize = sizeof_field(struct raw6_sock, filter),
.h.raw_hash = &raw_v6_hashinfo,
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2176,6 +2176,7 @@ struct proto tcpv6_prot = {
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1802,6 +1802,7 @@ struct proto udpv6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = NULL,
.diag_destroy = udp_abort,
};
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -67,6 +67,7 @@ struct proto udplitev6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = &udplite_table,
};
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -36,9 +36,6 @@ struct l2tp_ip6_sock {
u32 conn_id;
u32 peer_conn_id;
- /* ipv6_pinfo has to be the last member of l2tp_ip6_sock, see
- * inet6_sk_generic
- */
struct ipv6_pinfo inet6;
};
@@ -730,6 +727,7 @@ static struct proto l2tp_ip6_prot = {
.hash = l2tp_ip6_hash,
.unhash = l2tp_ip6_unhash,
.obj_size = sizeof(struct l2tp_ip6_sock),
+ .ipv6_pinfo_offset = offsetof(struct l2tp_ip6_sock, inet6),
};
static const struct proto_ops l2tp_ip6_ops = {
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3987,6 +3987,7 @@ int __init mptcp_proto_v6_init(void)
strcpy(mptcp_v6_prot.name, "MPTCPv6");
mptcp_v6_prot.slab = NULL;
mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
+ mptcp_v6_prot.ipv6_pinfo_offset = offsetof(struct mptcp6_sock, np);
err = proto_register(&mptcp_v6_prot, 1);
if (err)
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -9732,6 +9732,7 @@ struct proto sctpv6_prot = {
.unhash = sctp_unhash,
.no_autobind = true,
.obj_size = sizeof(struct sctp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct sctp6_sock, inet6),
.useroffset = offsetof(struct sctp6_sock, sctp.subscribe),
.usersize = offsetof(struct sctp6_sock, sctp.initmsg) -
offsetof(struct sctp6_sock, sctp.subscribe) +
next prev parent reply other threads:[~2023-08-31 11:11 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-31 11:10 [PATCH 6.5 0/8] 6.5.1-rc1 review Greg Kroah-Hartman
2023-08-31 11:10 ` [PATCH 6.5 1/8] ACPI: thermal: Drop nocrt parameter Greg Kroah-Hartman
2023-08-31 11:10 ` [PATCH 6.5 2/8] module: Expose module_init_layout_section() Greg Kroah-Hartman
2023-08-31 11:10 ` [PATCH 6.5 3/8] arm64: module: Use module_init_layout_section() to spot init sections Greg Kroah-Hartman
2023-08-31 11:10 ` [PATCH 6.5 4/8] ARM: " Greg Kroah-Hartman
2023-08-31 11:10 ` [PATCH 6.5 5/8] module/decompress: use vmalloc() for zstd decompression workspace Greg Kroah-Hartman
2023-08-31 11:10 ` Greg Kroah-Hartman [this message]
2023-08-31 11:10 ` [PATCH 6.5 7/8] lockdep: fix static memory detection even more Greg Kroah-Hartman
2023-08-31 11:10 ` [PATCH 6.5 8/8] kallsyms: Fix kallsyms_selftest failure Greg Kroah-Hartman
2023-09-01 6:10 ` [PATCH 6.5 0/8] 6.5.1-rc1 review Ron Economos
2023-09-01 9:21 ` Bagas Sanjaya
2023-09-01 13:42 ` Justin Forbes
2023-09-01 13:48 ` Naresh Kamboju
2023-09-01 15:20 ` Shuah Khan
2023-09-01 18:45 ` Jon Hunter
2023-09-02 4:20 ` Guenter Roeck
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230831110831.051111512@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=lixiaoyan@google.com \
--cc=patches@lists.linux.dev \
--cc=simon.horman@corigine.com \
--cc=stable@vger.kernel.org \
--cc=weiwan@google.com \
--cc=wwchao@google.com \
--cc=zhuyifei@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.