* [RFC 01/14] tcp: Write options after the header has been fully done
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
@ 2017-12-18 21:50 ` Christoph Paasch
2017-12-18 21:50 ` [RFC 02/14] tcp: Pass sock and skb to tcp_options_write Christoph Paasch
` (12 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:50 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
The generic TCP-option framework will need to have access to the full
TCP-header (e.g., if we want to compute a checksum for TCP-MD5).
Thus, we move the call to tcp_options_write() to after all the fields in
the header have been filled out.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
net/ipv4/tcp_output.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 04be9f833927..0f66d101d0ca 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1126,7 +1126,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
}
}
- tcp_options_write((__be32 *)(th + 1), tp, &opts);
skb_shinfo(skb)->gso_type = sk->sk_gso_type;
if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
th->window = htons(tcp_select_window(sk));
@@ -1137,6 +1136,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
*/
th->window = htons(min(tp->rcv_wnd, 65535U));
}
+ tcp_options_write((__be32 *)(th + 1), tp, &opts);
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
if (md5) {
@@ -3242,8 +3242,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
- tcp_options_write((__be32 *)(th + 1), NULL, &opts);
th->doff = (tcp_header_size >> 2);
+ tcp_options_write((__be32 *)(th + 1), NULL, &opts);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
#ifdef CONFIG_TCP_MD5SIG
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [RFC 02/14] tcp: Pass sock and skb to tcp_options_write
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
2017-12-18 21:50 ` [RFC 01/14] tcp: Write options after the header has been fully done Christoph Paasch
@ 2017-12-18 21:50 ` Christoph Paasch
2017-12-18 21:50 ` [RFC 03/14] tcp: Allow tcp_fast_parse_options to drop segments Christoph Paasch
` (11 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:50 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
An upcoming patch adds a configurable, per-socket list of TCP options to
populate in the TCP header. This requires tcp_options_write() to know the
socket (to use the options list) and the skb (to provide visibility to the
packet data for options like TCP_MD5SIG).
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
net/ipv4/tcp_output.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0f66d101d0ca..efe599a41e36 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -444,10 +444,14 @@ struct tcp_out_options {
* At least SACK_PERM as the first option is known to lead to a disaster
* (but it may well be that other scenarios fail similarly).
*/
-static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
+static void tcp_options_write(__be32 *ptr, struct sk_buff *skb, struct sock *sk,
struct tcp_out_options *opts)
{
u16 options = opts->options; /* mungable copy */
+ struct tcp_sock *tp = NULL;
+
+ if (sk_fullsock(sk))
+ tp = tcp_sk(sk);
if (unlikely(OPTION_MD5 & options)) {
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
@@ -1136,7 +1140,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
*/
th->window = htons(min(tp->rcv_wnd, 65535U));
}
- tcp_options_write((__be32 *)(th + 1), tp, &opts);
+ tcp_options_write((__be32 *)(th + 1), skb, sk, &opts);
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
if (md5) {
@@ -3243,7 +3247,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
th->doff = (tcp_header_size >> 2);
- tcp_options_write((__be32 *)(th + 1), NULL, &opts);
+ tcp_options_write((__be32 *)(th + 1), skb, req_to_sk(req), &opts);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
#ifdef CONFIG_TCP_MD5SIG
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [RFC 03/14] tcp: Allow tcp_fast_parse_options to drop segments
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
2017-12-18 21:50 ` [RFC 01/14] tcp: Write options after the header has been fully done Christoph Paasch
2017-12-18 21:50 ` [RFC 02/14] tcp: Pass sock and skb to tcp_options_write Christoph Paasch
@ 2017-12-18 21:50 ` Christoph Paasch
2017-12-18 21:50 ` [RFC 04/14] tcp_smc: Make smc_parse_options return 1 on success Christoph Paasch
` (10 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:50 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
After parsing the TCP-options, some option-kinds might trigger a drop of
the segment (e.g., as is the case for TCP_MD5). As we are moving to
consolidate the TCP_MD5-code in follow-up patches, we need to add the
capability to drop a segment right after parsing the options in
tcp_fast_parse_options().
Originally, tcp_fast_parse_options() returned false, when there is no
timestamp option, except in the case of the slow-path processing through
tcp_parse_options() where it always returns true.
So, the return-value of tcp_fast_parse_options() was kind of
inconsistent. With this patch, we make it return true when the segment
should get dropped based on the parsed options, and false otherwise.
In tcp_validate_incoming, we will then just check for
tp->rx_opt.saw_tstamp to see if we should verify PAWS.
The goto will be used in a follow-up patch to check whether one of the
options triggers a drop of the segment.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
net/ipv4/tcp_input.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4d55c4b338ee..eb97ee24c601 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3827,6 +3827,8 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
/* Fast parse options. This hopes to only see timestamps.
* If it is wrong it falls back on tcp_parse_options().
+ *
+ * Returns true if we should drop this packet based on present TCP-options.
*/
static bool tcp_fast_parse_options(const struct net *net,
const struct sk_buff *skb,
@@ -3837,18 +3839,19 @@ static bool tcp_fast_parse_options(const struct net *net,
*/
if (th->doff == (sizeof(*th) / 4)) {
tp->rx_opt.saw_tstamp = 0;
- return false;
+ goto extra_opt_check;
} else if (tp->rx_opt.tstamp_ok &&
th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
if (tcp_parse_aligned_timestamp(tp, th))
- return true;
+ goto extra_opt_check;
}
tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
- return true;
+extra_opt_check:
+ return false;
}
#ifdef CONFIG_TCP_MD5SIG
@@ -5168,9 +5171,11 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
bool rst_seq_match = false;
+ if (tcp_fast_parse_options(sock_net(sk), skb, th, tp))
+ goto discard;
+
/* RFC1323: H1. Apply PAWS check first. */
- if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
- tp->rx_opt.saw_tstamp &&
+ if (tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
if (!th->rst) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [RFC 04/14] tcp_smc: Make smc_parse_options return 1 on success
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
` (2 preceding siblings ...)
2017-12-18 21:50 ` [RFC 03/14] tcp: Allow tcp_fast_parse_options to drop segments Christoph Paasch
@ 2017-12-18 21:50 ` Christoph Paasch
2017-12-18 21:51 ` [RFC 05/14] tcp: Register handlers for extra TCP options Christoph Paasch
` (9 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:50 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov, Ursula Braun
As we allow a generic TCP-option parser that also parses experimental
TCP options, we need to add a return-value to smc_parse_options() that
indicates whether the option actually matched or not.
Cc: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
net/ipv4/tcp_input.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb97ee24c601..5c35fd568b13 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3671,19 +3671,22 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
foc->exp = exp_opt;
}
-static void smc_parse_options(const struct tcphdr *th,
- struct tcp_options_received *opt_rx,
- const unsigned char *ptr,
- int opsize)
+static int smc_parse_options(const struct tcphdr *th,
+ struct tcp_options_received *opt_rx,
+ const unsigned char *ptr,
+ int opsize)
{
#if IS_ENABLED(CONFIG_SMC)
if (static_branch_unlikely(&tcp_have_smc)) {
if (th->syn && !(opsize & 1) &&
opsize >= TCPOLEN_EXP_SMC_BASE &&
- get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
+ get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
opt_rx->smc_ok = 1;
+ return 1;
+ }
}
#endif
+ return 0;
}
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [RFC 05/14] tcp: Register handlers for extra TCP options
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
` (3 preceding siblings ...)
2017-12-18 21:50 ` [RFC 04/14] tcp_smc: Make smc_parse_options return 1 on success Christoph Paasch
@ 2017-12-18 21:51 ` Christoph Paasch
2017-12-18 21:51 ` [RFC 06/14] tcp_smc: Make SMC use TCP extra-option framework Christoph Paasch
` (8 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Allow additional TCP options to be handled by registered hook
functions.
Registered options have a priority that determines the order in which
options are prepared and written. Lower priority numbers are handled
first.
Option parsing will call the provided 'parse' function when a TCP option
number is not recognized by the normal option parsing code.
After parsing, there are two places where we post-process the options.
First, a 'check' callback that allows to drop the packet based on the
parsed options (e.g., useful for TCP MD5SIG). Then, a 'post_process'
function that gets called after other validity checks (aka, in-window,
PAWS,...). This post_process function can then update other state for
this particular extra-option.
In the output-path, the 'prepare' function determines the required space
for registered options and store associated data. 'write' adds the option
to the TCP header.
These additional TCP-options are stored in hlists of the TCP-socket. To
pass the state and options around during the 3-way handshake and in
time-wait state, the hlists are also on the tcp_request_sock and
tcp_timewait_sock.
The list is copied from the listener to the request-socket (calling into
the 'copy' callback). Then, moved from the request-socket to the
TCP-socket and finally to the time-wait socket.
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
---
drivers/infiniband/hw/cxgb4/cm.c | 2 +-
include/linux/tcp.h | 28 ++++
include/net/tcp.h | 110 ++++++++++++-
net/ipv4/syncookies.c | 6 +-
net/ipv4/tcp.c | 327 ++++++++++++++++++++++++++++++++++++++-
net/ipv4/tcp_input.c | 49 +++++-
net/ipv4/tcp_ipv4.c | 98 +++++++++---
net/ipv4/tcp_minisocks.c | 32 +++-
net/ipv4/tcp_output.c | 40 ++---
net/ipv6/syncookies.c | 6 +-
net/ipv6/tcp_ipv6.c | 32 ++++
11 files changed, 676 insertions(+), 54 deletions(-)
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 21db3b48a617..a1ea5583f07b 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3746,7 +3746,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
*/
memset(&tmp_opt, 0, sizeof(tmp_opt));
tcp_clear_options(&tmp_opt);
- tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
+ tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL, NULL);
req = __skb_push(skb, sizeof(*req));
memset(req, 0, sizeof(*req));
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4f93f0953c41..4756bd2c4b54 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -115,6 +115,24 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
#endif
}
+#define OPTION_SACK_ADVERTISE (1 << 0)
+#define OPTION_TS (1 << 1)
+#define OPTION_MD5 (1 << 2)
+#define OPTION_WSCALE (1 << 3)
+#define OPTION_FAST_OPEN_COOKIE (1 << 8)
+#define OPTION_SMC (1 << 9)
+
+struct tcp_out_options {
+ u16 options; /* bit field of OPTION_* */
+ u16 mss; /* 0 to disable */
+ u8 ws; /* window scale, 0 to disable */
+ u8 num_sack_blocks; /* number of SACK blocks to include */
+ u8 hash_size; /* bytes in hash_location */
+ __u8 *hash_location; /* temporary pointer, overloaded */
+ __u32 tsval, tsecr; /* need to include OPTION_TS */
+ struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
+};
+
/* This is the max number of SACKS that we'll generate and process. It's safe
* to increase this, although since:
* size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
@@ -137,6 +155,7 @@ struct tcp_request_sock {
* FastOpen it's the seq#
* after data-in-SYN.
*/
+ struct hlist_head tcp_option_list;
};
static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -373,6 +392,8 @@ struct tcp_sock {
*/
struct request_sock *fastopen_rsk;
u32 *saved_syn;
+
+ struct hlist_head tcp_option_list;
};
enum tsq_enum {
@@ -400,6 +421,11 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk)
return (struct tcp_sock *)sk;
}
+static inline struct sock *tcp_to_sk(const struct tcp_sock *tp)
+{
+ return (struct sock *)tp;
+}
+
struct tcp_timewait_sock {
struct inet_timewait_sock tw_sk;
#define tw_rcv_nxt tw_sk.__tw_common.skc_tw_rcv_nxt
@@ -412,6 +438,8 @@ struct tcp_timewait_sock {
u32 tw_last_oow_ack_time;
long tw_ts_recent_stamp;
+
+ struct hlist_head tcp_option_list;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *tw_md5_key;
#endif
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6939e69d3c37..ac62ceff9815 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -202,6 +202,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOLEN_FASTOPEN_BASE 2
#define TCPOLEN_EXP_FASTOPEN_BASE 4
#define TCPOLEN_EXP_SMC_BASE 6
+#define TCPOLEN_EXP_BASE 6
/* But this is what stacks really send out. */
#define TCPOLEN_TSTAMP_ALIGNED 12
@@ -403,7 +404,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
- int estab, struct tcp_fastopen_cookie *foc);
+ int estab, struct tcp_fastopen_cookie *foc,
+ struct sock *sk);
const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
/*
@@ -2063,4 +2065,110 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
#if IS_ENABLED(CONFIG_SMC)
extern struct static_key_false tcp_have_smc;
#endif
+
+struct tcp_extopt_store;
+
+struct tcp_extopt_ops {
+ u32 option_kind;
+ unsigned char priority;
+ void (*parse)(int opsize, const unsigned char *opptr,
+ const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx,
+ struct sock *sk,
+ struct tcp_extopt_store *store);
+ bool (*check)(struct sock *sk,
+ const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx,
+ struct tcp_extopt_store *store);
+ void (*post_process)(struct sock *sk,
+ struct tcp_options_received *opt_rx,
+ struct tcp_extopt_store *store);
+ /* Return the number of bytes consumed */
+ unsigned int (*prepare)(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+ __be32 *(*write)(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts, struct sock *sk,
+ struct tcp_extopt_store *store);
+ int (*response_prepare)(struct sk_buff *orig, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+ __be32 *(*response_write)(__be32 *ptr, struct sk_buff *orig,
+ struct tcphdr *th,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+ int (*add_header_len)(const struct sock *orig,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+ struct tcp_extopt_store *(*copy)(struct sock *listener,
+ struct request_sock *req,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *from);
+ struct tcp_extopt_store *(*move)(struct sock *from, struct sock *to,
+ struct tcp_extopt_store *store);
+ void (*destroy)(struct tcp_extopt_store *store);
+ struct module *owner;
+};
+
+/* The tcp_extopt_store is the generic structure that will be added to the
+ * list of TCP extra-options.
+ *
+ * Protocols using the framework can create a wrapper structure around it that
+ * stores protocol-specific state. The tcp_extopt-functions will provide
+ * tcp_extopt_store though, so the protocol can use container_of to get
+ * access to the wrapper structure containing the state.
+ */
+struct tcp_extopt_store {
+ struct hlist_node list;
+ const struct tcp_extopt_ops *ops;
+};
+
+struct hlist_head *tcp_extopt_get_list(const struct sock *sk);
+
+struct tcp_extopt_store *tcp_extopt_find_kind(u32 kind, const struct sock *sk);
+
+void tcp_extopt_parse(u32 opcode, int opsize, const unsigned char *opptr,
+ const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx, struct sock *sk);
+
+bool tcp_extopt_check(struct sock *sk, const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx);
+
+void tcp_extopt_post_process(struct sock *sk,
+ struct tcp_options_received *opt_rx);
+
+unsigned int tcp_extopt_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk);
+
+void tcp_extopt_write(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts, struct sock *sk);
+
+int tcp_extopt_response_prepare(struct sk_buff *orig, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk);
+
+void tcp_extopt_response_write(__be32 *ptr, struct sk_buff *orig,
+ struct tcphdr *th, struct tcp_out_options *opts,
+ const struct sock *sk);
+
+int tcp_extopt_add_header(const struct sock *orig, const struct sock *sk);
+
+/* Socket lock must be held when calling this function */
+int tcp_register_extopt(struct tcp_extopt_store *store, struct sock *sk);
+
+void tcp_extopt_copy(struct sock *listener, struct request_sock *req,
+ struct tcp_options_received *opt);
+
+void tcp_extopt_move(struct sock *from, struct sock *to);
+
+void tcp_extopt_destroy(struct sock *sk);
+
#endif /* _TCP_H */
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index fda37f2862c9..8373abf19440 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -313,7 +313,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+ tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, sk);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
tsoff = secure_tcp_ts_off(sock_net(sk),
@@ -325,6 +325,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
goto out;
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)) &&
+ tcp_extopt_check(sk, skb, &tcp_opt))
+ goto out;
+
ret = NULL;
req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */
if (!req)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c470fec9062f..17f38afb4212 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -416,6 +416,7 @@ void tcp_init_sock(struct sock *sk)
tcp_init_xmit_timers(sk);
INIT_LIST_HEAD(&tp->tsq_node);
INIT_LIST_HEAD(&tp->tsorted_sent_queue);
+ INIT_HLIST_HEAD(&tp->tcp_option_list);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
@@ -3473,6 +3474,331 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
#endif
+struct hlist_head *tcp_extopt_get_list(const struct sock *sk)
+{
+ if (sk_fullsock(sk))
+ return &tcp_sk(sk)->tcp_option_list;
+ else if (sk->sk_state == TCP_NEW_SYN_RECV)
+ return &tcp_rsk(inet_reqsk(sk))->tcp_option_list;
+ else if (sk->sk_state == TCP_TIME_WAIT)
+ return &tcp_twsk(sk)->tcp_option_list;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_get_list);
+
+/* Caller must ensure that rcu is locked */
+struct tcp_extopt_store *tcp_extopt_find_kind(u32 kind, const struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (entry->ops->option_kind == kind)
+ return entry;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_find_kind);
+
+void tcp_extopt_parse(u32 opcode, int opsize, const unsigned char *opptr,
+ const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx, struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+
+ rcu_read_lock();
+ entry = tcp_extopt_find_kind(opcode, sk);
+
+ if (entry && entry->ops->parse)
+ entry->ops->parse(opsize, opptr, skb, opt_rx, sk, entry);
+ rcu_read_unlock();
+}
+
+bool tcp_extopt_check(struct sock *sk, const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+ bool drop = false;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ bool ret = false;
+
+ if (entry->ops->check)
+ ret = entry->ops->check(sk, skb, opt_rx, entry);
+
+ if (ret)
+ drop = true;
+ }
+ rcu_read_unlock();
+
+ return drop;
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_check);
+
+void tcp_extopt_post_process(struct sock *sk,
+ struct tcp_options_received *opt_rx)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (entry->ops->post_process)
+ entry->ops->post_process(sk, opt_rx, entry);
+ }
+ rcu_read_unlock();
+}
+
+unsigned int tcp_extopt_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+ unsigned int used = 0;
+
+ if (!sk)
+ return 0;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (unlikely(!entry->ops->prepare))
+ continue;
+
+ used += entry->ops->prepare(skb, flags, remaining - used, opts,
+ sk, entry);
+ }
+ rcu_read_unlock();
+
+ return roundup(used, 4);
+}
+
+void tcp_extopt_write(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts, struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+
+ if (!sk)
+ return;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (unlikely(!entry->ops->write))
+ continue;
+
+ ptr = entry->ops->write(ptr, skb, opts, sk, entry);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_write);
+
+int tcp_extopt_response_prepare(struct sk_buff *orig, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+ unsigned int used = 0;
+
+ if (!sk)
+ return 0;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ int ret;
+
+ if (unlikely(!entry->ops->response_prepare))
+ continue;
+
+ ret = entry->ops->response_prepare(orig, flags,
+ remaining - used, opts,
+ sk, entry);
+
+ used += ret;
+ }
+ rcu_read_unlock();
+
+ return roundup(used, 4);
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_response_prepare);
+
+void tcp_extopt_response_write(__be32 *ptr, struct sk_buff *orig,
+ struct tcphdr *th, struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+
+ if (!sk)
+ return;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (unlikely(!entry->ops->response_write))
+ continue;
+
+ ptr = entry->ops->response_write(ptr, orig, th, opts, sk, entry);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_response_write);
+
+int tcp_extopt_add_header(const struct sock *orig, const struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+ int tcp_header_len = 0;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (unlikely(!entry->ops->add_header_len))
+ continue;
+
+ tcp_header_len += entry->ops->add_header_len(orig, sk, entry);
+ }
+ rcu_read_unlock();
+
+ return tcp_header_len;
+}
+
+/* Socket lock must be held when calling this function */
+int tcp_register_extopt(struct tcp_extopt_store *store, struct sock *sk)
+{
+ struct hlist_node *add_before = NULL;
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+ int ret = 0;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ if (!store->ops->option_kind)
+ return -EINVAL;
+
+ if (!try_module_get(store->ops->owner))
+ return -ENOENT;
+
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (entry->ops->option_kind == store->ops->option_kind) {
+ pr_notice("Option kind %u already registered\n",
+ store->ops->option_kind);
+ module_put(store->ops->owner);
+ return -EEXIST;
+ }
+
+ if (entry->ops->priority <= store->ops->priority)
+ add_before = &entry->list;
+ }
+
+ if (add_before)
+ hlist_add_behind_rcu(&store->list, add_before);
+ else
+ hlist_add_head_rcu(&store->list, lhead);
+
+ pr_debug("Option kind %u registered\n", store->ops->option_kind);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tcp_register_extopt);
+
+void tcp_extopt_copy(struct sock *listener, struct request_sock *req,
+ struct tcp_options_received *opt)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *from, *to;
+
+ from = tcp_extopt_get_list(listener);
+ to = tcp_extopt_get_list(req_to_sk(req));
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, from, list) {
+ struct tcp_extopt_store *new;
+
+ if (!try_module_get(entry->ops->owner)) {
+ pr_err("%s Module get failed while copying\n", __func__);
+ continue;
+ }
+
+ new = entry->ops->copy(listener, req, opt, entry);
+ if (!new) {
+ module_put(entry->ops->owner);
+ continue;
+ }
+
+ hlist_add_tail_rcu(&new->list, to);
+ }
+ rcu_read_unlock();
+}
+
+void tcp_extopt_move(struct sock *from, struct sock *to)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lfrom, *lto;
+ struct hlist_node *tmp;
+
+ lfrom = tcp_extopt_get_list(from);
+ lto = tcp_extopt_get_list(to);
+
+ rcu_read_lock();
+ hlist_for_each_entry_safe(entry, tmp, lfrom, list) {
+ hlist_del_rcu(&entry->list);
+
+ if (entry->ops->move) {
+ entry = entry->ops->move(from, to, entry);
+ if (!entry)
+ continue;
+ }
+
+ hlist_add_tail_rcu(&entry->list, lto);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_move);
+
+void tcp_extopt_destroy(struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+ struct hlist_node *tmp;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_safe(entry, tmp, lhead, list) {
+ struct module *owner = entry->ops->owner;
+
+ hlist_del_rcu(&entry->list);
+
+ entry->ops->destroy(entry);
+
+ module_put(owner);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_destroy);
+
void tcp_done(struct sock *sk)
{
struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
@@ -3622,7 +3948,6 @@ void __init tcp_init(void)
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
}
-
cnt = tcp_hashinfo.ehash_mask + 1;
sysctl_tcp_max_orphans = cnt / 2;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5c35fd568b13..1950ff80fb3f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3696,7 +3696,7 @@ static int smc_parse_options(const struct tcphdr *th,
void tcp_parse_options(const struct net *net,
const struct sk_buff *skb,
struct tcp_options_received *opt_rx, int estab,
- struct tcp_fastopen_cookie *foc)
+ struct tcp_fastopen_cookie *foc, struct sock *sk)
{
const unsigned char *ptr;
const struct tcphdr *th = tcp_hdr(skb);
@@ -3796,9 +3796,18 @@ void tcp_parse_options(const struct net *net,
tcp_parse_fastopen_option(opsize -
TCPOLEN_EXP_FASTOPEN_BASE,
ptr + 2, th->syn, foc, true);
- else
- smc_parse_options(th, opt_rx, ptr,
- opsize);
+ else if (smc_parse_options(th, opt_rx, ptr,
+ opsize))
+ break;
+ else if (opsize >= TCPOLEN_EXP_BASE)
+ tcp_extopt_parse(get_unaligned_be32(ptr),
+ opsize, ptr, skb,
+ opt_rx, sk);
+ break;
+
+ default:
+ tcp_extopt_parse(opcode, opsize, ptr, skb,
+ opt_rx, sk);
break;
}
@@ -3849,11 +3858,13 @@ static bool tcp_fast_parse_options(const struct net *net,
goto extra_opt_check;
}
- tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
+ tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL, tcp_to_sk(tp));
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
extra_opt_check:
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ return tcp_extopt_check(tcp_to_sk(tp), skb, &tp->rx_opt);
return false;
}
@@ -5327,6 +5338,9 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tp->rx_opt.saw_tstamp = 0;
+ if (!hlist_empty(&tp->tcp_option_list))
+ goto slow_path;
+
/* pred_flags is 0xS?10 << 16 + snd_wnd
* if header_prediction is to be made
* 'S' will always be tp->tcp_header_len >> 2
@@ -5514,7 +5528,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
/* Get original SYNACK MSS value if user MSS sets mss_clamp */
tcp_clear_options(&opt);
opt.user_mss = opt.mss_clamp = 0;
- tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
+ tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL, sk);
mss = opt.mss_clamp;
}
@@ -5577,10 +5591,14 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
int saved_clamp = tp->rx_opt.mss_clamp;
bool fastopen_fail;
- tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
+ tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc, sk);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)) &&
+ tcp_extopt_check(sk, skb, &tp->rx_opt))
+ goto discard;
+
if (th->ack) {
/* rfc793:
* "If the state is SYN-SENT then
@@ -5663,6 +5681,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tp->tcp_header_len = sizeof(struct tcphdr);
}
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ tcp_extopt_post_process(sk, &tp->rx_opt);
+
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
@@ -5756,6 +5777,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_ecn_rcv_syn(tp, th);
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ tcp_extopt_post_process(sk, &tp->rx_opt);
+
tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
@@ -6239,12 +6263,17 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->af_specific = af_ops;
tcp_rsk(req)->ts_off = 0;
+ INIT_HLIST_HEAD(&tcp_rsk(req)->tcp_option_list);
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = af_ops->mss_clamp;
tmp_opt.user_mss = tp->rx_opt.user_mss;
tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
- want_cookie ? NULL : &foc);
+ want_cookie ? NULL : &foc, sk);
+
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)) &&
+ tcp_extopt_check(sk, skb, &tmp_opt))
+ goto drop_and_free;
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
@@ -6305,6 +6334,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_reqsk_record_syn(sk, req, skb);
fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
}
+
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ tcp_extopt_copy(sk, req, &tmp_opt);
+
if (fastopen_sk) {
af_ops->send_synack(fastopen_sk, dst, &fl, req,
&foc, TCP_SYNACK_FASTOPEN);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 94e28350f420..dee296097b8f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -600,10 +600,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
const struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
-#ifdef CONFIG_TCP_MD5SIG
- __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
-#endif
+ __be32 opt[(MAX_TCP_OPTION_SPACE >> 2)];
} rep;
+ struct hlist_head *extopt_list = NULL;
struct ip_reply_arg arg;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key = NULL;
@@ -613,6 +612,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
struct sock *sk1 = NULL;
#endif
struct net *net;
+ int offset = 0;
/* Never send a reset in response to a reset. */
if (th->rst)
@@ -624,6 +624,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
return;
+ if (sk)
+ extopt_list = tcp_extopt_get_list(sk);
+
/* Swap the send and the receive. */
memset(&rep, 0, sizeof(rep));
rep.th.dest = th->source;
@@ -678,19 +681,44 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
goto out;
}
+#endif
+
+ if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
+ unsigned int remaining;
+ struct tcp_out_options opts;
+ int used;
+ remaining = sizeof(rep.opt);
+#ifdef CONFIG_TCP_MD5SIG
+ if (key)
+ remaining -= TCPOLEN_MD5SIG_ALIGNED;
+#endif
+
+ memset(&opts, 0, sizeof(opts));
+
+ used = tcp_extopt_response_prepare(skb, TCPHDR_RST, remaining,
+ &opts, sk);
+
+ arg.iov[0].iov_len += used;
+ rep.th.doff = arg.iov[0].iov_len / 4;
+
+ tcp_extopt_response_write(&rep.opt[0], skb, &rep.th, &opts, sk);
+ offset += used / 4;
+ }
+
+#ifdef CONFIG_TCP_MD5SIG
if (key) {
- rep.opt[0] = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
+ rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
/* Update length and the length the header thinks exists */
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
rep.th.doff = arg.iov[0].iov_len / 4;
- tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
- key, ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, &rep.th);
+ tcp_v4_md5_hash_hdr((__u8 *)&rep.opt[offset],
+ key, ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr, &rep.th);
}
#endif
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
@@ -742,14 +770,14 @@ static void tcp_v4_send_ack(const struct sock *sk,
const struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
- __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
-#ifdef CONFIG_TCP_MD5SIG
- + (TCPOLEN_MD5SIG_ALIGNED >> 2)
-#endif
- ];
+ __be32 opt[(MAX_TCP_OPTION_SPACE >> 2)];
} rep;
+ struct hlist_head *extopt_list = NULL;
struct net *net = sock_net(sk);
struct ip_reply_arg arg;
+ int offset = 0;
+
+ extopt_list = tcp_extopt_get_list(sk);
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
@@ -763,6 +791,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
rep.opt[1] = htonl(tsval);
rep.opt[2] = htonl(tsecr);
arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
+ offset += 3;
}
/* Swap the send and the receive. */
@@ -774,22 +803,45 @@ static void tcp_v4_send_ack(const struct sock *sk,
rep.th.ack = 1;
rep.th.window = htons(win);
+ if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
+ unsigned int remaining;
+ struct tcp_out_options opts;
+ int used;
+
+ remaining = sizeof(rep.th) + sizeof(rep.opt) - arg.iov[0].iov_len;
+
#ifdef CONFIG_TCP_MD5SIG
- if (key) {
- int offset = (tsecr) ? 3 : 0;
+ if (key)
+ remaining -= TCPOLEN_MD5SIG_ALIGNED;
+#endif
+
+ memset(&opts, 0, sizeof(opts));
+ used = tcp_extopt_response_prepare(skb, TCPHDR_ACK, remaining,
+ &opts, sk);
+
+ arg.iov[0].iov_len += used;
+ rep.th.doff = arg.iov[0].iov_len / 4;
+ tcp_extopt_response_write(&rep.opt[offset], skb, &rep.th, &opts, sk);
+
+ offset += used / 4;
+ }
+
+#ifdef CONFIG_TCP_MD5SIG
+ if (key) {
rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) |
TCPOLEN_MD5SIG);
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
- rep.th.doff = arg.iov[0].iov_len/4;
+ rep.th.doff = arg.iov[0].iov_len / 4;
tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
key, ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr, &rep.th);
}
#endif
+
arg.flags = reply_flags;
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
@@ -893,6 +945,9 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
*/
static void tcp_v4_reqsk_destructor(struct request_sock *req)
{
+ if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)))
+ tcp_extopt_destroy(req_to_sk(req));
+
kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
@@ -1410,6 +1465,11 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
if (likely(*own_req)) {
tcp_move_syn(newtp, req);
ireq->ireq_opt = NULL;
+
+ if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list))) {
+ tcp_extopt_move(req_to_sk(req), newsk);
+ INIT_HLIST_HEAD(&tcp_rsk(req)->tcp_option_list);
+ }
} else {
newinet->inet_opt = NULL;
}
@@ -1907,6 +1967,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
/* Cleans up our, hopefully empty, out_of_order_queue. */
skb_rbtree_purge(&tp->out_of_order_queue);
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ tcp_extopt_destroy(sk);
#ifdef CONFIG_TCP_MD5SIG
/* Clean up the MD5 key list, if any */
if (tp->md5sig_info) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a8384b0c11f8..676ad7ca13ad 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -95,9 +95,10 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
bool paws_reject = false;
- tmp_opt.saw_tstamp = 0;
+ tcp_clear_options(&tmp_opt);
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
- tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
+ tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL,
+ (struct sock *)tw);
if (tmp_opt.saw_tstamp) {
if (tmp_opt.rcv_tsecr)
@@ -108,6 +109,10 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
}
}
+ if (unlikely(!hlist_empty(&tcptw->tcp_option_list)) &&
+ tcp_extopt_check((struct sock *)tw, skb, &tmp_opt))
+ return TCP_TW_SUCCESS;
+
if (tw->tw_substate == TCP_FIN_WAIT2) {
/* Just repeat all the checks of tcp_rcv_state_process() */
@@ -251,7 +256,7 @@ EXPORT_SYMBOL(tcp_timewait_state_process);
void tcp_time_wait(struct sock *sk, int state, int timeo)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- const struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
struct inet_timewait_sock *tw;
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
@@ -271,6 +276,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
tcptw->tw_ts_offset = tp->tsoffset;
tcptw->tw_last_oow_ack_time = 0;
+ INIT_HLIST_HEAD(&tcptw->tcp_option_list);
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
@@ -284,6 +290,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
}
#endif
+ if (unlikely(!hlist_empty(&tp->tcp_option_list))) {
+ tcp_extopt_move(sk, (struct sock *)tw);
+ INIT_HLIST_HEAD(&tp->tcp_option_list);
+ }
#ifdef CONFIG_TCP_MD5SIG
/*
* The timewait bucket does not have the key DB from the
@@ -341,6 +351,9 @@ void tcp_twsk_destructor(struct sock *sk)
if (twsk->tw_md5_key)
kfree_rcu(twsk->tw_md5_key, rcu);
#endif
+
+ if (unlikely(!hlist_empty(&twsk->tcp_option_list)))
+ tcp_extopt_destroy(sk);
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
@@ -470,6 +483,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
INIT_LIST_HEAD(&newtp->tsq_node);
INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
+ INIT_HLIST_HEAD(&newtp->tcp_option_list);
tcp_init_wl(newtp, treq->rcv_isn);
@@ -545,6 +559,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
if (newtp->af_specific->md5_lookup(sk, newsk))
newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
+ if (unlikely(!hlist_empty(&treq->tcp_option_list)))
+ newtp->tcp_header_len += tcp_extopt_add_header(req_to_sk(req), newsk);
+
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
@@ -587,9 +604,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
bool paws_reject = false;
bool own_req;
- tmp_opt.saw_tstamp = 0;
+ tcp_clear_options(&tmp_opt);
if (th->doff > (sizeof(struct tcphdr)>>2)) {
- tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
+ tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL,
+ req_to_sk(req));
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = req->ts_recent;
@@ -604,6 +622,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
}
}
+ if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)) &&
+ tcp_extopt_check(req_to_sk(req), skb, &tmp_opt))
+ return NULL;
+
/* Check for pure retransmitted SYN. */
if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
flg == TCP_FLAG_SYN &&
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index efe599a41e36..6804a9325107 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -398,13 +398,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
return tp->snd_una != tp->snd_up;
}
-#define OPTION_SACK_ADVERTISE (1 << 0)
-#define OPTION_TS (1 << 1)
-#define OPTION_MD5 (1 << 2)
-#define OPTION_WSCALE (1 << 3)
-#define OPTION_FAST_OPEN_COOKIE (1 << 8)
-#define OPTION_SMC (1 << 9)
-
static void smc_options_write(__be32 *ptr, u16 *options)
{
#if IS_ENABLED(CONFIG_SMC)
@@ -420,17 +413,6 @@ static void smc_options_write(__be32 *ptr, u16 *options)
#endif
}
-struct tcp_out_options {
- u16 options; /* bit field of OPTION_* */
- u16 mss; /* 0 to disable */
- u8 ws; /* window scale, 0 to disable */
- u8 num_sack_blocks; /* number of SACK blocks to include */
- u8 hash_size; /* bytes in hash_location */
- __u8 *hash_location; /* temporary pointer, overloaded */
- __u32 tsval, tsecr; /* need to include OPTION_TS */
- struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
-};
-
/* Write previously computed TCP options to the packet.
*
* Beware: Something in the Internet is very sensitive to the ordering of
@@ -447,12 +429,15 @@ struct tcp_out_options {
static void tcp_options_write(__be32 *ptr, struct sk_buff *skb, struct sock *sk,
struct tcp_out_options *opts)
{
+ struct hlist_head *extopt_list;
u16 options = opts->options; /* mungable copy */
struct tcp_sock *tp = NULL;
if (sk_fullsock(sk))
tp = tcp_sk(sk);
+ extopt_list = tcp_extopt_get_list(sk);
+
if (unlikely(OPTION_MD5 & options)) {
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
@@ -543,6 +528,9 @@ static void tcp_options_write(__be32 *ptr, struct sk_buff *skb, struct sock *sk,
}
smc_options_write(ptr, &options);
+
+ if (unlikely(!hlist_empty(extopt_list)))
+ tcp_extopt_write(ptr, skb, opts, sk);
}
static void smc_set_option(const struct tcp_sock *tp,
@@ -645,6 +633,10 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
smc_set_option(tp, opts, &remaining);
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ remaining -= tcp_extopt_prepare(skb, TCPHDR_SYN, remaining,
+ opts, tcp_to_sk(tp));
+
return MAX_TCP_OPTION_SPACE - remaining;
}
@@ -708,6 +700,11 @@ static unsigned int tcp_synack_options(const struct sock *sk,
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
+ if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)))
+ remaining -= tcp_extopt_prepare(skb, TCPHDR_SYN | TCPHDR_ACK,
+ remaining, opts,
+ req_to_sk(req));
+
return MAX_TCP_OPTION_SPACE - remaining;
}
@@ -741,6 +738,10 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
size += TCPOLEN_TSTAMP_ALIGNED;
}
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ size += tcp_extopt_prepare(skb, 0, MAX_TCP_OPTION_SPACE - size,
+ opts, tcp_to_sk(tp));
+
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
if (unlikely(eff_sacks)) {
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
@@ -3303,6 +3304,9 @@ static void tcp_connect_init(struct sock *sk)
tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ tp->tcp_header_len += tcp_extopt_add_header(sk, sk);
+
/* If user gave his TCP_MAXSEG, record it to clamp */
if (tp->rx_opt.user_mss)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index e7a3a6b6cf56..d0716c7e9390 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -162,7 +162,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+ tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, sk);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
tsoff = secure_tcpv6_ts_off(sock_net(sk),
@@ -174,6 +174,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
goto out;
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)) &&
+ tcp_extopt_check(sk, skb, &tcp_opt))
+ goto out;
+
ret = NULL;
req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
if (!req)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7178476b3d2f..5af5dcc1ac83 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -500,6 +500,9 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
+ if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)))
+ tcp_extopt_destroy(req_to_sk(req));
+
kfree(inet_rsk(req)->ipv6_opt);
kfree_skb(inet_rsk(req)->pktopts);
}
@@ -789,6 +792,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
unsigned int tot_len = sizeof(struct tcphdr);
struct dst_entry *dst;
__be32 *topt;
+ struct hlist_head *extopt_list = NULL;
+ struct tcp_out_options extraopts;
if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
@@ -797,6 +802,25 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
tot_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
+ if (sk)
+ extopt_list = tcp_extopt_get_list(sk);
+
+ if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
+ unsigned int remaining = MAX_TCP_OPTION_SPACE - tot_len;
+ u8 extraflags = rst ? TCPHDR_RST : 0;
+ int used;
+
+ if (!rst || !th->ack)
+ extraflags |= TCPHDR_ACK;
+
+ memset(&extraopts, 0, sizeof(extraopts));
+
+ used = tcp_extopt_response_prepare(skb, extraflags, remaining,
+ &extraopts, sk);
+
+ tot_len += used;
+ }
+
buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
GFP_ATOMIC);
if (!buff)
@@ -837,6 +861,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
}
#endif
+ if (unlikely(extopt_list && !hlist_empty(extopt_list)))
+ tcp_extopt_response_write(topt, skb, t1, &extraopts, sk);
+
memset(&fl6, 0, sizeof(fl6));
fl6.daddr = ipv6_hdr(skb)->saddr;
fl6.saddr = ipv6_hdr(skb)->daddr;
@@ -1231,6 +1258,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
skb_set_owner_r(newnp->pktoptions, newsk);
}
}
+
+ if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list))) {
+ tcp_extopt_move(req_to_sk(req), newsk);
+ INIT_HLIST_HEAD(&tcp_rsk(req)->tcp_option_list);
+ }
}
return newsk;
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [RFC 06/14] tcp_smc: Make SMC use TCP extra-option framework
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
` (4 preceding siblings ...)
2017-12-18 21:51 ` [RFC 05/14] tcp: Register handlers for extra TCP options Christoph Paasch
@ 2017-12-18 21:51 ` Christoph Paasch
2017-12-18 21:51 ` [RFC 07/14] tcp_md5: Don't pass along md5-key Christoph Paasch
` (7 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov, Ursula Braun
Adopt the extra-option framework for SMC.
It allows us to entirely remove SMC-code out of the TCP-stack.
The static key is gone, as this is now covered by the static key of the
extra-option framework.
We allocate state (struct tcp_smc_opt) that indicates whether SMC was
successfully negotiated or not and check this state in the relevant
functions.
Cc: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
include/linux/tcp.h | 3 +-
include/net/inet_sock.h | 3 +-
include/net/tcp.h | 4 -
net/ipv4/tcp.c | 5 --
net/ipv4/tcp_input.c | 36 ---------
net/ipv4/tcp_minisocks.c | 18 -----
net/ipv4/tcp_output.c | 54 --------------
net/smc/af_smc.c | 190 +++++++++++++++++++++++++++++++++++++++++++++--
8 files changed, 186 insertions(+), 127 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4756bd2c4b54..231b352f587f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -257,8 +257,7 @@ struct tcp_sock {
syn_fastopen_ch:1, /* Active TFO re-enabling probe */
syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
save_syn:1, /* Save headers of SYN packet */
- is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
- syn_smc:1; /* SYN includes SMC */
+ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
/* RTT measurement */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 39efb968b7a4..8e51b4a69088 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -90,8 +90,7 @@ struct inet_request_sock {
wscale_ok : 1,
ecn_ok : 1,
acked : 1,
- no_srccheck: 1,
- smc_ok : 1;
+ no_srccheck: 1;
u32 ir_mark;
union {
struct ip_options_rcu __rcu *ireq_opt;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ac62ceff9815..a5c4856e25c7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2062,10 +2062,6 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
}
-#if IS_ENABLED(CONFIG_SMC)
-extern struct static_key_false tcp_have_smc;
-#endif
-
struct tcp_extopt_store;
struct tcp_extopt_ops {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 17f38afb4212..0a1cabee6d5e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -294,11 +294,6 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
-#if IS_ENABLED(CONFIG_SMC)
-DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
-EXPORT_SYMBOL(tcp_have_smc);
-#endif
-
/*
* Current number of TCP sockets.
*/
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1950ff80fb3f..af8f4f9fd098 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3671,24 +3671,6 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
foc->exp = exp_opt;
}
-static int smc_parse_options(const struct tcphdr *th,
- struct tcp_options_received *opt_rx,
- const unsigned char *ptr,
- int opsize)
-{
-#if IS_ENABLED(CONFIG_SMC)
- if (static_branch_unlikely(&tcp_have_smc)) {
- if (th->syn && !(opsize & 1) &&
- opsize >= TCPOLEN_EXP_SMC_BASE &&
- get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
- opt_rx->smc_ok = 1;
- return 1;
- }
- }
-#endif
- return 0;
-}
-
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
* the fast version below fails.
@@ -3796,9 +3778,6 @@ void tcp_parse_options(const struct net *net,
tcp_parse_fastopen_option(opsize -
TCPOLEN_EXP_FASTOPEN_BASE,
ptr + 2, th->syn, foc, true);
- else if (smc_parse_options(th, opt_rx, ptr,
- opsize))
- break;
else if (opsize >= TCPOLEN_EXP_BASE)
tcp_extopt_parse(get_unaligned_be32(ptr),
opsize, ptr, skb,
@@ -5572,16 +5551,6 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
return false;
}
-static void smc_check_reset_syn(struct tcp_sock *tp)
-{
-#if IS_ENABLED(CONFIG_SMC)
- if (static_branch_unlikely(&tcp_have_smc)) {
- if (tp->syn_smc && !tp->rx_opt.smc_ok)
- tp->syn_smc = 0;
- }
-#endif
-}
-
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th)
{
@@ -5692,8 +5661,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* is initialized. */
tp->copied_seq = tp->rcv_nxt;
- smc_check_reset_syn(tp);
-
smp_mb();
tcp_finish_connect(sk, skb);
@@ -6150,9 +6117,6 @@ static void tcp_openreq_init(struct request_sock *req,
ireq->ir_rmt_port = tcp_hdr(skb)->source;
ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
ireq->ir_mark = inet_request_mark(sk, skb);
-#if IS_ENABLED(CONFIG_SMC)
- ireq->smc_ok = rx_opt->smc_ok;
-#endif
}
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 676ad7ca13ad..aa2ff9aadad0 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -435,21 +435,6 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
}
EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
-static void smc_check_reset_syn_req(struct tcp_sock *oldtp,
- struct request_sock *req,
- struct tcp_sock *newtp)
-{
-#if IS_ENABLED(CONFIG_SMC)
- struct inet_request_sock *ireq;
-
- if (static_branch_unlikely(&tcp_have_smc)) {
- ireq = inet_rsk(req);
- if (oldtp->syn_smc && !ireq->smc_ok)
- newtp->syn_smc = 0;
- }
-#endif
-}
-
/* This is not only more efficient than what we used to do, it eliminates
* a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
*
@@ -467,9 +452,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
struct tcp_request_sock *treq = tcp_rsk(req);
struct inet_connection_sock *newicsk = inet_csk(newsk);
struct tcp_sock *newtp = tcp_sk(newsk);
- struct tcp_sock *oldtp = tcp_sk(sk);
-
- smc_check_reset_syn_req(oldtp, req, newtp);
/* Now setup tcp_sock */
newtp->pred_flags = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6804a9325107..baf1c913ca7f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -398,21 +398,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
return tp->snd_una != tp->snd_up;
}
-static void smc_options_write(__be32 *ptr, u16 *options)
-{
-#if IS_ENABLED(CONFIG_SMC)
- if (static_branch_unlikely(&tcp_have_smc)) {
- if (unlikely(OPTION_SMC & *options)) {
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_EXP << 8) |
- (TCPOLEN_EXP_SMC_BASE));
- *ptr++ = htonl(TCPOPT_SMC_MAGIC);
- }
- }
-#endif
-}
-
/* Write previously computed TCP options to the packet.
*
* Beware: Something in the Internet is very sensitive to the ordering of
@@ -527,45 +512,10 @@ static void tcp_options_write(__be32 *ptr, struct sk_buff *skb, struct sock *sk,
ptr += (len + 3) >> 2;
}
- smc_options_write(ptr, &options);
-
if (unlikely(!hlist_empty(extopt_list)))
tcp_extopt_write(ptr, skb, opts, sk);
}
-static void smc_set_option(const struct tcp_sock *tp,
- struct tcp_out_options *opts,
- unsigned int *remaining)
-{
-#if IS_ENABLED(CONFIG_SMC)
- if (static_branch_unlikely(&tcp_have_smc)) {
- if (tp->syn_smc) {
- if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
- opts->options |= OPTION_SMC;
- *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
- }
- }
- }
-#endif
-}
-
-static void smc_set_option_cond(const struct tcp_sock *tp,
- const struct inet_request_sock *ireq,
- struct tcp_out_options *opts,
- unsigned int *remaining)
-{
-#if IS_ENABLED(CONFIG_SMC)
- if (static_branch_unlikely(&tcp_have_smc)) {
- if (tp->syn_smc && ireq->smc_ok) {
- if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
- opts->options |= OPTION_SMC;
- *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
- }
- }
- }
-#endif
-}
-
/* Compute TCP options for SYN packets. This is not the final
* network wire format yet.
*/
@@ -631,8 +581,6 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
}
}
- smc_set_option(tp, opts, &remaining);
-
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
remaining -= tcp_extopt_prepare(skb, TCPHDR_SYN, remaining,
opts, tcp_to_sk(tp));
@@ -698,8 +646,6 @@ static unsigned int tcp_synack_options(const struct sock *sk,
}
}
- smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
-
if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)))
remaining -= tcp_extopt_prepare(skb, TCPHDR_SYN | TCPHDR_ACK,
remaining, opts,
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index daf8075f5a4c..14bb84f81a50 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -44,6 +44,149 @@
#include "smc_rx.h"
#include "smc_close.h"
+static unsigned int tcp_smc_opt_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+static __be32 *tcp_smc_opt_write(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct sock *sk,
+ struct tcp_extopt_store *store);
+static void tcp_smc_opt_parse(int opsize, const unsigned char *opptr,
+ const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx,
+ struct sock *sk,
+ struct tcp_extopt_store *store);
+static void tcp_smc_opt_post_process(struct sock *sk,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *store);
+static struct tcp_extopt_store *tcp_smc_opt_copy(struct sock *listener,
+ struct request_sock *req,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *store);
+static void tcp_smc_opt_destroy(struct tcp_extopt_store *store);
+
+struct tcp_smc_opt {
+ struct tcp_extopt_store store;
+ int smc_ok:1; /* SMC supported on this connection */
+ struct rcu_head rcu;
+};
+
+static const struct tcp_extopt_ops tcp_smc_extra_ops = {
+ .option_kind = TCPOPT_SMC_MAGIC,
+ .parse = tcp_smc_opt_parse,
+ .post_process = tcp_smc_opt_post_process,
+ .prepare = tcp_smc_opt_prepare,
+ .write = tcp_smc_opt_write,
+ .copy = tcp_smc_opt_copy,
+ .destroy = tcp_smc_opt_destroy,
+ .owner = THIS_MODULE,
+};
+
+static struct tcp_smc_opt *tcp_extopt_to_smc(struct tcp_extopt_store *store)
+{
+ return container_of(store, struct tcp_smc_opt, store);
+}
+
+static struct tcp_smc_opt *tcp_smc_opt_find(struct sock *sk)
+{
+ struct tcp_extopt_store *ext_opt;
+
+ ext_opt = tcp_extopt_find_kind(TCPOPT_SMC_MAGIC, sk);
+
+ return tcp_extopt_to_smc(ext_opt);
+}
+
+static unsigned int tcp_smc_opt_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+ if (!(flags & TCPHDR_SYN))
+ return 0;
+
+ if (remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+ opts->options |= OPTION_SMC;
+ return TCPOLEN_EXP_SMC_BASE_ALIGNED;
+ }
+
+ return 0;
+}
+
+static __be32 *tcp_smc_opt_write(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+ if (unlikely(OPTION_SMC & opts->options)) {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_EXP << 8) |
+ (TCPOLEN_EXP_SMC_BASE));
+ *ptr++ = htonl(TCPOPT_SMC_MAGIC);
+ }
+
+ return ptr;
+}
+
+static void tcp_smc_opt_parse(int opsize, const unsigned char *opptr,
+ const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx,
+ struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+ struct tcphdr *th = tcp_hdr(skb);
+
+ if (th->syn && !(opsize & 1) && opsize >= TCPOLEN_EXP_SMC_BASE)
+ opt_rx->smc_ok = 1;
+}
+
+static void tcp_smc_opt_post_process(struct sock *sk,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *store)
+{
+ struct tcp_smc_opt *smc_opt = tcp_extopt_to_smc(store);
+
+ if (sk->sk_state != TCP_SYN_SENT)
+ return;
+
+ if (opt->smc_ok)
+ smc_opt->smc_ok = 1;
+ else
+ smc_opt->smc_ok = 0;
+}
+
+static struct tcp_extopt_store *tcp_smc_opt_copy(struct sock *listener,
+ struct request_sock *req,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *store)
+{
+ struct tcp_smc_opt *smc_opt;
+
+ /* First, check if the peer sent us the smc-opt */
+ if (!opt->smc_ok)
+ return NULL;
+
+ smc_opt = kzalloc(sizeof(*smc_opt), GFP_ATOMIC);
+ if (!smc_opt)
+ return NULL;
+
+ smc_opt->store.ops = &tcp_smc_extra_ops;
+
+ smc_opt->smc_ok = 1;
+
+ return (struct tcp_extopt_store *)smc_opt;
+}
+
+static void tcp_smc_opt_destroy(struct tcp_extopt_store *store)
+{
+ struct tcp_smc_opt *smc_opt = tcp_extopt_to_smc(store);
+
+ kfree_rcu(smc_opt, rcu);
+}
+
static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
* creation
*/
@@ -384,13 +527,15 @@ static int smc_connect_rdma(struct smc_sock *smc)
struct smc_clc_msg_accept_confirm aclc;
int local_contact = SMC_FIRST_CONTACT;
struct smc_ib_device *smcibdev;
+ struct tcp_smc_opt *smc_opt;
struct smc_link *link;
u8 srv_first_contact;
int reason_code = 0;
int rc = 0;
u8 ibport;
- if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
+ smc_opt = tcp_smc_opt_find(smc->clcsock->sk);
+ if (!smc_opt || !smc_opt->smc_ok) {
/* peer has not signalled SMC-capability */
smc->use_fallback = true;
goto out_connected;
@@ -535,6 +680,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
static int smc_connect(struct socket *sock, struct sockaddr *addr,
int alen, int flags)
{
+ struct tcp_smc_opt *smc_opt;
struct sock *sk = sock->sk;
struct smc_sock *smc;
int rc = -EINVAL;
@@ -548,9 +694,17 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
goto out_err;
smc->addr = addr; /* needed for nonblocking connect */
+ smc_opt = kzalloc(sizeof(*smc_opt), GFP_KERNEL);
+ if (!smc_opt) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ smc_opt->store.ops = &tcp_smc_extra_ops;
+
lock_sock(sk);
switch (sk->sk_state) {
default:
+ rc = -EINVAL;
goto out;
case SMC_ACTIVE:
rc = -EISCONN;
@@ -560,8 +714,15 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
break;
}
+ /* We are the only owner of smc->clcsock->sk, so we can be lockless */
+ rc = tcp_register_extopt(&smc_opt->store, smc->clcsock->sk);
+ if (rc) {
+ release_sock(smc->clcsock->sk);
+ kfree(smc_opt);
+ goto out_err;
+ }
+
smc_copy_sock_settings_to_clc(smc);
- tcp_sk(smc->clcsock->sk)->syn_smc = 1;
rc = kernel_connect(smc->clcsock, addr, alen, flags);
if (rc)
goto out;
@@ -760,6 +921,7 @@ static void smc_listen_work(struct work_struct *work)
struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *smcibdev;
struct sockaddr_in peeraddr;
+ struct tcp_smc_opt *smc_opt;
u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
@@ -769,7 +931,8 @@ static void smc_listen_work(struct work_struct *work)
u8 ibport;
/* check if peer is smc capable */
- if (!tcp_sk(newclcsock->sk)->syn_smc) {
+ smc_opt = tcp_smc_opt_find(newclcsock->sk);
+ if (!smc_opt || !smc_opt->smc_ok) {
new_smc->use_fallback = true;
goto out_connected;
}
@@ -962,10 +1125,18 @@ static void smc_tcp_listen_work(struct work_struct *work)
static int smc_listen(struct socket *sock, int backlog)
{
+ struct tcp_smc_opt *smc_opt;
struct sock *sk = sock->sk;
struct smc_sock *smc;
int rc;
+ smc_opt = kzalloc(sizeof(*smc_opt), GFP_KERNEL);
+ if (!smc_opt) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ smc_opt->store.ops = &tcp_smc_extra_ops;
+
smc = smc_sk(sk);
lock_sock(sk);
@@ -978,11 +1149,19 @@ static int smc_listen(struct socket *sock, int backlog)
sk->sk_max_ack_backlog = backlog;
goto out;
}
+
+ /* We are the only owner of smc->clcsock->sk, so we can be lockless */
+ rc = tcp_register_extopt(&smc_opt->store, smc->clcsock->sk);
+ if (rc) {
+ release_sock(smc->clcsock->sk);
+ kfree(smc_opt);
+ goto out_err;
+ }
+
/* some socket options are handled in core, so we could not apply
* them to the clc socket -- copy smc socket options to clc socket
*/
smc_copy_sock_settings_to_clc(smc);
- tcp_sk(smc->clcsock->sk)->syn_smc = 1;
rc = kernel_listen(smc->clcsock, backlog);
if (rc)
@@ -995,6 +1174,7 @@ static int smc_listen(struct socket *sock, int backlog)
out:
release_sock(sk);
+out_err:
return rc;
}
@@ -1425,7 +1605,6 @@ static int __init smc_init(void)
goto out_sock;
}
- static_branch_enable(&tcp_have_smc);
return 0;
out_sock:
@@ -1450,7 +1629,6 @@ static void __exit smc_exit(void)
list_del_init(&lgr->list);
smc_lgr_free(lgr); /* free link group */
}
- static_branch_disable(&tcp_have_smc);
smc_ib_unregister_client();
sock_unregister(PF_SMC);
proto_unregister(&smc_proto);
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [RFC 07/14] tcp_md5: Don't pass along md5-key
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
` (5 preceding siblings ...)
2017-12-18 21:51 ` [RFC 06/14] tcp_smc: Make SMC use TCP extra-option framework Christoph Paasch
@ 2017-12-18 21:51 ` Christoph Paasch
2017-12-18 21:51 ` [RFC 08/14] tcp_md5: Detect key inside tcp_v4_send_ack instead of passing it as an argument Christoph Paasch
` (6 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
It is much cleaner to store the key-pointer in tcp_out_options. It
allows to remove some MD5-specific code out of the function-arguments
and paves the way to adopting the TCP-option framework with TCP-MD5.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
include/linux/tcp.h | 1 +
net/ipv4/tcp_output.c | 46 +++++++++++++++++++---------------------------
2 files changed, 20 insertions(+), 27 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 231b352f587f..b0b38f7100a4 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -131,6 +131,7 @@ struct tcp_out_options {
__u8 *hash_location; /* temporary pointer, overloaded */
__u32 tsval, tsecr; /* need to include OPTION_TS */
struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
+ struct tcp_md5sig_key *md5; /* TCP_MD5 signature key */
};
/* This is the max number of SACKS that we'll generate and process. It's safe
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index baf1c913ca7f..43849ed73b03 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -520,21 +520,18 @@ static void tcp_options_write(__be32 *ptr, struct sk_buff *skb, struct sock *sk,
* network wire format yet.
*/
static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
- struct tcp_out_options *opts,
- struct tcp_md5sig_key **md5)
+ struct tcp_out_options *opts)
{
struct tcp_sock *tp = tcp_sk(sk);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
struct tcp_fastopen_request *fastopen = tp->fastopen_req;
#ifdef CONFIG_TCP_MD5SIG
- *md5 = tp->af_specific->md5_lookup(sk, sk);
- if (*md5) {
+ opts->md5 = tp->af_specific->md5_lookup(sk, sk);
+ if (opts->md5) {
opts->options |= OPTION_MD5;
remaining -= TCPOLEN_MD5SIG_ALIGNED;
}
-#else
- *md5 = NULL;
#endif
/* We always get an MSS option. The option bytes which will be seen in
@@ -549,7 +546,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
+ if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !opts->md5)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
@@ -593,14 +590,13 @@ static unsigned int tcp_synack_options(const struct sock *sk,
struct request_sock *req,
unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts,
- const struct tcp_md5sig_key *md5,
struct tcp_fastopen_cookie *foc)
{
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
#ifdef CONFIG_TCP_MD5SIG
- if (md5) {
+ if (opts->md5) {
opts->options |= OPTION_MD5;
remaining -= TCPOLEN_MD5SIG_ALIGNED;
@@ -658,8 +654,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
* final wire format yet.
*/
static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
- struct tcp_out_options *opts,
- struct tcp_md5sig_key **md5)
+ struct tcp_out_options *opts)
{
struct tcp_sock *tp = tcp_sk(sk);
unsigned int size = 0;
@@ -668,13 +663,13 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
opts->options = 0;
#ifdef CONFIG_TCP_MD5SIG
- *md5 = tp->af_specific->md5_lookup(sk, sk);
- if (unlikely(*md5)) {
+ opts->md5 = tp->af_specific->md5_lookup(sk, sk);
+ if (unlikely(opts->md5)) {
opts->options |= OPTION_MD5;
size += TCPOLEN_MD5SIG_ALIGNED;
}
#else
- *md5 = NULL;
+ opts->md5 = NULL;
#endif
if (likely(tp->rx_opt.tstamp_ok)) {
@@ -992,7 +987,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
struct tcp_out_options opts;
unsigned int tcp_options_size, tcp_header_size;
struct sk_buff *oskb = NULL;
- struct tcp_md5sig_key *md5;
struct tcphdr *th;
int err;
@@ -1021,10 +1015,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
memset(&opts, 0, sizeof(opts));
if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
- tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
+ tcp_options_size = tcp_syn_options(sk, skb, &opts);
else
- tcp_options_size = tcp_established_options(sk, skb, &opts,
- &md5);
+ tcp_options_size = tcp_established_options(sk, skb, &opts);
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
/* if no packet is in qdisc/device queue, then allow XPS to select
@@ -1090,10 +1083,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tcp_options_write((__be32 *)(th + 1), skb, sk, &opts);
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
- if (md5) {
+ if (opts.md5) {
sk_nocaps_add(sk, NETIF_F_GSO_MASK);
tp->af_specific->calc_md5_hash(opts.hash_location,
- md5, sk, skb);
+ opts.md5, sk, skb);
}
#endif
@@ -1537,7 +1530,6 @@ unsigned int tcp_current_mss(struct sock *sk)
u32 mss_now;
unsigned int header_len;
struct tcp_out_options opts;
- struct tcp_md5sig_key *md5;
mss_now = tp->mss_cache;
@@ -1547,7 +1539,7 @@ unsigned int tcp_current_mss(struct sock *sk)
mss_now = tcp_sync_mss(sk, mtu);
}
- header_len = tcp_established_options(sk, NULL, &opts, &md5) +
+ header_len = tcp_established_options(sk, NULL, &opts) +
sizeof(struct tcphdr);
/* The mss_cache is sized based on tp->tcp_header_len, which assumes
* some common options. If this is an odd packet (because we have SACK
@@ -3123,7 +3115,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
{
struct inet_request_sock *ireq = inet_rsk(req);
const struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_key *md5 = NULL;
struct tcp_out_options opts;
struct sk_buff *skb;
int tcp_header_size;
@@ -3169,10 +3160,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
#ifdef CONFIG_TCP_MD5SIG
rcu_read_lock();
- md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
+ opts.md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif
skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
- tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
+ tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts,
foc) + sizeof(*th);
skb_push(skb, tcp_header_size);
@@ -3199,9 +3190,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
#ifdef CONFIG_TCP_MD5SIG
/* Okay, we have all we need - do the md5 hash if needed */
- if (md5)
+ if (opts.md5)
tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
- md5, req_to_sk(req), skb);
+ opts.md5,
+ req_to_sk(req), skb);
rcu_read_unlock();
#endif
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [RFC 08/14] tcp_md5: Detect key inside tcp_v4_send_ack instead of passing it as an argument
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
` (6 preceding siblings ...)
2017-12-18 21:51 ` [RFC 07/14] tcp_md5: Don't pass along md5-key Christoph Paasch
@ 2017-12-18 21:51 ` Christoph Paasch
2017-12-18 21:51 ` [RFC 09/14] tcp_md5: Detect key inside tcp_v6_send_response " Christoph Paasch
` (5 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
This will simplify to consolidate the TCP_MD5-code into a single place.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
net/ipv4/tcp_ipv4.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dee296097b8f..397975203e14 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -764,7 +764,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
static void tcp_v4_send_ack(const struct sock *sk,
struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
- struct tcp_md5sig_key *key,
int reply_flags, u8 tos)
{
const struct tcphdr *th = tcp_hdr(skb);
@@ -773,6 +772,9 @@ static void tcp_v4_send_ack(const struct sock *sk,
__be32 opt[(MAX_TCP_OPTION_SPACE >> 2)];
} rep;
struct hlist_head *extopt_list = NULL;
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_md5sig_key *key;
+#endif
struct net *net = sock_net(sk);
struct ip_reply_arg arg;
int offset = 0;
@@ -803,6 +805,17 @@ static void tcp_v4_send_ack(const struct sock *sk,
rep.th.ack = 1;
rep.th.window = htons(win);
+#ifdef CONFIG_TCP_MD5SIG
+ if (sk->sk_state == TCP_TIME_WAIT) {
+ key = tcp_twsk_md5_key(tcp_twsk(sk));
+ } else if (sk->sk_state == TCP_NEW_SYN_RECV) {
+ key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
+ AF_INET);
+ } else {
+ key = NULL; /* Should not happen */
+ }
+#endif
+
if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
unsigned int remaining;
struct tcp_out_options opts;
@@ -872,7 +885,6 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
tcptw->tw_ts_recent,
tw->tw_bound_dev_if,
- tcp_twsk_md5_key(tcptw),
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
tw->tw_tos
);
@@ -900,8 +912,6 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent,
0,
- tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
- AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
}
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [RFC 09/14] tcp_md5: Detect key inside tcp_v6_send_response instead of passing it as an argument
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
` (7 preceding siblings ...)
2017-12-18 21:51 ` [RFC 08/14] tcp_md5: Detect key inside tcp_v4_send_ack instead of passing it as an argument Christoph Paasch
@ 2017-12-18 21:51 ` Christoph Paasch
2017-12-18 21:51 ` [RFC 10/14] tcp_md5: Check for TCP_MD5 after TCP Timestamps in tcp_established_options Christoph Paasch
` (4 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
We want to move all the TCP-MD5 code to a single place which enables us
to factor the TCP-MD5 code out of the TCP-stack into the extra-option
framework.
Detection of whether or not to drop the segment (as done in
tcp_v6_send_reset()) has now been moved to tcp_v6_send_response().
So we needed to adapt the latter so that it can handle the case where we
want to exit without sending anything.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
net/ipv6/tcp_ipv6.c | 119 +++++++++++++++++++++++++---------------------------
1 file changed, 57 insertions(+), 62 deletions(-)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5af5dcc1ac83..202a59511950 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -82,12 +82,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific;
#ifdef CONFIG_TCP_MD5SIG
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
-#else
-static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
- const struct in6_addr *addr)
-{
- return NULL;
-}
#endif
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
@@ -780,12 +774,11 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr,
- int oif, struct tcp_md5sig_key *key, int rst,
- u8 tclass, __be32 label)
+ int oif, int rst, u8 tclass, __be32 label)
{
const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1;
- struct sk_buff *buff;
+ struct sk_buff *buff = NULL;
struct flowi6 fl6;
struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
struct sock *ctl_sk = net->ipv6.tcp_sk;
@@ -794,10 +787,54 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
__be32 *topt;
struct hlist_head *extopt_list = NULL;
struct tcp_out_options extraopts;
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_md5sig_key *key = NULL;
+ const __u8 *hash_location = NULL;
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+#endif
if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
+ rcu_read_lock();
+ hash_location = tcp_parse_md5sig_option(th);
+ if (sk && sk_fullsock(sk)) {
+ key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
+ } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
+ struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+
+ key = tcp_twsk_md5_key(tcptw);
+ } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
+ key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
+ } else if (hash_location) {
+ unsigned char newhash[16];
+ struct sock *sk1 = NULL;
+ int genhash;
+
+ /* active side is lost. Try to find listening socket through
+ * source port, and then find md5 key through listening socket.
+ * we are not loose security here:
+ * Incoming packet is checked with md5 hash with finding key,
+ * no RST generated if md5 hash doesn't match.
+ */
+ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
+ &tcp_hashinfo, NULL, 0,
+ &ipv6h->saddr,
+ th->source, &ipv6h->daddr,
+ ntohs(th->source), tcp_v6_iif(skb),
+ tcp_v6_sdif(skb));
+ if (!sk1)
+ goto out;
+
+ key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
+ if (!key)
+ goto out;
+
+ genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
+ if (genhash || memcmp(hash_location, newhash, 16) != 0)
+ goto out;
+ }
+
if (key)
tot_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
@@ -824,7 +861,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
GFP_ATOMIC);
if (!buff)
- return;
+ goto out;
skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
@@ -901,24 +938,21 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
- return;
+ buff = NULL;
}
+out:
kfree_skb(buff);
+
+#ifdef CONFIG_TCP_MD5SIG
+ rcu_read_unlock();
+#endif
}
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
{
const struct tcphdr *th = tcp_hdr(skb);
u32 seq = 0, ack_seq = 0;
- struct tcp_md5sig_key *key = NULL;
-#ifdef CONFIG_TCP_MD5SIG
- const __u8 *hash_location = NULL;
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- unsigned char newhash[16];
- int genhash;
- struct sock *sk1 = NULL;
-#endif
int oif = 0;
if (th->rst)
@@ -930,38 +964,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
if (!sk && !ipv6_unicast_destination(skb))
return;
-#ifdef CONFIG_TCP_MD5SIG
- rcu_read_lock();
- hash_location = tcp_parse_md5sig_option(th);
- if (sk && sk_fullsock(sk)) {
- key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
- } else if (hash_location) {
- /*
- * active side is lost. Try to find listening socket through
- * source port, and then find md5 key through listening socket.
- * we are not loose security here:
- * Incoming packet is checked with md5 hash with finding key,
- * no RST generated if md5 hash doesn't match.
- */
- sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, NULL, 0,
- &ipv6h->saddr,
- th->source, &ipv6h->daddr,
- ntohs(th->source), tcp_v6_iif(skb),
- tcp_v6_sdif(skb));
- if (!sk1)
- goto out;
-
- key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
- if (!key)
- goto out;
-
- genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
- if (genhash || memcmp(hash_location, newhash, 16) != 0)
- goto out;
- }
-#endif
-
if (th->ack)
seq = ntohl(th->ack_seq);
else
@@ -973,20 +975,14 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
trace_tcp_send_reset(sk, skb);
}
- tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
-
-#ifdef CONFIG_TCP_MD5SIG
-out:
- rcu_read_unlock();
-#endif
+ tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, 0, 0);
}
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
- struct tcp_md5sig_key *key, u8 tclass,
- __be32 label)
+ u8 tclass, __be32 label)
{
- tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
+ tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0,
tclass, label);
}
@@ -998,7 +994,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
- tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
+ tcptw->tw_ts_recent, tw->tw_bound_dev_if,
tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
inet_twsk_put(tw);
@@ -1021,7 +1017,6 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent, sk->sk_bound_dev_if,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
0, 0);
}
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [RFC 10/14] tcp_md5: Check for TCP_MD5 after TCP Timestamps in tcp_established_options
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
` (8 preceding siblings ...)
2017-12-18 21:51 ` [RFC 09/14] tcp_md5: Detect key inside tcp_v6_send_response " Christoph Paasch
@ 2017-12-18 21:51 ` Christoph Paasch
2017-12-18 21:51 ` [RFC 11/14] tcp_md5: Move TCP-MD5 code out of TCP itself Christoph Paasch
` (3 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
It really does not matter, because we never use TCP timestamps when
TCP_MD5 is enabled (see tcp_syn_options).
Moving TCP_MD5 a bit lower allows for easier adoption of the
tcp_extra_option framework.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
net/ipv4/tcp_output.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 43849ed73b03..7ea65f70e5ec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -662,6 +662,13 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
opts->options = 0;
+ if (likely(tp->rx_opt.tstamp_ok)) {
+ opts->options |= OPTION_TS;
+ opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
+ opts->tsecr = tp->rx_opt.ts_recent;
+ size += TCPOLEN_TSTAMP_ALIGNED;
+ }
+
#ifdef CONFIG_TCP_MD5SIG
opts->md5 = tp->af_specific->md5_lookup(sk, sk);
if (unlikely(opts->md5)) {
@@ -672,13 +679,6 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
opts->md5 = NULL;
#endif
- if (likely(tp->rx_opt.tstamp_ok)) {
- opts->options |= OPTION_TS;
- opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
- opts->tsecr = tp->rx_opt.ts_recent;
- size += TCPOLEN_TSTAMP_ALIGNED;
- }
-
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
size += tcp_extopt_prepare(skb, 0, MAX_TCP_OPTION_SPACE - size,
opts, tcp_to_sk(tp));
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [RFC 11/14] tcp_md5: Move TCP-MD5 code out of TCP itself
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
` (9 preceding siblings ...)
2017-12-18 21:51 ` [RFC 10/14] tcp_md5: Check for TCP_MD5 after TCP Timestamps in tcp_established_options Christoph Paasch
@ 2017-12-18 21:51 ` Christoph Paasch
2018-01-02 19:39 ` Mat Martineau
2017-12-18 21:51 ` [RFC 12/14] tcp_md5: Use tcp_extra_options in output path Christoph Paasch
` (2 subsequent siblings)
13 siblings, 1 reply; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
This is all just copy-pasting the TCP_MD5-code into functions that are
placed in net/ipv4/tcp_md5.c.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
include/linux/inet_diag.h | 1 +
include/linux/tcp_md5.h | 138 ++++++
include/net/tcp.h | 77 ----
net/ipv4/Makefile | 1 +
net/ipv4/tcp.c | 133 +-----
net/ipv4/tcp_diag.c | 81 +---
net/ipv4/tcp_input.c | 38 --
net/ipv4/tcp_ipv4.c | 520 ++-------------------
net/ipv4/tcp_md5.c | 1102 +++++++++++++++++++++++++++++++++++++++++++++
net/ipv4/tcp_minisocks.c | 27 +-
net/ipv4/tcp_output.c | 4 +-
net/ipv6/tcp_ipv6.c | 318 +------------
12 files changed, 1305 insertions(+), 1135 deletions(-)
create mode 100644 include/linux/tcp_md5.h
create mode 100644 net/ipv4/tcp_md5.c
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 39faaaf843e1..1ef6727e41c9 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -2,6 +2,7 @@
#ifndef _INET_DIAG_H_
#define _INET_DIAG_H_ 1
+#include <linux/user_namespace.h>
#include <uapi/linux/inet_diag.h>
struct net;
diff --git a/include/linux/tcp_md5.h b/include/linux/tcp_md5.h
new file mode 100644
index 000000000000..f6a681cdded4
--- /dev/null
+++ b/include/linux/tcp_md5.h
@@ -0,0 +1,138 @@
+#ifndef _LINUX_TCP_MD5_H
+#define _LINUX_TCP_MD5_H
+
+#include <linux/skbuff.h>
+
+#ifdef CONFIG_TCP_MD5SIG
+#include <linux/types.h>
+
+#include <net/tcp.h>
+
+union tcp_md5_addr {
+ struct in_addr a4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr a6;
+#endif
+};
+
+/* - key database */
+struct tcp_md5sig_key {
+ struct hlist_node node;
+ u8 keylen;
+ u8 family; /* AF_INET or AF_INET6 */
+ union tcp_md5_addr addr;
+ u8 prefixlen;
+ u8 key[TCP_MD5SIG_MAXKEYLEN];
+ struct rcu_head rcu;
+};
+
+/* - sock block */
+struct tcp_md5sig_info {
+ struct hlist_head head;
+ struct rcu_head rcu;
+};
+
+union tcp_md5sum_block {
+ struct tcp4_pseudohdr ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct tcp6_pseudohdr ip6;
+#endif
+};
+
+/* - pool: digest algorithm, hash description and scratch buffer */
+struct tcp_md5sig_pool {
+ struct ahash_request *md5_req;
+ void *scratch;
+};
+
+extern const struct tcp_sock_af_ops tcp_sock_ipv4_specific;
+extern const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
+extern const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
+
+/* - functions */
+int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
+ const struct sock *sk, const struct sk_buff *skb);
+
+struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
+ const struct sock *addr_sk);
+
+void tcp_v4_md5_destroy_sock(struct sock *sk);
+
+int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk);
+
+void tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
+ struct tcphdr *t1,
+ struct tcp_out_options *opts,
+ const struct sock *sk);
+
+int tcp_v6_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk);
+
+void tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
+ struct tcphdr *t1,
+ struct tcp_out_options *opts,
+ const struct sock *sk);
+
+bool tcp_v4_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb);
+
+void tcp_v4_md5_syn_recv_sock(const struct sock *listener, struct sock *sk);
+
+void tcp_v6_md5_syn_recv_sock(const struct sock *listener, struct sock *sk);
+
+void tcp_md5_time_wait(struct sock *sk, struct inet_timewait_sock *tw);
+
+struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
+ const struct sock *addr_sk);
+
+int tcp_v6_md5_hash_skb(char *md5_hash,
+ const struct tcp_md5sig_key *key,
+ const struct sock *sk,
+ const struct sk_buff *skb);
+
+bool tcp_v6_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb);
+
+static inline void tcp_md5_twsk_destructor(struct sock *sk)
+{
+ struct tcp_timewait_sock *twsk = tcp_twsk(sk);
+
+ if (twsk->tw_md5_key)
+ kfree_rcu(twsk->tw_md5_key, rcu);
+}
+
+static inline void tcp_md5_add_header_len(const struct sock *listener,
+ struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->af_specific->md5_lookup(listener, sk))
+ tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
+}
+
+int tcp_md5_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb);
+
+int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin);
+
+#else
+
+static inline bool tcp_v4_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ return false;
+}
+
+static inline bool tcp_v6_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ return false;
+}
+
+#endif
+
+#endif /* _LINUX_TCP_MD5_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a5c4856e25c7..e955c5f0997f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -406,7 +406,6 @@ void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
int estab, struct tcp_fastopen_cookie *foc,
struct sock *sk);
-const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
/*
* TCP v4 functions exported for the inet6 API
@@ -1415,30 +1414,6 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
tp->retransmit_skb_hint = NULL;
}
-union tcp_md5_addr {
- struct in_addr a4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr a6;
-#endif
-};
-
-/* - key database */
-struct tcp_md5sig_key {
- struct hlist_node node;
- u8 keylen;
- u8 family; /* AF_INET or AF_INET6 */
- union tcp_md5_addr addr;
- u8 prefixlen;
- u8 key[TCP_MD5SIG_MAXKEYLEN];
- struct rcu_head rcu;
-};
-
-/* - sock block */
-struct tcp_md5sig_info {
- struct hlist_head head;
- struct rcu_head rcu;
-};
-
/* - pseudo header */
struct tcp4_pseudohdr {
__be32 saddr;
@@ -1455,58 +1430,6 @@ struct tcp6_pseudohdr {
__be32 protocol; /* including padding */
};
-union tcp_md5sum_block {
- struct tcp4_pseudohdr ip4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct tcp6_pseudohdr ip6;
-#endif
-};
-
-/* - pool: digest algorithm, hash description and scratch buffer */
-struct tcp_md5sig_pool {
- struct ahash_request *md5_req;
- void *scratch;
-};
-
-/* - functions */
-int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
- const struct sock *sk, const struct sk_buff *skb);
-int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
- gfp_t gfp);
-int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen);
-struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk);
-
-#ifdef CONFIG_TCP_MD5SIG
-struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family);
-#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
-#else
-static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family)
-{
- return NULL;
-}
-#define tcp_twsk_md5_key(twsk) NULL
-#endif
-
-bool tcp_alloc_md5sig_pool(void);
-
-struct tcp_md5sig_pool *tcp_get_md5sig_pool(void);
-static inline void tcp_put_md5sig_pool(void)
-{
- local_bh_enable();
-}
-
-int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
- unsigned int header_len);
-int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
- const struct tcp_md5sig_key *key);
-
/* From tcp_fastopen.c */
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie);
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index c6c8ad1d4b6d..9262d9a01035 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_MD5SIG) += tcp_md5.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0a1cabee6d5e..29f3ce8a0b54 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,6 +271,7 @@
#include <linux/slab.h>
#include <linux/errqueue.h>
#include <linux/static_key.h>
+#include <linux/tcp_md5.h>
#include <net/icmp.h>
#include <net/inet_common.h>
@@ -3337,138 +3338,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
-#ifdef CONFIG_TCP_MD5SIG
-static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
-static DEFINE_MUTEX(tcp_md5sig_mutex);
-static bool tcp_md5sig_pool_populated = false;
-
-static void __tcp_alloc_md5sig_pool(void)
-{
- struct crypto_ahash *hash;
- int cpu;
-
- hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(hash))
- return;
-
- for_each_possible_cpu(cpu) {
- void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
- struct ahash_request *req;
-
- if (!scratch) {
- scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
- sizeof(struct tcphdr),
- GFP_KERNEL,
- cpu_to_node(cpu));
- if (!scratch)
- return;
- per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
- }
- if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
- continue;
-
- req = ahash_request_alloc(hash, GFP_KERNEL);
- if (!req)
- return;
-
- ahash_request_set_callback(req, 0, NULL, NULL);
-
- per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
- }
- /* before setting tcp_md5sig_pool_populated, we must commit all writes
- * to memory. See smp_rmb() in tcp_get_md5sig_pool()
- */
- smp_wmb();
- tcp_md5sig_pool_populated = true;
-}
-
-bool tcp_alloc_md5sig_pool(void)
-{
- if (unlikely(!tcp_md5sig_pool_populated)) {
- mutex_lock(&tcp_md5sig_mutex);
-
- if (!tcp_md5sig_pool_populated)
- __tcp_alloc_md5sig_pool();
-
- mutex_unlock(&tcp_md5sig_mutex);
- }
- return tcp_md5sig_pool_populated;
-}
-EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
-
-
-/**
- * tcp_get_md5sig_pool - get md5sig_pool for this user
- *
- * We use percpu structure, so if we succeed, we exit with preemption
- * and BH disabled, to make sure another thread or softirq handling
- * wont try to get same context.
- */
-struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
-{
- local_bh_disable();
-
- if (tcp_md5sig_pool_populated) {
- /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
- smp_rmb();
- return this_cpu_ptr(&tcp_md5sig_pool);
- }
- local_bh_enable();
- return NULL;
-}
-EXPORT_SYMBOL(tcp_get_md5sig_pool);
-
-int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
- const struct sk_buff *skb, unsigned int header_len)
-{
- struct scatterlist sg;
- const struct tcphdr *tp = tcp_hdr(skb);
- struct ahash_request *req = hp->md5_req;
- unsigned int i;
- const unsigned int head_data_len = skb_headlen(skb) > header_len ?
- skb_headlen(skb) - header_len : 0;
- const struct skb_shared_info *shi = skb_shinfo(skb);
- struct sk_buff *frag_iter;
-
- sg_init_table(&sg, 1);
-
- sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
- ahash_request_set_crypt(req, &sg, NULL, head_data_len);
- if (crypto_ahash_update(req))
- return 1;
-
- for (i = 0; i < shi->nr_frags; ++i) {
- const struct skb_frag_struct *f = &shi->frags[i];
- unsigned int offset = f->page_offset;
- struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
-
- sg_set_page(&sg, page, skb_frag_size(f),
- offset_in_page(offset));
- ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
- if (crypto_ahash_update(req))
- return 1;
- }
-
- skb_walk_frags(skb, frag_iter)
- if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
- return 1;
-
- return 0;
-}
-EXPORT_SYMBOL(tcp_md5_hash_skb_data);
-
-int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
-{
- struct scatterlist sg;
-
- sg_init_one(&sg, key->key, key->keylen);
- ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen);
- return crypto_ahash_update(hp->md5_req);
-}
-EXPORT_SYMBOL(tcp_md5_hash_key);
-
-#endif
-
struct hlist_head *tcp_extopt_get_list(const struct sock *sk)
{
if (sk_fullsock(sk))
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index abbf0edcf6c2..5cfe5dc8f8dd 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -15,6 +15,7 @@
#include <linux/inet_diag.h>
#include <linux/tcp.h>
+#include <linux/tcp_md5.h>
#include <net/netlink.h>
#include <net/tcp.h>
@@ -37,70 +38,14 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
tcp_get_info(sk, info);
}
-#ifdef CONFIG_TCP_MD5SIG
-static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info,
- const struct tcp_md5sig_key *key)
-{
- info->tcpm_family = key->family;
- info->tcpm_prefixlen = key->prefixlen;
- info->tcpm_keylen = key->keylen;
- memcpy(info->tcpm_key, key->key, key->keylen);
-
- if (key->family == AF_INET)
- info->tcpm_addr[0] = key->addr.a4.s_addr;
- #if IS_ENABLED(CONFIG_IPV6)
- else if (key->family == AF_INET6)
- memcpy(&info->tcpm_addr, &key->addr.a6,
- sizeof(info->tcpm_addr));
- #endif
-}
-
-static int tcp_diag_put_md5sig(struct sk_buff *skb,
- const struct tcp_md5sig_info *md5sig)
-{
- const struct tcp_md5sig_key *key;
- struct tcp_diag_md5sig *info;
- struct nlattr *attr;
- int md5sig_count = 0;
-
- hlist_for_each_entry_rcu(key, &md5sig->head, node)
- md5sig_count++;
- if (md5sig_count == 0)
- return 0;
-
- attr = nla_reserve(skb, INET_DIAG_MD5SIG,
- md5sig_count * sizeof(struct tcp_diag_md5sig));
- if (!attr)
- return -EMSGSIZE;
-
- info = nla_data(attr);
- memset(info, 0, md5sig_count * sizeof(struct tcp_diag_md5sig));
- hlist_for_each_entry_rcu(key, &md5sig->head, node) {
- tcp_diag_md5sig_fill(info++, key);
- if (--md5sig_count == 0)
- break;
- }
-
- return 0;
-}
-#endif
-
static int tcp_diag_get_aux(struct sock *sk, bool net_admin,
struct sk_buff *skb)
{
#ifdef CONFIG_TCP_MD5SIG
- if (net_admin) {
- struct tcp_md5sig_info *md5sig;
- int err = 0;
-
- rcu_read_lock();
- md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
- if (md5sig)
- err = tcp_diag_put_md5sig(skb, md5sig);
- rcu_read_unlock();
- if (err < 0)
- return err;
- }
+ int err = tcp_md5_diag_get_aux(sk, net_admin, skb);
+
+ if (err < 0)
+ return err;
#endif
return 0;
@@ -111,21 +56,7 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
size_t size = 0;
#ifdef CONFIG_TCP_MD5SIG
- if (net_admin && sk_fullsock(sk)) {
- const struct tcp_md5sig_info *md5sig;
- const struct tcp_md5sig_key *key;
- size_t md5sig_count = 0;
-
- rcu_read_lock();
- md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
- if (md5sig) {
- hlist_for_each_entry_rcu(key, &md5sig->head, node)
- md5sig_count++;
- }
- rcu_read_unlock();
- size += nla_total_size(md5sig_count *
- sizeof(struct tcp_diag_md5sig));
- }
+ size += tcp_md5_diag_get_aux_size(sk, net_admin);
#endif
return size;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index af8f4f9fd098..db54bdbdee51 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3847,44 +3847,6 @@ static bool tcp_fast_parse_options(const struct net *net,
return false;
}
-#ifdef CONFIG_TCP_MD5SIG
-/*
- * Parse MD5 Signature option
- */
-const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
-{
- int length = (th->doff << 2) - sizeof(*th);
- const u8 *ptr = (const u8 *)(th + 1);
-
- /* If the TCP option is too short, we can short cut */
- if (length < TCPOLEN_MD5SIG)
- return NULL;
-
- while (length > 0) {
- int opcode = *ptr++;
- int opsize;
-
- switch (opcode) {
- case TCPOPT_EOL:
- return NULL;
- case TCPOPT_NOP:
- length--;
- continue;
- default:
- opsize = *ptr++;
- if (opsize < 2 || opsize > length)
- return NULL;
- if (opcode == TCPOPT_MD5SIG)
- return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
- }
- ptr += opsize - 2;
- length -= opsize;
- }
- return NULL;
-}
-EXPORT_SYMBOL(tcp_parse_md5sig_option);
-#endif
-
/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
*
* It is not fatal. If this ACK does _not_ change critical state (seqs, window)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 397975203e14..143e1f66a24a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -62,6 +62,7 @@
#include <linux/init.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/tcp_md5.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
@@ -87,11 +88,6 @@
#include <trace/events/tcp.h>
-#ifdef CONFIG_TCP_MD5SIG
-static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
- __be32 daddr, __be32 saddr, const struct tcphdr *th);
-#endif
-
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
@@ -603,16 +599,13 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
__be32 opt[(MAX_TCP_OPTION_SPACE >> 2)];
} rep;
struct hlist_head *extopt_list = NULL;
+ struct tcp_out_options opts;
struct ip_reply_arg arg;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key = NULL;
- const __u8 *hash_location = NULL;
- unsigned char newhash[16];
- int genhash;
- struct sock *sk1 = NULL;
-#endif
struct net *net;
int offset = 0;
+#ifdef CONFIG_TCP_MD5SIG
+ int ret;
+#endif
/* Never send a reset in response to a reset. */
if (th->rst)
@@ -627,6 +620,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk)
extopt_list = tcp_extopt_get_list(sk);
+ memset(&opts, 0, sizeof(opts));
+
/* Swap the send and the receive. */
memset(&rep, 0, sizeof(rep));
rep.th.dest = th->source;
@@ -647,55 +642,28 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
arg.iov[0].iov_len = sizeof(rep.th);
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
-#ifdef CONFIG_TCP_MD5SIG
- rcu_read_lock();
- hash_location = tcp_parse_md5sig_option(th);
- if (sk && sk_fullsock(sk)) {
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
- &ip_hdr(skb)->saddr, AF_INET);
- } else if (hash_location) {
- /*
- * active side is lost. Try to find listening socket through
- * source port, and then find md5 key through listening socket.
- * we are not loose security here:
- * Incoming packet is checked with md5 hash with finding key,
- * no RST generated if md5 hash doesn't match.
- */
- sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
- ip_hdr(skb)->saddr,
- th->source, ip_hdr(skb)->daddr,
- ntohs(th->source), inet_iif(skb),
- tcp_v4_sdif(skb));
- /* don't send rst if it can't find key */
- if (!sk1)
- goto out;
-
- key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
- &ip_hdr(skb)->saddr, AF_INET);
- if (!key)
- goto out;
+#ifdef CONFIG_TCP_MD5SIG
+ ret = tcp_v4_md5_send_response_prepare(skb, 0,
+ MAX_TCP_OPTION_SPACE - arg.iov[0].iov_len,
+ &opts, sk);
- genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
- if (genhash || memcmp(hash_location, newhash, 16) != 0)
- goto out;
+ if (ret == -1)
+ return;
- }
+ arg.iov[0].iov_len += ret;
#endif
if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
unsigned int remaining;
- struct tcp_out_options opts;
int used;
remaining = sizeof(rep.opt);
#ifdef CONFIG_TCP_MD5SIG
- if (key)
+ if (opts.md5)
remaining -= TCPOLEN_MD5SIG_ALIGNED;
#endif
- memset(&opts, 0, sizeof(opts));
-
used = tcp_extopt_response_prepare(skb, TCPHDR_RST, remaining,
&opts, sk);
@@ -707,19 +675,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
}
#ifdef CONFIG_TCP_MD5SIG
- if (key) {
- rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
- /* Update length and the length the header thinks exists */
- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
- rep.th.doff = arg.iov[0].iov_len / 4;
-
- tcp_v4_md5_hash_hdr((__u8 *)&rep.opt[offset],
- key, ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, &rep.th);
- }
+ tcp_v4_md5_send_response_write(&rep.opt[offset], skb, &rep.th, &opts, sk);
#endif
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
@@ -750,11 +706,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
local_bh_enable();
-
-#ifdef CONFIG_TCP_MD5SIG
-out:
- rcu_read_unlock();
-#endif
}
/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
@@ -772,17 +723,19 @@ static void tcp_v4_send_ack(const struct sock *sk,
__be32 opt[(MAX_TCP_OPTION_SPACE >> 2)];
} rep;
struct hlist_head *extopt_list = NULL;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key;
-#endif
+ struct tcp_out_options opts;
struct net *net = sock_net(sk);
struct ip_reply_arg arg;
int offset = 0;
+#ifdef CONFIG_TCP_MD5SIG
+ int ret;
+#endif
extopt_list = tcp_extopt_get_list(sk);
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
+ memset(&opts, 0, sizeof(opts));
arg.iov[0].iov_base = (unsigned char *)&rep;
arg.iov[0].iov_len = sizeof(rep.th);
@@ -806,25 +759,24 @@ static void tcp_v4_send_ack(const struct sock *sk,
rep.th.window = htons(win);
#ifdef CONFIG_TCP_MD5SIG
- if (sk->sk_state == TCP_TIME_WAIT) {
- key = tcp_twsk_md5_key(tcp_twsk(sk));
- } else if (sk->sk_state == TCP_NEW_SYN_RECV) {
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
- AF_INET);
- } else {
- key = NULL; /* Should not happen */
- }
+ ret = tcp_v4_md5_send_response_prepare(skb, 0,
+ MAX_TCP_OPTION_SPACE - arg.iov[0].iov_len,
+ &opts, sk);
+
+ if (ret == -1)
+ return;
+
+ arg.iov[0].iov_len += ret;
#endif
if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
unsigned int remaining;
- struct tcp_out_options opts;
int used;
remaining = sizeof(rep.th) + sizeof(rep.opt) - arg.iov[0].iov_len;
#ifdef CONFIG_TCP_MD5SIG
- if (key)
+ if (opts.md5)
remaining -= TCPOLEN_MD5SIG_ALIGNED;
#endif
@@ -841,18 +793,11 @@ static void tcp_v4_send_ack(const struct sock *sk,
}
#ifdef CONFIG_TCP_MD5SIG
- if (key) {
- rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
+ if (opts.md5) {
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
rep.th.doff = arg.iov[0].iov_len / 4;
-
- tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
- key, ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, &rep.th);
}
+ tcp_v4_md5_send_response_write(&rep.opt[offset], skb, &rep.th, &opts, sk);
#endif
arg.flags = reply_flags;
@@ -961,374 +906,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
-#ifdef CONFIG_TCP_MD5SIG
-/*
- * RFC2385 MD5 checksumming requires a mapping of
- * IP address->MD5 Key.
- * We need to maintain these in the sk structure.
- */
-
-/* Find the Key structure for an address. */
-struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_key *key;
- const struct tcp_md5sig_info *md5sig;
- __be32 mask;
- struct tcp_md5sig_key *best_match = NULL;
- bool match;
-
- /* caller either holds rcu_read_lock() or socket lock */
- md5sig = rcu_dereference_check(tp->md5sig_info,
- lockdep_sock_is_held(sk));
- if (!md5sig)
- return NULL;
-
- hlist_for_each_entry_rcu(key, &md5sig->head, node) {
- if (key->family != family)
- continue;
-
- if (family == AF_INET) {
- mask = inet_make_mask(key->prefixlen);
- match = (key->addr.a4.s_addr & mask) ==
- (addr->a4.s_addr & mask);
-#if IS_ENABLED(CONFIG_IPV6)
- } else if (family == AF_INET6) {
- match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
- key->prefixlen);
-#endif
- } else {
- match = false;
- }
-
- if (match && (!best_match ||
- key->prefixlen > best_match->prefixlen))
- best_match = key;
- }
- return best_match;
-}
-EXPORT_SYMBOL(tcp_md5_do_lookup);
-
-static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family, u8 prefixlen)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_key *key;
- unsigned int size = sizeof(struct in_addr);
- const struct tcp_md5sig_info *md5sig;
-
- /* caller either holds rcu_read_lock() or socket lock */
- md5sig = rcu_dereference_check(tp->md5sig_info,
- lockdep_sock_is_held(sk));
- if (!md5sig)
- return NULL;
-#if IS_ENABLED(CONFIG_IPV6)
- if (family == AF_INET6)
- size = sizeof(struct in6_addr);
-#endif
- hlist_for_each_entry_rcu(key, &md5sig->head, node) {
- if (key->family != family)
- continue;
- if (!memcmp(&key->addr, addr, size) &&
- key->prefixlen == prefixlen)
- return key;
- }
- return NULL;
-}
-
-struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk)
-{
- const union tcp_md5_addr *addr;
-
- addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
- return tcp_md5_do_lookup(sk, addr, AF_INET);
-}
-EXPORT_SYMBOL(tcp_v4_md5_lookup);
-
-/* This can be called on a newly created socket, from other files */
-int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
- gfp_t gfp)
-{
- /* Add Key to the list */
- struct tcp_md5sig_key *key;
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_info *md5sig;
-
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
- if (key) {
- /* Pre-existing entry - just update that one. */
- memcpy(key->key, newkey, newkeylen);
- key->keylen = newkeylen;
- return 0;
- }
-
- md5sig = rcu_dereference_protected(tp->md5sig_info,
- lockdep_sock_is_held(sk));
- if (!md5sig) {
- md5sig = kmalloc(sizeof(*md5sig), gfp);
- if (!md5sig)
- return -ENOMEM;
-
- sk_nocaps_add(sk, NETIF_F_GSO_MASK);
- INIT_HLIST_HEAD(&md5sig->head);
- rcu_assign_pointer(tp->md5sig_info, md5sig);
- }
-
- key = sock_kmalloc(sk, sizeof(*key), gfp);
- if (!key)
- return -ENOMEM;
- if (!tcp_alloc_md5sig_pool()) {
- sock_kfree_s(sk, key, sizeof(*key));
- return -ENOMEM;
- }
-
- memcpy(key->key, newkey, newkeylen);
- key->keylen = newkeylen;
- key->family = family;
- key->prefixlen = prefixlen;
- memcpy(&key->addr, addr,
- (family == AF_INET6) ? sizeof(struct in6_addr) :
- sizeof(struct in_addr));
- hlist_add_head_rcu(&key->node, &md5sig->head);
- return 0;
-}
-EXPORT_SYMBOL(tcp_md5_do_add);
-
-int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
- u8 prefixlen)
-{
- struct tcp_md5sig_key *key;
-
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
- if (!key)
- return -ENOENT;
- hlist_del_rcu(&key->node);
- atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
- kfree_rcu(key, rcu);
- return 0;
-}
-EXPORT_SYMBOL(tcp_md5_do_del);
-
-static void tcp_clear_md5_list(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_key *key;
- struct hlist_node *n;
- struct tcp_md5sig_info *md5sig;
-
- md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
-
- hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
- hlist_del_rcu(&key->node);
- atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
- kfree_rcu(key, rcu);
- }
-}
-
-static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
- char __user *optval, int optlen)
-{
- struct tcp_md5sig cmd;
- struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
- u8 prefixlen = 32;
-
- if (optlen < sizeof(cmd))
- return -EINVAL;
-
- if (copy_from_user(&cmd, optval, sizeof(cmd)))
- return -EFAULT;
-
- if (sin->sin_family != AF_INET)
- return -EINVAL;
-
- if (optname == TCP_MD5SIG_EXT &&
- cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
- prefixlen = cmd.tcpm_prefixlen;
- if (prefixlen > 32)
- return -EINVAL;
- }
-
- if (!cmd.tcpm_keylen)
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
- AF_INET, prefixlen);
-
- if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
- return -EINVAL;
-
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
- AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
- GFP_KERNEL);
-}
-
-static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
- __be32 daddr, __be32 saddr,
- const struct tcphdr *th, int nbytes)
-{
- struct tcp4_pseudohdr *bp;
- struct scatterlist sg;
- struct tcphdr *_th;
-
- bp = hp->scratch;
- bp->saddr = saddr;
- bp->daddr = daddr;
- bp->pad = 0;
- bp->protocol = IPPROTO_TCP;
- bp->len = cpu_to_be16(nbytes);
-
- _th = (struct tcphdr *)(bp + 1);
- memcpy(_th, th, sizeof(*th));
- _th->check = 0;
-
- sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
- ahash_request_set_crypt(hp->md5_req, &sg, NULL,
- sizeof(*bp) + sizeof(*th));
- return crypto_ahash_update(hp->md5_req);
-}
-
-static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
- __be32 daddr, __be32 saddr, const struct tcphdr *th)
-{
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
-
- if (crypto_ahash_init(req))
- goto clear_hash;
- if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
- goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
- goto clear_hash;
-
- tcp_put_md5sig_pool();
- return 0;
-
-clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
- memset(md5_hash, 0, 16);
- return 1;
-}
-
-int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
- const struct sock *sk,
- const struct sk_buff *skb)
-{
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
- const struct tcphdr *th = tcp_hdr(skb);
- __be32 saddr, daddr;
-
- if (sk) { /* valid for establish/request sockets */
- saddr = sk->sk_rcv_saddr;
- daddr = sk->sk_daddr;
- } else {
- const struct iphdr *iph = ip_hdr(skb);
- saddr = iph->saddr;
- daddr = iph->daddr;
- }
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
-
- if (crypto_ahash_init(req))
- goto clear_hash;
-
- if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
- goto clear_hash;
- if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
- goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
- goto clear_hash;
-
- tcp_put_md5sig_pool();
- return 0;
-
-clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
- memset(md5_hash, 0, 16);
- return 1;
-}
-EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
-
-#endif
-
-/* Called with rcu_read_lock() */
-static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
-{
-#ifdef CONFIG_TCP_MD5SIG
- /*
- * This gets called for each TCP segment that arrives
- * so we want to be efficient.
- * We have 3 drop cases:
- * o No MD5 hash and one expected.
- * o MD5 hash and we're not expecting one.
- * o MD5 hash and its wrong.
- */
- const __u8 *hash_location = NULL;
- struct tcp_md5sig_key *hash_expected;
- const struct iphdr *iph = ip_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
- int genhash;
- unsigned char newhash[16];
-
- hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
- AF_INET);
- hash_location = tcp_parse_md5sig_option(th);
-
- /* We've parsed the options - do we have a hash? */
- if (!hash_expected && !hash_location)
- return false;
-
- if (hash_expected && !hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
- return true;
- }
-
- if (!hash_expected && hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
- return true;
- }
-
- /* Okay, so this is hash_expected and hash_location -
- * so we need to calculate the checksum.
- */
- genhash = tcp_v4_md5_hash_skb(newhash,
- hash_expected,
- NULL, skb);
-
- if (genhash || memcmp(hash_location, newhash, 16) != 0) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
- &iph->saddr, ntohs(th->source),
- &iph->daddr, ntohs(th->dest),
- genhash ? " tcp_v4_calc_md5_hash failed"
- : "");
- return true;
- }
- return false;
-#endif
- return false;
-}
-
static void tcp_v4_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
@@ -1404,9 +981,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key;
-#endif
struct ip_options_rcu *inet_opt;
if (sk_acceptq_is_full(sk))
@@ -1453,20 +1027,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
tcp_initialize_rcv_mss(newsk);
#ifdef CONFIG_TCP_MD5SIG
- /* Copy over the MD5 key from the original socket */
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
- AF_INET);
- if (key) {
- /*
- * We're using one, so create a matching key
- * on the newsk structure. If we fail to get
- * memory, then we end up not copying the key
- * across. Shucks.
- */
- tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
- AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
- sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
- }
+ tcp_v4_md5_syn_recv_sock(sk, newsk);
#endif
if (__inet_inherit_port(sk, newsk) < 0)
@@ -1930,14 +1491,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
};
EXPORT_SYMBOL(ipv4_specific);
-#ifdef CONFIG_TCP_MD5SIG
-static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
- .md5_lookup = tcp_v4_md5_lookup,
- .calc_md5_hash = tcp_v4_md5_hash_skb,
- .md5_parse = tcp_v4_parse_md5_keys,
-};
-#endif
-
/* NOTE: A lot of things set to zero explicitly by call to
* sk_alloc() so need not be done here.
*/
@@ -1980,12 +1533,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
tcp_extopt_destroy(sk);
#ifdef CONFIG_TCP_MD5SIG
- /* Clean up the MD5 key list, if any */
- if (tp->md5sig_info) {
- tcp_clear_md5_list(sk);
- kfree_rcu(tp->md5sig_info, rcu);
- tp->md5sig_info = NULL;
- }
+ tcp_v4_md5_destroy_sock(sk);
#endif
/* Clean up a referenced TCP bind bucket. */
diff --git a/net/ipv4/tcp_md5.c b/net/ipv4/tcp_md5.c
new file mode 100644
index 000000000000..a31b404e6dbf
--- /dev/null
+++ b/net/ipv4/tcp_md5.c
@@ -0,0 +1,1102 @@
+#include <linux/inet_diag.h>
+#include <linux/inetdevice.h>
+#include <linux/tcp.h>
+#include <linux/tcp_md5.h>
+
+#include <crypto/hash.h>
+
+#include <net/inet6_hashtables.h>
+
+static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
+static DEFINE_MUTEX(tcp_md5sig_mutex);
+static bool tcp_md5sig_pool_populated;
+
+#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
+
+static void __tcp_alloc_md5sig_pool(void)
+{
+ struct crypto_ahash *hash;
+ int cpu;
+
+ hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(hash))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
+ struct ahash_request *req;
+
+ if (!scratch) {
+ scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
+ sizeof(struct tcphdr),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!scratch)
+ return;
+ per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
+ }
+ if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
+ continue;
+
+ req = ahash_request_alloc(hash, GFP_KERNEL);
+ if (!req)
+ return;
+
+ ahash_request_set_callback(req, 0, NULL, NULL);
+
+ per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
+ }
+ /* before setting tcp_md5sig_pool_populated, we must commit all writes
+ * to memory. See smp_rmb() in tcp_get_md5sig_pool()
+ */
+ smp_wmb();
+ tcp_md5sig_pool_populated = true;
+}
+
+static bool tcp_alloc_md5sig_pool(void)
+{
+ if (unlikely(!tcp_md5sig_pool_populated)) {
+ mutex_lock(&tcp_md5sig_mutex);
+
+ if (!tcp_md5sig_pool_populated)
+ __tcp_alloc_md5sig_pool();
+
+ mutex_unlock(&tcp_md5sig_mutex);
+ }
+ return tcp_md5sig_pool_populated;
+}
+
+static void tcp_put_md5sig_pool(void)
+{
+ local_bh_enable();
+}
+
+/**
+ * tcp_get_md5sig_pool - get md5sig_pool for this user
+ *
+ * We use percpu structure, so if we succeed, we exit with preemption
+ * and BH disabled, to make sure another thread or softirq handling
+ * wont try to get same context.
+ */
+static struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
+{
+ local_bh_disable();
+
+ if (tcp_md5sig_pool_populated) {
+ /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
+ smp_rmb();
+ return this_cpu_ptr(&tcp_md5sig_pool);
+ }
+ local_bh_enable();
+ return NULL;
+}
+
+static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
+ const union tcp_md5_addr *addr,
+ int family, u8 prefixlen)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_key *key;
+ unsigned int size = sizeof(struct in_addr);
+ const struct tcp_md5sig_info *md5sig;
+
+ /* caller either holds rcu_read_lock() or socket lock */
+ md5sig = rcu_dereference_check(tp->md5sig_info,
+ lockdep_sock_is_held(sk));
+ if (!md5sig)
+ return NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6)
+ size = sizeof(struct in6_addr);
+#endif
+ hlist_for_each_entry_rcu(key, &md5sig->head, node) {
+ if (key->family != family)
+ continue;
+ if (!memcmp(&key->addr, addr, size) &&
+ key->prefixlen == prefixlen)
+ return key;
+ }
+ return NULL;
+}
+
+/* This can be called on a newly created socket, from other files */
+static int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
+ int family, u8 prefixlen, const u8 *newkey,
+ u8 newkeylen, gfp_t gfp)
+{
+ /* Add Key to the list */
+ struct tcp_md5sig_key *key;
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_info *md5sig;
+
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
+ if (key) {
+ /* Pre-existing entry - just update that one. */
+ memcpy(key->key, newkey, newkeylen);
+ key->keylen = newkeylen;
+ return 0;
+ }
+
+ md5sig = rcu_dereference_protected(tp->md5sig_info,
+ lockdep_sock_is_held(sk));
+ if (!md5sig) {
+ md5sig = kmalloc(sizeof(*md5sig), gfp);
+ if (!md5sig)
+ return -ENOMEM;
+
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
+ INIT_HLIST_HEAD(&md5sig->head);
+ rcu_assign_pointer(tp->md5sig_info, md5sig);
+ }
+
+ key = sock_kmalloc(sk, sizeof(*key), gfp);
+ if (!key)
+ return -ENOMEM;
+ if (!tcp_alloc_md5sig_pool()) {
+ sock_kfree_s(sk, key, sizeof(*key));
+ return -ENOMEM;
+ }
+
+ memcpy(key->key, newkey, newkeylen);
+ key->keylen = newkeylen;
+ key->family = family;
+ key->prefixlen = prefixlen;
+ memcpy(&key->addr, addr,
+ (family == AF_INET6) ? sizeof(struct in6_addr) :
+ sizeof(struct in_addr));
+ hlist_add_head_rcu(&key->node, &md5sig->head);
+ return 0;
+}
+
+static void tcp_clear_md5_list(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_key *key;
+ struct hlist_node *n;
+ struct tcp_md5sig_info *md5sig;
+
+ md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
+
+ hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
+ hlist_del_rcu(&key->node);
+ atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
+ kfree_rcu(key, rcu);
+ }
+}
+
+static int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
+ int family, u8 prefixlen)
+{
+ struct tcp_md5sig_key *key;
+
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
+ if (!key)
+ return -ENOENT;
+ hlist_del_rcu(&key->node);
+ atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
+ kfree_rcu(key, rcu);
+ return 0;
+}
+
+static int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
+ const struct tcp_md5sig_key *key)
+{
+ struct scatterlist sg;
+
+ sg_init_one(&sg, key->key, key->keylen);
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen);
+ return crypto_ahash_update(hp->md5_req);
+}
+
+static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
+ char __user *optval, int optlen)
+{
+ struct tcp_md5sig cmd;
+ struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
+ u8 prefixlen = 32;
+
+ if (optlen < sizeof(cmd))
+ return -EINVAL;
+
+ if (copy_from_user(&cmd, optval, sizeof(cmd)))
+ return -EFAULT;
+
+ if (sin->sin_family != AF_INET)
+ return -EINVAL;
+
+ if (optname == TCP_MD5SIG_EXT &&
+ cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
+ prefixlen = cmd.tcpm_prefixlen;
+ if (prefixlen > 32)
+ return -EINVAL;
+ }
+
+ if (!cmd.tcpm_keylen)
+ return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
+ AF_INET, prefixlen);
+
+ if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
+ return -EINVAL;
+
+ return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
+ AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
+ GFP_KERNEL);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
+ char __user *optval, int optlen)
+{
+ struct tcp_md5sig cmd;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
+ u8 prefixlen;
+
+ if (optlen < sizeof(cmd))
+ return -EINVAL;
+
+ if (copy_from_user(&cmd, optval, sizeof(cmd)))
+ return -EFAULT;
+
+ if (sin6->sin6_family != AF_INET6)
+ return -EINVAL;
+
+ if (optname == TCP_MD5SIG_EXT &&
+ cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
+ prefixlen = cmd.tcpm_prefixlen;
+ if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
+ prefixlen > 32))
+ return -EINVAL;
+ } else {
+ prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
+ }
+
+ if (!cmd.tcpm_keylen) {
+ if (ipv6_addr_v4mapped(&sin6->sin6_addr))
+ return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+ AF_INET, prefixlen);
+ return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+ AF_INET6, prefixlen);
+ }
+
+ if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
+ return -EINVAL;
+
+ if (ipv6_addr_v4mapped(&sin6->sin6_addr))
+ return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+ AF_INET, prefixlen, cmd.tcpm_key,
+ cmd.tcpm_keylen, GFP_KERNEL);
+
+ return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+ AF_INET6, prefixlen, cmd.tcpm_key,
+ cmd.tcpm_keylen, GFP_KERNEL);
+}
+#endif
+
+static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
+ __be32 daddr, __be32 saddr,
+ const struct tcphdr *th, int nbytes)
+{
+ struct tcp4_pseudohdr *bp;
+ struct scatterlist sg;
+ struct tcphdr *_th;
+
+ bp = hp->scratch;
+ bp->saddr = saddr;
+ bp->daddr = daddr;
+ bp->pad = 0;
+ bp->protocol = IPPROTO_TCP;
+ bp->len = cpu_to_be16(nbytes);
+
+ _th = (struct tcphdr *)(bp + 1);
+ memcpy(_th, th, sizeof(*th));
+ _th->check = 0;
+
+ sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+ sizeof(*bp) + sizeof(*th));
+ return crypto_ahash_update(hp->md5_req);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr,
+ const struct tcphdr *th, int nbytes)
+{
+ struct tcp6_pseudohdr *bp;
+ struct scatterlist sg;
+ struct tcphdr *_th;
+
+ bp = hp->scratch;
+ /* 1. TCP pseudo-header (RFC2460) */
+ bp->saddr = *saddr;
+ bp->daddr = *daddr;
+ bp->protocol = cpu_to_be32(IPPROTO_TCP);
+ bp->len = cpu_to_be32(nbytes);
+
+ _th = (struct tcphdr *)(bp + 1);
+ memcpy(_th, th, sizeof(*th));
+ _th->check = 0;
+
+ sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+ sizeof(*bp) + sizeof(*th));
+ return crypto_ahash_update(hp->md5_req);
+}
+#endif
+
+static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
+ __be32 daddr, __be32 saddr,
+ const struct tcphdr *th)
+{
+ struct tcp_md5sig_pool *hp;
+ struct ahash_request *req;
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ req = hp->md5_req;
+
+ if (crypto_ahash_init(req))
+ goto clear_hash;
+ if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
+ const struct in6_addr *daddr,
+ struct in6_addr *saddr, const struct tcphdr *th)
+{
+ struct tcp_md5sig_pool *hp;
+ struct ahash_request *req;
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ req = hp->md5_req;
+
+ if (crypto_ahash_init(req))
+ goto clear_hash;
+ if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+#endif
+
+/* RFC2385 MD5 checksumming requires a mapping of
+ * IP address->MD5 Key.
+ * We need to maintain these in the sk structure.
+ */
+
+/* Find the Key structure for an address. */
+static struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
+ const union tcp_md5_addr *addr,
+ int family)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_key *key;
+ const struct tcp_md5sig_info *md5sig;
+ __be32 mask;
+ struct tcp_md5sig_key *best_match = NULL;
+ bool match;
+
+ /* caller either holds rcu_read_lock() or socket lock */
+ md5sig = rcu_dereference_check(tp->md5sig_info,
+ lockdep_sock_is_held(sk));
+ if (!md5sig)
+ return NULL;
+
+ hlist_for_each_entry_rcu(key, &md5sig->head, node) {
+ if (key->family != family)
+ continue;
+
+ if (family == AF_INET) {
+ mask = inet_make_mask(key->prefixlen);
+ match = (key->addr.a4.s_addr & mask) ==
+ (addr->a4.s_addr & mask);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (family == AF_INET6) {
+ match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
+ key->prefixlen);
+#endif
+ } else {
+ match = false;
+ }
+
+ if (match && (!best_match ||
+ key->prefixlen > best_match->prefixlen))
+ best_match = key;
+ }
+ return best_match;
+}
+
+/* Parse MD5 Signature option */
+static const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
+{
+ int length = (th->doff << 2) - sizeof(*th);
+ const u8 *ptr = (const u8 *)(th + 1);
+
+ /* If the TCP option is too short, we can short cut */
+ if (length < TCPOLEN_MD5SIG)
+ return NULL;
+
+ while (length > 0) {
+ int opcode = *ptr++;
+ int opsize;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ return NULL;
+ case TCPOPT_NOP:
+ length--;
+ continue;
+ default:
+ opsize = *ptr++;
+ if (opsize < 2 || opsize > length)
+ return NULL;
+ if (opcode == TCPOPT_MD5SIG)
+ return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
+ }
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ return NULL;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
+ const struct in6_addr *addr)
+{
+ return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
+}
+#endif
+
+static int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
+ const struct sk_buff *skb,
+ unsigned int header_len)
+{
+ struct scatterlist sg;
+ const struct tcphdr *tp = tcp_hdr(skb);
+ struct ahash_request *req = hp->md5_req;
+ unsigned int i;
+ const unsigned int head_data_len = skb_headlen(skb) > header_len ?
+ skb_headlen(skb) - header_len : 0;
+ const struct skb_shared_info *shi = skb_shinfo(skb);
+ struct sk_buff *frag_iter;
+
+ sg_init_table(&sg, 1);
+
+ sg_set_buf(&sg, ((u8 *)tp) + header_len, head_data_len);
+ ahash_request_set_crypt(req, &sg, NULL, head_data_len);
+ if (crypto_ahash_update(req))
+ return 1;
+
+ for (i = 0; i < shi->nr_frags; ++i) {
+ const struct skb_frag_struct *f = &shi->frags[i];
+ unsigned int offset = f->page_offset;
+ struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
+
+ sg_set_page(&sg, page, skb_frag_size(f),
+ offset_in_page(offset));
+ ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
+ if (crypto_ahash_update(req))
+ return 1;
+ }
+
+ skb_walk_frags(skb, frag_iter)
+ if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
+ return 1;
+
+ return 0;
+}
+
+int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
+ const __u8 *hash_location = NULL;
+
+ rcu_read_lock();
+ hash_location = tcp_parse_md5sig_option(th);
+ if (sk && sk_fullsock(sk)) {
+ opts->md5 = tcp_md5_do_lookup(sk,
+ (union tcp_md5_addr *)&iph->saddr,
+ AF_INET);
+ } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
+ struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+
+ opts->md5 = tcp_twsk_md5_key(tcptw);
+ } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
+ opts->md5 = tcp_md5_do_lookup(sk,
+ (union tcp_md5_addr *)&iph->saddr,
+ AF_INET);
+ } else if (hash_location) {
+ unsigned char newhash[16];
+ struct sock *sk1;
+ int genhash;
+
+ /* active side is lost. Try to find listening socket through
+ * source port, and then find md5 key through listening socket.
+ * we are not loose security here:
+ * Incoming packet is checked with md5 hash with finding key,
+ * no RST generated if md5 hash doesn't match.
+ */
+ sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
+ &tcp_hashinfo, NULL, 0,
+ iph->saddr,
+ th->source, iph->daddr,
+ ntohs(th->source), inet_iif(skb),
+ tcp_v4_sdif(skb));
+ /* don't send rst if it can't find key */
+ if (!sk1)
+ goto out_err;
+
+ opts->md5 = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
+ &iph->saddr, AF_INET);
+ if (!opts->md5)
+ goto out_err;
+
+ genhash = tcp_v4_md5_hash_skb(newhash, opts->md5, NULL, skb);
+ if (genhash || memcmp(hash_location, newhash, 16) != 0)
+ goto out_err;
+ }
+
+ if (opts->md5)
+ return TCPOLEN_MD5SIG_ALIGNED;
+
+ rcu_read_unlock();
+ return 0;
+
+out_err:
+ rcu_read_unlock();
+ return -1;
+}
+
+void tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
+ struct tcphdr *t1,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ if (opts->md5) {
+ *topt++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
+
+ tcp_v4_md5_hash_hdr((__u8 *)topt, opts->md5,
+ ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr, t1);
+ rcu_read_unlock();
+ }
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+int tcp_v6_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ const __u8 *hash_location = NULL;
+
+ rcu_read_lock();
+ hash_location = tcp_parse_md5sig_option(th);
+ if (sk && sk_fullsock(sk)) {
+ opts->md5 = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
+ } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
+ struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+
+ opts->md5 = tcp_twsk_md5_key(tcptw);
+ } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
+ opts->md5 = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
+ } else if (hash_location) {
+ unsigned char newhash[16];
+ struct sock *sk1;
+ int genhash;
+
+ /* active side is lost. Try to find listening socket through
+ * source port, and then find md5 key through listening socket.
+ * we are not loose security here:
+ * Incoming packet is checked with md5 hash with finding key,
+ * no RST generated if md5 hash doesn't match.
+ */
+ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
+ &tcp_hashinfo, NULL, 0,
+ &ipv6h->saddr,
+ th->source, &ipv6h->daddr,
+ ntohs(th->source), tcp_v6_iif(skb),
+ tcp_v6_sdif(skb));
+ if (!sk1)
+ goto out_err;
+
+ opts->md5 = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
+ if (!opts->md5)
+ goto out_err;
+
+ genhash = tcp_v6_md5_hash_skb(newhash, opts->md5, NULL, skb);
+ if (genhash || memcmp(hash_location, newhash, 16) != 0)
+ goto out_err;
+ }
+
+ if (opts->md5)
+ return TCPOLEN_MD5SIG_ALIGNED;
+
+ rcu_read_unlock();
+ return 0;
+
+out_err:
+ rcu_read_unlock();
+ return -1;
+}
+EXPORT_SYMBOL_GPL(tcp_v6_md5_send_response_prepare);
+
+void tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
+ struct tcphdr *t1,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ if (opts->md5) {
+ *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
+ tcp_v6_md5_hash_hdr((__u8 *)topt, opts->md5,
+ &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, t1);
+
+ rcu_read_unlock();
+ }
+}
+EXPORT_SYMBOL_GPL(tcp_v6_md5_send_response_write);
+#endif
+
+struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
+ const struct sock *addr_sk)
+{
+ const union tcp_md5_addr *addr;
+
+ addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
+ return tcp_md5_do_lookup(sk, addr, AF_INET);
+}
+EXPORT_SYMBOL(tcp_v4_md5_lookup);
+
+int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
+ const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ struct tcp_md5sig_pool *hp;
+ struct ahash_request *req;
+ const struct tcphdr *th = tcp_hdr(skb);
+ __be32 saddr, daddr;
+
+ if (sk) { /* valid for establish/request sockets */
+ saddr = sk->sk_rcv_saddr;
+ daddr = sk->sk_daddr;
+ } else {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ }
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ req = hp->md5_req;
+
+ if (crypto_ahash_init(req))
+ goto clear_hash;
+
+ if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
+ goto clear_hash;
+ if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int tcp_v6_md5_hash_skb(char *md5_hash,
+ const struct tcp_md5sig_key *key,
+ const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const struct in6_addr *saddr, *daddr;
+ struct tcp_md5sig_pool *hp;
+ struct ahash_request *req;
+ const struct tcphdr *th = tcp_hdr(skb);
+
+ if (sk) { /* valid for establish/request sockets */
+ saddr = &sk->sk_v6_rcv_saddr;
+ daddr = &sk->sk_v6_daddr;
+ } else {
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+
+ saddr = &ip6h->saddr;
+ daddr = &ip6h->daddr;
+ }
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ req = hp->md5_req;
+
+ if (crypto_ahash_init(req))
+ goto clear_hash;
+
+ if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
+ goto clear_hash;
+ if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(tcp_v6_md5_hash_skb);
+#endif
+
+/* Called with rcu_read_lock() */
+bool tcp_v4_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ /* This gets called for each TCP segment that arrives
+ * so we want to be efficient.
+ * We have 3 drop cases:
+ * o No MD5 hash and one expected.
+ * o MD5 hash and we're not expecting one.
+ * o MD5 hash and its wrong.
+ */
+ const __u8 *hash_location = NULL;
+ struct tcp_md5sig_key *hash_expected;
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+ int genhash;
+ unsigned char newhash[16];
+
+ hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
+ AF_INET);
+ hash_location = tcp_parse_md5sig_option(th);
+
+ /* We've parsed the options - do we have a hash? */
+ if (!hash_expected && !hash_location)
+ return false;
+
+ if (hash_expected && !hash_location) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
+ return true;
+ }
+
+ if (!hash_expected && hash_location) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
+ return true;
+ }
+
+ /* Okay, so this is hash_expected and hash_location -
+ * so we need to calculate the checksum.
+ */
+ genhash = tcp_v4_md5_hash_skb(newhash,
+ hash_expected,
+ NULL, skb);
+
+ if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
+ net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
+ &iph->saddr, ntohs(th->source),
+ &iph->daddr, ntohs(th->dest),
+ genhash ? " tcp_v4_calc_md5_hash failed"
+ : "");
+ return true;
+ }
+ return false;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+bool tcp_v6_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const __u8 *hash_location = NULL;
+ struct tcp_md5sig_key *hash_expected;
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+ int genhash;
+ u8 newhash[16];
+
+ hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
+ hash_location = tcp_parse_md5sig_option(th);
+
+ /* We've parsed the options - do we have a hash? */
+ if (!hash_expected && !hash_location)
+ return false;
+
+ if (hash_expected && !hash_location) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
+ return true;
+ }
+
+ if (!hash_expected && hash_location) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
+ return true;
+ }
+
+ /* check the signature */
+ genhash = tcp_v6_md5_hash_skb(newhash,
+ hash_expected,
+ NULL, skb);
+
+ if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
+ net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
+ genhash ? "failed" : "mismatch",
+ &ip6h->saddr, ntohs(th->source),
+ &ip6h->daddr, ntohs(th->dest));
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(tcp_v6_inbound_md5_hash);
+#endif
+
+void tcp_v4_md5_destroy_sock(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ /* Clean up the MD5 key list, if any */
+ if (tp->md5sig_info) {
+ tcp_clear_md5_list(sk);
+ kfree_rcu(tp->md5sig_info, rcu);
+ tp->md5sig_info = NULL;
+ }
+}
+
+void tcp_v4_md5_syn_recv_sock(const struct sock *listener, struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct tcp_md5sig_key *key;
+
+ /* Copy over the MD5 key from the original socket */
+ key = tcp_md5_do_lookup(listener, (union tcp_md5_addr *)&inet->inet_daddr,
+ AF_INET);
+ if (key) {
+ /* We're using one, so create a matching key
+ * on the sk structure. If we fail to get
+ * memory, then we end up not copying the key
+ * across. Shucks.
+ */
+ tcp_md5_do_add(sk, (union tcp_md5_addr *)&inet->inet_daddr,
+ AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
+ }
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+void tcp_v6_md5_syn_recv_sock(const struct sock *listener, struct sock *sk)
+{
+ struct tcp_md5sig_key *key;
+
+ /* Copy over the MD5 key from the original socket */
+ key = tcp_v6_md5_do_lookup(listener, &sk->sk_v6_daddr);
+ if (key) {
+ /* We're using one, so create a matching key
+ * on the newsk structure. If we fail to get
+ * memory, then we end up not copying the key
+ * across. Shucks.
+ */
+ tcp_md5_do_add(sk, (union tcp_md5_addr *)&sk->sk_v6_daddr,
+ AF_INET6, 128, key->key, key->keylen,
+ sk_gfp_mask(sk, GFP_ATOMIC));
+ }
+}
+EXPORT_SYMBOL_GPL(tcp_v6_md5_syn_recv_sock);
+
+struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
+ const struct sock *addr_sk)
+{
+ return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
+}
+EXPORT_SYMBOL_GPL(tcp_v6_md5_lookup);
+#endif
+
+void tcp_md5_time_wait(struct sock *sk, struct inet_timewait_sock *tw)
+{
+ struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_key *key;
+
+ /* The timewait bucket does not have the key DB from the
+ * sock structure. We just make a quick copy of the
+ * md5 key being used (if indeed we are using one)
+ * so the timewait ack generating code has the key.
+ */
+ tcptw->tw_md5_key = NULL;
+ key = tp->af_specific->md5_lookup(sk, sk);
+ if (key) {
+ tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
+ BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
+ }
+}
+
+static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info,
+ const struct tcp_md5sig_key *key)
+{
+ info->tcpm_family = key->family;
+ info->tcpm_prefixlen = key->prefixlen;
+ info->tcpm_keylen = key->keylen;
+ memcpy(info->tcpm_key, key->key, key->keylen);
+
+ if (key->family == AF_INET)
+ info->tcpm_addr[0] = key->addr.a4.s_addr;
+ #if IS_ENABLED(CONFIG_IPV6)
+ else if (key->family == AF_INET6)
+ memcpy(&info->tcpm_addr, &key->addr.a6,
+ sizeof(info->tcpm_addr));
+ #endif
+}
+
+static int tcp_diag_put_md5sig(struct sk_buff *skb,
+ const struct tcp_md5sig_info *md5sig)
+{
+ const struct tcp_md5sig_key *key;
+ struct tcp_diag_md5sig *info;
+ struct nlattr *attr;
+ int md5sig_count = 0;
+
+ hlist_for_each_entry_rcu(key, &md5sig->head, node)
+ md5sig_count++;
+ if (md5sig_count == 0)
+ return 0;
+
+ attr = nla_reserve(skb, INET_DIAG_MD5SIG,
+ md5sig_count * sizeof(struct tcp_diag_md5sig));
+ if (!attr)
+ return -EMSGSIZE;
+
+ info = nla_data(attr);
+ memset(info, 0, md5sig_count * sizeof(struct tcp_diag_md5sig));
+ hlist_for_each_entry_rcu(key, &md5sig->head, node) {
+ tcp_diag_md5sig_fill(info++, key);
+ if (--md5sig_count == 0)
+ break;
+ }
+
+ return 0;
+}
+
+int tcp_md5_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb)
+{
+ if (net_admin) {
+ struct tcp_md5sig_info *md5sig;
+ int err = 0;
+
+ rcu_read_lock();
+ md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
+ if (md5sig)
+ err = tcp_diag_put_md5sig(skb, md5sig);
+ rcu_read_unlock();
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tcp_md5_diag_get_aux);
+
+int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin)
+{
+ int size = 0;
+
+ if (net_admin && sk_fullsock(sk)) {
+ const struct tcp_md5sig_info *md5sig;
+ const struct tcp_md5sig_key *key;
+ size_t md5sig_count = 0;
+
+ rcu_read_lock();
+ md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
+ if (md5sig) {
+ hlist_for_each_entry_rcu(key, &md5sig->head, node)
+ md5sig_count++;
+ }
+ rcu_read_unlock();
+ size += nla_total_size(md5sig_count *
+ sizeof(struct tcp_diag_md5sig));
+ }
+
+ return size;
+}
+EXPORT_SYMBOL_GPL(tcp_md5_diag_get_aux_size);
+
+const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
+ .md5_lookup = tcp_v4_md5_lookup,
+ .calc_md5_hash = tcp_v4_md5_hash_skb,
+ .md5_parse = tcp_v4_parse_md5_keys,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
+ .md5_lookup = tcp_v6_md5_lookup,
+ .calc_md5_hash = tcp_v6_md5_hash_skb,
+ .md5_parse = tcp_v6_parse_md5_keys,
+};
+EXPORT_SYMBOL_GPL(tcp_sock_ipv6_specific);
+
+const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
+ .md5_lookup = tcp_v4_md5_lookup,
+ .calc_md5_hash = tcp_v4_md5_hash_skb,
+ .md5_parse = tcp_v6_parse_md5_keys,
+};
+EXPORT_SYMBOL_GPL(tcp_sock_ipv6_mapped_specific);
+#endif
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index aa2ff9aadad0..f33214b29167 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
+#include <linux/tcp_md5.h>
#include <linux/workqueue.h>
#include <linux/static_key.h>
#include <net/tcp.h>
@@ -295,21 +296,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
INIT_HLIST_HEAD(&tp->tcp_option_list);
}
#ifdef CONFIG_TCP_MD5SIG
- /*
- * The timewait bucket does not have the key DB from the
- * sock structure. We just make a quick copy of the
- * md5 key being used (if indeed we are using one)
- * so the timewait ack generating code has the key.
- */
- do {
- struct tcp_md5sig_key *key;
- tcptw->tw_md5_key = NULL;
- key = tp->af_specific->md5_lookup(sk, sk);
- if (key) {
- tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
- BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
- }
- } while (0);
+ tcp_md5_time_wait(sk, tw);
#endif
/* Get the TIME_WAIT timeout firing. */
@@ -346,13 +333,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
void tcp_twsk_destructor(struct sock *sk)
{
#ifdef CONFIG_TCP_MD5SIG
- struct tcp_timewait_sock *twsk = tcp_twsk(sk);
-
- if (twsk->tw_md5_key)
- kfree_rcu(twsk->tw_md5_key, rcu);
+ tcp_md5_twsk_destructor(sk);
#endif
- if (unlikely(!hlist_empty(&twsk->tcp_option_list)))
+ if (unlikely(!hlist_empty(&tcp_twsk(sk)->tcp_option_list)))
tcp_extopt_destroy(sk);
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
@@ -538,8 +522,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
- if (newtp->af_specific->md5_lookup(sk, newsk))
- newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
+ tcp_md5_add_header_len(sk, newsk);
#endif
if (unlikely(!hlist_empty(&treq->tcp_option_list)))
newtp->tcp_header_len += tcp_extopt_add_header(req_to_sk(req), newsk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7ea65f70e5ec..137645753abb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -42,6 +42,7 @@
#include <linux/gfp.h>
#include <linux/module.h>
#include <linux/static_key.h>
+#include <linux/tcp_md5.h>
#include <trace/events/tcp.h>
@@ -3238,8 +3239,7 @@ static void tcp_connect_init(struct sock *sk)
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
- if (tp->af_specific->md5_lookup(sk, sk))
- tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
+ tcp_md5_add_header_len(sk, sk);
#endif
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 202a59511950..e9b72d794140 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -43,6 +43,7 @@
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
+#include <linux/tcp_md5.h>
#include <net/tcp.h>
#include <net/ndisc.h>
@@ -79,10 +80,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
static const struct inet_connection_sock_af_ops ipv6_mapped;
static const struct inet_connection_sock_af_ops ipv6_specific;
-#ifdef CONFIG_TCP_MD5SIG
-static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
-static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
-#endif
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
@@ -501,218 +498,6 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req)
kfree_skb(inet_rsk(req)->pktopts);
}
-#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
- const struct in6_addr *addr)
-{
- return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
-}
-
-static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk)
-{
- return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
-}
-
-static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
- char __user *optval, int optlen)
-{
- struct tcp_md5sig cmd;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
- u8 prefixlen;
-
- if (optlen < sizeof(cmd))
- return -EINVAL;
-
- if (copy_from_user(&cmd, optval, sizeof(cmd)))
- return -EFAULT;
-
- if (sin6->sin6_family != AF_INET6)
- return -EINVAL;
-
- if (optname == TCP_MD5SIG_EXT &&
- cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
- prefixlen = cmd.tcpm_prefixlen;
- if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
- prefixlen > 32))
- return -EINVAL;
- } else {
- prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
- }
-
- if (!cmd.tcpm_keylen) {
- if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen);
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen);
- }
-
- if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
- return -EINVAL;
-
- if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen, cmd.tcpm_key,
- cmd.tcpm_keylen, GFP_KERNEL);
-
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, cmd.tcpm_key,
- cmd.tcpm_keylen, GFP_KERNEL);
-}
-
-static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr,
- const struct tcphdr *th, int nbytes)
-{
- struct tcp6_pseudohdr *bp;
- struct scatterlist sg;
- struct tcphdr *_th;
-
- bp = hp->scratch;
- /* 1. TCP pseudo-header (RFC2460) */
- bp->saddr = *saddr;
- bp->daddr = *daddr;
- bp->protocol = cpu_to_be32(IPPROTO_TCP);
- bp->len = cpu_to_be32(nbytes);
-
- _th = (struct tcphdr *)(bp + 1);
- memcpy(_th, th, sizeof(*th));
- _th->check = 0;
-
- sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
- ahash_request_set_crypt(hp->md5_req, &sg, NULL,
- sizeof(*bp) + sizeof(*th));
- return crypto_ahash_update(hp->md5_req);
-}
-
-static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
- const struct in6_addr *daddr, struct in6_addr *saddr,
- const struct tcphdr *th)
-{
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
-
- if (crypto_ahash_init(req))
- goto clear_hash;
- if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
- goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
- goto clear_hash;
-
- tcp_put_md5sig_pool();
- return 0;
-
-clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
- memset(md5_hash, 0, 16);
- return 1;
-}
-
-static int tcp_v6_md5_hash_skb(char *md5_hash,
- const struct tcp_md5sig_key *key,
- const struct sock *sk,
- const struct sk_buff *skb)
-{
- const struct in6_addr *saddr, *daddr;
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
- const struct tcphdr *th = tcp_hdr(skb);
-
- if (sk) { /* valid for establish/request sockets */
- saddr = &sk->sk_v6_rcv_saddr;
- daddr = &sk->sk_v6_daddr;
- } else {
- const struct ipv6hdr *ip6h = ipv6_hdr(skb);
- saddr = &ip6h->saddr;
- daddr = &ip6h->daddr;
- }
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
-
- if (crypto_ahash_init(req))
- goto clear_hash;
-
- if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
- goto clear_hash;
- if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
- goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
- goto clear_hash;
-
- tcp_put_md5sig_pool();
- return 0;
-
-clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
- memset(md5_hash, 0, 16);
- return 1;
-}
-
-#endif
-
-static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
-{
-#ifdef CONFIG_TCP_MD5SIG
- const __u8 *hash_location = NULL;
- struct tcp_md5sig_key *hash_expected;
- const struct ipv6hdr *ip6h = ipv6_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
- int genhash;
- u8 newhash[16];
-
- hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
- hash_location = tcp_parse_md5sig_option(th);
-
- /* We've parsed the options - do we have a hash? */
- if (!hash_expected && !hash_location)
- return false;
-
- if (hash_expected && !hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
- return true;
- }
-
- if (!hash_expected && hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
- return true;
- }
-
- /* check the signature */
- genhash = tcp_v6_md5_hash_skb(newhash,
- hash_expected,
- NULL, skb);
-
- if (genhash || memcmp(hash_location, newhash, 16) != 0) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
- genhash ? "failed" : "mismatch",
- &ip6h->saddr, ntohs(th->source),
- &ip6h->daddr, ntohs(th->dest));
- return true;
- }
-#endif
- return false;
-}
-
static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
@@ -787,56 +572,24 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
__be32 *topt;
struct hlist_head *extopt_list = NULL;
struct tcp_out_options extraopts;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key = NULL;
- const __u8 *hash_location = NULL;
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-#endif
+
+ memset(&extraopts, 0, sizeof(extraopts));
if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
- rcu_read_lock();
- hash_location = tcp_parse_md5sig_option(th);
- if (sk && sk_fullsock(sk)) {
- key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
- } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
- struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
-
- key = tcp_twsk_md5_key(tcptw);
- } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
- key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
- } else if (hash_location) {
- unsigned char newhash[16];
- struct sock *sk1 = NULL;
- int genhash;
-
- /* active side is lost. Try to find listening socket through
- * source port, and then find md5 key through listening socket.
- * we are not loose security here:
- * Incoming packet is checked with md5 hash with finding key,
- * no RST generated if md5 hash doesn't match.
- */
- sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, NULL, 0,
- &ipv6h->saddr,
- th->source, &ipv6h->daddr,
- ntohs(th->source), tcp_v6_iif(skb),
- tcp_v6_sdif(skb));
- if (!sk1)
- goto out;
+{
+ int ret;
- key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
- if (!key)
- goto out;
+ ret = tcp_v6_md5_send_response_prepare(skb, 0,
+ MAX_TCP_OPTION_SPACE - tot_len,
+ &extraopts, sk);
- genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
- if (genhash || memcmp(hash_location, newhash, 16) != 0)
- goto out;
- }
+ if (ret == -1)
+ goto out;
- if (key)
- tot_len += TCPOLEN_MD5SIG_ALIGNED;
+ tot_len += ret;
+}
#endif
if (sk)
@@ -850,8 +603,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (!rst || !th->ack)
extraflags |= TCPHDR_ACK;
- memset(&extraopts, 0, sizeof(extraopts));
-
used = tcp_extopt_response_prepare(skb, extraflags, remaining,
&extraopts, sk);
@@ -889,13 +640,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
}
#ifdef CONFIG_TCP_MD5SIG
- if (key) {
- *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
- tcp_v6_md5_hash_hdr((__u8 *)topt, key,
- &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, t1);
- }
+ if (extraopts.md5)
+ tcp_v6_md5_send_response_write(topt, skb, t1, &extraopts, sk);
#endif
if (unlikely(extopt_list && !hlist_empty(extopt_list)))
@@ -943,10 +689,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
out:
kfree_skb(buff);
-
-#ifdef CONFIG_TCP_MD5SIG
- rcu_read_unlock();
-#endif
}
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
@@ -1072,9 +814,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key;
-#endif
struct flowi6 fl6;
if (skb->protocol == htons(ETH_P_IP)) {
@@ -1219,18 +958,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
#ifdef CONFIG_TCP_MD5SIG
- /* Copy over the MD5 key from the original socket */
- key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
- if (key) {
- /* We're using one, so create a matching key
- * on the newsk structure. If we fail to get
- * memory, then we end up not copying the key
- * across. Shucks.
- */
- tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
- AF_INET6, 128, key->key, key->keylen,
- sk_gfp_mask(sk, GFP_ATOMIC));
- }
+ tcp_v6_md5_syn_recv_sock(sk, newsk);
#endif
if (__inet_inherit_port(sk, newsk) < 0) {
@@ -1692,14 +1420,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.mtu_reduced = tcp_v6_mtu_reduced,
};
-#ifdef CONFIG_TCP_MD5SIG
-static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
- .md5_lookup = tcp_v6_md5_lookup,
- .calc_md5_hash = tcp_v6_md5_hash_skb,
- .md5_parse = tcp_v6_parse_md5_keys,
-};
-#endif
-
/*
* TCP over IPv4 via INET6 API
*/
@@ -1722,14 +1442,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.mtu_reduced = tcp_v4_mtu_reduced,
};
-#ifdef CONFIG_TCP_MD5SIG
-static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
- .md5_lookup = tcp_v4_md5_lookup,
- .calc_md5_hash = tcp_v4_md5_hash_skb,
- .md5_parse = tcp_v6_parse_md5_keys,
-};
-#endif
-
/* NOTE: A lot of things set to zero explicitly by call to
* sk_alloc() so need not be done here.
*/
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [RFC 11/14] tcp_md5: Move TCP-MD5 code out of TCP itself
2017-12-18 21:51 ` [RFC 11/14] tcp_md5: Move TCP-MD5 code out of TCP itself Christoph Paasch
@ 2018-01-02 19:39 ` Mat Martineau
2018-01-05 1:15 ` Christoph Paasch
0 siblings, 1 reply; 17+ messages in thread
From: Mat Martineau @ 2018-01-02 19:39 UTC (permalink / raw)
To: Christoph Paasch; +Cc: netdev, Eric Dumazet, Alexei Starovoitov
Hi Christoph -
On Mon, 18 Dec 2017, Christoph Paasch wrote:
> This is all just copy-pasting the TCP_MD5-code into functions that are
> placed in net/ipv4/tcp_md5.c.
>
> Signed-off-by: Christoph Paasch <cpaasch@apple.com>
> Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
> ---
> include/linux/inet_diag.h | 1 +
> include/linux/tcp_md5.h | 138 ++++++
> include/net/tcp.h | 77 ----
> net/ipv4/Makefile | 1 +
> net/ipv4/tcp.c | 133 +-----
> net/ipv4/tcp_diag.c | 81 +---
> net/ipv4/tcp_input.c | 38 --
> net/ipv4/tcp_ipv4.c | 520 ++-------------------
> net/ipv4/tcp_md5.c | 1102 +++++++++++++++++++++++++++++++++++++++++++++
> net/ipv4/tcp_minisocks.c | 27 +-
> net/ipv4/tcp_output.c | 4 +-
> net/ipv6/tcp_ipv6.c | 318 +------------
> 12 files changed, 1305 insertions(+), 1135 deletions(-)
> create mode 100644 include/linux/tcp_md5.h
> create mode 100644 net/ipv4/tcp_md5.c
...
> diff --git a/include/linux/tcp_md5.h b/include/linux/tcp_md5.h
> new file mode 100644
> index 000000000000..f6a681cdded4
> --- /dev/null
> +++ b/include/linux/tcp_md5.h
> @@ -0,0 +1,138 @@
There's no license info in this new file. Take a look at the SPDX
identifiers recently added as the first line of some files (like
tcp_vegas.h) for one way to do it.
> +#ifndef _LINUX_TCP_MD5_H
> +#define _LINUX_TCP_MD5_H
> +
> +#include <linux/skbuff.h>
> +
> +#ifdef CONFIG_TCP_MD5SIG
> +#include <linux/types.h>
> +
> +#include <net/tcp.h>
> +
> +union tcp_md5_addr {
> + struct in_addr a4;
> +#if IS_ENABLED(CONFIG_IPV6)
> + struct in6_addr a6;
> +#endif
> +};
> +
> +/* - key database */
> +struct tcp_md5sig_key {
> + struct hlist_node node;
> + u8 keylen;
> + u8 family; /* AF_INET or AF_INET6 */
> + union tcp_md5_addr addr;
> + u8 prefixlen;
> + u8 key[TCP_MD5SIG_MAXKEYLEN];
> + struct rcu_head rcu;
> +};
> +
> +/* - sock block */
> +struct tcp_md5sig_info {
> + struct hlist_head head;
> + struct rcu_head rcu;
> +};
> +
> +union tcp_md5sum_block {
> + struct tcp4_pseudohdr ip4;
> +#if IS_ENABLED(CONFIG_IPV6)
> + struct tcp6_pseudohdr ip6;
> +#endif
> +};
> +
> +/* - pool: digest algorithm, hash description and scratch buffer */
> +struct tcp_md5sig_pool {
> + struct ahash_request *md5_req;
> + void *scratch;
> +};
> +
> +extern const struct tcp_sock_af_ops tcp_sock_ipv4_specific;
> +extern const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
> +extern const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
> +
> +/* - functions */
> +int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
> + const struct sock *sk, const struct sk_buff *skb);
> +
> +struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
> + const struct sock *addr_sk);
> +
> +void tcp_v4_md5_destroy_sock(struct sock *sk);
> +
> +int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
> + unsigned int remaining,
> + struct tcp_out_options *opts,
> + const struct sock *sk);
> +
> +void tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
> + struct tcphdr *t1,
> + struct tcp_out_options *opts,
> + const struct sock *sk);
> +
> +int tcp_v6_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
> + unsigned int remaining,
> + struct tcp_out_options *opts,
> + const struct sock *sk);
> +
> +void tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
> + struct tcphdr *t1,
> + struct tcp_out_options *opts,
> + const struct sock *sk);
> +
> +bool tcp_v4_inbound_md5_hash(const struct sock *sk,
> + const struct sk_buff *skb);
> +
> +void tcp_v4_md5_syn_recv_sock(const struct sock *listener, struct sock *sk);
> +
> +void tcp_v6_md5_syn_recv_sock(const struct sock *listener, struct sock *sk);
> +
> +void tcp_md5_time_wait(struct sock *sk, struct inet_timewait_sock *tw);
> +
> +struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
> + const struct sock *addr_sk);
> +
> +int tcp_v6_md5_hash_skb(char *md5_hash,
> + const struct tcp_md5sig_key *key,
> + const struct sock *sk,
> + const struct sk_buff *skb);
> +
> +bool tcp_v6_inbound_md5_hash(const struct sock *sk,
> + const struct sk_buff *skb);
> +
> +static inline void tcp_md5_twsk_destructor(struct sock *sk)
> +{
> + struct tcp_timewait_sock *twsk = tcp_twsk(sk);
> +
> + if (twsk->tw_md5_key)
> + kfree_rcu(twsk->tw_md5_key, rcu);
> +}
> +
> +static inline void tcp_md5_add_header_len(const struct sock *listener,
> + struct sock *sk)
> +{
> + struct tcp_sock *tp = tcp_sk(sk);
> +
> + if (tp->af_specific->md5_lookup(listener, sk))
> + tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
> +}
> +
> +int tcp_md5_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb);
> +
> +int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin);
> +
> +#else
> +
> +static inline bool tcp_v4_inbound_md5_hash(const struct sock *sk,
> + const struct sk_buff *skb)
> +{
> + return false;
> +}
> +
> +static inline bool tcp_v6_inbound_md5_hash(const struct sock *sk,
> + const struct sk_buff *skb)
> +{
> + return false;
> +}
> +
> +#endif
> +
> +#endif /* _LINUX_TCP_MD5_H */
...
> diff --git a/net/ipv4/tcp_md5.c b/net/ipv4/tcp_md5.c
> new file mode 100644
> index 000000000000..a31b404e6dbf
> --- /dev/null
> +++ b/net/ipv4/tcp_md5.c
> @@ -0,0 +1,1102 @@
This new file needs license info too, maybe a SPDX identifier like
tcp_input.c
Regards,
Mat
> +#include <linux/inet_diag.h>
> +#include <linux/inetdevice.h>
> +#include <linux/tcp.h>
> +#include <linux/tcp_md5.h>
> +
> +#include <crypto/hash.h>
> +
> +#include <net/inet6_hashtables.h>
> +
> +static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
> +static DEFINE_MUTEX(tcp_md5sig_mutex);
> +static bool tcp_md5sig_pool_populated;
> +
> +#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
> +
> +static void __tcp_alloc_md5sig_pool(void)
> +{
> + struct crypto_ahash *hash;
> + int cpu;
> +
> + hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
> + if (IS_ERR(hash))
> + return;
> +
> + for_each_possible_cpu(cpu) {
> + void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
> + struct ahash_request *req;
> +
> + if (!scratch) {
> + scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
> + sizeof(struct tcphdr),
> + GFP_KERNEL,
> + cpu_to_node(cpu));
> + if (!scratch)
> + return;
> + per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
> + }
> + if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
> + continue;
> +
> + req = ahash_request_alloc(hash, GFP_KERNEL);
> + if (!req)
> + return;
> +
> + ahash_request_set_callback(req, 0, NULL, NULL);
> +
> + per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
> + }
> + /* before setting tcp_md5sig_pool_populated, we must commit all writes
> + * to memory. See smp_rmb() in tcp_get_md5sig_pool()
> + */
> + smp_wmb();
> + tcp_md5sig_pool_populated = true;
> +}
> +
> +static bool tcp_alloc_md5sig_pool(void)
> +{
> + if (unlikely(!tcp_md5sig_pool_populated)) {
> + mutex_lock(&tcp_md5sig_mutex);
> +
> + if (!tcp_md5sig_pool_populated)
> + __tcp_alloc_md5sig_pool();
> +
> + mutex_unlock(&tcp_md5sig_mutex);
> + }
> + return tcp_md5sig_pool_populated;
> +}
> +
> +static void tcp_put_md5sig_pool(void)
> +{
> + local_bh_enable();
> +}
> +
> +/**
> + * tcp_get_md5sig_pool - get md5sig_pool for this user
> + *
> + * We use percpu structure, so if we succeed, we exit with preemption
> + * and BH disabled, to make sure another thread or softirq handling
> + * wont try to get same context.
> + */
> +static struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
> +{
> + local_bh_disable();
> +
> + if (tcp_md5sig_pool_populated) {
> + /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
> + smp_rmb();
> + return this_cpu_ptr(&tcp_md5sig_pool);
> + }
> + local_bh_enable();
> + return NULL;
> +}
> +
> +static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
> + const union tcp_md5_addr *addr,
> + int family, u8 prefixlen)
> +{
> + const struct tcp_sock *tp = tcp_sk(sk);
> + struct tcp_md5sig_key *key;
> + unsigned int size = sizeof(struct in_addr);
> + const struct tcp_md5sig_info *md5sig;
> +
> + /* caller either holds rcu_read_lock() or socket lock */
> + md5sig = rcu_dereference_check(tp->md5sig_info,
> + lockdep_sock_is_held(sk));
> + if (!md5sig)
> + return NULL;
> +#if IS_ENABLED(CONFIG_IPV6)
> + if (family == AF_INET6)
> + size = sizeof(struct in6_addr);
> +#endif
> + hlist_for_each_entry_rcu(key, &md5sig->head, node) {
> + if (key->family != family)
> + continue;
> + if (!memcmp(&key->addr, addr, size) &&
> + key->prefixlen == prefixlen)
> + return key;
> + }
> + return NULL;
> +}
> +
> +/* This can be called on a newly created socket, from other files */
> +static int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
> + int family, u8 prefixlen, const u8 *newkey,
> + u8 newkeylen, gfp_t gfp)
> +{
> + /* Add Key to the list */
> + struct tcp_md5sig_key *key;
> + struct tcp_sock *tp = tcp_sk(sk);
> + struct tcp_md5sig_info *md5sig;
> +
> + key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
> + if (key) {
> + /* Pre-existing entry - just update that one. */
> + memcpy(key->key, newkey, newkeylen);
> + key->keylen = newkeylen;
> + return 0;
> + }
> +
> + md5sig = rcu_dereference_protected(tp->md5sig_info,
> + lockdep_sock_is_held(sk));
> + if (!md5sig) {
> + md5sig = kmalloc(sizeof(*md5sig), gfp);
> + if (!md5sig)
> + return -ENOMEM;
> +
> + sk_nocaps_add(sk, NETIF_F_GSO_MASK);
> + INIT_HLIST_HEAD(&md5sig->head);
> + rcu_assign_pointer(tp->md5sig_info, md5sig);
> + }
> +
> + key = sock_kmalloc(sk, sizeof(*key), gfp);
> + if (!key)
> + return -ENOMEM;
> + if (!tcp_alloc_md5sig_pool()) {
> + sock_kfree_s(sk, key, sizeof(*key));
> + return -ENOMEM;
> + }
> +
> + memcpy(key->key, newkey, newkeylen);
> + key->keylen = newkeylen;
> + key->family = family;
> + key->prefixlen = prefixlen;
> + memcpy(&key->addr, addr,
> + (family == AF_INET6) ? sizeof(struct in6_addr) :
> + sizeof(struct in_addr));
> + hlist_add_head_rcu(&key->node, &md5sig->head);
> + return 0;
> +}
> +
> +static void tcp_clear_md5_list(struct sock *sk)
> +{
> + struct tcp_sock *tp = tcp_sk(sk);
> + struct tcp_md5sig_key *key;
> + struct hlist_node *n;
> + struct tcp_md5sig_info *md5sig;
> +
> + md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
> +
> + hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
> + hlist_del_rcu(&key->node);
> + atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
> + kfree_rcu(key, rcu);
> + }
> +}
> +
> +static int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
> + int family, u8 prefixlen)
> +{
> + struct tcp_md5sig_key *key;
> +
> + key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
> + if (!key)
> + return -ENOENT;
> + hlist_del_rcu(&key->node);
> + atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
> + kfree_rcu(key, rcu);
> + return 0;
> +}
> +
> +static int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
> + const struct tcp_md5sig_key *key)
> +{
> + struct scatterlist sg;
> +
> + sg_init_one(&sg, key->key, key->keylen);
> + ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen);
> + return crypto_ahash_update(hp->md5_req);
> +}
> +
> +static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
> + char __user *optval, int optlen)
> +{
> + struct tcp_md5sig cmd;
> + struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
> + u8 prefixlen = 32;
> +
> + if (optlen < sizeof(cmd))
> + return -EINVAL;
> +
> + if (copy_from_user(&cmd, optval, sizeof(cmd)))
> + return -EFAULT;
> +
> + if (sin->sin_family != AF_INET)
> + return -EINVAL;
> +
> + if (optname == TCP_MD5SIG_EXT &&
> + cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
> + prefixlen = cmd.tcpm_prefixlen;
> + if (prefixlen > 32)
> + return -EINVAL;
> + }
> +
> + if (!cmd.tcpm_keylen)
> + return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
> + AF_INET, prefixlen);
> +
> + if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
> + return -EINVAL;
> +
> + return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
> + AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
> + GFP_KERNEL);
> +}
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
> + char __user *optval, int optlen)
> +{
> + struct tcp_md5sig cmd;
> + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
> + u8 prefixlen;
> +
> + if (optlen < sizeof(cmd))
> + return -EINVAL;
> +
> + if (copy_from_user(&cmd, optval, sizeof(cmd)))
> + return -EFAULT;
> +
> + if (sin6->sin6_family != AF_INET6)
> + return -EINVAL;
> +
> + if (optname == TCP_MD5SIG_EXT &&
> + cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
> + prefixlen = cmd.tcpm_prefixlen;
> + if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
> + prefixlen > 32))
> + return -EINVAL;
> + } else {
> + prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
> + }
> +
> + if (!cmd.tcpm_keylen) {
> + if (ipv6_addr_v4mapped(&sin6->sin6_addr))
> + return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
> + AF_INET, prefixlen);
> + return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
> + AF_INET6, prefixlen);
> + }
> +
> + if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
> + return -EINVAL;
> +
> + if (ipv6_addr_v4mapped(&sin6->sin6_addr))
> + return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
> + AF_INET, prefixlen, cmd.tcpm_key,
> + cmd.tcpm_keylen, GFP_KERNEL);
> +
> + return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
> + AF_INET6, prefixlen, cmd.tcpm_key,
> + cmd.tcpm_keylen, GFP_KERNEL);
> +}
> +#endif
> +
> +static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
> + __be32 daddr, __be32 saddr,
> + const struct tcphdr *th, int nbytes)
> +{
> + struct tcp4_pseudohdr *bp;
> + struct scatterlist sg;
> + struct tcphdr *_th;
> +
> + bp = hp->scratch;
> + bp->saddr = saddr;
> + bp->daddr = daddr;
> + bp->pad = 0;
> + bp->protocol = IPPROTO_TCP;
> + bp->len = cpu_to_be16(nbytes);
> +
> + _th = (struct tcphdr *)(bp + 1);
> + memcpy(_th, th, sizeof(*th));
> + _th->check = 0;
> +
> + sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
> + ahash_request_set_crypt(hp->md5_req, &sg, NULL,
> + sizeof(*bp) + sizeof(*th));
> + return crypto_ahash_update(hp->md5_req);
> +}
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
> + const struct in6_addr *daddr,
> + const struct in6_addr *saddr,
> + const struct tcphdr *th, int nbytes)
> +{
> + struct tcp6_pseudohdr *bp;
> + struct scatterlist sg;
> + struct tcphdr *_th;
> +
> + bp = hp->scratch;
> + /* 1. TCP pseudo-header (RFC2460) */
> + bp->saddr = *saddr;
> + bp->daddr = *daddr;
> + bp->protocol = cpu_to_be32(IPPROTO_TCP);
> + bp->len = cpu_to_be32(nbytes);
> +
> + _th = (struct tcphdr *)(bp + 1);
> + memcpy(_th, th, sizeof(*th));
> + _th->check = 0;
> +
> + sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
> + ahash_request_set_crypt(hp->md5_req, &sg, NULL,
> + sizeof(*bp) + sizeof(*th));
> + return crypto_ahash_update(hp->md5_req);
> +}
> +#endif
> +
> +static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
> + __be32 daddr, __be32 saddr,
> + const struct tcphdr *th)
> +{
> + struct tcp_md5sig_pool *hp;
> + struct ahash_request *req;
> +
> + hp = tcp_get_md5sig_pool();
> + if (!hp)
> + goto clear_hash_noput;
> + req = hp->md5_req;
> +
> + if (crypto_ahash_init(req))
> + goto clear_hash;
> + if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
> + goto clear_hash;
> + if (tcp_md5_hash_key(hp, key))
> + goto clear_hash;
> + ahash_request_set_crypt(req, NULL, md5_hash, 0);
> + if (crypto_ahash_final(req))
> + goto clear_hash;
> +
> + tcp_put_md5sig_pool();
> + return 0;
> +
> +clear_hash:
> + tcp_put_md5sig_pool();
> +clear_hash_noput:
> + memset(md5_hash, 0, 16);
> + return 1;
> +}
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
> + const struct in6_addr *daddr,
> + struct in6_addr *saddr, const struct tcphdr *th)
> +{
> + struct tcp_md5sig_pool *hp;
> + struct ahash_request *req;
> +
> + hp = tcp_get_md5sig_pool();
> + if (!hp)
> + goto clear_hash_noput;
> + req = hp->md5_req;
> +
> + if (crypto_ahash_init(req))
> + goto clear_hash;
> + if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
> + goto clear_hash;
> + if (tcp_md5_hash_key(hp, key))
> + goto clear_hash;
> + ahash_request_set_crypt(req, NULL, md5_hash, 0);
> + if (crypto_ahash_final(req))
> + goto clear_hash;
> +
> + tcp_put_md5sig_pool();
> + return 0;
> +
> +clear_hash:
> + tcp_put_md5sig_pool();
> +clear_hash_noput:
> + memset(md5_hash, 0, 16);
> + return 1;
> +}
> +#endif
> +
> +/* RFC2385 MD5 checksumming requires a mapping of
> + * IP address->MD5 Key.
> + * We need to maintain these in the sk structure.
> + */
> +
> +/* Find the Key structure for an address. */
> +static struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
> + const union tcp_md5_addr *addr,
> + int family)
> +{
> + const struct tcp_sock *tp = tcp_sk(sk);
> + struct tcp_md5sig_key *key;
> + const struct tcp_md5sig_info *md5sig;
> + __be32 mask;
> + struct tcp_md5sig_key *best_match = NULL;
> + bool match;
> +
> + /* caller either holds rcu_read_lock() or socket lock */
> + md5sig = rcu_dereference_check(tp->md5sig_info,
> + lockdep_sock_is_held(sk));
> + if (!md5sig)
> + return NULL;
> +
> + hlist_for_each_entry_rcu(key, &md5sig->head, node) {
> + if (key->family != family)
> + continue;
> +
> + if (family == AF_INET) {
> + mask = inet_make_mask(key->prefixlen);
> + match = (key->addr.a4.s_addr & mask) ==
> + (addr->a4.s_addr & mask);
> +#if IS_ENABLED(CONFIG_IPV6)
> + } else if (family == AF_INET6) {
> + match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
> + key->prefixlen);
> +#endif
> + } else {
> + match = false;
> + }
> +
> + if (match && (!best_match ||
> + key->prefixlen > best_match->prefixlen))
> + best_match = key;
> + }
> + return best_match;
> +}
> +
> +/* Parse MD5 Signature option */
> +static const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
> +{
> + int length = (th->doff << 2) - sizeof(*th);
> + const u8 *ptr = (const u8 *)(th + 1);
> +
> + /* If the TCP option is too short, we can short cut */
> + if (length < TCPOLEN_MD5SIG)
> + return NULL;
> +
> + while (length > 0) {
> + int opcode = *ptr++;
> + int opsize;
> +
> + switch (opcode) {
> + case TCPOPT_EOL:
> + return NULL;
> + case TCPOPT_NOP:
> + length--;
> + continue;
> + default:
> + opsize = *ptr++;
> + if (opsize < 2 || opsize > length)
> + return NULL;
> + if (opcode == TCPOPT_MD5SIG)
> + return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
> + }
> + ptr += opsize - 2;
> + length -= opsize;
> + }
> + return NULL;
> +}
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
> + const struct in6_addr *addr)
> +{
> + return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
> +}
> +#endif
> +
> +static int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
> + const struct sk_buff *skb,
> + unsigned int header_len)
> +{
> + struct scatterlist sg;
> + const struct tcphdr *tp = tcp_hdr(skb);
> + struct ahash_request *req = hp->md5_req;
> + unsigned int i;
> + const unsigned int head_data_len = skb_headlen(skb) > header_len ?
> + skb_headlen(skb) - header_len : 0;
> + const struct skb_shared_info *shi = skb_shinfo(skb);
> + struct sk_buff *frag_iter;
> +
> + sg_init_table(&sg, 1);
> +
> + sg_set_buf(&sg, ((u8 *)tp) + header_len, head_data_len);
> + ahash_request_set_crypt(req, &sg, NULL, head_data_len);
> + if (crypto_ahash_update(req))
> + return 1;
> +
> + for (i = 0; i < shi->nr_frags; ++i) {
> + const struct skb_frag_struct *f = &shi->frags[i];
> + unsigned int offset = f->page_offset;
> + struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
> +
> + sg_set_page(&sg, page, skb_frag_size(f),
> + offset_in_page(offset));
> + ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
> + if (crypto_ahash_update(req))
> + return 1;
> + }
> +
> + skb_walk_frags(skb, frag_iter)
> + if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
> + return 1;
> +
> + return 0;
> +}
> +
> +int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
> + unsigned int remaining,
> + struct tcp_out_options *opts,
> + const struct sock *sk)
> +{
> + const struct tcphdr *th = tcp_hdr(skb);
> + const struct iphdr *iph = ip_hdr(skb);
> + const __u8 *hash_location = NULL;
> +
> + rcu_read_lock();
> + hash_location = tcp_parse_md5sig_option(th);
> + if (sk && sk_fullsock(sk)) {
> + opts->md5 = tcp_md5_do_lookup(sk,
> + (union tcp_md5_addr *)&iph->saddr,
> + AF_INET);
> + } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
> + struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
> +
> + opts->md5 = tcp_twsk_md5_key(tcptw);
> + } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
> + opts->md5 = tcp_md5_do_lookup(sk,
> + (union tcp_md5_addr *)&iph->saddr,
> + AF_INET);
> + } else if (hash_location) {
> + unsigned char newhash[16];
> + struct sock *sk1;
> + int genhash;
> +
> + /* active side is lost. Try to find listening socket through
> + * source port, and then find md5 key through listening socket.
> + * we are not loose security here:
> + * Incoming packet is checked with md5 hash with finding key,
> + * no RST generated if md5 hash doesn't match.
> + */
> + sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
> + &tcp_hashinfo, NULL, 0,
> + iph->saddr,
> + th->source, iph->daddr,
> + ntohs(th->source), inet_iif(skb),
> + tcp_v4_sdif(skb));
> + /* don't send rst if it can't find key */
> + if (!sk1)
> + goto out_err;
> +
> + opts->md5 = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
> + &iph->saddr, AF_INET);
> + if (!opts->md5)
> + goto out_err;
> +
> + genhash = tcp_v4_md5_hash_skb(newhash, opts->md5, NULL, skb);
> + if (genhash || memcmp(hash_location, newhash, 16) != 0)
> + goto out_err;
> + }
> +
> + if (opts->md5)
> + return TCPOLEN_MD5SIG_ALIGNED;
> +
> + rcu_read_unlock();
> + return 0;
> +
> +out_err:
> + rcu_read_unlock();
> + return -1;
> +}
> +
> +void tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
> + struct tcphdr *t1,
> + struct tcp_out_options *opts,
> + const struct sock *sk)
> +{
> + if (opts->md5) {
> + *topt++ = htonl((TCPOPT_NOP << 24) |
> + (TCPOPT_NOP << 16) |
> + (TCPOPT_MD5SIG << 8) |
> + TCPOLEN_MD5SIG);
> +
> + tcp_v4_md5_hash_hdr((__u8 *)topt, opts->md5,
> + ip_hdr(skb)->saddr,
> + ip_hdr(skb)->daddr, t1);
> + rcu_read_unlock();
> + }
> +}
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +int tcp_v6_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
> + unsigned int remaining,
> + struct tcp_out_options *opts,
> + const struct sock *sk)
> +{
> + const struct tcphdr *th = tcp_hdr(skb);
> + struct ipv6hdr *ipv6h = ipv6_hdr(skb);
> + const __u8 *hash_location = NULL;
> +
> + rcu_read_lock();
> + hash_location = tcp_parse_md5sig_option(th);
> + if (sk && sk_fullsock(sk)) {
> + opts->md5 = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
> + } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
> + struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
> +
> + opts->md5 = tcp_twsk_md5_key(tcptw);
> + } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
> + opts->md5 = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
> + } else if (hash_location) {
> + unsigned char newhash[16];
> + struct sock *sk1;
> + int genhash;
> +
> + /* active side is lost. Try to find listening socket through
> + * source port, and then find md5 key through listening socket.
> + * we are not loose security here:
> + * Incoming packet is checked with md5 hash with finding key,
> + * no RST generated if md5 hash doesn't match.
> + */
> + sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
> + &tcp_hashinfo, NULL, 0,
> + &ipv6h->saddr,
> + th->source, &ipv6h->daddr,
> + ntohs(th->source), tcp_v6_iif(skb),
> + tcp_v6_sdif(skb));
> + if (!sk1)
> + goto out_err;
> +
> + opts->md5 = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
> + if (!opts->md5)
> + goto out_err;
> +
> + genhash = tcp_v6_md5_hash_skb(newhash, opts->md5, NULL, skb);
> + if (genhash || memcmp(hash_location, newhash, 16) != 0)
> + goto out_err;
> + }
> +
> + if (opts->md5)
> + return TCPOLEN_MD5SIG_ALIGNED;
> +
> + rcu_read_unlock();
> + return 0;
> +
> +out_err:
> + rcu_read_unlock();
> + return -1;
> +}
> +EXPORT_SYMBOL_GPL(tcp_v6_md5_send_response_prepare);
> +
> +void tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
> + struct tcphdr *t1,
> + struct tcp_out_options *opts,
> + const struct sock *sk)
> +{
> + if (opts->md5) {
> + *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
> + (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
> + tcp_v6_md5_hash_hdr((__u8 *)topt, opts->md5,
> + &ipv6_hdr(skb)->saddr,
> + &ipv6_hdr(skb)->daddr, t1);
> +
> + rcu_read_unlock();
> + }
> +}
> +EXPORT_SYMBOL_GPL(tcp_v6_md5_send_response_write);
> +#endif
> +
> +struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
> + const struct sock *addr_sk)
> +{
> + const union tcp_md5_addr *addr;
> +
> + addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
> + return tcp_md5_do_lookup(sk, addr, AF_INET);
> +}
> +EXPORT_SYMBOL(tcp_v4_md5_lookup);
> +
> +int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
> + const struct sock *sk,
> + const struct sk_buff *skb)
> +{
> + struct tcp_md5sig_pool *hp;
> + struct ahash_request *req;
> + const struct tcphdr *th = tcp_hdr(skb);
> + __be32 saddr, daddr;
> +
> + if (sk) { /* valid for establish/request sockets */
> + saddr = sk->sk_rcv_saddr;
> + daddr = sk->sk_daddr;
> + } else {
> + const struct iphdr *iph = ip_hdr(skb);
> +
> + saddr = iph->saddr;
> + daddr = iph->daddr;
> + }
> +
> + hp = tcp_get_md5sig_pool();
> + if (!hp)
> + goto clear_hash_noput;
> + req = hp->md5_req;
> +
> + if (crypto_ahash_init(req))
> + goto clear_hash;
> +
> + if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
> + goto clear_hash;
> + if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
> + goto clear_hash;
> + if (tcp_md5_hash_key(hp, key))
> + goto clear_hash;
> + ahash_request_set_crypt(req, NULL, md5_hash, 0);
> + if (crypto_ahash_final(req))
> + goto clear_hash;
> +
> + tcp_put_md5sig_pool();
> + return 0;
> +
> +clear_hash:
> + tcp_put_md5sig_pool();
> +clear_hash_noput:
> + memset(md5_hash, 0, 16);
> + return 1;
> +}
> +EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +int tcp_v6_md5_hash_skb(char *md5_hash,
> + const struct tcp_md5sig_key *key,
> + const struct sock *sk,
> + const struct sk_buff *skb)
> +{
> + const struct in6_addr *saddr, *daddr;
> + struct tcp_md5sig_pool *hp;
> + struct ahash_request *req;
> + const struct tcphdr *th = tcp_hdr(skb);
> +
> + if (sk) { /* valid for establish/request sockets */
> + saddr = &sk->sk_v6_rcv_saddr;
> + daddr = &sk->sk_v6_daddr;
> + } else {
> + const struct ipv6hdr *ip6h = ipv6_hdr(skb);
> +
> + saddr = &ip6h->saddr;
> + daddr = &ip6h->daddr;
> + }
> +
> + hp = tcp_get_md5sig_pool();
> + if (!hp)
> + goto clear_hash_noput;
> + req = hp->md5_req;
> +
> + if (crypto_ahash_init(req))
> + goto clear_hash;
> +
> + if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
> + goto clear_hash;
> + if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
> + goto clear_hash;
> + if (tcp_md5_hash_key(hp, key))
> + goto clear_hash;
> + ahash_request_set_crypt(req, NULL, md5_hash, 0);
> + if (crypto_ahash_final(req))
> + goto clear_hash;
> +
> + tcp_put_md5sig_pool();
> + return 0;
> +
> +clear_hash:
> + tcp_put_md5sig_pool();
> +clear_hash_noput:
> + memset(md5_hash, 0, 16);
> + return 1;
> +}
> +EXPORT_SYMBOL_GPL(tcp_v6_md5_hash_skb);
> +#endif
> +
> +/* Called with rcu_read_lock() */
> +bool tcp_v4_inbound_md5_hash(const struct sock *sk,
> + const struct sk_buff *skb)
> +{
> + /* This gets called for each TCP segment that arrives
> + * so we want to be efficient.
> + * We have 3 drop cases:
> + * o No MD5 hash and one expected.
> + * o MD5 hash and we're not expecting one.
> + * o MD5 hash and its wrong.
> + */
> + const __u8 *hash_location = NULL;
> + struct tcp_md5sig_key *hash_expected;
> + const struct iphdr *iph = ip_hdr(skb);
> + const struct tcphdr *th = tcp_hdr(skb);
> + int genhash;
> + unsigned char newhash[16];
> +
> + hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
> + AF_INET);
> + hash_location = tcp_parse_md5sig_option(th);
> +
> + /* We've parsed the options - do we have a hash? */
> + if (!hash_expected && !hash_location)
> + return false;
> +
> + if (hash_expected && !hash_location) {
> + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
> + return true;
> + }
> +
> + if (!hash_expected && hash_location) {
> + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
> + return true;
> + }
> +
> + /* Okay, so this is hash_expected and hash_location -
> + * so we need to calculate the checksum.
> + */
> + genhash = tcp_v4_md5_hash_skb(newhash,
> + hash_expected,
> + NULL, skb);
> +
> + if (genhash || memcmp(hash_location, newhash, 16) != 0) {
> + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
> + net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
> + &iph->saddr, ntohs(th->source),
> + &iph->daddr, ntohs(th->dest),
> + genhash ? " tcp_v4_calc_md5_hash failed"
> + : "");
> + return true;
> + }
> + return false;
> +}
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +bool tcp_v6_inbound_md5_hash(const struct sock *sk,
> + const struct sk_buff *skb)
> +{
> + const __u8 *hash_location = NULL;
> + struct tcp_md5sig_key *hash_expected;
> + const struct ipv6hdr *ip6h = ipv6_hdr(skb);
> + const struct tcphdr *th = tcp_hdr(skb);
> + int genhash;
> + u8 newhash[16];
> +
> + hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
> + hash_location = tcp_parse_md5sig_option(th);
> +
> + /* We've parsed the options - do we have a hash? */
> + if (!hash_expected && !hash_location)
> + return false;
> +
> + if (hash_expected && !hash_location) {
> + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
> + return true;
> + }
> +
> + if (!hash_expected && hash_location) {
> + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
> + return true;
> + }
> +
> + /* check the signature */
> + genhash = tcp_v6_md5_hash_skb(newhash,
> + hash_expected,
> + NULL, skb);
> +
> + if (genhash || memcmp(hash_location, newhash, 16) != 0) {
> + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
> + net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
> + genhash ? "failed" : "mismatch",
> + &ip6h->saddr, ntohs(th->source),
> + &ip6h->daddr, ntohs(th->dest));
> + return true;
> + }
> +
> + return false;
> +}
> +EXPORT_SYMBOL_GPL(tcp_v6_inbound_md5_hash);
> +#endif
> +
> +void tcp_v4_md5_destroy_sock(struct sock *sk)
> +{
> + struct tcp_sock *tp = tcp_sk(sk);
> +
> + /* Clean up the MD5 key list, if any */
> + if (tp->md5sig_info) {
> + tcp_clear_md5_list(sk);
> + kfree_rcu(tp->md5sig_info, rcu);
> + tp->md5sig_info = NULL;
> + }
> +}
> +
> +void tcp_v4_md5_syn_recv_sock(const struct sock *listener, struct sock *sk)
> +{
> + struct inet_sock *inet = inet_sk(sk);
> + struct tcp_md5sig_key *key;
> +
> + /* Copy over the MD5 key from the original socket */
> + key = tcp_md5_do_lookup(listener, (union tcp_md5_addr *)&inet->inet_daddr,
> + AF_INET);
> + if (key) {
> + /* We're using one, so create a matching key
> + * on the sk structure. If we fail to get
> + * memory, then we end up not copying the key
> + * across. Shucks.
> + */
> + tcp_md5_do_add(sk, (union tcp_md5_addr *)&inet->inet_daddr,
> + AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
> + sk_nocaps_add(sk, NETIF_F_GSO_MASK);
> + }
> +}
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +void tcp_v6_md5_syn_recv_sock(const struct sock *listener, struct sock *sk)
> +{
> + struct tcp_md5sig_key *key;
> +
> + /* Copy over the MD5 key from the original socket */
> + key = tcp_v6_md5_do_lookup(listener, &sk->sk_v6_daddr);
> + if (key) {
> + /* We're using one, so create a matching key
> + * on the newsk structure. If we fail to get
> + * memory, then we end up not copying the key
> + * across. Shucks.
> + */
> + tcp_md5_do_add(sk, (union tcp_md5_addr *)&sk->sk_v6_daddr,
> + AF_INET6, 128, key->key, key->keylen,
> + sk_gfp_mask(sk, GFP_ATOMIC));
> + }
> +}
> +EXPORT_SYMBOL_GPL(tcp_v6_md5_syn_recv_sock);
> +
> +struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
> + const struct sock *addr_sk)
> +{
> + return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
> +}
> +EXPORT_SYMBOL_GPL(tcp_v6_md5_lookup);
> +#endif
> +
> +void tcp_md5_time_wait(struct sock *sk, struct inet_timewait_sock *tw)
> +{
> + struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
> + struct tcp_sock *tp = tcp_sk(sk);
> + struct tcp_md5sig_key *key;
> +
> + /* The timewait bucket does not have the key DB from the
> + * sock structure. We just make a quick copy of the
> + * md5 key being used (if indeed we are using one)
> + * so the timewait ack generating code has the key.
> + */
> + tcptw->tw_md5_key = NULL;
> + key = tp->af_specific->md5_lookup(sk, sk);
> + if (key) {
> + tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
> + BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
> + }
> +}
> +
> +static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info,
> + const struct tcp_md5sig_key *key)
> +{
> + info->tcpm_family = key->family;
> + info->tcpm_prefixlen = key->prefixlen;
> + info->tcpm_keylen = key->keylen;
> + memcpy(info->tcpm_key, key->key, key->keylen);
> +
> + if (key->family == AF_INET)
> + info->tcpm_addr[0] = key->addr.a4.s_addr;
> + #if IS_ENABLED(CONFIG_IPV6)
> + else if (key->family == AF_INET6)
> + memcpy(&info->tcpm_addr, &key->addr.a6,
> + sizeof(info->tcpm_addr));
> + #endif
> +}
> +
> +static int tcp_diag_put_md5sig(struct sk_buff *skb,
> + const struct tcp_md5sig_info *md5sig)
> +{
> + const struct tcp_md5sig_key *key;
> + struct tcp_diag_md5sig *info;
> + struct nlattr *attr;
> + int md5sig_count = 0;
> +
> + hlist_for_each_entry_rcu(key, &md5sig->head, node)
> + md5sig_count++;
> + if (md5sig_count == 0)
> + return 0;
> +
> + attr = nla_reserve(skb, INET_DIAG_MD5SIG,
> + md5sig_count * sizeof(struct tcp_diag_md5sig));
> + if (!attr)
> + return -EMSGSIZE;
> +
> + info = nla_data(attr);
> + memset(info, 0, md5sig_count * sizeof(struct tcp_diag_md5sig));
> + hlist_for_each_entry_rcu(key, &md5sig->head, node) {
> + tcp_diag_md5sig_fill(info++, key);
> + if (--md5sig_count == 0)
> + break;
> + }
> +
> + return 0;
> +}
> +
> +int tcp_md5_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb)
> +{
> + if (net_admin) {
> + struct tcp_md5sig_info *md5sig;
> + int err = 0;
> +
> + rcu_read_lock();
> + md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
> + if (md5sig)
> + err = tcp_diag_put_md5sig(skb, md5sig);
> + rcu_read_unlock();
> + if (err < 0)
> + return err;
> + }
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(tcp_md5_diag_get_aux);
> +
> +int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin)
> +{
> + int size = 0;
> +
> + if (net_admin && sk_fullsock(sk)) {
> + const struct tcp_md5sig_info *md5sig;
> + const struct tcp_md5sig_key *key;
> + size_t md5sig_count = 0;
> +
> + rcu_read_lock();
> + md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
> + if (md5sig) {
> + hlist_for_each_entry_rcu(key, &md5sig->head, node)
> + md5sig_count++;
> + }
> + rcu_read_unlock();
> + size += nla_total_size(md5sig_count *
> + sizeof(struct tcp_diag_md5sig));
> + }
> +
> + return size;
> +}
> +EXPORT_SYMBOL_GPL(tcp_md5_diag_get_aux_size);
> +
> +const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
> + .md5_lookup = tcp_v4_md5_lookup,
> + .calc_md5_hash = tcp_v4_md5_hash_skb,
> + .md5_parse = tcp_v4_parse_md5_keys,
> +};
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
> + .md5_lookup = tcp_v6_md5_lookup,
> + .calc_md5_hash = tcp_v6_md5_hash_skb,
> + .md5_parse = tcp_v6_parse_md5_keys,
> +};
> +EXPORT_SYMBOL_GPL(tcp_sock_ipv6_specific);
> +
> +const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
> + .md5_lookup = tcp_v4_md5_lookup,
> + .calc_md5_hash = tcp_v4_md5_hash_skb,
> + .md5_parse = tcp_v6_parse_md5_keys,
> +};
> +EXPORT_SYMBOL_GPL(tcp_sock_ipv6_mapped_specific);
> +#endif
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [RFC 11/14] tcp_md5: Move TCP-MD5 code out of TCP itself
2018-01-02 19:39 ` Mat Martineau
@ 2018-01-05 1:15 ` Christoph Paasch
0 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2018-01-05 1:15 UTC (permalink / raw)
To: Mat Martineau; +Cc: netdev, Eric Dumazet, Alexei Starovoitov
Hello Mat,
On 02/01/18 - 11:39:23, Mat Martineau wrote:
>
> Hi Christoph -
>
> On Mon, 18 Dec 2017, Christoph Paasch wrote:
>
> > This is all just copy-pasting the TCP_MD5-code into functions that are
> > placed in net/ipv4/tcp_md5.c.
> >
> > Signed-off-by: Christoph Paasch <cpaasch@apple.com>
> > Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
> > ---
> > include/linux/inet_diag.h | 1 +
> > include/linux/tcp_md5.h | 138 ++++++
> > include/net/tcp.h | 77 ----
> > net/ipv4/Makefile | 1 +
> > net/ipv4/tcp.c | 133 +-----
> > net/ipv4/tcp_diag.c | 81 +---
> > net/ipv4/tcp_input.c | 38 --
> > net/ipv4/tcp_ipv4.c | 520 ++-------------------
> > net/ipv4/tcp_md5.c | 1102 +++++++++++++++++++++++++++++++++++++++++++++
> > net/ipv4/tcp_minisocks.c | 27 +-
> > net/ipv4/tcp_output.c | 4 +-
> > net/ipv6/tcp_ipv6.c | 318 +------------
> > 12 files changed, 1305 insertions(+), 1135 deletions(-)
> > create mode 100644 include/linux/tcp_md5.h
> > create mode 100644 net/ipv4/tcp_md5.c
>
> ...
>
> > diff --git a/include/linux/tcp_md5.h b/include/linux/tcp_md5.h
> > new file mode 100644
> > index 000000000000..f6a681cdded4
> > --- /dev/null
> > +++ b/include/linux/tcp_md5.h
> > @@ -0,0 +1,138 @@
>
> There's no license info in this new file. Take a look at the SPDX
> identifiers recently added as the first line of some files (like
> tcp_vegas.h) for one way to do it.
Thanks, I added the SPDX-identifier line.
>
>
> > +#ifndef _LINUX_TCP_MD5_H
> > +#define _LINUX_TCP_MD5_H
> > +
> > +#include <linux/skbuff.h>
> > +
> > +#ifdef CONFIG_TCP_MD5SIG
> > +#include <linux/types.h>
> > +
> > +#include <net/tcp.h>
> > +
> > +union tcp_md5_addr {
> > + struct in_addr a4;
> > +#if IS_ENABLED(CONFIG_IPV6)
> > + struct in6_addr a6;
> > +#endif
> > +};
> > +
> > +/* - key database */
> > +struct tcp_md5sig_key {
> > + struct hlist_node node;
> > + u8 keylen;
> > + u8 family; /* AF_INET or AF_INET6 */
> > + union tcp_md5_addr addr;
> > + u8 prefixlen;
> > + u8 key[TCP_MD5SIG_MAXKEYLEN];
> > + struct rcu_head rcu;
> > +};
> > +
> > +/* - sock block */
> > +struct tcp_md5sig_info {
> > + struct hlist_head head;
> > + struct rcu_head rcu;
> > +};
> > +
> > +union tcp_md5sum_block {
> > + struct tcp4_pseudohdr ip4;
> > +#if IS_ENABLED(CONFIG_IPV6)
> > + struct tcp6_pseudohdr ip6;
> > +#endif
> > +};
> > +
> > +/* - pool: digest algorithm, hash description and scratch buffer */
> > +struct tcp_md5sig_pool {
> > + struct ahash_request *md5_req;
> > + void *scratch;
> > +};
> > +
> > +extern const struct tcp_sock_af_ops tcp_sock_ipv4_specific;
> > +extern const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
> > +extern const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
> > +
> > +/* - functions */
> > +int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
> > + const struct sock *sk, const struct sk_buff *skb);
> > +
> > +struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
> > + const struct sock *addr_sk);
> > +
> > +void tcp_v4_md5_destroy_sock(struct sock *sk);
> > +
> > +int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
> > + unsigned int remaining,
> > + struct tcp_out_options *opts,
> > + const struct sock *sk);
> > +
> > +void tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
> > + struct tcphdr *t1,
> > + struct tcp_out_options *opts,
> > + const struct sock *sk);
> > +
> > +int tcp_v6_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
> > + unsigned int remaining,
> > + struct tcp_out_options *opts,
> > + const struct sock *sk);
> > +
> > +void tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
> > + struct tcphdr *t1,
> > + struct tcp_out_options *opts,
> > + const struct sock *sk);
> > +
> > +bool tcp_v4_inbound_md5_hash(const struct sock *sk,
> > + const struct sk_buff *skb);
> > +
> > +void tcp_v4_md5_syn_recv_sock(const struct sock *listener, struct sock *sk);
> > +
> > +void tcp_v6_md5_syn_recv_sock(const struct sock *listener, struct sock *sk);
> > +
> > +void tcp_md5_time_wait(struct sock *sk, struct inet_timewait_sock *tw);
> > +
> > +struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
> > + const struct sock *addr_sk);
> > +
> > +int tcp_v6_md5_hash_skb(char *md5_hash,
> > + const struct tcp_md5sig_key *key,
> > + const struct sock *sk,
> > + const struct sk_buff *skb);
> > +
> > +bool tcp_v6_inbound_md5_hash(const struct sock *sk,
> > + const struct sk_buff *skb);
> > +
> > +static inline void tcp_md5_twsk_destructor(struct sock *sk)
> > +{
> > + struct tcp_timewait_sock *twsk = tcp_twsk(sk);
> > +
> > + if (twsk->tw_md5_key)
> > + kfree_rcu(twsk->tw_md5_key, rcu);
> > +}
> > +
> > +static inline void tcp_md5_add_header_len(const struct sock *listener,
> > + struct sock *sk)
> > +{
> > + struct tcp_sock *tp = tcp_sk(sk);
> > +
> > + if (tp->af_specific->md5_lookup(listener, sk))
> > + tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
> > +}
> > +
> > +int tcp_md5_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb);
> > +
> > +int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin);
> > +
> > +#else
> > +
> > +static inline bool tcp_v4_inbound_md5_hash(const struct sock *sk,
> > + const struct sk_buff *skb)
> > +{
> > + return false;
> > +}
> > +
> > +static inline bool tcp_v6_inbound_md5_hash(const struct sock *sk,
> > + const struct sk_buff *skb)
> > +{
> > + return false;
> > +}
> > +
> > +#endif
> > +
> > +#endif /* _LINUX_TCP_MD5_H */
>
> ...
>
> > diff --git a/net/ipv4/tcp_md5.c b/net/ipv4/tcp_md5.c
> > new file mode 100644
> > index 000000000000..a31b404e6dbf
> > --- /dev/null
> > +++ b/net/ipv4/tcp_md5.c
> > @@ -0,0 +1,1102 @@
>
> This new file needs license info too, maybe a SPDX identifier like
> tcp_input.c
Same here, added the SPDX-line.
Thanks for spotting this.
Christoph
^ permalink raw reply [flat|nested] 17+ messages in thread
* [RFC 12/14] tcp_md5: Use tcp_extra_options in output path
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
` (10 preceding siblings ...)
2017-12-18 21:51 ` [RFC 11/14] tcp_md5: Move TCP-MD5 code out of TCP itself Christoph Paasch
@ 2017-12-18 21:51 ` Christoph Paasch
2017-12-18 21:51 ` [RFC 13/14] tcp_md5: Cleanup TCP-code Christoph Paasch
2017-12-18 21:51 ` [RFC 14/14] tcp_md5: Use TCP extra-options on the input path Christoph Paasch
13 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
This patch starts making use of the extra_option framework for TCP_MD5.
One tricky part is that extra_options are called at the end of the
tcp_syn_options(), while TCP_MD5 is called at the beginning.
TCP_MD5 is called at the beginning because it wants to disable
TCP-timestamps (for option-space reasons). So, in the _prepare-function
of the extra options we need to undo the work that was done when
enabling TCP timestamps.
Another thing to note is that in tcp_v4_send_reset (and its IPv6
counterpart), we were looking previously for the listening-socket (if sk
== NULL) in case there was an MD5 signature in the TCP-option space of
the incoming packet.
With the extra-option framework we can't do this anymore, because
extra-options are part of the TCP-socket's tcp_option_list. If there is
no socket, it means we can't parse the option.
This shouldn't have an impact, because when we receive a segment and
there is not established socket, we will match on the listening socket
(if it's still there). Then, when we decide to respond with a RST in
tcp_rcv_state_process, we will give to tcp_v4_send_reset() the
listening-socket and thus will parse the TCP_MD5 option.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
include/linux/tcp.h | 10 +-
include/linux/tcp_md5.h | 64 -----
net/ipv4/tcp_ipv4.c | 55 ----
net/ipv4/tcp_md5.c | 696 +++++++++++++++++++++++++++++++++--------------
net/ipv4/tcp_minisocks.c | 12 -
net/ipv4/tcp_output.c | 68 +----
net/ipv6/tcp_ipv6.c | 23 --
7 files changed, 488 insertions(+), 440 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index b0b38f7100a4..034fbd9e0a38 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -127,11 +127,11 @@ struct tcp_out_options {
u16 mss; /* 0 to disable */
u8 ws; /* window scale, 0 to disable */
u8 num_sack_blocks; /* number of SACK blocks to include */
- u8 hash_size; /* bytes in hash_location */
- __u8 *hash_location; /* temporary pointer, overloaded */
__u32 tsval, tsecr; /* need to include OPTION_TS */
struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
+#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *md5; /* TCP_MD5 signature key */
+#endif
};
/* This is the max number of SACKS that we'll generate and process. It's safe
@@ -380,9 +380,6 @@ struct tcp_sock {
#ifdef CONFIG_TCP_MD5SIG
/* TCP AF-Specific parts; only used by MD5 Signature support so far */
const struct tcp_sock_af_ops *af_specific;
-
-/* TCP MD5 Signature Option information */
- struct tcp_md5sig_info __rcu *md5sig_info;
#endif
/* TCP fastopen related information */
@@ -440,9 +437,6 @@ struct tcp_timewait_sock {
long tw_ts_recent_stamp;
struct hlist_head tcp_option_list;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *tw_md5_key;
-#endif
};
static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
diff --git a/include/linux/tcp_md5.h b/include/linux/tcp_md5.h
index f6a681cdded4..8dee4fc3dc7f 100644
--- a/include/linux/tcp_md5.h
+++ b/include/linux/tcp_md5.h
@@ -26,25 +26,6 @@ struct tcp_md5sig_key {
struct rcu_head rcu;
};
-/* - sock block */
-struct tcp_md5sig_info {
- struct hlist_head head;
- struct rcu_head rcu;
-};
-
-union tcp_md5sum_block {
- struct tcp4_pseudohdr ip4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct tcp6_pseudohdr ip6;
-#endif
-};
-
-/* - pool: digest algorithm, hash description and scratch buffer */
-struct tcp_md5sig_pool {
- struct ahash_request *md5_req;
- void *scratch;
-};
-
extern const struct tcp_sock_af_ops tcp_sock_ipv4_specific;
extern const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
extern const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
@@ -56,37 +37,9 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
-void tcp_v4_md5_destroy_sock(struct sock *sk);
-
-int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
- unsigned int remaining,
- struct tcp_out_options *opts,
- const struct sock *sk);
-
-void tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
- struct tcphdr *t1,
- struct tcp_out_options *opts,
- const struct sock *sk);
-
-int tcp_v6_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
- unsigned int remaining,
- struct tcp_out_options *opts,
- const struct sock *sk);
-
-void tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
- struct tcphdr *t1,
- struct tcp_out_options *opts,
- const struct sock *sk);
-
bool tcp_v4_inbound_md5_hash(const struct sock *sk,
const struct sk_buff *skb);
-void tcp_v4_md5_syn_recv_sock(const struct sock *listener, struct sock *sk);
-
-void tcp_v6_md5_syn_recv_sock(const struct sock *listener, struct sock *sk);
-
-void tcp_md5_time_wait(struct sock *sk, struct inet_timewait_sock *tw);
-
struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
@@ -98,23 +51,6 @@ int tcp_v6_md5_hash_skb(char *md5_hash,
bool tcp_v6_inbound_md5_hash(const struct sock *sk,
const struct sk_buff *skb);
-static inline void tcp_md5_twsk_destructor(struct sock *sk)
-{
- struct tcp_timewait_sock *twsk = tcp_twsk(sk);
-
- if (twsk->tw_md5_key)
- kfree_rcu(twsk->tw_md5_key, rcu);
-}
-
-static inline void tcp_md5_add_header_len(const struct sock *listener,
- struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (tp->af_specific->md5_lookup(listener, sk))
- tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
-}
-
int tcp_md5_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb);
int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 143e1f66a24a..356bf41ec73a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -603,9 +603,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
struct ip_reply_arg arg;
struct net *net;
int offset = 0;
-#ifdef CONFIG_TCP_MD5SIG
- int ret;
-#endif
/* Never send a reset in response to a reset. */
if (th->rst)
@@ -643,26 +640,11 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
-#ifdef CONFIG_TCP_MD5SIG
- ret = tcp_v4_md5_send_response_prepare(skb, 0,
- MAX_TCP_OPTION_SPACE - arg.iov[0].iov_len,
- &opts, sk);
-
- if (ret == -1)
- return;
-
- arg.iov[0].iov_len += ret;
-#endif
-
if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
unsigned int remaining;
int used;
remaining = sizeof(rep.opt);
-#ifdef CONFIG_TCP_MD5SIG
- if (opts.md5)
- remaining -= TCPOLEN_MD5SIG_ALIGNED;
-#endif
used = tcp_extopt_response_prepare(skb, TCPHDR_RST, remaining,
&opts, sk);
@@ -674,9 +656,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
offset += used / 4;
}
-#ifdef CONFIG_TCP_MD5SIG
- tcp_v4_md5_send_response_write(&rep.opt[offset], skb, &rep.th, &opts, sk);
-#endif
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
arg.iov[0].iov_len, IPPROTO_TCP, 0);
@@ -727,9 +706,6 @@ static void tcp_v4_send_ack(const struct sock *sk,
struct net *net = sock_net(sk);
struct ip_reply_arg arg;
int offset = 0;
-#ifdef CONFIG_TCP_MD5SIG
- int ret;
-#endif
extopt_list = tcp_extopt_get_list(sk);
@@ -758,28 +734,12 @@ static void tcp_v4_send_ack(const struct sock *sk,
rep.th.ack = 1;
rep.th.window = htons(win);
-#ifdef CONFIG_TCP_MD5SIG
- ret = tcp_v4_md5_send_response_prepare(skb, 0,
- MAX_TCP_OPTION_SPACE - arg.iov[0].iov_len,
- &opts, sk);
-
- if (ret == -1)
- return;
-
- arg.iov[0].iov_len += ret;
-#endif
-
if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
unsigned int remaining;
int used;
remaining = sizeof(rep.th) + sizeof(rep.opt) - arg.iov[0].iov_len;
-#ifdef CONFIG_TCP_MD5SIG
- if (opts.md5)
- remaining -= TCPOLEN_MD5SIG_ALIGNED;
-#endif
-
memset(&opts, 0, sizeof(opts));
used = tcp_extopt_response_prepare(skb, TCPHDR_ACK, remaining,
&opts, sk);
@@ -792,14 +752,6 @@ static void tcp_v4_send_ack(const struct sock *sk,
offset += used / 4;
}
-#ifdef CONFIG_TCP_MD5SIG
- if (opts.md5) {
- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
- rep.th.doff = arg.iov[0].iov_len / 4;
- }
- tcp_v4_md5_send_response_write(&rep.opt[offset], skb, &rep.th, &opts, sk);
-#endif
-
arg.flags = reply_flags;
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
@@ -1026,10 +978,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
tcp_initialize_rcv_mss(newsk);
-#ifdef CONFIG_TCP_MD5SIG
- tcp_v4_md5_syn_recv_sock(sk, newsk);
-#endif
-
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
@@ -1532,9 +1480,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
tcp_extopt_destroy(sk);
-#ifdef CONFIG_TCP_MD5SIG
- tcp_v4_md5_destroy_sock(sk);
-#endif
/* Clean up a referenced TCP bind bucket. */
if (inet_csk(sk)->icsk_bind_hash)
diff --git a/net/ipv4/tcp_md5.c b/net/ipv4/tcp_md5.c
index a31b404e6dbf..64e5b4420ce9 100644
--- a/net/ipv4/tcp_md5.c
+++ b/net/ipv4/tcp_md5.c
@@ -7,11 +7,119 @@
#include <net/inet6_hashtables.h>
+struct tcp_md5sig_info {
+ struct hlist_head head;
+ struct rcu_head rcu;
+};
+
+union tcp_md5sum_block {
+ struct tcp4_pseudohdr ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct tcp6_pseudohdr ip6;
+#endif
+};
+
+/* - pool: digest algorithm, hash description and scratch buffer */
+struct tcp_md5sig_pool {
+ struct ahash_request *md5_req;
+ void *scratch;
+};
+
static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
static DEFINE_MUTEX(tcp_md5sig_mutex);
static bool tcp_md5sig_pool_populated;
-#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
+static unsigned int tcp_md5_extopt_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+
+static __be32 *tcp_md5_extopt_write(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct sock *sk,
+ struct tcp_extopt_store *store);
+
+static int tcp_md5_send_response_prepare(struct sk_buff *orig, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+
+static __be32 *tcp_md5_send_response_write(__be32 *ptr, struct sk_buff *orig,
+ struct tcphdr *th,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+
+static int tcp_md5_extopt_add_header_len(const struct sock *orig,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+
+static struct tcp_extopt_store *tcp_md5_extopt_copy(struct sock *listener,
+ struct request_sock *req,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *store);
+
+static struct tcp_extopt_store *tcp_md5_extopt_move(struct sock *from,
+ struct sock *to,
+ struct tcp_extopt_store *store);
+
+static void tcp_md5_extopt_destroy(struct tcp_extopt_store *store);
+
+struct tcp_md5_extopt {
+ struct tcp_extopt_store store;
+ struct tcp_md5sig_info __rcu *md5sig_info;
+ struct sock *sk;
+ struct rcu_head rcu;
+};
+
+static const struct tcp_extopt_ops tcp_md5_extra_ops = {
+ .option_kind = TCPOPT_MD5SIG,
+ .prepare = tcp_md5_extopt_prepare,
+ .write = tcp_md5_extopt_write,
+ .response_prepare = tcp_md5_send_response_prepare,
+ .response_write = tcp_md5_send_response_write,
+ .add_header_len = tcp_md5_extopt_add_header_len,
+ .copy = tcp_md5_extopt_copy,
+ .move = tcp_md5_extopt_move,
+ .destroy = tcp_md5_extopt_destroy,
+ .owner = THIS_MODULE,
+};
+
+static struct tcp_md5_extopt *tcp_extopt_to_md5(struct tcp_extopt_store *store)
+{
+ return container_of(store, struct tcp_md5_extopt, store);
+}
+
+static struct tcp_md5_extopt *tcp_md5_opt_find(const struct sock *sk)
+{
+ struct tcp_extopt_store *ext_opt;
+
+ ext_opt = tcp_extopt_find_kind(TCPOPT_MD5SIG, sk);
+
+ return tcp_extopt_to_md5(ext_opt);
+}
+
+static int tcp_md5_register(struct sock *sk,
+ struct tcp_md5_extopt *md5_opt)
+{
+ return tcp_register_extopt(&md5_opt->store, sk);
+}
+
+static struct tcp_md5_extopt *tcp_md5_alloc_store(struct sock *sk)
+{
+ struct tcp_md5_extopt *md5_opt;
+
+ md5_opt = kzalloc(sizeof(*md5_opt), GFP_ATOMIC);
+ if (!md5_opt)
+ return NULL;
+
+ md5_opt->store.ops = &tcp_md5_extra_ops;
+ md5_opt->sk = sk;
+
+ return md5_opt;
+}
static void __tcp_alloc_md5sig_pool(void)
{
@@ -91,18 +199,18 @@ static struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
return NULL;
}
-static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
+static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct tcp_md5_extopt *md5_opt,
const union tcp_md5_addr *addr,
int family, u8 prefixlen)
{
- const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
unsigned int size = sizeof(struct in_addr);
const struct tcp_md5sig_info *md5sig;
+ const struct sock *sk = md5_opt->sk;
/* caller either holds rcu_read_lock() or socket lock */
- md5sig = rcu_dereference_check(tp->md5sig_info,
- lockdep_sock_is_held(sk));
+ md5sig = rcu_dereference_check(md5_opt->md5sig_info,
+ sk_fullsock(sk) && lockdep_sock_is_held(sk));
if (!md5sig)
return NULL;
#if IS_ENABLED(CONFIG_IPV6)
@@ -125,11 +233,26 @@ static int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
u8 newkeylen, gfp_t gfp)
{
/* Add Key to the list */
- struct tcp_md5sig_key *key;
- struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_info *md5sig;
+ struct tcp_md5_extopt *md5_opt;
+ struct tcp_md5sig_key *key;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
+ md5_opt = tcp_md5_opt_find(sk);
+ if (!md5_opt) {
+ int ret;
+
+ md5_opt = tcp_md5_alloc_store(sk);
+ if (!md5_opt)
+ return -ENOMEM;
+
+ ret = tcp_md5_register(sk, md5_opt);
+ if (ret) {
+ kfree(md5_opt);
+ return ret;
+ }
+ }
+
+ key = tcp_md5_do_lookup_exact(md5_opt, addr, family, prefixlen);
if (key) {
/* Pre-existing entry - just update that one. */
memcpy(key->key, newkey, newkeylen);
@@ -137,8 +260,8 @@ static int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
return 0;
}
- md5sig = rcu_dereference_protected(tp->md5sig_info,
- lockdep_sock_is_held(sk));
+ md5sig = rcu_dereference_protected(md5_opt->md5sig_info,
+ sk_fullsock(sk) && lockdep_sock_is_held(sk));
if (!md5sig) {
md5sig = kmalloc(sizeof(*md5sig), gfp);
if (!md5sig)
@@ -146,7 +269,7 @@ static int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
sk_nocaps_add(sk, NETIF_F_GSO_MASK);
INIT_HLIST_HEAD(&md5sig->head);
- rcu_assign_pointer(tp->md5sig_info, md5sig);
+ rcu_assign_pointer(md5_opt->md5sig_info, md5sig);
}
key = sock_kmalloc(sk, sizeof(*key), gfp);
@@ -168,18 +291,18 @@ static int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
return 0;
}
-static void tcp_clear_md5_list(struct sock *sk)
+static void tcp_clear_md5_list(struct tcp_md5_extopt *md5_opt)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_info *md5sig;
struct tcp_md5sig_key *key;
struct hlist_node *n;
- struct tcp_md5sig_info *md5sig;
- md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
+ md5sig = rcu_dereference_protected(md5_opt->md5sig_info, 1);
hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
hlist_del_rcu(&key->node);
- atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
+ if (md5_opt->sk && sk_fullsock(md5_opt->sk))
+ atomic_sub(sizeof(*key), &md5_opt->sk->sk_omem_alloc);
kfree_rcu(key, rcu);
}
}
@@ -187,9 +310,14 @@ static void tcp_clear_md5_list(struct sock *sk)
static int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
int family, u8 prefixlen)
{
+ struct tcp_md5_extopt *md5_opt;
struct tcp_md5sig_key *key;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
+ md5_opt = tcp_md5_opt_find(sk);
+ if (!md5_opt)
+ return -ENOENT;
+
+ key = tcp_md5_do_lookup_exact(md5_opt, addr, family, prefixlen);
if (!key)
return -ENOENT;
hlist_del_rcu(&key->node);
@@ -421,16 +549,20 @@ static struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
const union tcp_md5_addr *addr,
int family)
{
- const struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_key *key;
+ struct tcp_md5sig_key *best_match = NULL;
const struct tcp_md5sig_info *md5sig;
+ struct tcp_md5_extopt *md5_opt;
+ struct tcp_md5sig_key *key;
__be32 mask;
- struct tcp_md5sig_key *best_match = NULL;
bool match;
+ md5_opt = tcp_md5_opt_find(sk);
+ if (!md5_opt)
+ return NULL;
+
/* caller either holds rcu_read_lock() or socket lock */
- md5sig = rcu_dereference_check(tp->md5sig_info,
- lockdep_sock_is_held(sk));
+ md5sig = rcu_dereference_check(md5_opt->md5sig_info,
+ sk_fullsock(sk) && lockdep_sock_is_held(sk));
if (!md5sig)
return NULL;
@@ -538,75 +670,30 @@ static int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
return 0;
}
-int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
- unsigned int remaining,
- struct tcp_out_options *opts,
- const struct sock *sk)
+static int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
{
- const struct tcphdr *th = tcp_hdr(skb);
const struct iphdr *iph = ip_hdr(skb);
- const __u8 *hash_location = NULL;
rcu_read_lock();
- hash_location = tcp_parse_md5sig_option(th);
- if (sk && sk_fullsock(sk)) {
- opts->md5 = tcp_md5_do_lookup(sk,
- (union tcp_md5_addr *)&iph->saddr,
- AF_INET);
- } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
- struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
-
- opts->md5 = tcp_twsk_md5_key(tcptw);
- } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
- opts->md5 = tcp_md5_do_lookup(sk,
- (union tcp_md5_addr *)&iph->saddr,
- AF_INET);
- } else if (hash_location) {
- unsigned char newhash[16];
- struct sock *sk1;
- int genhash;
-
- /* active side is lost. Try to find listening socket through
- * source port, and then find md5 key through listening socket.
- * we are not loose security here:
- * Incoming packet is checked with md5 hash with finding key,
- * no RST generated if md5 hash doesn't match.
- */
- sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, NULL, 0,
- iph->saddr,
- th->source, iph->daddr,
- ntohs(th->source), inet_iif(skb),
- tcp_v4_sdif(skb));
- /* don't send rst if it can't find key */
- if (!sk1)
- goto out_err;
-
- opts->md5 = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
- &iph->saddr, AF_INET);
- if (!opts->md5)
- goto out_err;
-
- genhash = tcp_v4_md5_hash_skb(newhash, opts->md5, NULL, skb);
- if (genhash || memcmp(hash_location, newhash, 16) != 0)
- goto out_err;
- }
+ opts->md5 = tcp_md5_do_lookup(sk,
+ (union tcp_md5_addr *)&iph->saddr,
+ AF_INET);
if (opts->md5)
+ /* rcu_read_unlock() is in _response_write */
return TCPOLEN_MD5SIG_ALIGNED;
rcu_read_unlock();
return 0;
-
-out_err:
- rcu_read_unlock();
- return -1;
}
-void tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
- struct tcphdr *t1,
- struct tcp_out_options *opts,
- const struct sock *sk)
+static __be32 *tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
+ struct tcphdr *t1,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
{
if (opts->md5) {
*topt++ = htonl((TCPOPT_NOP << 24) |
@@ -617,75 +704,39 @@ void tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
tcp_v4_md5_hash_hdr((__u8 *)topt, opts->md5,
ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr, t1);
+
+ topt += 4;
+
+ /* Unlocking from _response_prepare */
rcu_read_unlock();
}
+
+ return topt;
}
#if IS_ENABLED(CONFIG_IPV6)
-int tcp_v6_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
- unsigned int remaining,
- struct tcp_out_options *opts,
- const struct sock *sk)
+static int tcp_v6_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
{
- const struct tcphdr *th = tcp_hdr(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- const __u8 *hash_location = NULL;
rcu_read_lock();
- hash_location = tcp_parse_md5sig_option(th);
- if (sk && sk_fullsock(sk)) {
- opts->md5 = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
- } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
- struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
-
- opts->md5 = tcp_twsk_md5_key(tcptw);
- } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
- opts->md5 = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
- } else if (hash_location) {
- unsigned char newhash[16];
- struct sock *sk1;
- int genhash;
-
- /* active side is lost. Try to find listening socket through
- * source port, and then find md5 key through listening socket.
- * we are not loose security here:
- * Incoming packet is checked with md5 hash with finding key,
- * no RST generated if md5 hash doesn't match.
- */
- sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, NULL, 0,
- &ipv6h->saddr,
- th->source, &ipv6h->daddr,
- ntohs(th->source), tcp_v6_iif(skb),
- tcp_v6_sdif(skb));
- if (!sk1)
- goto out_err;
-
- opts->md5 = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
- if (!opts->md5)
- goto out_err;
-
- genhash = tcp_v6_md5_hash_skb(newhash, opts->md5, NULL, skb);
- if (genhash || memcmp(hash_location, newhash, 16) != 0)
- goto out_err;
- }
+ opts->md5 = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
if (opts->md5)
+ /* rcu_read_unlock() is in _response_write */
return TCPOLEN_MD5SIG_ALIGNED;
rcu_read_unlock();
return 0;
-
-out_err:
- rcu_read_unlock();
- return -1;
}
-EXPORT_SYMBOL_GPL(tcp_v6_md5_send_response_prepare);
-void tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
- struct tcphdr *t1,
- struct tcp_out_options *opts,
- const struct sock *sk)
+static __be32 *tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
+ struct tcphdr *t1,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
{
if (opts->md5) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
@@ -694,12 +745,45 @@ void tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, t1);
+ topt += 4;
+
+ /* Unlocking from _response_prepare */
rcu_read_unlock();
}
+
+ return topt;
}
-EXPORT_SYMBOL_GPL(tcp_v6_md5_send_response_write);
#endif
+static int tcp_md5_send_response_prepare(struct sk_buff *orig, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (orig->protocol != htons(ETH_P_IP))
+ return tcp_v6_md5_send_response_prepare(orig, flags, remaining,
+ opts, sk);
+ else
+#endif
+ return tcp_v4_md5_send_response_prepare(orig, flags, remaining,
+ opts, sk);
+}
+
+static __be32 *tcp_md5_send_response_write(__be32 *ptr, struct sk_buff *orig,
+ struct tcphdr *th,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (orig->protocol != htons(ETH_P_IP))
+ return tcp_v6_md5_send_response_write(ptr, orig, th, opts, sk);
+#endif
+ return tcp_v4_md5_send_response_write(ptr, orig, th, opts, sk);
+}
+
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
{
@@ -909,59 +993,6 @@ bool tcp_v6_inbound_md5_hash(const struct sock *sk,
return false;
}
EXPORT_SYMBOL_GPL(tcp_v6_inbound_md5_hash);
-#endif
-
-void tcp_v4_md5_destroy_sock(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- /* Clean up the MD5 key list, if any */
- if (tp->md5sig_info) {
- tcp_clear_md5_list(sk);
- kfree_rcu(tp->md5sig_info, rcu);
- tp->md5sig_info = NULL;
- }
-}
-
-void tcp_v4_md5_syn_recv_sock(const struct sock *listener, struct sock *sk)
-{
- struct inet_sock *inet = inet_sk(sk);
- struct tcp_md5sig_key *key;
-
- /* Copy over the MD5 key from the original socket */
- key = tcp_md5_do_lookup(listener, (union tcp_md5_addr *)&inet->inet_daddr,
- AF_INET);
- if (key) {
- /* We're using one, so create a matching key
- * on the sk structure. If we fail to get
- * memory, then we end up not copying the key
- * across. Shucks.
- */
- tcp_md5_do_add(sk, (union tcp_md5_addr *)&inet->inet_daddr,
- AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
- sk_nocaps_add(sk, NETIF_F_GSO_MASK);
- }
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-void tcp_v6_md5_syn_recv_sock(const struct sock *listener, struct sock *sk)
-{
- struct tcp_md5sig_key *key;
-
- /* Copy over the MD5 key from the original socket */
- key = tcp_v6_md5_do_lookup(listener, &sk->sk_v6_daddr);
- if (key) {
- /* We're using one, so create a matching key
- * on the newsk structure. If we fail to get
- * memory, then we end up not copying the key
- * across. Shucks.
- */
- tcp_md5_do_add(sk, (union tcp_md5_addr *)&sk->sk_v6_daddr,
- AF_INET6, 128, key->key, key->keylen,
- sk_gfp_mask(sk, GFP_ATOMIC));
- }
-}
-EXPORT_SYMBOL_GPL(tcp_v6_md5_syn_recv_sock);
struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
@@ -971,25 +1002,6 @@ struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
EXPORT_SYMBOL_GPL(tcp_v6_md5_lookup);
#endif
-void tcp_md5_time_wait(struct sock *sk, struct inet_timewait_sock *tw)
-{
- struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_key *key;
-
- /* The timewait bucket does not have the key DB from the
- * sock structure. We just make a quick copy of the
- * md5 key being used (if indeed we are using one)
- * so the timewait ack generating code has the key.
- */
- tcptw->tw_md5_key = NULL;
- key = tp->af_specific->md5_lookup(sk, sk);
- if (key) {
- tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
- BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
- }
-}
-
static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info,
const struct tcp_md5sig_key *key)
{
@@ -1039,13 +1051,17 @@ static int tcp_diag_put_md5sig(struct sk_buff *skb,
int tcp_md5_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb)
{
if (net_admin) {
+ struct tcp_md5_extopt *md5_opt;
struct tcp_md5sig_info *md5sig;
int err = 0;
rcu_read_lock();
- md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
- if (md5sig)
- err = tcp_diag_put_md5sig(skb, md5sig);
+ md5_opt = tcp_md5_opt_find(sk);
+ if (md5_opt) {
+ md5sig = rcu_dereference(md5_opt->md5sig_info);
+ if (md5sig)
+ err = tcp_diag_put_md5sig(skb, md5sig);
+ }
rcu_read_unlock();
if (err < 0)
return err;
@@ -1060,15 +1076,19 @@ int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin)
int size = 0;
if (net_admin && sk_fullsock(sk)) {
+ struct tcp_md5_extopt *md5_opt;
const struct tcp_md5sig_info *md5sig;
const struct tcp_md5sig_key *key;
size_t md5sig_count = 0;
rcu_read_lock();
- md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
- if (md5sig) {
- hlist_for_each_entry_rcu(key, &md5sig->head, node)
- md5sig_count++;
+ md5_opt = tcp_md5_opt_find(sk);
+ if (md5_opt) {
+ md5sig = rcu_dereference(md5_opt->md5sig_info);
+ if (md5sig) {
+ hlist_for_each_entry_rcu(key, &md5sig->head, node)
+ md5sig_count++;
+ }
}
rcu_read_unlock();
size += nla_total_size(md5sig_count *
@@ -1079,6 +1099,260 @@ int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin)
}
EXPORT_SYMBOL_GPL(tcp_md5_diag_get_aux_size);
+static int tcp_md5_extopt_add_header_len(const struct sock *orig,
+ const struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->af_specific->md5_lookup(orig, sk))
+ return TCPOLEN_MD5SIG_ALIGNED;
+
+ return 0;
+}
+
+static unsigned int tcp_md5_extopt_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+ int ret = 0;
+
+ if (sk_fullsock(sk)) {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ opts->md5 = tp->af_specific->md5_lookup(sk, sk);
+ } else {
+ struct request_sock *req = inet_reqsk(sk);
+ struct sock *listener = req->rsk_listener;
+
+ /* Coming from tcp_make_synack, unlock is in
+ * tcp_md5_extopt_write
+ */
+ rcu_read_lock();
+
+ opts->md5 = tcp_rsk(req)->af_specific->req_md5_lookup(listener, sk);
+
+ if (!opts->md5)
+ rcu_read_unlock();
+ }
+
+ if (unlikely(opts->md5)) {
+ ret = TCPOLEN_MD5SIG_ALIGNED;
+ opts->options |= OPTION_MD5;
+
+ /* Don't use TCP timestamps with TCP_MD5 */
+ if ((opts->options & OPTION_TS)) {
+ ret -= TCPOLEN_TSTAMP_ALIGNED;
+
+ /* When TS are enabled, Linux puts the SACK_OK
+ * next to the timestamp option, thus not accounting
+ * for its space. Here, we disable timestamps, thus
+ * we need to account for the space.
+ */
+ if (opts->options & OPTION_SACK_ADVERTISE)
+ ret += TCPOLEN_SACKPERM_ALIGNED;
+ }
+
+ opts->options &= ~OPTION_TS;
+ opts->tsval = 0;
+ opts->tsecr = 0;
+
+ if (!sk_fullsock(sk)) {
+ struct request_sock *req = inet_reqsk(sk);
+
+ inet_rsk(req)->tstamp_ok = 0;
+ }
+ }
+
+ return ret;
+}
+
+static __be32 *tcp_md5_extopt_write(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+ if (unlikely(OPTION_MD5 & opts->options)) {
+#if IS_ENABLED(CONFIG_IPV6)
+ const struct in6_addr *addr6;
+
+ if (sk_fullsock(sk)) {
+ addr6 = &sk->sk_v6_daddr;
+ } else {
+ BUG_ON(sk->sk_state != TCP_NEW_SYN_RECV);
+ addr6 = &inet_rsk(inet_reqsk(sk))->ir_v6_rmt_addr;
+ }
+#endif
+
+ *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
+
+ if (sk_fullsock(sk))
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
+
+ /* Calculate the MD5 hash, as we have all we need now */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6 && !ipv6_addr_v4mapped(addr6))
+ tcp_v6_md5_hash_skb((__u8 *)ptr, opts->md5, sk, skb);
+ else
+#endif
+ tcp_v4_md5_hash_skb((__u8 *)ptr, opts->md5, sk, skb);
+
+ ptr += 4;
+
+ /* Coming from tcp_make_synack */
+ if (!sk_fullsock(sk))
+ rcu_read_unlock();
+ }
+
+ return ptr;
+}
+
+static struct tcp_md5_extopt *__tcp_md5_extopt_copy(struct request_sock *req,
+ const struct tcp_md5sig_key *key,
+ const union tcp_md5_addr *addr,
+ int family)
+{
+ struct tcp_md5_extopt *md5_opt = NULL;
+ struct tcp_md5sig_info *md5sig;
+ struct tcp_md5sig_key *newkey;
+
+ md5_opt = tcp_md5_alloc_store(req_to_sk(req));
+ if (!md5_opt)
+ goto err;
+
+ md5sig = kmalloc(sizeof(*md5sig), GFP_ATOMIC);
+ if (!md5sig)
+ goto err_md5sig;
+
+ INIT_HLIST_HEAD(&md5sig->head);
+ rcu_assign_pointer(md5_opt->md5sig_info, md5sig);
+
+ newkey = kmalloc(sizeof(*newkey), GFP_ATOMIC);
+ if (!newkey)
+ goto err_newkey;
+
+ memcpy(newkey->key, key->key, key->keylen);
+ newkey->keylen = key->keylen;
+ newkey->family = family;
+ newkey->prefixlen = 32;
+ memcpy(&newkey->addr, addr,
+ (family == AF_INET6) ? sizeof(struct in6_addr) :
+ sizeof(struct in_addr));
+ hlist_add_head_rcu(&newkey->node, &md5sig->head);
+
+ return md5_opt;
+
+err_newkey:
+ kfree(md5sig);
+err_md5sig:
+ kfree_rcu(md5_opt, rcu);
+err:
+ return NULL;
+}
+
+static struct tcp_extopt_store *tcp_md5_v4_extopt_copy(const struct sock *listener,
+ struct request_sock *req)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+ struct tcp_md5sig_key *key;
+
+ /* Copy over the MD5 key from the original socket */
+ key = tcp_md5_do_lookup(listener,
+ (union tcp_md5_addr *)&ireq->ir_rmt_addr,
+ AF_INET);
+ if (!key)
+ return NULL;
+
+ return (struct tcp_extopt_store *)__tcp_md5_extopt_copy(req, key,
+ (union tcp_md5_addr *)&ireq->ir_rmt_addr,
+ AF_INET);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct tcp_extopt_store *tcp_md5_v6_extopt_copy(const struct sock *listener,
+ struct request_sock *req)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+ struct tcp_md5sig_key *key;
+
+ /* Copy over the MD5 key from the original socket */
+ key = tcp_v6_md5_do_lookup(listener, &ireq->ir_v6_rmt_addr);
+ if (!key)
+ return NULL;
+
+ return (struct tcp_extopt_store *)__tcp_md5_extopt_copy(req, key,
+ (union tcp_md5_addr *)&ireq->ir_v6_rmt_addr,
+ AF_INET6);
+}
+#endif
+
+/* We are creating a new request-socket, based on the listener's key that
+ * matches the IP-address. Thus, we need to create a new tcp_extopt_store, and
+ * store the matching key in there for the request-sock.
+ */
+static struct tcp_extopt_store *tcp_md5_extopt_copy(struct sock *listener,
+ struct request_sock *req,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *store)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ if (ireq->ireq_family == AF_INET6)
+ return tcp_md5_v6_extopt_copy(listener, req);
+#endif
+ return tcp_md5_v4_extopt_copy(listener, req);
+}
+
+/* Moving from a request-sock to a full socket means we need to account for
+ * the memory and set GSO-flags. When moving from a full socket to ta time-wait
+ * socket we also need to adjust the memory accounting.
+ */
+static struct tcp_extopt_store *tcp_md5_extopt_move(struct sock *from,
+ struct sock *to,
+ struct tcp_extopt_store *store)
+{
+ struct tcp_md5_extopt *md5_opt = tcp_extopt_to_md5(store);
+ unsigned int size = sizeof(struct tcp_md5sig_key);
+
+ if (sk_fullsock(to)) {
+ /* From request-sock to full socket */
+
+ if (size > sysctl_optmem_max ||
+ atomic_read(&to->sk_omem_alloc) + size >= sysctl_optmem_max) {
+ tcp_md5_extopt_destroy(store);
+ return NULL;
+ }
+
+ sk_nocaps_add(to, NETIF_F_GSO_MASK);
+ atomic_add(size, &to->sk_omem_alloc);
+ } else if (sk_fullsock(from)) {
+ /* From full socket to time-wait-socket */
+ atomic_sub(size, &from->sk_omem_alloc);
+ }
+
+ md5_opt->sk = to;
+
+ return store;
+}
+
+static void tcp_md5_extopt_destroy(struct tcp_extopt_store *store)
+{
+ struct tcp_md5_extopt *md5_opt = tcp_extopt_to_md5(store);
+
+ /* Clean up the MD5 key list, if any */
+ if (md5_opt) {
+ tcp_clear_md5_list(md5_opt);
+ kfree_rcu(md5_opt->md5sig_info, rcu);
+ md5_opt->md5sig_info = NULL;
+
+ kfree_rcu(md5_opt, rcu);
+ }
+}
+
const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
.md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f33214b29167..3da1c823240b 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -22,7 +22,6 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
-#include <linux/tcp_md5.h>
#include <linux/workqueue.h>
#include <linux/static_key.h>
#include <net/tcp.h>
@@ -295,9 +294,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcp_extopt_move(sk, (struct sock *)tw);
INIT_HLIST_HEAD(&tp->tcp_option_list);
}
-#ifdef CONFIG_TCP_MD5SIG
- tcp_md5_time_wait(sk, tw);
-#endif
/* Get the TIME_WAIT timeout firing. */
if (timeo < rto)
@@ -332,10 +328,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
void tcp_twsk_destructor(struct sock *sk)
{
-#ifdef CONFIG_TCP_MD5SIG
- tcp_md5_twsk_destructor(sk);
-#endif
-
if (unlikely(!hlist_empty(&tcp_twsk(sk)->tcp_option_list)))
tcp_extopt_destroy(sk);
}
@@ -520,10 +512,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->tcp_header_len = sizeof(struct tcphdr);
}
newtp->tsoffset = treq->ts_off;
-#ifdef CONFIG_TCP_MD5SIG
- newtp->md5sig_info = NULL; /*XXX*/
- tcp_md5_add_header_len(sk, newsk);
-#endif
if (unlikely(!hlist_empty(&treq->tcp_option_list)))
newtp->tcp_header_len += tcp_extopt_add_header(req_to_sk(req), newsk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 137645753abb..41bd8a791b0d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -42,7 +42,6 @@
#include <linux/gfp.h>
#include <linux/module.h>
#include <linux/static_key.h>
-#include <linux/tcp_md5.h>
#include <trace/events/tcp.h>
@@ -424,14 +423,6 @@ static void tcp_options_write(__be32 *ptr, struct sk_buff *skb, struct sock *sk,
extopt_list = tcp_extopt_get_list(sk);
- if (unlikely(OPTION_MD5 & options)) {
- *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
- /* overload cookie hash location */
- opts->hash_location = (__u8 *)ptr;
- ptr += 4;
- }
-
if (unlikely(opts->mss)) {
*ptr++ = htonl((TCPOPT_MSS << 24) |
(TCPOLEN_MSS << 16) |
@@ -527,14 +518,6 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
unsigned int remaining = MAX_TCP_OPTION_SPACE;
struct tcp_fastopen_request *fastopen = tp->fastopen_req;
-#ifdef CONFIG_TCP_MD5SIG
- opts->md5 = tp->af_specific->md5_lookup(sk, sk);
- if (opts->md5) {
- opts->options |= OPTION_MD5;
- remaining -= TCPOLEN_MD5SIG_ALIGNED;
- }
-#endif
-
/* We always get an MSS option. The option bytes which will be seen in
* normal data packets should timestamps be used, must be in the MSS
* advertised. But we subtract them from tp->mss_cache so that
@@ -547,7 +530,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !opts->md5)) {
+ if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
@@ -596,20 +579,6 @@ static unsigned int tcp_synack_options(const struct sock *sk,
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
-#ifdef CONFIG_TCP_MD5SIG
- if (opts->md5) {
- opts->options |= OPTION_MD5;
- remaining -= TCPOLEN_MD5SIG_ALIGNED;
-
- /* We can't fit any SACK blocks in a packet with MD5 + TS
- * options. There was discussion about disabling SACK
- * rather than TS in order to fit in better with old,
- * buggy kernels, but that was deemed to be unnecessary.
- */
- ireq->tstamp_ok &= !ireq->sack_ok;
- }
-#endif
-
/* We always send an MSS option. */
opts->mss = mss;
remaining -= TCPOLEN_MSS_ALIGNED;
@@ -670,16 +639,6 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
size += TCPOLEN_TSTAMP_ALIGNED;
}
-#ifdef CONFIG_TCP_MD5SIG
- opts->md5 = tp->af_specific->md5_lookup(sk, sk);
- if (unlikely(opts->md5)) {
- opts->options |= OPTION_MD5;
- size += TCPOLEN_MD5SIG_ALIGNED;
- }
-#else
- opts->md5 = NULL;
-#endif
-
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
size += tcp_extopt_prepare(skb, 0, MAX_TCP_OPTION_SPACE - size,
opts, tcp_to_sk(tp));
@@ -1082,14 +1041,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->window = htons(min(tp->rcv_wnd, 65535U));
}
tcp_options_write((__be32 *)(th + 1), skb, sk, &opts);
-#ifdef CONFIG_TCP_MD5SIG
- /* Calculate the MD5 hash, as we have all we need now */
- if (opts.md5) {
- sk_nocaps_add(sk, NETIF_F_GSO_MASK);
- tp->af_specific->calc_md5_hash(opts.hash_location,
- opts.md5, sk, skb);
- }
-#endif
icsk->icsk_af_ops->send_check(sk, skb);
@@ -3159,10 +3110,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
#endif
skb->skb_mstamp = tcp_clock_us();
-#ifdef CONFIG_TCP_MD5SIG
- rcu_read_lock();
- opts.md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
-#endif
skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts,
foc) + sizeof(*th);
@@ -3189,15 +3136,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
tcp_options_write((__be32 *)(th + 1), skb, req_to_sk(req), &opts);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
-#ifdef CONFIG_TCP_MD5SIG
- /* Okay, we have all we need - do the md5 hash if needed */
- if (opts.md5)
- tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
- opts.md5,
- req_to_sk(req), skb);
- rcu_read_unlock();
-#endif
-
/* Do not fool tcpdump (if any), clean our debris */
skb->tstamp = 0;
return skb;
@@ -3238,10 +3176,6 @@ static void tcp_connect_init(struct sock *sk)
if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
-#ifdef CONFIG_TCP_MD5SIG
- tcp_md5_add_header_len(sk, sk);
-#endif
-
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
tp->tcp_header_len += tcp_extopt_add_header(sk, sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e9b72d794140..16cbd6ec2063 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -577,20 +577,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
-#ifdef CONFIG_TCP_MD5SIG
-{
- int ret;
-
- ret = tcp_v6_md5_send_response_prepare(skb, 0,
- MAX_TCP_OPTION_SPACE - tot_len,
- &extraopts, sk);
-
- if (ret == -1)
- goto out;
-
- tot_len += ret;
-}
-#endif
if (sk)
extopt_list = tcp_extopt_get_list(sk);
@@ -639,11 +625,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
*topt++ = htonl(tsecr);
}
-#ifdef CONFIG_TCP_MD5SIG
- if (extraopts.md5)
- tcp_v6_md5_send_response_write(topt, skb, t1, &extraopts, sk);
-#endif
-
if (unlikely(extopt_list && !hlist_empty(extopt_list)))
tcp_extopt_response_write(topt, skb, t1, &extraopts, sk);
@@ -957,10 +938,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
-#ifdef CONFIG_TCP_MD5SIG
- tcp_v6_md5_syn_recv_sock(sk, newsk);
-#endif
-
if (__inet_inherit_port(sk, newsk) < 0) {
inet_csk_prepare_forced_close(newsk);
tcp_done(newsk);
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [RFC 13/14] tcp_md5: Cleanup TCP-code
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
` (11 preceding siblings ...)
2017-12-18 21:51 ` [RFC 12/14] tcp_md5: Use tcp_extra_options in output path Christoph Paasch
@ 2017-12-18 21:51 ` Christoph Paasch
2017-12-18 21:51 ` [RFC 14/14] tcp_md5: Use TCP extra-options on the input path Christoph Paasch
13 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
Now that we have consolidated the TCP_MD5 output path, we can cleanup
TCP and its callbacks to MD5.
These callbacks are solely there to handle the different
address-familiese (v4, v6 and v4mapped).
Now that we have isolated the TCP_MD5-code it is acceptable to add a bit
more complexity inside tcp_md5.c to handle these address-families at the
benefit of getting rid of these callbacks in tcp_sock, together with its
assignments in tcp_v4/6_connect,...
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
include/linux/tcp.h | 5 -
include/linux/tcp_md5.h | 18 +--
include/net/tcp.h | 24 ----
net/ipv4/tcp.c | 2 +-
net/ipv4/tcp_ipv4.c | 8 --
net/ipv4/tcp_md5.c | 340 ++++++++++++++++++++++--------------------------
net/ipv6/tcp_ipv6.c | 17 ---
7 files changed, 155 insertions(+), 259 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 034fbd9e0a38..5278387fabe7 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -377,11 +377,6 @@ struct tcp_sock {
* while socket was owned by user.
*/
-#ifdef CONFIG_TCP_MD5SIG
-/* TCP AF-Specific parts; only used by MD5 Signature support so far */
- const struct tcp_sock_af_ops *af_specific;
-#endif
-
/* TCP fastopen related information */
struct tcp_fastopen_request *fastopen_req;
/* fastopen_rsk points to request_sock that resulted in this big
diff --git a/include/linux/tcp_md5.h b/include/linux/tcp_md5.h
index 8dee4fc3dc7f..509fc36335e7 100644
--- a/include/linux/tcp_md5.h
+++ b/include/linux/tcp_md5.h
@@ -26,28 +26,14 @@ struct tcp_md5sig_key {
struct rcu_head rcu;
};
-extern const struct tcp_sock_af_ops tcp_sock_ipv4_specific;
-extern const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
-extern const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
-
/* - functions */
-int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
- const struct sock *sk, const struct sk_buff *skb);
-struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk);
+int tcp_md5_parse_keys(struct sock *sk, int optname, char __user *optval,
+ int optlen);
bool tcp_v4_inbound_md5_hash(const struct sock *sk,
const struct sk_buff *skb);
-struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk);
-
-int tcp_v6_md5_hash_skb(char *md5_hash,
- const struct tcp_md5sig_key *key,
- const struct sock *sk,
- const struct sk_buff *skb);
-
bool tcp_v6_inbound_md5_hash(const struct sock *sk,
const struct sk_buff *skb);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e955c5f0997f..baf0a6989a79 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1729,32 +1729,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb);
-/* TCP af-specific functions */
-struct tcp_sock_af_ops {
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *(*md5_lookup) (const struct sock *sk,
- const struct sock *addr_sk);
- int (*calc_md5_hash)(char *location,
- const struct tcp_md5sig_key *md5,
- const struct sock *sk,
- const struct sk_buff *skb);
- int (*md5_parse)(struct sock *sk,
- int optname,
- char __user *optval,
- int optlen);
-#endif
-};
-
struct tcp_request_sock_ops {
u16 mss_clamp;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *(*req_md5_lookup)(const struct sock *sk,
- const struct sock *addr_sk);
- int (*calc_md5_hash) (char *location,
- const struct tcp_md5sig_key *md5,
- const struct sock *sk,
- const struct sk_buff *skb);
-#endif
void (*init_req)(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 29f3ce8a0b54..8b6f5efe9509 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2795,7 +2795,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_MD5SIG:
case TCP_MD5SIG_EXT:
/* Read the IP->Key mappings from userspace */
- err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
+ err = tcp_md5_parse_keys(sk, optname, optval, optlen);
break;
#endif
case TCP_USER_TIMEOUT:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 356bf41ec73a..670d7751f814 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -889,10 +889,6 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.mss_clamp = TCP_MSS_DEFAULT,
-#ifdef CONFIG_TCP_MD5SIG
- .req_md5_lookup = tcp_v4_md5_lookup,
- .calc_md5_hash = tcp_v4_md5_hash_skb,
-#endif
.init_req = tcp_v4_init_req,
#ifdef CONFIG_SYN_COOKIES
.cookie_init_seq = cookie_v4_init_sequence,
@@ -1450,10 +1446,6 @@ static int tcp_v4_init_sock(struct sock *sk)
icsk->icsk_af_ops = &ipv4_specific;
-#ifdef CONFIG_TCP_MD5SIG
- tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
-#endif
-
return 0;
}
diff --git a/net/ipv4/tcp_md5.c b/net/ipv4/tcp_md5.c
index 64e5b4420ce9..052f5a587783 100644
--- a/net/ipv4/tcp_md5.c
+++ b/net/ipv4/tcp_md5.c
@@ -336,12 +336,13 @@ static int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
return crypto_ahash_update(hp->md5_req);
}
-static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
- char __user *optval, int optlen)
+int tcp_md5_parse_keys(struct sock *sk, int optname, char __user *optval,
+ int optlen)
{
+ u8 prefixlen = 32, maxprefixlen;
+ union tcp_md5_addr *tcpmd5addr;
struct tcp_md5sig cmd;
- struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
- u8 prefixlen = 32;
+ unsigned short family;
if (optlen < sizeof(cmd))
return -EINVAL;
@@ -349,76 +350,48 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
if (copy_from_user(&cmd, optval, sizeof(cmd)))
return -EFAULT;
- if (sin->sin_family != AF_INET)
- return -EINVAL;
-
- if (optname == TCP_MD5SIG_EXT &&
- cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
- prefixlen = cmd.tcpm_prefixlen;
- if (prefixlen > 32)
- return -EINVAL;
- }
+ family = cmd.tcpm_addr.ss_family;
- if (!cmd.tcpm_keylen)
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
- AF_INET, prefixlen);
-
- if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
+ if (family != AF_INET && family != AF_INET6)
return -EINVAL;
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
- AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
- GFP_KERNEL);
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
- char __user *optval, int optlen)
-{
- struct tcp_md5sig cmd;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
- u8 prefixlen;
-
- if (optlen < sizeof(cmd))
+ if (sk->sk_family != family)
return -EINVAL;
- if (copy_from_user(&cmd, optval, sizeof(cmd)))
- return -EFAULT;
+ if (family == AF_INET6) {
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
- if (sin6->sin6_family != AF_INET6)
- return -EINVAL;
+ if (!ipv6_addr_v4mapped(&sin6->sin6_addr)) {
+ tcpmd5addr = (union tcp_md5_addr *)&sin6->sin6_addr;
+ maxprefixlen = 128;
+ } else {
+ tcpmd5addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3];
+ family = AF_INET;
+ maxprefixlen = 32;
+ }
+ } else {
+ struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
+
+ tcpmd5addr = (union tcp_md5_addr *)&sin->sin_addr;
+ maxprefixlen = 32;
+ }
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
prefixlen = cmd.tcpm_prefixlen;
- if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
- prefixlen > 32))
+ if (prefixlen > maxprefixlen)
return -EINVAL;
- } else {
- prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
}
- if (!cmd.tcpm_keylen) {
- if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen);
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen);
- }
+ if (!cmd.tcpm_keylen)
+ return tcp_md5_do_del(sk, tcpmd5addr, family, prefixlen);
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
- if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen, cmd.tcpm_key,
- cmd.tcpm_keylen, GFP_KERNEL);
-
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, cmd.tcpm_key,
+ return tcp_md5_do_add(sk, tcpmd5addr, family, prefixlen, cmd.tcpm_key,
cmd.tcpm_keylen, GFP_KERNEL);
}
-#endif
static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
__be32 daddr, __be32 saddr,
@@ -670,6 +643,102 @@ static int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
return 0;
}
+static int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
+ const struct sock *sk, const struct sk_buff *skb)
+{
+ struct tcp_md5sig_pool *hp;
+ struct ahash_request *req;
+ const struct tcphdr *th = tcp_hdr(skb);
+ __be32 saddr, daddr;
+
+ if (sk) { /* valid for establish/request sockets */
+ saddr = sk->sk_rcv_saddr;
+ daddr = sk->sk_daddr;
+ } else {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ }
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ req = hp->md5_req;
+
+ if (crypto_ahash_init(req))
+ goto clear_hash;
+
+ if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
+ goto clear_hash;
+ if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int tcp_v6_md5_hash_skb(char *md5_hash,
+ const struct tcp_md5sig_key *key,
+ const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const struct in6_addr *saddr, *daddr;
+ struct tcp_md5sig_pool *hp;
+ struct ahash_request *req;
+ const struct tcphdr *th = tcp_hdr(skb);
+
+ if (sk) { /* valid for establish/request sockets */
+ saddr = &sk->sk_v6_rcv_saddr;
+ daddr = &sk->sk_v6_daddr;
+ } else {
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+
+ saddr = &ip6h->saddr;
+ daddr = &ip6h->daddr;
+ }
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ req = hp->md5_req;
+
+ if (crypto_ahash_init(req))
+ goto clear_hash;
+
+ if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
+ goto clear_hash;
+ if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+#endif
+
static int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
unsigned int remaining,
struct tcp_out_options *opts,
@@ -784,114 +853,14 @@ static __be32 *tcp_md5_send_response_write(__be32 *ptr, struct sk_buff *orig,
return tcp_v4_md5_send_response_write(ptr, orig, th, opts, sk);
}
-struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk)
+static struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
+ const struct sock *addr_sk)
{
const union tcp_md5_addr *addr;
addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
return tcp_md5_do_lookup(sk, addr, AF_INET);
}
-EXPORT_SYMBOL(tcp_v4_md5_lookup);
-
-int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
- const struct sock *sk,
- const struct sk_buff *skb)
-{
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
- const struct tcphdr *th = tcp_hdr(skb);
- __be32 saddr, daddr;
-
- if (sk) { /* valid for establish/request sockets */
- saddr = sk->sk_rcv_saddr;
- daddr = sk->sk_daddr;
- } else {
- const struct iphdr *iph = ip_hdr(skb);
-
- saddr = iph->saddr;
- daddr = iph->daddr;
- }
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
-
- if (crypto_ahash_init(req))
- goto clear_hash;
-
- if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
- goto clear_hash;
- if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
- goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
- goto clear_hash;
-
- tcp_put_md5sig_pool();
- return 0;
-
-clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
- memset(md5_hash, 0, 16);
- return 1;
-}
-EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
-
-#if IS_ENABLED(CONFIG_IPV6)
-int tcp_v6_md5_hash_skb(char *md5_hash,
- const struct tcp_md5sig_key *key,
- const struct sock *sk,
- const struct sk_buff *skb)
-{
- const struct in6_addr *saddr, *daddr;
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
- const struct tcphdr *th = tcp_hdr(skb);
-
- if (sk) { /* valid for establish/request sockets */
- saddr = &sk->sk_v6_rcv_saddr;
- daddr = &sk->sk_v6_daddr;
- } else {
- const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-
- saddr = &ip6h->saddr;
- daddr = &ip6h->daddr;
- }
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
-
- if (crypto_ahash_init(req))
- goto clear_hash;
-
- if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
- goto clear_hash;
- if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
- goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
- goto clear_hash;
-
- tcp_put_md5sig_pool();
- return 0;
-
-clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
- memset(md5_hash, 0, 16);
- return 1;
-}
-EXPORT_SYMBOL_GPL(tcp_v6_md5_hash_skb);
-#endif
/* Called with rcu_read_lock() */
bool tcp_v4_inbound_md5_hash(const struct sock *sk,
@@ -994,8 +963,8 @@ bool tcp_v6_inbound_md5_hash(const struct sock *sk,
}
EXPORT_SYMBOL_GPL(tcp_v6_inbound_md5_hash);
-struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk)
+static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
+ const struct sock *addr_sk)
{
return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
}
@@ -1103,10 +1072,17 @@ static int tcp_md5_extopt_add_header_len(const struct sock *orig,
const struct sock *sk,
struct tcp_extopt_store *store)
{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (tp->af_specific->md5_lookup(orig, sk))
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6 &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
+ if (tcp_v6_md5_lookup(orig, sk))
+ return TCPOLEN_MD5SIG_ALIGNED;
+ } else
+#endif
+{
+ if (tcp_v4_md5_lookup(orig, sk))
return TCPOLEN_MD5SIG_ALIGNED;
+}
return 0;
}
@@ -1120,19 +1096,29 @@ static unsigned int tcp_md5_extopt_prepare(struct sk_buff *skb, u8 flags,
int ret = 0;
if (sk_fullsock(sk)) {
- struct tcp_sock *tp = tcp_sk(sk);
-
- opts->md5 = tp->af_specific->md5_lookup(sk, sk);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6 && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
+ opts->md5 = tcp_v6_md5_lookup(sk, sk);
+ else
+#endif
+ opts->md5 = tcp_v4_md5_lookup(sk, sk);
} else {
struct request_sock *req = inet_reqsk(sk);
struct sock *listener = req->rsk_listener;
+ struct inet_request_sock *ireq = inet_rsk(req);
/* Coming from tcp_make_synack, unlock is in
* tcp_md5_extopt_write
*/
rcu_read_lock();
- opts->md5 = tcp_rsk(req)->af_specific->req_md5_lookup(listener, sk);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ireq->ireq_family == AF_INET6 &&
+ !ipv6_addr_v4mapped(&ireq->ir_v6_rmt_addr))
+ opts->md5 = tcp_v6_md5_lookup(listener, sk);
+ else
+#endif
+ opts->md5 = tcp_v4_md5_lookup(listener, sk);
if (!opts->md5)
rcu_read_unlock();
@@ -1352,25 +1338,3 @@ static void tcp_md5_extopt_destroy(struct tcp_extopt_store *store)
kfree_rcu(md5_opt, rcu);
}
}
-
-const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
- .md5_lookup = tcp_v4_md5_lookup,
- .calc_md5_hash = tcp_v4_md5_hash_skb,
- .md5_parse = tcp_v4_parse_md5_keys,
-};
-
-#if IS_ENABLED(CONFIG_IPV6)
-const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
- .md5_lookup = tcp_v6_md5_lookup,
- .calc_md5_hash = tcp_v6_md5_hash_skb,
- .md5_parse = tcp_v6_parse_md5_keys,
-};
-EXPORT_SYMBOL_GPL(tcp_sock_ipv6_specific);
-
-const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
- .md5_lookup = tcp_v4_md5_lookup,
- .calc_md5_hash = tcp_v4_md5_hash_skb,
- .md5_parse = tcp_v6_parse_md5_keys,
-};
-EXPORT_SYMBOL_GPL(tcp_sock_ipv6_mapped_specific);
-#endif
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 16cbd6ec2063..890616fc5591 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -208,9 +208,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
icsk->icsk_af_ops = &ipv6_mapped;
sk->sk_backlog_rcv = tcp_v4_do_rcv;
-#ifdef CONFIG_TCP_MD5SIG
- tp->af_specific = &tcp_sock_ipv6_mapped_specific;
-#endif
err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
@@ -218,9 +215,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
icsk->icsk_ext_hdr_len = exthdrlen;
icsk->icsk_af_ops = &ipv6_specific;
sk->sk_backlog_rcv = tcp_v6_do_rcv;
-#ifdef CONFIG_TCP_MD5SIG
- tp->af_specific = &tcp_sock_ipv6_specific;
-#endif
goto failure;
}
np->saddr = sk->sk_v6_rcv_saddr;
@@ -543,10 +537,6 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
sizeof(struct ipv6hdr),
-#ifdef CONFIG_TCP_MD5SIG
- .req_md5_lookup = tcp_v6_md5_lookup,
- .calc_md5_hash = tcp_v6_md5_hash_skb,
-#endif
.init_req = tcp_v6_init_req,
#ifdef CONFIG_SYN_COOKIES
.cookie_init_seq = cookie_v6_init_sequence,
@@ -821,9 +811,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
-#ifdef CONFIG_TCP_MD5SIG
- newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
-#endif
newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
@@ -1430,10 +1417,6 @@ static int tcp_v6_init_sock(struct sock *sk)
icsk->icsk_af_ops = &ipv6_specific;
-#ifdef CONFIG_TCP_MD5SIG
- tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
-#endif
-
return 0;
}
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [RFC 14/14] tcp_md5: Use TCP extra-options on the input path
2017-12-18 21:50 [RFC 00/14] Generic TCP-option framework and adoption for TCP-SMC and TCP-MD5 Christoph Paasch
` (12 preceding siblings ...)
2017-12-18 21:51 ` [RFC 13/14] tcp_md5: Cleanup TCP-code Christoph Paasch
@ 2017-12-18 21:51 ` Christoph Paasch
13 siblings, 0 replies; 17+ messages in thread
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
The checks are now being done through the extra-option framework. For
TCP MD5 this means that the check happens a bit later than usual.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
include/linux/tcp_md5.h | 23 +----------------------
net/ipv4/tcp_input.c | 8 --------
net/ipv4/tcp_ipv4.c | 9 ---------
net/ipv4/tcp_md5.c | 29 ++++++++++++++++++++++++-----
net/ipv6/tcp_ipv6.c | 9 ---------
5 files changed, 25 insertions(+), 53 deletions(-)
diff --git a/include/linux/tcp_md5.h b/include/linux/tcp_md5.h
index 509fc36335e7..bef277f55b36 100644
--- a/include/linux/tcp_md5.h
+++ b/include/linux/tcp_md5.h
@@ -31,30 +31,9 @@ struct tcp_md5sig_key {
int tcp_md5_parse_keys(struct sock *sk, int optname, char __user *optval,
int optlen);
-bool tcp_v4_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb);
-
-bool tcp_v6_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb);
-
int tcp_md5_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb);
int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin);
-#else
-
-static inline bool tcp_v4_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
-{
- return false;
-}
-
-static inline bool tcp_v6_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
-{
- return false;
-}
-
-#endif
-
+#endif /* CONFIG_TCP_MD5SIG */
#endif /* _LINUX_TCP_MD5_H */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index db54bdbdee51..e4de06e28a85 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3754,14 +3754,6 @@ void tcp_parse_options(const struct net *net,
TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
}
break;
-#ifdef CONFIG_TCP_MD5SIG
- case TCPOPT_MD5SIG:
- /*
- * The MD5 Hash has already been
- * checked (see tcp_v{4,6}_do_rcv()).
- */
- break;
-#endif
case TCPOPT_FASTOPEN:
tcp_parse_fastopen_option(
opsize - TCPOLEN_FASTOPEN_BASE,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 670d7751f814..707ad1a343ba 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -62,7 +62,6 @@
#include <linux/init.h>
#include <linux/times.h>
#include <linux/slab.h>
-#include <linux/tcp_md5.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
@@ -1249,11 +1248,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
struct sock *nsk;
sk = req->rsk_listener;
- if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
- sk_drops_add(sk, skb);
- reqsk_put(req);
- goto discard_it;
- }
if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
@@ -1293,9 +1287,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- if (tcp_v4_inbound_md5_hash(sk, skb))
- goto discard_and_relse;
-
nf_reset(skb);
if (tcp_filter(sk, skb))
diff --git a/net/ipv4/tcp_md5.c b/net/ipv4/tcp_md5.c
index 052f5a587783..723320d0741a 100644
--- a/net/ipv4/tcp_md5.c
+++ b/net/ipv4/tcp_md5.c
@@ -29,6 +29,10 @@ static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
static DEFINE_MUTEX(tcp_md5sig_mutex);
static bool tcp_md5sig_pool_populated;
+static bool tcp_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx,
+ struct tcp_extopt_store *store);
+
static unsigned int tcp_md5_extopt_prepare(struct sk_buff *skb, u8 flags,
unsigned int remaining,
struct tcp_out_options *opts,
@@ -76,6 +80,7 @@ struct tcp_md5_extopt {
static const struct tcp_extopt_ops tcp_md5_extra_ops = {
.option_kind = TCPOPT_MD5SIG,
+ .check = tcp_inbound_md5_hash,
.prepare = tcp_md5_extopt_prepare,
.write = tcp_md5_extopt_write,
.response_prepare = tcp_md5_send_response_prepare,
@@ -863,8 +868,8 @@ static struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
}
/* Called with rcu_read_lock() */
-bool tcp_v4_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
+static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
{
/* This gets called for each TCP segment that arrives
* so we want to be efficient.
@@ -918,8 +923,8 @@ bool tcp_v4_inbound_md5_hash(const struct sock *sk,
}
#if IS_ENABLED(CONFIG_IPV6)
-bool tcp_v6_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
+static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
{
const __u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
@@ -961,7 +966,6 @@ bool tcp_v6_inbound_md5_hash(const struct sock *sk,
return false;
}
-EXPORT_SYMBOL_GPL(tcp_v6_inbound_md5_hash);
static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
@@ -971,6 +975,21 @@ static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
EXPORT_SYMBOL_GPL(tcp_v6_md5_lookup);
#endif
+static bool tcp_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx,
+ struct tcp_extopt_store *store)
+{
+ if (skb->protocol == htons(ETH_P_IP)) {
+ return tcp_v4_inbound_md5_hash(sk, skb);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ return tcp_v6_inbound_md5_hash(sk, skb);
+#endif
+ }
+
+ return false;
+}
+
static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info,
const struct tcp_md5sig_key *key)
{
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 890616fc5591..f5dc730d3abc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -43,7 +43,6 @@
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
-#include <linux/tcp_md5.h>
#include <net/tcp.h>
#include <net/ndisc.h>
@@ -1173,11 +1172,6 @@ static int tcp_v6_rcv(struct sk_buff *skb)
struct sock *nsk;
sk = req->rsk_listener;
- if (tcp_v6_inbound_md5_hash(sk, skb)) {
- sk_drops_add(sk, skb);
- reqsk_put(req);
- goto discard_it;
- }
if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
@@ -1214,9 +1208,6 @@ static int tcp_v6_rcv(struct sk_buff *skb)
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- if (tcp_v6_inbound_md5_hash(sk, skb))
- goto discard_and_relse;
-
if (tcp_filter(sk, skb))
goto discard_and_relse;
th = (const struct tcphdr *)skb->data;
--
2.15.0
^ permalink raw reply related [flat|nested] 17+ messages in thread