* [PATCHv3 net-next 01/14] net: sched: fix coding style issues
From: Alexander Aring @ 2017-12-18 22:45 UTC (permalink / raw)
To: jhs; +Cc: xiyou.wangcong, jiri, davem, netdev, kernel, Alexander Aring
In-Reply-To: <20171218224513.29836-1-aring@mojatatu.com>
This patch fix checkpatch issues for upcomming patches according to the
sched api file. It changes mostly how to check on null pointer.
Signed-off-by: Alexander Aring <aring@mojatatu.com>
---
net/sched/sch_api.c | 2 +-
net/sched/sch_cbq.c | 12 ++++++------
net/sched/sch_gred.c | 7 ++++---
net/sched/sch_hfsc.c | 2 +-
net/sched/sch_multiq.c | 2 +-
net/sched/sch_tbf.c | 2 +-
6 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a904276b657d..b54917f4ad87 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -669,7 +669,7 @@ int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
unsigned int size = 4;
clhash->hash = qdisc_class_hash_alloc(size);
- if (clhash->hash == NULL)
+ if (!clhash->hash)
return -ENOMEM;
clhash->hashsize = size;
clhash->hashmask = size - 1;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 525eb3a6d625..0692fe35f4ec 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1150,12 +1150,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
if (err < 0)
return err;
- if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
+ if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE])
return -EINVAL;
r = nla_data(tb[TCA_CBQ_RATE]);
- if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
+ q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB]);
+ if (!q->link.R_tab)
return -EINVAL;
err = tcf_block_get(&q->link.block, &q->link.filter_list, sch);
@@ -1460,7 +1461,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
struct cbq_class *parent;
struct qdisc_rate_table *rtab = NULL;
- if (opt == NULL)
+ if (!opt)
return -EINVAL;
err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL);
@@ -1532,8 +1533,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (parentid == TC_H_ROOT)
return -EINVAL;
- if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
- tb[TCA_CBQ_LSSOPT] == NULL)
+ if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT])
return -EINVAL;
rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
@@ -1565,7 +1565,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (parentid) {
parent = cbq_class_lookup(q, parentid);
err = -EINVAL;
- if (parent == NULL)
+ if (!parent)
goto failure;
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index bc30f9186ac6..ccd1a00e2a9a 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -306,12 +306,13 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
struct tc_gred_sopt *sopt;
int i;
- if (dps == NULL)
+ if (!dps)
return -EINVAL;
sopt = nla_data(dps);
- if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs)
+ if (sopt->DPs > MAX_DPs || sopt->DPs == 0 ||
+ sopt->def_DP >= sopt->DPs)
return -EINVAL;
sch_tree_lock(sch);
@@ -470,7 +471,7 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
struct nlattr *tb[TCA_GRED_MAX + 1];
int err;
- if (opt == NULL)
+ if (!opt)
return -EINVAL;
err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index d04068a97d81..94db20352f37 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1396,7 +1396,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
qdisc_watchdog_init(&q->watchdog, sch);
- if (opt == NULL || nla_len(opt) < sizeof(*qopt))
+ if (!opt || nla_len(opt) < sizeof(*qopt))
return -EINVAL;
qopt = nla_data(opt);
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 012216386c0b..37195e0c64ba 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -243,7 +243,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
q->queues = NULL;
- if (opt == NULL)
+ if (!opt)
return -EINVAL;
err = tcf_block_get(&q->block, &q->filter_list, sch);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 120f4f365967..e8f3345674c5 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -428,7 +428,7 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
qdisc_watchdog_init(&q->watchdog, sch);
q->qdisc = &noop_qdisc;
- if (opt == NULL)
+ if (!opt)
return -EINVAL;
q->t_c = ktime_get_ns();
--
2.11.0
^ permalink raw reply related
* [PATCHv3 net-next 00/14] net: sched: sch: introduce extack support
From: Alexander Aring @ 2017-12-18 22:44 UTC (permalink / raw)
To: jhs
Cc: xiyou.wangcong, jiri, davem, netdev, kernel, Alexander Aring,
David Ahern
Hi,
this patch series basically add support for extack in common qdisc handling.
Additional it adds extack pointer to common qdisc callback handling this
offers per qdisc implementation to setting the extack message for each
failure over netlink.
The extack message will be set deeper in qdisc functions but going not
deeper as net core api. For qdisc module callback handling, the extack
will not be set. This will be part of per qdisc extack handling.
I also want to prepare patches to handle extack per qdisc module...
so there will come a lot of more patches, just cut them down to make
it reviewable.
There are some above 80-chars width warnings, which I ignore because
it looks more ugly otherwise.
This patch-series based on patches by David Ahren which gave me some
hints how to deal with extack support.
Cc: David Ahern <dsahern@gmail.com>
changes since v3:
- remove patch 2/2 lib: nlattr: set extack msg if validate_nla fails since
David Ahren has a better solution
- Remove check on net admin permission since -EPERM indicates it already
- Change rtab to "rate table" - this is what it's stands for
- Fix cbs *not* support messages
- Fix tcf block error message for allocation, allocation will be still there
because there are multiple places which returns -ENOMEM
- Finnally also took care about sch_atm, sorry somehow I forgot this one and
I hope I didn't forgot any sch implementation to add new callback parameters
changes since v2:
- add fix coding style patch to catch all checkpatch warnings
- add patch for setting netlink extack msg if validate_nla fails
- changes in handle generic qdisc errors
- remove NL_SET_ERR_MSG from memory allocation errors
- remove NL_SET_ERR_MSG from device not found
- change STAB to table size
- add various new patches to add extack support for common
TC functions like qdisc_get_rtab, tcf_block_get, qdisc_alloc
and qdisc_create_dflt - users which are interessted in the
detailed error messages can assign extack, otherwise NULL.
- Add sch_cbq as example for qdisc_ops callback: init,
qdisc_class_ops callbacks: change and graft
- Add sch_cbs as example for qdisc_ops callback: change
- Add sch_drr as example for qdisc_class ops callbacks: tcf_block
- Alex
Alexander Aring (14):
net: sched: fix coding style issues
net: sched: sch_api: handle generic qdisc errors
net: sched: sch: add extack for init callback
net: sched: sch: add extack for change qdisc ops
net: sched: sch: add extack to change class
net: sched: sch: add extack for block callback
net: sched: sch: add extack for graft callback
net: sch: api: add extack support in qdisc_get_rtab
net: sch: api: add extack support in tcf_block_get
net: sch: api: add extack support in qdisc_alloc
net: sch: api: add extack support in qdisc_create_dflt
net: sch: sch_cbq: add extack support
net: sch: sch_cbs: add extack support
net: sch: sch_drr: add extack support
include/net/pkt_cls.h | 6 +-
include/net/pkt_sched.h | 6 +-
include/net/sch_generic.h | 21 ++++--
net/sched/act_police.c | 4 +-
net/sched/cls_api.c | 17 +++--
net/sched/sch_api.c | 172 ++++++++++++++++++++++++++++++++--------------
net/sched/sch_atm.c | 23 ++++---
net/sched/sch_cbq.c | 78 ++++++++++++++-------
net/sched/sch_cbs.c | 31 ++++++---
net/sched/sch_choke.c | 8 ++-
net/sched/sch_codel.c | 8 ++-
net/sched/sch_drr.c | 40 +++++++----
net/sched/sch_dsmark.c | 19 +++--
net/sched/sch_fifo.c | 11 +--
net/sched/sch_fq.c | 8 ++-
net/sched/sch_fq_codel.c | 13 ++--
net/sched/sch_generic.c | 25 ++++---
net/sched/sch_gred.c | 13 ++--
net/sched/sch_hfsc.c | 28 ++++----
net/sched/sch_hhf.c | 8 ++-
net/sched/sch_htb.c | 29 ++++----
net/sched/sch_ingress.c | 20 ++++--
net/sched/sch_mq.c | 8 ++-
net/sched/sch_mqprio.c | 7 +-
net/sched/sch_multiq.c | 19 ++---
net/sched/sch_netem.c | 10 +--
net/sched/sch_pie.c | 8 ++-
net/sched/sch_plug.c | 6 +-
net/sched/sch_prio.c | 18 +++--
net/sched/sch_qfq.c | 22 +++---
net/sched/sch_red.c | 13 ++--
net/sched/sch_sfb.c | 20 +++---
net/sched/sch_sfq.c | 8 ++-
net/sched/sch_tbf.c | 21 +++---
net/sched/sch_teql.c | 3 +-
35 files changed, 486 insertions(+), 265 deletions(-)
--
2.11.0
^ permalink raw reply
* Re: [PATCH 1/3] kallsyms: don't leak address when symbol not found
From: Tobin C. Harding @ 2017-12-18 22:41 UTC (permalink / raw)
To: Felix Fietkau, kernel-hardening
Cc: Steven Rostedt, Tycho Andersen, Linus Torvalds, Kees Cook,
Andrew Morton, Daniel Borkmann, Masahiro Yamada,
Alexei Starovoitov, linux-kernel, Network Development
In-Reply-To: <5b86abe3-2c9d-1396-777d-a5a8f19555ef@nbd.name>
On Mon, Dec 18, 2017, at 20:55, Felix Fietkau wrote:
> On 2017-12-18 00:53, Tobin C. Harding wrote:
> > Currently if kallsyms_lookup() fails to find the symbol then the address
> > is printed. This potentially leaks sensitive information. Instead of
> > printing the address we can return an error, giving the calling code the
> > option to print the address or print some sanitized message.
> >
> > Return error instead of printing address to argument buffer. Leave
> > buffer in a sane state.
> >
> > Signed-off-by: Tobin C. Harding <me@tobin.cc>
> I think there should be a way to keep the old behavior for debugging.
That was the intended use of
EXPORT_SYMBOL(string_is_no_symbol);
in patch 2 of this series. Then if debugging behaviour is adversely
effected one could use string_is_no_symbol() on a case by case basis to
add back in the original behaviour.
Current suggestion on list is to remove this function. Do you have a use
case in mind where debugging will break? We could add a fix to this
series if so. Otherwise next version will likely drop
string_is_no_symbol()
thanks,
Tobin.
^ permalink raw reply
* Re: [PATCH 3/3] trace: print address if symbol not found
From: Tobin C. Harding @ 2017-12-18 22:35 UTC (permalink / raw)
To: Steven Rostedt
Cc: kernel-hardening, Tycho Andersen, Linus Torvalds, Kees Cook,
Andrew Morton, Daniel Borkmann, Masahiro Yamada,
Alexei Starovoitov, linux-kernel, Network Development
In-Reply-To: <20171218114947.2c11211a@gandalf.local.home>
On Mon, Dec 18, 2017 at 11:49:47AM -0500, Steven Rostedt wrote:
> On Mon, 18 Dec 2017 10:53:32 +1100
> "Tobin C. Harding" <me@tobin.cc> wrote:
>
> > Fixes behaviour modified by: commit bd6b239cdbb2 ("kallsyms: don't leak
> > address when symbol not found")
> >
> > Previous patch changed behaviour of kallsyms function sprint_symbol() to
> > return an error code instead of printing the address if a symbol was not
> > found. Ftrace relies on the original behaviour. We should not break
> > tracing when applying the previous patch. We can maintain the original
> > behaviour by checking the return code on calls to sprint_symbol() and
> > friends.
> >
> > Check return code and print actual address on error (i.e symbol not
> > found).
> >
> > Signed-off-by: Tobin C. Harding <me@tobin.cc>
> > ---
> > kernel/trace/trace.h | 24 ++++++++++++++++++++++++
> > kernel/trace/trace_events_hist.c | 6 +++---
> > 2 files changed, 27 insertions(+), 3 deletions(-)
> >
> > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> > index 2a6d0325a761..881b1a577d75 100644
> > --- a/kernel/trace/trace.h
> > +++ b/kernel/trace/trace.h
> > @@ -1814,4 +1814,28 @@ static inline void trace_event_eval_update(struct trace_eval_map **map, int len)
> >
> > extern struct trace_iterator *tracepoint_print_iter;
> >
> > +static inline int
> > +trace_sprint_symbol(char *buffer, unsigned long address)
> > +{
> > + int ret;
> > +
> > + ret = sprint_symbol(buffer, address);
> > + if (ret == -1)
> > + ret = sprintf(buffer, "0x%lx", address);
> > +
> > + return ret;
> > +}
> > +
> > +static inline int
> > +trace_sprint_symbol_no_offset(char *buffer, unsigned long address)
> > +{
> > + int ret;
> > +
> > + ret = sprint_symbol_no_offset(buffer, address);
> > + if (ret == -1)
> > + ret = sprintf(buffer, "0x%lx", address);
> > +
> > + return ret;
> > +}
> > +
> > #endif /* _LINUX_KERNEL_TRACE_H */
> > diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
> > index 1e1558c99d56..3e28522a76f4 100644
> > --- a/kernel/trace/trace_events_hist.c
> > +++ b/kernel/trace/trace_events_hist.c
> > @@ -982,7 +982,7 @@ static void hist_trigger_stacktrace_print(struct seq_file *m,
> > return;
> >
> > seq_printf(m, "%*c", 1 + spaces, ' ');
> > - sprint_symbol(str, stacktrace_entries[i]);
> > + trace_sprint_symbol_addr(str, stacktrace_entries[i]);
>
> Hmm, where is trace_sprint_symbol_addr() defined?
>
> -- Steve
Also, I missed one in kernel/trace/trace_output.c
Added for next version.
thanks,
Tobin.
^ permalink raw reply
* [Patch net-next] net_sched: properly check for empty skb array on error path
From: Cong Wang @ 2017-12-18 22:34 UTC (permalink / raw)
To: netdev; +Cc: Cong Wang, John Fastabend
First, the check of &q->ring.queue against NULL is wrong, it
is always false. We should check the value rather than the address.
Secondly, we need the same check in pfifo_fast_reset() too,
as both ->reset() and ->destroy() are called in qdisc_destroy().
Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array")
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
net/sched/sch_generic.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 981c08fe810b..876fab2604b8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -659,6 +659,12 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
struct skb_array *q = band2list(priv, band);
struct sk_buff *skb;
+ /* NULL ring is possible if destroy path is due to a failed
+ * skb_array_init() in pfifo_fast_init() case.
+ */
+ if (!q->ring.queue)
+ continue;
+
while ((skb = skb_array_consume_bh(q)) != NULL)
kfree_skb(skb);
}
@@ -719,7 +725,7 @@ static void pfifo_fast_destroy(struct Qdisc *sch)
/* NULL ring is possible if destroy path is due to a failed
* skb_array_init() in pfifo_fast_init() case.
*/
- if (!&q->ring.queue)
+ if (!q->ring.queue)
continue;
/* Destroy ring but no need to kfree_skb because a call to
* pfifo_fast_reset() has already done that work.
--
2.13.0
^ permalink raw reply related
* [PATCH 2/2] net: stmmac: Fix bad RX timestamp extraction
From: Fredrik Hallenberg @ 2017-12-18 22:34 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: Jose Abreu, David S . Miller, Giuseppe Cavallaro,
Alexandre Torgue, Fredrik Hallenberg
In-Reply-To: <20171218223400.29644-1-megahallon@gmail.com>
As noted in dwmac4_wrback_get_rx_timestamp_status the timestamp is found
in the context descriptor following the current descriptor. However the
current code looks for the context descriptor in the current
descriptor, which will always fail.
Signed-off-by: Fredrik Hallenberg <megahallon@gmail.com>
---
drivers/net/ethernet/stmicro/stmmac/common.h | 2 +-
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 5 +++--
drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 3 ++-
drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 2 +-
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
5 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index e1e5ac053..ce2ea2d49 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -409,7 +409,7 @@ struct stmmac_desc_ops {
/* get timestamp value */
u64(*get_timestamp) (void *desc, u32 ats);
/* get rx timestamp status */
- int (*get_rx_timestamp_status) (void *desc, u32 ats);
+ int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
/* Display ring */
void (*display_ring)(void *head, unsigned int size, bool rx);
/* set MSS via context descriptor */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 4b286e27c..7e089bf90 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -258,7 +258,8 @@ static int dwmac4_rx_check_timestamp(void *desc)
return ret;
}
-static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
+static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc,
+ u32 ats)
{
struct dma_desc *p = (struct dma_desc *)desc;
int ret = -EINVAL;
@@ -270,7 +271,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
/* Check if timestamp is OK from context descriptor */
do {
- ret = dwmac4_rx_check_timestamp(desc);
+ ret = dwmac4_rx_check_timestamp(next_desc);
if (ret < 0)
goto exit;
i++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 7546b3664..2a828a312 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -400,7 +400,8 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
return ns;
}
-static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats)
+static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
+ u32 ats)
{
if (ats) {
struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index f817f8f36..db4cee57b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -265,7 +265,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
return ns;
}
-static int ndesc_get_rx_timestamp_status(void *desc, u32 ats)
+static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
{
struct dma_desc *p = (struct dma_desc *)desc;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d7250539d..337d53d12 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -482,7 +482,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
desc = np;
/* Check if timestamp is available */
- if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) {
+ if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) {
ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
shhwtstamp = skb_hwtstamps(skb);
--
2.15.1
^ permalink raw reply related
* [PATCH 1/2] net: stmmac: Fix TX timestamp calculation
From: Fredrik Hallenberg @ 2017-12-18 22:33 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: Jose Abreu, David S . Miller, Giuseppe Cavallaro,
Alexandre Torgue, Fredrik Hallenberg
When using GMAC4 the value written in PTP_SSIR should be shifted however
the shifted value is also used in subsequent calculations which results
in a bad timestamp value.
Signed-off-by: Fredrik Hallenberg <megahallon@gmail.com>
---
drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 721b61655..08c19ebd5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -34,6 +34,7 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
{
u32 value = readl(ioaddr + PTP_TCR);
unsigned long data;
+ u32 reg_value;
/* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second
* formula = (1/ptp_clock) * 1000000000
@@ -50,10 +51,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
data &= PTP_SSIR_SSINC_MASK;
+ reg_value = data;
if (gmac4)
- data = data << GMAC4_PTP_SSIR_SSINC_SHIFT;
+ reg_value <<= GMAC4_PTP_SSIR_SSINC_SHIFT;
- writel(data, ioaddr + PTP_SSIR);
+ writel(reg_value, ioaddr + PTP_SSIR);
return data;
}
--
2.15.1
^ permalink raw reply related
* Re: r8169 regression: UDP packets dropped intermittantly
From: Jonathan Woithe @ 2017-12-18 22:32 UTC (permalink / raw)
To: Holger Hoffstätte; +Cc: netdev, linux-kernel
In-Reply-To: <c47cfb79-afb9-e39b-4861-7c534c55bb70@applied-asynchrony.com>
Hi Holger
On Mon, Dec 18, 2017 at 02:38:53PM +0100, Holger Hoffstätte wrote:
> On 12/18/17 06:49, Jonathan Woithe wrote:
> > Resend to netdev. LKML CCed in case anyone in the wider kernel community
> > can suggest a way forward. Please CC responses if replying only to LKML.
> >
> > It seems that this 4+ year old regression in the r8169 driver (documented in
> > this thread on netdev beginning on 9 March 2013) will never be fixed,
> > despite the identification of the commit which broke it. Cards using this
> > driver will therefore remain unusable for certain workloads utilising UDP.
> (snip)
>
> Since I've seen your postings several times now with no comment or resolution
> I've decided to try your reproducer on my own systems. In short, I cannot
> reproduce any packet loss, despite having 2 (cheap) 1Gb switches between the
> two machines. Both are running 4.14.7.
Thanks for trying the test program on your system. The result indicates
that the problem might be specific to the behaviour of a particular network
variant of the r8169 chip. The systems we use are all equipped with a
PCI Netgear GA311 card, which identifies as
05:01.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL-8169
Gigabit Ethernet (rev 10)
Subsystem: Netgear GA311
Respective IDs are
05:01.0 0200: 10ec:8169 (rev 10)
Subsystem: 1385:311a
> Both NICs are onboard PCIe
This is a significant difference between your test systems and ours: the
cards we are using are PCI and are not onboard.
> Nevertheless your reproducer runs forever and all I see is 6 bytes
> request, 14 bytes response, with no drops. Not one. I tried in both
> directions - no difference.
That's very interesting. On the system noted above with the GA311 the
packet sequence certainly works most of the time. However, within an hour
the 14 byte response will not be seen by the system which sent the 6 byte
request. The slave sees the 6 byte request and sends the 14 byte response:
the problem is in the master (the system sending the 6 byte request). The
NIC in the slave or kernel version running on the slave does not affect the
result.
> I realize this doesn't actually solve your immediate problem, but it is
> nevertheless an indicator that whatever you have been observing is caused
> by something else.
The inability to trigger the problem on your systems could be due to the
NICs in use. That is an obvious difference between our system (which
reliably experiences the problem) and yours (which doesn't). This may
indicate that only certain variants of the r8169 chip are affected, which
obviously complicates things.
In any case, this tester (and the production program with which the problem
was first noticed) work perfectly until commit
da78dbff2e05630921c551dbbc70a4b7981a8fff (identified with git bisect).
Furthermore, when the pre-da78dbff...981a8fff driver was ported to 4.3 as a
test the problem was resolved, verified over a week of continuous testing;
the standard 4.3 reliably triggered the problem within minutes. Of course
the ported driver isn't a viable long term solution since it's essentially
an out of tree driver.
It's hard to see how this problem is unrelated to da78dbff...981a8fff.
Before this commit, everything worked fine. While keeping everything else on
the system unchanged, applying this single commit to the r8169 driver causes
the problem.
Thank you again for running the tests.
Regards
jonathan
^ permalink raw reply
* Re: [trivial PATCH] treewide: Align function definition open/close braces
From: Alexandre Belloni @ 2017-12-18 22:15 UTC (permalink / raw)
To: Joe Perches
Cc: linux-rtc, alsa-devel, linuxppc-dev, Jiri Kosina, linux-scsi,
MPT-FusionLinux.pdl, acpi4asus-user, linux-wireless, linux-kernel,
dri-devel, platform-driver-x86, linux-xfs, linux-acpi,
linux-audit, amd-gfx, netdev, linux-fsdevel, Linus Torvalds,
ocfs2-devel, linux-media
In-Reply-To: <1513556924.31581.51.camel@perches.com>
On 17/12/2017 at 16:28:44 -0800, Joe Perches wrote:
> Some functions definitions have either the initial open brace and/or
> the closing brace outside of column 1.
>
> Move those braces to column 1.
>
> This allows various function analyzers like gnu complexity to work
> properly for these modified functions.
>
> Miscellanea:
>
> o Remove extra trailing ; and blank line from xfs_agf_verify
>
> Signed-off-by: Joe Perches <joe@perches.com>
For RTC:
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
--
Alexandre Belloni, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com
^ permalink raw reply
* Re: [trivial PATCH] treewide: Align function definition open/close braces
From: Dave Chinner @ 2017-12-18 22:10 UTC (permalink / raw)
To: Joe Perches
Cc: Jiri Kosina, Linus Torvalds, linux-kernel, linux-acpi, amd-gfx,
dri-devel, linux-media, MPT-FusionLinux.pdl, linux-scsi, netdev,
linux-wireless, acpi4asus-user, platform-driver-x86, linux-rtc,
linux-fsdevel, ocfs2-devel, linux-xfs, linux-audit, alsa-devel,
linuxppc-dev
In-Reply-To: <1513556924.31581.51.camel@perches.com>
On Sun, Dec 17, 2017 at 04:28:44PM -0800, Joe Perches wrote:
> Some functions definitions have either the initial open brace and/or
> the closing brace outside of column 1.
>
> Move those braces to column 1.
>
> This allows various function analyzers like gnu complexity to work
> properly for these modified functions.
>
> Miscellanea:
>
> o Remove extra trailing ; and blank line from xfs_agf_verify
>
> Signed-off-by: Joe Perches <joe@perches.com>
> ---
....
XFS bits look fine.
Acked-by: Dave Chinner <dchinner@redhat.com>
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply
* Re: [PATCH] net: thunderx: add support for rgmii internal delay
From: Tim Harvey @ 2017-12-18 22:10 UTC (permalink / raw)
To: Andrew Lunn, Sunil Goutham; +Cc: netdev
In-Reply-To: <20171214084525.GA19186@lunn.ch>
On Thu, Dec 14, 2017 at 12:45 AM, Andrew Lunn <andrew@lunn.ch> wrote:
> On Wed, Dec 13, 2017 at 03:28:33PM -0800, Tim Harvey wrote:
>> On Wed, Dec 13, 2017 at 3:10 AM, Andrew Lunn <andrew@lunn.ch> wrote:
>> >> +void xcv_init_hw(int phy_mode)
>> >> {
>> >> u64 cfg;
>> >>
>> >> @@ -81,12 +81,31 @@ void xcv_init_hw(void)
>> >> /* Wait for DLL to lock */
>> >> msleep(1);
>> >>
>> >> - /* Configure DLL - enable or bypass
>> >> - * TX no bypass, RX bypass
>> >> - */
>> >> + /* enable/bypass DLL providing MAC based internal TX/RX delays */
>> >> cfg = readq_relaxed(xcv->reg_base + XCV_DLL_CTL);
>> >> - cfg &= ~0xFF03;
>> >> - cfg |= CLKRX_BYP;
>> >> + cfg &= ~0xffff00;
>> >> + switch (phy_mode) {
>> >> + /* RX and TX delays are added by the MAC */
>> >> + case PHY_INTERFACE_MODE_RGMII:
>> >> + break;
>> >> + /* internal RX and TX delays provided by the PHY */
>> >> + case PHY_INTERFACE_MODE_RGMII_ID:
>> >> + cfg |= CLKRX_BYP;
>> >> + cfg |= CLKTX_BYP;
>> >> + break;
>> >> + /* internal RX delay provided by the PHY, the MAC
>> >> + * should not add an RX delay in this case
>> >> + */
>> >> + case PHY_INTERFACE_MODE_RGMII_RXID:
>> >> + cfg |= CLKRX_BYP;
>> >> + break;
>> >> + /* internal TX delay provided by the PHY, the MAC
>> >> + * should not add an TX delay in this case
>> >> + */
>> >> + case PHY_INTERFACE_MODE_RGMII_TXID:
>> >> + cfg |= CLKRX_BYP;
>> >> + break;
>> >> + }
>> >
>> > Hi Tim
>> >
>> > This i don't get. Normally, you leave the PHY to handle delays, if
>> > needed. The MAC should not add any. Here you seem to assume a delay is
>> > always needed, and if the PHY is not providing it, the MAC should.
>> >
>> > Andrew
>>
>> Andrew,
>>
>> The thunder RGX inserts a delay via an on-board DLL. The 'bypass'
>> register will bypass this DLL and not insert a delay from the MAC
>> side. By default out of reset CLKTX_BYP=1 causing the RGX transmit
>> interface to not introduce a delay and CLKRX_BYP=0 causing the RGX
>> receive interface to introduce a delay.
>
> Hi Tim
>
> So the MAC by default is doing PHY_INTERFACE_MODE_RGMII_RXID. And it
> calls phy_connect_direct() passing PHY_INTERFACE_MODE_RGMII. It does
> not get anything from device tree. So it looks like we have a chance
> to clean this up.
>
> So the correct thing to do is set the MAC to PHY_INTERFACE_MODE_RGMII,
> i.e. no delays. By default call phy_connect_direct()
> PHY_INTERFACE_MODE_RGMII_RXID. That should give you the same behaviour
> as today.
I don't understand - PHY_INTERFACE_MODE_RGMII means delays are added by the MAC
The way I see it today the driver is making an assumption that is not
always correct. What is the right way to configure a MAC when no
phy-mode is present in the dts? I assumed it would be RGMII_ID such
that the MAC introduces no delay.
>
> Then add code to look in device tree, to find a per board setting. In
> your case, you want PHY_INTERFACE_MODE_RGMII_ID. And make sure the PHY
> driver respects the value passed.
>
> Andrew
>
Should I be attempting to make the default if no phy-mode is in the
dts be PHY_INTERFACE_MODE_RGMII_RXID so that existing boards do not
break (as I assume they configure the phy's that way in firmware).
My original goal was to make the bgx driver flexible for different
delay configurations as well as allow phy drivers to be used. However
I found that the dp83867 driver doesn't work with my board anyway as
it issues a soft reset that disables CLKOUT which I setup in firmware
and require. Is it standard for phy drivers to issue hard or soft
resets during init and if so how do boards deal with custom LED or
CLKOUT configs as those don't seem to be supported by phy drivers? I
only have experience with phy drivers that support an optional hard
reset and if you don't want to reset any custom regs you simply don't
expose the phy_rst gpio (assuming there is on) to the driver by not
defining it in device-tree.
Tim
^ permalink raw reply
* Re: [trivial PATCH] treewide: Align function definition open/close braces
From: Darrick J. Wong @ 2017-12-18 22:08 UTC (permalink / raw)
To: Joe Perches
Cc: linux-rtc, alsa-devel, linuxppc-dev, Jiri Kosina, linux-scsi,
MPT-FusionLinux.pdl, acpi4asus-user, linux-wireless, linux-kernel,
dri-devel, platform-driver-x86, linux-xfs, linux-acpi,
linux-audit, amd-gfx, netdev, linux-fsdevel, Linus Torvalds,
ocfs2-devel, linux-media
In-Reply-To: <1513556924.31581.51.camel@perches.com>
On Sun, Dec 17, 2017 at 04:28:44PM -0800, Joe Perches wrote:
> Some functions definitions have either the initial open brace and/or
> the closing brace outside of column 1.
>
> Move those braces to column 1.
>
> This allows various function analyzers like gnu complexity to work
> properly for these modified functions.
>
> Miscellanea:
>
> o Remove extra trailing ; and blank line from xfs_agf_verify
>
> Signed-off-by: Joe Perches <joe@perches.com>
> ---
> git diff -w shows no difference other than the above 'Miscellanea'
>
> (this is against -next, but it applies against Linus' tree
> with a couple offsets)
>
> arch/x86/include/asm/atomic64_32.h | 2 +-
> drivers/acpi/custom_method.c | 2 +-
> drivers/acpi/fan.c | 2 +-
> drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +-
> drivers/media/i2c/msp3400-kthreads.c | 2 +-
> drivers/message/fusion/mptsas.c | 2 +-
> drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c | 2 +-
> drivers/net/wireless/ath/ath9k/xmit.c | 2 +-
> drivers/platform/x86/eeepc-laptop.c | 2 +-
> drivers/rtc/rtc-ab-b5ze-s3.c | 2 +-
> drivers/scsi/dpt_i2o.c | 2 +-
> drivers/scsi/sym53c8xx_2/sym_glue.c | 2 +-
> fs/locks.c | 2 +-
> fs/ocfs2/stack_user.c | 2 +-
> fs/xfs/libxfs/xfs_alloc.c | 5 ++---
> fs/xfs/xfs_export.c | 2 +-
> kernel/audit.c | 6 +++---
> kernel/trace/trace_printk.c | 4 ++--
> lib/raid6/sse2.c | 14 +++++++-------
> sound/soc/fsl/fsl_dma.c | 2 +-
> 20 files changed, 30 insertions(+), 31 deletions(-)
>
> diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
> index 97c46b8169b7..d4d4883080fa 100644
> --- a/arch/x86/include/asm/atomic64_32.h
> +++ b/arch/x86/include/asm/atomic64_32.h
> @@ -122,7 +122,7 @@ static inline long long atomic64_read(const atomic64_t *v)
> long long r;
> alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
> return r;
> - }
> +}
>
> /**
> * atomic64_add_return - add and return
> diff --git a/drivers/acpi/custom_method.c b/drivers/acpi/custom_method.c
> index c68e72414a67..e967c1173ba3 100644
> --- a/drivers/acpi/custom_method.c
> +++ b/drivers/acpi/custom_method.c
> @@ -94,7 +94,7 @@ static void __exit acpi_custom_method_exit(void)
> {
> if (cm_dentry)
> debugfs_remove(cm_dentry);
> - }
> +}
>
> module_init(acpi_custom_method_init);
> module_exit(acpi_custom_method_exit);
> diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
> index 6cf4988206f2..3563103590c6 100644
> --- a/drivers/acpi/fan.c
> +++ b/drivers/acpi/fan.c
> @@ -219,7 +219,7 @@ fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
> return fan_set_state_acpi4(device, state);
> else
> return fan_set_state(device, state);
> - }
> +}
>
> static const struct thermal_cooling_device_ops fan_cooling_ops = {
> .get_max_state = fan_get_max_state,
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
> index d1488d5ee028..1e0d1e7c5324 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
> @@ -461,7 +461,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
> ******************************************************************************/
>
> struct dc *dc_create(const struct dc_init_data *init_params)
> - {
> +{
> struct dc *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
> unsigned int full_pipe_count;
>
> diff --git a/drivers/media/i2c/msp3400-kthreads.c b/drivers/media/i2c/msp3400-kthreads.c
> index 4dd01e9f553b..dc6cb8d475b3 100644
> --- a/drivers/media/i2c/msp3400-kthreads.c
> +++ b/drivers/media/i2c/msp3400-kthreads.c
> @@ -885,7 +885,7 @@ static int msp34xxg_modus(struct i2c_client *client)
> }
>
> static void msp34xxg_set_source(struct i2c_client *client, u16 reg, int in)
> - {
> +{
> struct msp_state *state = to_state(i2c_get_clientdata(client));
> int source, matrix;
>
> diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
> index 345f6035599e..69a62d23514b 100644
> --- a/drivers/message/fusion/mptsas.c
> +++ b/drivers/message/fusion/mptsas.c
> @@ -2968,7 +2968,7 @@ mptsas_exp_repmanufacture_info(MPT_ADAPTER *ioc,
> mutex_unlock(&ioc->sas_mgmt.mutex);
> out:
> return ret;
> - }
> +}
>
> static void
> mptsas_parse_device_info(struct sas_identify *identify,
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
> index 3dd973475125..0ea141ece19e 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
> @@ -603,7 +603,7 @@ static struct uni_table_desc *nx_get_table_desc(const u8 *unirom, int section)
>
> static int
> netxen_nic_validate_header(struct netxen_adapter *adapter)
> - {
> +{
> const u8 *unirom = adapter->fw->data;
> struct uni_table_desc *directory = (struct uni_table_desc *) &unirom[0];
> u32 fw_file_size = adapter->fw->size;
> diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
> index bd438062a6db..baedc7186b10 100644
> --- a/drivers/net/wireless/ath/ath9k/xmit.c
> +++ b/drivers/net/wireless/ath/ath9k/xmit.c
> @@ -196,7 +196,7 @@ ath_tid_pull(struct ath_atx_tid *tid)
> }
>
> return skb;
> - }
> +}
>
> static struct sk_buff *ath_tid_dequeue(struct ath_atx_tid *tid)
> {
> diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
> index 5a681962899c..4c38904a8a32 100644
> --- a/drivers/platform/x86/eeepc-laptop.c
> +++ b/drivers/platform/x86/eeepc-laptop.c
> @@ -492,7 +492,7 @@ static void eeepc_platform_exit(struct eeepc_laptop *eeepc)
> * potentially bad time, such as a timer interrupt.
> */
> static void tpd_led_update(struct work_struct *work)
> - {
> +{
> struct eeepc_laptop *eeepc;
>
> eeepc = container_of(work, struct eeepc_laptop, tpd_led_work);
> diff --git a/drivers/rtc/rtc-ab-b5ze-s3.c b/drivers/rtc/rtc-ab-b5ze-s3.c
> index a319bf1e49de..ef5c16dfabfa 100644
> --- a/drivers/rtc/rtc-ab-b5ze-s3.c
> +++ b/drivers/rtc/rtc-ab-b5ze-s3.c
> @@ -648,7 +648,7 @@ static int abb5zes3_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
> ret);
>
> return ret;
> - }
> +}
>
> /* Enable or disable battery low irq generation */
> static inline int _abb5zes3_rtc_battery_low_irq_enable(struct regmap *regmap,
> diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
> index fd172b0890d3..a00d822e3142 100644
> --- a/drivers/scsi/dpt_i2o.c
> +++ b/drivers/scsi/dpt_i2o.c
> @@ -3524,7 +3524,7 @@ static int adpt_i2o_systab_send(adpt_hba* pHba)
> #endif
>
> return ret;
> - }
> +}
>
>
> /*============================================================================
> diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
> index 791a2182de53..7320d5fe4cbc 100644
> --- a/drivers/scsi/sym53c8xx_2/sym_glue.c
> +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
> @@ -1393,7 +1393,7 @@ static struct Scsi_Host *sym_attach(struct scsi_host_template *tpnt, int unit,
> scsi_host_put(shost);
>
> return NULL;
> - }
> +}
>
>
> /*
> diff --git a/fs/locks.c b/fs/locks.c
> index 21b4dfa289ee..d2399d001afe 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -559,7 +559,7 @@ static const struct lock_manager_operations lease_manager_ops = {
> * Initialize a lease, use the default lock manager operations
> */
> static int lease_init(struct file *filp, long type, struct file_lock *fl)
> - {
> +{
> if (assign_type(fl, type) != 0)
> return -EINVAL;
>
> diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
> index dae9eb7c441e..d2fb97b173da 100644
> --- a/fs/ocfs2/stack_user.c
> +++ b/fs/ocfs2/stack_user.c
> @@ -398,7 +398,7 @@ static int ocfs2_control_do_setnode_msg(struct file *file,
>
> static int ocfs2_control_do_setversion_msg(struct file *file,
> struct ocfs2_control_message_setv *msg)
> - {
> +{
> long major, minor;
> char *ptr = NULL;
> struct ocfs2_control_private *p = file->private_data;
> diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
> index 0da80019a917..217108f765d5 100644
> --- a/fs/xfs/libxfs/xfs_alloc.c
> +++ b/fs/xfs/libxfs/xfs_alloc.c
> @@ -2401,7 +2401,7 @@ static bool
> xfs_agf_verify(
> struct xfs_mount *mp,
> struct xfs_buf *bp)
> - {
> +{
> struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
>
> if (xfs_sb_version_hascrc(&mp->m_sb)) {
> @@ -2449,8 +2449,7 @@ xfs_agf_verify(
> be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
> return false;
>
> - return true;;
> -
> + return true;
> }
>
> static void
> diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
> index fe1bfee35898..7d5c355d78b5 100644
> --- a/fs/xfs/xfs_export.c
> +++ b/fs/xfs/xfs_export.c
> @@ -122,7 +122,7 @@ xfs_nfs_get_inode(
> struct super_block *sb,
> u64 ino,
> u32 generation)
> - {
> +{
> xfs_mount_t *mp = XFS_M(sb);
> xfs_inode_t *ip;
> int error;
The xfs bits look ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
--D
> diff --git a/kernel/audit.c b/kernel/audit.c
> index 227db99b0f19..d97e8f0f73ca 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -443,15 +443,15 @@ static int audit_set_failure(u32 state)
> * Drop any references inside the auditd connection tracking struct and free
> * the memory.
> */
> - static void auditd_conn_free(struct rcu_head *rcu)
> - {
> +static void auditd_conn_free(struct rcu_head *rcu)
> +{
> struct auditd_connection *ac;
>
> ac = container_of(rcu, struct auditd_connection, rcu);
> put_pid(ac->pid);
> put_net(ac->net);
> kfree(ac);
> - }
> +}
>
> /**
> * auditd_set - Set/Reset the auditd connection state
> diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
> index ad1d6164e946..50f44b7b2b32 100644
> --- a/kernel/trace/trace_printk.c
> +++ b/kernel/trace/trace_printk.c
> @@ -196,7 +196,7 @@ struct notifier_block module_trace_bprintk_format_nb = {
> };
>
> int __trace_bprintk(unsigned long ip, const char *fmt, ...)
> - {
> +{
> int ret;
> va_list ap;
>
> @@ -214,7 +214,7 @@ int __trace_bprintk(unsigned long ip, const char *fmt, ...)
> EXPORT_SYMBOL_GPL(__trace_bprintk);
>
> int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap)
> - {
> +{
> if (unlikely(!fmt))
> return 0;
>
> diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
> index 1d2276b007ee..8191e1d0d2fb 100644
> --- a/lib/raid6/sse2.c
> +++ b/lib/raid6/sse2.c
> @@ -91,7 +91,7 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
>
> static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
> size_t bytes, void **ptrs)
> - {
> +{
> u8 **dptr = (u8 **)ptrs;
> u8 *p, *q;
> int d, z, z0;
> @@ -200,9 +200,9 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
> kernel_fpu_end();
> }
>
> - static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
> +static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
> size_t bytes, void **ptrs)
> - {
> +{
> u8 **dptr = (u8 **)ptrs;
> u8 *p, *q;
> int d, z, z0;
> @@ -265,7 +265,7 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
>
> asm volatile("sfence" : : : "memory");
> kernel_fpu_end();
> - }
> +}
>
> const struct raid6_calls raid6_sse2x2 = {
> raid6_sse22_gen_syndrome,
> @@ -366,9 +366,9 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
> kernel_fpu_end();
> }
>
> - static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
> +static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
> size_t bytes, void **ptrs)
> - {
> +{
> u8 **dptr = (u8 **)ptrs;
> u8 *p, *q;
> int d, z, z0;
> @@ -471,7 +471,7 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
> }
> asm volatile("sfence" : : : "memory");
> kernel_fpu_end();
> - }
> +}
>
>
> const struct raid6_calls raid6_sse2x4 = {
> diff --git a/sound/soc/fsl/fsl_dma.c b/sound/soc/fsl/fsl_dma.c
> index 0c11f434a374..ec619f51d336 100644
> --- a/sound/soc/fsl/fsl_dma.c
> +++ b/sound/soc/fsl/fsl_dma.c
> @@ -879,7 +879,7 @@ static const struct snd_pcm_ops fsl_dma_ops = {
> };
>
> static int fsl_soc_dma_probe(struct platform_device *pdev)
> - {
> +{
> struct dma_object *dma;
> struct device_node *np = pdev->dev.of_node;
> struct device_node *ssi_np;
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [trivial PATCH] treewide: Align function definition open/close braces
From: Alex Deucher @ 2017-12-18 22:07 UTC (permalink / raw)
To: Joe Perches
Cc: linux-rtc-u79uwXL29TY76Z2rM5mHXA,
alsa-devel-K7yf7f+aM1XWsZ/bQMPhNw@public.gmane.org, Jiri Kosina,
Linux SCSI List, MPT-FusionLinux.pdl-dY08KVG/lbpWk0Htik3J/w,
linuxppc-dev, acpi4asus-user-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
Linux Wireless List, LKML, Maling list - DRI developers,
platform-driver-x86-u79uwXL29TY76Z2rM5mHXA,
linux-xfs-u79uwXL29TY76Z2rM5mHXA, Linux ACPI,
linux-audit-H+wXaHxf7aLQT0dZR+AlfA, amd-gfx list,
Network Development, linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
Linus Torvalds, ocfs2-devel-N0ozoZBvEnrZJqsBc5GL+g, linux-media
In-Reply-To: <1513556924.31581.51.camel-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>
On Sun, Dec 17, 2017 at 7:28 PM, Joe Perches <joe@perches.com> wrote:
> Some functions definitions have either the initial open brace and/or
> the closing brace outside of column 1.
>
> Move those braces to column 1.
>
> This allows various function analyzers like gnu complexity to work
> properly for these modified functions.
>
> Miscellanea:
>
> o Remove extra trailing ; and blank line from xfs_agf_verify
>
> Signed-off-by: Joe Perches <joe@perches.com>
> ---
> git diff -w shows no difference other than the above 'Miscellanea'
>
> (this is against -next, but it applies against Linus' tree
> with a couple offsets)
>
> arch/x86/include/asm/atomic64_32.h | 2 +-
> drivers/acpi/custom_method.c | 2 +-
> drivers/acpi/fan.c | 2 +-
> drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +-
For amdgpu:
Acked-by: Alex Deucher <alexander.deucher@amd.com>
> drivers/media/i2c/msp3400-kthreads.c | 2 +-
> drivers/message/fusion/mptsas.c | 2 +-
> drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c | 2 +-
> drivers/net/wireless/ath/ath9k/xmit.c | 2 +-
> drivers/platform/x86/eeepc-laptop.c | 2 +-
> drivers/rtc/rtc-ab-b5ze-s3.c | 2 +-
> drivers/scsi/dpt_i2o.c | 2 +-
> drivers/scsi/sym53c8xx_2/sym_glue.c | 2 +-
> fs/locks.c | 2 +-
> fs/ocfs2/stack_user.c | 2 +-
> fs/xfs/libxfs/xfs_alloc.c | 5 ++---
> fs/xfs/xfs_export.c | 2 +-
> kernel/audit.c | 6 +++---
> kernel/trace/trace_printk.c | 4 ++--
> lib/raid6/sse2.c | 14 +++++++-------
> sound/soc/fsl/fsl_dma.c | 2 +-
> 20 files changed, 30 insertions(+), 31 deletions(-)
>
> diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
> index 97c46b8169b7..d4d4883080fa 100644
> --- a/arch/x86/include/asm/atomic64_32.h
> +++ b/arch/x86/include/asm/atomic64_32.h
> @@ -122,7 +122,7 @@ static inline long long atomic64_read(const atomic64_t *v)
> long long r;
> alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
> return r;
> - }
> +}
>
> /**
> * atomic64_add_return - add and return
> diff --git a/drivers/acpi/custom_method.c b/drivers/acpi/custom_method.c
> index c68e72414a67..e967c1173ba3 100644
> --- a/drivers/acpi/custom_method.c
> +++ b/drivers/acpi/custom_method.c
> @@ -94,7 +94,7 @@ static void __exit acpi_custom_method_exit(void)
> {
> if (cm_dentry)
> debugfs_remove(cm_dentry);
> - }
> +}
>
> module_init(acpi_custom_method_init);
> module_exit(acpi_custom_method_exit);
> diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
> index 6cf4988206f2..3563103590c6 100644
> --- a/drivers/acpi/fan.c
> +++ b/drivers/acpi/fan.c
> @@ -219,7 +219,7 @@ fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
> return fan_set_state_acpi4(device, state);
> else
> return fan_set_state(device, state);
> - }
> +}
>
> static const struct thermal_cooling_device_ops fan_cooling_ops = {
> .get_max_state = fan_get_max_state,
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
> index d1488d5ee028..1e0d1e7c5324 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
> @@ -461,7 +461,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
> ******************************************************************************/
>
> struct dc *dc_create(const struct dc_init_data *init_params)
> - {
> +{
> struct dc *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
> unsigned int full_pipe_count;
>
> diff --git a/drivers/media/i2c/msp3400-kthreads.c b/drivers/media/i2c/msp3400-kthreads.c
> index 4dd01e9f553b..dc6cb8d475b3 100644
> --- a/drivers/media/i2c/msp3400-kthreads.c
> +++ b/drivers/media/i2c/msp3400-kthreads.c
> @@ -885,7 +885,7 @@ static int msp34xxg_modus(struct i2c_client *client)
> }
>
> static void msp34xxg_set_source(struct i2c_client *client, u16 reg, int in)
> - {
> +{
> struct msp_state *state = to_state(i2c_get_clientdata(client));
> int source, matrix;
>
> diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
> index 345f6035599e..69a62d23514b 100644
> --- a/drivers/message/fusion/mptsas.c
> +++ b/drivers/message/fusion/mptsas.c
> @@ -2968,7 +2968,7 @@ mptsas_exp_repmanufacture_info(MPT_ADAPTER *ioc,
> mutex_unlock(&ioc->sas_mgmt.mutex);
> out:
> return ret;
> - }
> +}
>
> static void
> mptsas_parse_device_info(struct sas_identify *identify,
> diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
> index 3dd973475125..0ea141ece19e 100644
> --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
> +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
> @@ -603,7 +603,7 @@ static struct uni_table_desc *nx_get_table_desc(const u8 *unirom, int section)
>
> static int
> netxen_nic_validate_header(struct netxen_adapter *adapter)
> - {
> +{
> const u8 *unirom = adapter->fw->data;
> struct uni_table_desc *directory = (struct uni_table_desc *) &unirom[0];
> u32 fw_file_size = adapter->fw->size;
> diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
> index bd438062a6db..baedc7186b10 100644
> --- a/drivers/net/wireless/ath/ath9k/xmit.c
> +++ b/drivers/net/wireless/ath/ath9k/xmit.c
> @@ -196,7 +196,7 @@ ath_tid_pull(struct ath_atx_tid *tid)
> }
>
> return skb;
> - }
> +}
>
> static struct sk_buff *ath_tid_dequeue(struct ath_atx_tid *tid)
> {
> diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
> index 5a681962899c..4c38904a8a32 100644
> --- a/drivers/platform/x86/eeepc-laptop.c
> +++ b/drivers/platform/x86/eeepc-laptop.c
> @@ -492,7 +492,7 @@ static void eeepc_platform_exit(struct eeepc_laptop *eeepc)
> * potentially bad time, such as a timer interrupt.
> */
> static void tpd_led_update(struct work_struct *work)
> - {
> +{
> struct eeepc_laptop *eeepc;
>
> eeepc = container_of(work, struct eeepc_laptop, tpd_led_work);
> diff --git a/drivers/rtc/rtc-ab-b5ze-s3.c b/drivers/rtc/rtc-ab-b5ze-s3.c
> index a319bf1e49de..ef5c16dfabfa 100644
> --- a/drivers/rtc/rtc-ab-b5ze-s3.c
> +++ b/drivers/rtc/rtc-ab-b5ze-s3.c
> @@ -648,7 +648,7 @@ static int abb5zes3_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
> ret);
>
> return ret;
> - }
> +}
>
> /* Enable or disable battery low irq generation */
> static inline int _abb5zes3_rtc_battery_low_irq_enable(struct regmap *regmap,
> diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
> index fd172b0890d3..a00d822e3142 100644
> --- a/drivers/scsi/dpt_i2o.c
> +++ b/drivers/scsi/dpt_i2o.c
> @@ -3524,7 +3524,7 @@ static int adpt_i2o_systab_send(adpt_hba* pHba)
> #endif
>
> return ret;
> - }
> +}
>
>
> /*============================================================================
> diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
> index 791a2182de53..7320d5fe4cbc 100644
> --- a/drivers/scsi/sym53c8xx_2/sym_glue.c
> +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
> @@ -1393,7 +1393,7 @@ static struct Scsi_Host *sym_attach(struct scsi_host_template *tpnt, int unit,
> scsi_host_put(shost);
>
> return NULL;
> - }
> +}
>
>
> /*
> diff --git a/fs/locks.c b/fs/locks.c
> index 21b4dfa289ee..d2399d001afe 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -559,7 +559,7 @@ static const struct lock_manager_operations lease_manager_ops = {
> * Initialize a lease, use the default lock manager operations
> */
> static int lease_init(struct file *filp, long type, struct file_lock *fl)
> - {
> +{
> if (assign_type(fl, type) != 0)
> return -EINVAL;
>
> diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
> index dae9eb7c441e..d2fb97b173da 100644
> --- a/fs/ocfs2/stack_user.c
> +++ b/fs/ocfs2/stack_user.c
> @@ -398,7 +398,7 @@ static int ocfs2_control_do_setnode_msg(struct file *file,
>
> static int ocfs2_control_do_setversion_msg(struct file *file,
> struct ocfs2_control_message_setv *msg)
> - {
> +{
> long major, minor;
> char *ptr = NULL;
> struct ocfs2_control_private *p = file->private_data;
> diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
> index 0da80019a917..217108f765d5 100644
> --- a/fs/xfs/libxfs/xfs_alloc.c
> +++ b/fs/xfs/libxfs/xfs_alloc.c
> @@ -2401,7 +2401,7 @@ static bool
> xfs_agf_verify(
> struct xfs_mount *mp,
> struct xfs_buf *bp)
> - {
> +{
> struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
>
> if (xfs_sb_version_hascrc(&mp->m_sb)) {
> @@ -2449,8 +2449,7 @@ xfs_agf_verify(
> be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
> return false;
>
> - return true;;
> -
> + return true;
> }
>
> static void
> diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
> index fe1bfee35898..7d5c355d78b5 100644
> --- a/fs/xfs/xfs_export.c
> +++ b/fs/xfs/xfs_export.c
> @@ -122,7 +122,7 @@ xfs_nfs_get_inode(
> struct super_block *sb,
> u64 ino,
> u32 generation)
> - {
> +{
> xfs_mount_t *mp = XFS_M(sb);
> xfs_inode_t *ip;
> int error;
> diff --git a/kernel/audit.c b/kernel/audit.c
> index 227db99b0f19..d97e8f0f73ca 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -443,15 +443,15 @@ static int audit_set_failure(u32 state)
> * Drop any references inside the auditd connection tracking struct and free
> * the memory.
> */
> - static void auditd_conn_free(struct rcu_head *rcu)
> - {
> +static void auditd_conn_free(struct rcu_head *rcu)
> +{
> struct auditd_connection *ac;
>
> ac = container_of(rcu, struct auditd_connection, rcu);
> put_pid(ac->pid);
> put_net(ac->net);
> kfree(ac);
> - }
> +}
>
> /**
> * auditd_set - Set/Reset the auditd connection state
> diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
> index ad1d6164e946..50f44b7b2b32 100644
> --- a/kernel/trace/trace_printk.c
> +++ b/kernel/trace/trace_printk.c
> @@ -196,7 +196,7 @@ struct notifier_block module_trace_bprintk_format_nb = {
> };
>
> int __trace_bprintk(unsigned long ip, const char *fmt, ...)
> - {
> +{
> int ret;
> va_list ap;
>
> @@ -214,7 +214,7 @@ int __trace_bprintk(unsigned long ip, const char *fmt, ...)
> EXPORT_SYMBOL_GPL(__trace_bprintk);
>
> int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap)
> - {
> +{
> if (unlikely(!fmt))
> return 0;
>
> diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
> index 1d2276b007ee..8191e1d0d2fb 100644
> --- a/lib/raid6/sse2.c
> +++ b/lib/raid6/sse2.c
> @@ -91,7 +91,7 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
>
> static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
> size_t bytes, void **ptrs)
> - {
> +{
> u8 **dptr = (u8 **)ptrs;
> u8 *p, *q;
> int d, z, z0;
> @@ -200,9 +200,9 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
> kernel_fpu_end();
> }
>
> - static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
> +static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
> size_t bytes, void **ptrs)
> - {
> +{
> u8 **dptr = (u8 **)ptrs;
> u8 *p, *q;
> int d, z, z0;
> @@ -265,7 +265,7 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
>
> asm volatile("sfence" : : : "memory");
> kernel_fpu_end();
> - }
> +}
>
> const struct raid6_calls raid6_sse2x2 = {
> raid6_sse22_gen_syndrome,
> @@ -366,9 +366,9 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
> kernel_fpu_end();
> }
>
> - static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
> +static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
> size_t bytes, void **ptrs)
> - {
> +{
> u8 **dptr = (u8 **)ptrs;
> u8 *p, *q;
> int d, z, z0;
> @@ -471,7 +471,7 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
> }
> asm volatile("sfence" : : : "memory");
> kernel_fpu_end();
> - }
> +}
>
>
> const struct raid6_calls raid6_sse2x4 = {
> diff --git a/sound/soc/fsl/fsl_dma.c b/sound/soc/fsl/fsl_dma.c
> index 0c11f434a374..ec619f51d336 100644
> --- a/sound/soc/fsl/fsl_dma.c
> +++ b/sound/soc/fsl/fsl_dma.c
> @@ -879,7 +879,7 @@ static const struct snd_pcm_ops fsl_dma_ops = {
> };
>
> static int fsl_soc_dma_probe(struct platform_device *pdev)
> - {
> +{
> struct dma_object *dma;
> struct device_node *np = pdev->dev.of_node;
> struct device_node *ssi_np;
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply
* [RFC 12/14] tcp_md5: Use tcp_extra_options in output path
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
In-Reply-To: <20171218215109.38700-1-cpaasch@apple.com>
This patch starts making use of the extra_option framework for TCP_MD5.
One tricky part is that extra_options are called at the end of the
tcp_syn_options(), while TCP_MD5 is called at the beginning.
TCP_MD5 is called at the beginning because it wants to disable
TCP-timestamps (for option-space reasons). So, in the _prepare-function
of the extra options we need to undo the work that was done when
enabling TCP timestamps.
Another thing to note is that in tcp_v4_send_reset (and its IPv6
counterpart), we were looking previously for the listening-socket (if sk
== NULL) in case there was an MD5 signature in the TCP-option space of
the incoming packet.
With the extra-option framework we can't do this anymore, because
extra-options are part of the TCP-socket's tcp_option_list. If there is
no socket, it means we can't parse the option.
This shouldn't have an impact, because when we receive a segment and
there is not established socket, we will match on the listening socket
(if it's still there). Then, when we decide to respond with a RST in
tcp_rcv_state_process, we will give to tcp_v4_send_reset() the
listening-socket and thus will parse the TCP_MD5 option.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
include/linux/tcp.h | 10 +-
include/linux/tcp_md5.h | 64 -----
net/ipv4/tcp_ipv4.c | 55 ----
net/ipv4/tcp_md5.c | 696 +++++++++++++++++++++++++++++++++--------------
net/ipv4/tcp_minisocks.c | 12 -
net/ipv4/tcp_output.c | 68 +----
net/ipv6/tcp_ipv6.c | 23 --
7 files changed, 488 insertions(+), 440 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index b0b38f7100a4..034fbd9e0a38 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -127,11 +127,11 @@ struct tcp_out_options {
u16 mss; /* 0 to disable */
u8 ws; /* window scale, 0 to disable */
u8 num_sack_blocks; /* number of SACK blocks to include */
- u8 hash_size; /* bytes in hash_location */
- __u8 *hash_location; /* temporary pointer, overloaded */
__u32 tsval, tsecr; /* need to include OPTION_TS */
struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
+#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *md5; /* TCP_MD5 signature key */
+#endif
};
/* This is the max number of SACKS that we'll generate and process. It's safe
@@ -380,9 +380,6 @@ struct tcp_sock {
#ifdef CONFIG_TCP_MD5SIG
/* TCP AF-Specific parts; only used by MD5 Signature support so far */
const struct tcp_sock_af_ops *af_specific;
-
-/* TCP MD5 Signature Option information */
- struct tcp_md5sig_info __rcu *md5sig_info;
#endif
/* TCP fastopen related information */
@@ -440,9 +437,6 @@ struct tcp_timewait_sock {
long tw_ts_recent_stamp;
struct hlist_head tcp_option_list;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *tw_md5_key;
-#endif
};
static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
diff --git a/include/linux/tcp_md5.h b/include/linux/tcp_md5.h
index f6a681cdded4..8dee4fc3dc7f 100644
--- a/include/linux/tcp_md5.h
+++ b/include/linux/tcp_md5.h
@@ -26,25 +26,6 @@ struct tcp_md5sig_key {
struct rcu_head rcu;
};
-/* - sock block */
-struct tcp_md5sig_info {
- struct hlist_head head;
- struct rcu_head rcu;
-};
-
-union tcp_md5sum_block {
- struct tcp4_pseudohdr ip4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct tcp6_pseudohdr ip6;
-#endif
-};
-
-/* - pool: digest algorithm, hash description and scratch buffer */
-struct tcp_md5sig_pool {
- struct ahash_request *md5_req;
- void *scratch;
-};
-
extern const struct tcp_sock_af_ops tcp_sock_ipv4_specific;
extern const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
extern const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
@@ -56,37 +37,9 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
-void tcp_v4_md5_destroy_sock(struct sock *sk);
-
-int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
- unsigned int remaining,
- struct tcp_out_options *opts,
- const struct sock *sk);
-
-void tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
- struct tcphdr *t1,
- struct tcp_out_options *opts,
- const struct sock *sk);
-
-int tcp_v6_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
- unsigned int remaining,
- struct tcp_out_options *opts,
- const struct sock *sk);
-
-void tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
- struct tcphdr *t1,
- struct tcp_out_options *opts,
- const struct sock *sk);
-
bool tcp_v4_inbound_md5_hash(const struct sock *sk,
const struct sk_buff *skb);
-void tcp_v4_md5_syn_recv_sock(const struct sock *listener, struct sock *sk);
-
-void tcp_v6_md5_syn_recv_sock(const struct sock *listener, struct sock *sk);
-
-void tcp_md5_time_wait(struct sock *sk, struct inet_timewait_sock *tw);
-
struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
@@ -98,23 +51,6 @@ int tcp_v6_md5_hash_skb(char *md5_hash,
bool tcp_v6_inbound_md5_hash(const struct sock *sk,
const struct sk_buff *skb);
-static inline void tcp_md5_twsk_destructor(struct sock *sk)
-{
- struct tcp_timewait_sock *twsk = tcp_twsk(sk);
-
- if (twsk->tw_md5_key)
- kfree_rcu(twsk->tw_md5_key, rcu);
-}
-
-static inline void tcp_md5_add_header_len(const struct sock *listener,
- struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (tp->af_specific->md5_lookup(listener, sk))
- tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
-}
-
int tcp_md5_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb);
int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 143e1f66a24a..356bf41ec73a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -603,9 +603,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
struct ip_reply_arg arg;
struct net *net;
int offset = 0;
-#ifdef CONFIG_TCP_MD5SIG
- int ret;
-#endif
/* Never send a reset in response to a reset. */
if (th->rst)
@@ -643,26 +640,11 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
-#ifdef CONFIG_TCP_MD5SIG
- ret = tcp_v4_md5_send_response_prepare(skb, 0,
- MAX_TCP_OPTION_SPACE - arg.iov[0].iov_len,
- &opts, sk);
-
- if (ret == -1)
- return;
-
- arg.iov[0].iov_len += ret;
-#endif
-
if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
unsigned int remaining;
int used;
remaining = sizeof(rep.opt);
-#ifdef CONFIG_TCP_MD5SIG
- if (opts.md5)
- remaining -= TCPOLEN_MD5SIG_ALIGNED;
-#endif
used = tcp_extopt_response_prepare(skb, TCPHDR_RST, remaining,
&opts, sk);
@@ -674,9 +656,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
offset += used / 4;
}
-#ifdef CONFIG_TCP_MD5SIG
- tcp_v4_md5_send_response_write(&rep.opt[offset], skb, &rep.th, &opts, sk);
-#endif
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
arg.iov[0].iov_len, IPPROTO_TCP, 0);
@@ -727,9 +706,6 @@ static void tcp_v4_send_ack(const struct sock *sk,
struct net *net = sock_net(sk);
struct ip_reply_arg arg;
int offset = 0;
-#ifdef CONFIG_TCP_MD5SIG
- int ret;
-#endif
extopt_list = tcp_extopt_get_list(sk);
@@ -758,28 +734,12 @@ static void tcp_v4_send_ack(const struct sock *sk,
rep.th.ack = 1;
rep.th.window = htons(win);
-#ifdef CONFIG_TCP_MD5SIG
- ret = tcp_v4_md5_send_response_prepare(skb, 0,
- MAX_TCP_OPTION_SPACE - arg.iov[0].iov_len,
- &opts, sk);
-
- if (ret == -1)
- return;
-
- arg.iov[0].iov_len += ret;
-#endif
-
if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
unsigned int remaining;
int used;
remaining = sizeof(rep.th) + sizeof(rep.opt) - arg.iov[0].iov_len;
-#ifdef CONFIG_TCP_MD5SIG
- if (opts.md5)
- remaining -= TCPOLEN_MD5SIG_ALIGNED;
-#endif
-
memset(&opts, 0, sizeof(opts));
used = tcp_extopt_response_prepare(skb, TCPHDR_ACK, remaining,
&opts, sk);
@@ -792,14 +752,6 @@ static void tcp_v4_send_ack(const struct sock *sk,
offset += used / 4;
}
-#ifdef CONFIG_TCP_MD5SIG
- if (opts.md5) {
- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
- rep.th.doff = arg.iov[0].iov_len / 4;
- }
- tcp_v4_md5_send_response_write(&rep.opt[offset], skb, &rep.th, &opts, sk);
-#endif
-
arg.flags = reply_flags;
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
@@ -1026,10 +978,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
tcp_initialize_rcv_mss(newsk);
-#ifdef CONFIG_TCP_MD5SIG
- tcp_v4_md5_syn_recv_sock(sk, newsk);
-#endif
-
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
@@ -1532,9 +1480,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
tcp_extopt_destroy(sk);
-#ifdef CONFIG_TCP_MD5SIG
- tcp_v4_md5_destroy_sock(sk);
-#endif
/* Clean up a referenced TCP bind bucket. */
if (inet_csk(sk)->icsk_bind_hash)
diff --git a/net/ipv4/tcp_md5.c b/net/ipv4/tcp_md5.c
index a31b404e6dbf..64e5b4420ce9 100644
--- a/net/ipv4/tcp_md5.c
+++ b/net/ipv4/tcp_md5.c
@@ -7,11 +7,119 @@
#include <net/inet6_hashtables.h>
+struct tcp_md5sig_info {
+ struct hlist_head head;
+ struct rcu_head rcu;
+};
+
+union tcp_md5sum_block {
+ struct tcp4_pseudohdr ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct tcp6_pseudohdr ip6;
+#endif
+};
+
+/* - pool: digest algorithm, hash description and scratch buffer */
+struct tcp_md5sig_pool {
+ struct ahash_request *md5_req;
+ void *scratch;
+};
+
static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
static DEFINE_MUTEX(tcp_md5sig_mutex);
static bool tcp_md5sig_pool_populated;
-#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
+static unsigned int tcp_md5_extopt_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+
+static __be32 *tcp_md5_extopt_write(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct sock *sk,
+ struct tcp_extopt_store *store);
+
+static int tcp_md5_send_response_prepare(struct sk_buff *orig, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+
+static __be32 *tcp_md5_send_response_write(__be32 *ptr, struct sk_buff *orig,
+ struct tcphdr *th,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+
+static int tcp_md5_extopt_add_header_len(const struct sock *orig,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+
+static struct tcp_extopt_store *tcp_md5_extopt_copy(struct sock *listener,
+ struct request_sock *req,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *store);
+
+static struct tcp_extopt_store *tcp_md5_extopt_move(struct sock *from,
+ struct sock *to,
+ struct tcp_extopt_store *store);
+
+static void tcp_md5_extopt_destroy(struct tcp_extopt_store *store);
+
+struct tcp_md5_extopt {
+ struct tcp_extopt_store store;
+ struct tcp_md5sig_info __rcu *md5sig_info;
+ struct sock *sk;
+ struct rcu_head rcu;
+};
+
+static const struct tcp_extopt_ops tcp_md5_extra_ops = {
+ .option_kind = TCPOPT_MD5SIG,
+ .prepare = tcp_md5_extopt_prepare,
+ .write = tcp_md5_extopt_write,
+ .response_prepare = tcp_md5_send_response_prepare,
+ .response_write = tcp_md5_send_response_write,
+ .add_header_len = tcp_md5_extopt_add_header_len,
+ .copy = tcp_md5_extopt_copy,
+ .move = tcp_md5_extopt_move,
+ .destroy = tcp_md5_extopt_destroy,
+ .owner = THIS_MODULE,
+};
+
+static struct tcp_md5_extopt *tcp_extopt_to_md5(struct tcp_extopt_store *store)
+{
+ return container_of(store, struct tcp_md5_extopt, store);
+}
+
+static struct tcp_md5_extopt *tcp_md5_opt_find(const struct sock *sk)
+{
+ struct tcp_extopt_store *ext_opt;
+
+ ext_opt = tcp_extopt_find_kind(TCPOPT_MD5SIG, sk);
+
+ return tcp_extopt_to_md5(ext_opt);
+}
+
+static int tcp_md5_register(struct sock *sk,
+ struct tcp_md5_extopt *md5_opt)
+{
+ return tcp_register_extopt(&md5_opt->store, sk);
+}
+
+static struct tcp_md5_extopt *tcp_md5_alloc_store(struct sock *sk)
+{
+ struct tcp_md5_extopt *md5_opt;
+
+ md5_opt = kzalloc(sizeof(*md5_opt), GFP_ATOMIC);
+ if (!md5_opt)
+ return NULL;
+
+ md5_opt->store.ops = &tcp_md5_extra_ops;
+ md5_opt->sk = sk;
+
+ return md5_opt;
+}
static void __tcp_alloc_md5sig_pool(void)
{
@@ -91,18 +199,18 @@ static struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
return NULL;
}
-static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
+static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct tcp_md5_extopt *md5_opt,
const union tcp_md5_addr *addr,
int family, u8 prefixlen)
{
- const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
unsigned int size = sizeof(struct in_addr);
const struct tcp_md5sig_info *md5sig;
+ const struct sock *sk = md5_opt->sk;
/* caller either holds rcu_read_lock() or socket lock */
- md5sig = rcu_dereference_check(tp->md5sig_info,
- lockdep_sock_is_held(sk));
+ md5sig = rcu_dereference_check(md5_opt->md5sig_info,
+ sk_fullsock(sk) && lockdep_sock_is_held(sk));
if (!md5sig)
return NULL;
#if IS_ENABLED(CONFIG_IPV6)
@@ -125,11 +233,26 @@ static int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
u8 newkeylen, gfp_t gfp)
{
/* Add Key to the list */
- struct tcp_md5sig_key *key;
- struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_info *md5sig;
+ struct tcp_md5_extopt *md5_opt;
+ struct tcp_md5sig_key *key;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
+ md5_opt = tcp_md5_opt_find(sk);
+ if (!md5_opt) {
+ int ret;
+
+ md5_opt = tcp_md5_alloc_store(sk);
+ if (!md5_opt)
+ return -ENOMEM;
+
+ ret = tcp_md5_register(sk, md5_opt);
+ if (ret) {
+ kfree(md5_opt);
+ return ret;
+ }
+ }
+
+ key = tcp_md5_do_lookup_exact(md5_opt, addr, family, prefixlen);
if (key) {
/* Pre-existing entry - just update that one. */
memcpy(key->key, newkey, newkeylen);
@@ -137,8 +260,8 @@ static int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
return 0;
}
- md5sig = rcu_dereference_protected(tp->md5sig_info,
- lockdep_sock_is_held(sk));
+ md5sig = rcu_dereference_protected(md5_opt->md5sig_info,
+ sk_fullsock(sk) && lockdep_sock_is_held(sk));
if (!md5sig) {
md5sig = kmalloc(sizeof(*md5sig), gfp);
if (!md5sig)
@@ -146,7 +269,7 @@ static int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
sk_nocaps_add(sk, NETIF_F_GSO_MASK);
INIT_HLIST_HEAD(&md5sig->head);
- rcu_assign_pointer(tp->md5sig_info, md5sig);
+ rcu_assign_pointer(md5_opt->md5sig_info, md5sig);
}
key = sock_kmalloc(sk, sizeof(*key), gfp);
@@ -168,18 +291,18 @@ static int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
return 0;
}
-static void tcp_clear_md5_list(struct sock *sk)
+static void tcp_clear_md5_list(struct tcp_md5_extopt *md5_opt)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_info *md5sig;
struct tcp_md5sig_key *key;
struct hlist_node *n;
- struct tcp_md5sig_info *md5sig;
- md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
+ md5sig = rcu_dereference_protected(md5_opt->md5sig_info, 1);
hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
hlist_del_rcu(&key->node);
- atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
+ if (md5_opt->sk && sk_fullsock(md5_opt->sk))
+ atomic_sub(sizeof(*key), &md5_opt->sk->sk_omem_alloc);
kfree_rcu(key, rcu);
}
}
@@ -187,9 +310,14 @@ static void tcp_clear_md5_list(struct sock *sk)
static int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
int family, u8 prefixlen)
{
+ struct tcp_md5_extopt *md5_opt;
struct tcp_md5sig_key *key;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
+ md5_opt = tcp_md5_opt_find(sk);
+ if (!md5_opt)
+ return -ENOENT;
+
+ key = tcp_md5_do_lookup_exact(md5_opt, addr, family, prefixlen);
if (!key)
return -ENOENT;
hlist_del_rcu(&key->node);
@@ -421,16 +549,20 @@ static struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
const union tcp_md5_addr *addr,
int family)
{
- const struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_key *key;
+ struct tcp_md5sig_key *best_match = NULL;
const struct tcp_md5sig_info *md5sig;
+ struct tcp_md5_extopt *md5_opt;
+ struct tcp_md5sig_key *key;
__be32 mask;
- struct tcp_md5sig_key *best_match = NULL;
bool match;
+ md5_opt = tcp_md5_opt_find(sk);
+ if (!md5_opt)
+ return NULL;
+
/* caller either holds rcu_read_lock() or socket lock */
- md5sig = rcu_dereference_check(tp->md5sig_info,
- lockdep_sock_is_held(sk));
+ md5sig = rcu_dereference_check(md5_opt->md5sig_info,
+ sk_fullsock(sk) && lockdep_sock_is_held(sk));
if (!md5sig)
return NULL;
@@ -538,75 +670,30 @@ static int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
return 0;
}
-int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
- unsigned int remaining,
- struct tcp_out_options *opts,
- const struct sock *sk)
+static int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
{
- const struct tcphdr *th = tcp_hdr(skb);
const struct iphdr *iph = ip_hdr(skb);
- const __u8 *hash_location = NULL;
rcu_read_lock();
- hash_location = tcp_parse_md5sig_option(th);
- if (sk && sk_fullsock(sk)) {
- opts->md5 = tcp_md5_do_lookup(sk,
- (union tcp_md5_addr *)&iph->saddr,
- AF_INET);
- } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
- struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
-
- opts->md5 = tcp_twsk_md5_key(tcptw);
- } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
- opts->md5 = tcp_md5_do_lookup(sk,
- (union tcp_md5_addr *)&iph->saddr,
- AF_INET);
- } else if (hash_location) {
- unsigned char newhash[16];
- struct sock *sk1;
- int genhash;
-
- /* active side is lost. Try to find listening socket through
- * source port, and then find md5 key through listening socket.
- * we are not loose security here:
- * Incoming packet is checked with md5 hash with finding key,
- * no RST generated if md5 hash doesn't match.
- */
- sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, NULL, 0,
- iph->saddr,
- th->source, iph->daddr,
- ntohs(th->source), inet_iif(skb),
- tcp_v4_sdif(skb));
- /* don't send rst if it can't find key */
- if (!sk1)
- goto out_err;
-
- opts->md5 = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
- &iph->saddr, AF_INET);
- if (!opts->md5)
- goto out_err;
-
- genhash = tcp_v4_md5_hash_skb(newhash, opts->md5, NULL, skb);
- if (genhash || memcmp(hash_location, newhash, 16) != 0)
- goto out_err;
- }
+ opts->md5 = tcp_md5_do_lookup(sk,
+ (union tcp_md5_addr *)&iph->saddr,
+ AF_INET);
if (opts->md5)
+ /* rcu_read_unlock() is in _response_write */
return TCPOLEN_MD5SIG_ALIGNED;
rcu_read_unlock();
return 0;
-
-out_err:
- rcu_read_unlock();
- return -1;
}
-void tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
- struct tcphdr *t1,
- struct tcp_out_options *opts,
- const struct sock *sk)
+static __be32 *tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
+ struct tcphdr *t1,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
{
if (opts->md5) {
*topt++ = htonl((TCPOPT_NOP << 24) |
@@ -617,75 +704,39 @@ void tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
tcp_v4_md5_hash_hdr((__u8 *)topt, opts->md5,
ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr, t1);
+
+ topt += 4;
+
+ /* Unlocking from _response_prepare */
rcu_read_unlock();
}
+
+ return topt;
}
#if IS_ENABLED(CONFIG_IPV6)
-int tcp_v6_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
- unsigned int remaining,
- struct tcp_out_options *opts,
- const struct sock *sk)
+static int tcp_v6_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
{
- const struct tcphdr *th = tcp_hdr(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- const __u8 *hash_location = NULL;
rcu_read_lock();
- hash_location = tcp_parse_md5sig_option(th);
- if (sk && sk_fullsock(sk)) {
- opts->md5 = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
- } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
- struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
-
- opts->md5 = tcp_twsk_md5_key(tcptw);
- } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
- opts->md5 = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
- } else if (hash_location) {
- unsigned char newhash[16];
- struct sock *sk1;
- int genhash;
-
- /* active side is lost. Try to find listening socket through
- * source port, and then find md5 key through listening socket.
- * we are not loose security here:
- * Incoming packet is checked with md5 hash with finding key,
- * no RST generated if md5 hash doesn't match.
- */
- sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, NULL, 0,
- &ipv6h->saddr,
- th->source, &ipv6h->daddr,
- ntohs(th->source), tcp_v6_iif(skb),
- tcp_v6_sdif(skb));
- if (!sk1)
- goto out_err;
-
- opts->md5 = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
- if (!opts->md5)
- goto out_err;
-
- genhash = tcp_v6_md5_hash_skb(newhash, opts->md5, NULL, skb);
- if (genhash || memcmp(hash_location, newhash, 16) != 0)
- goto out_err;
- }
+ opts->md5 = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
if (opts->md5)
+ /* rcu_read_unlock() is in _response_write */
return TCPOLEN_MD5SIG_ALIGNED;
rcu_read_unlock();
return 0;
-
-out_err:
- rcu_read_unlock();
- return -1;
}
-EXPORT_SYMBOL_GPL(tcp_v6_md5_send_response_prepare);
-void tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
- struct tcphdr *t1,
- struct tcp_out_options *opts,
- const struct sock *sk)
+static __be32 *tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
+ struct tcphdr *t1,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
{
if (opts->md5) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
@@ -694,12 +745,45 @@ void tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, t1);
+ topt += 4;
+
+ /* Unlocking from _response_prepare */
rcu_read_unlock();
}
+
+ return topt;
}
-EXPORT_SYMBOL_GPL(tcp_v6_md5_send_response_write);
#endif
+static int tcp_md5_send_response_prepare(struct sk_buff *orig, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (orig->protocol != htons(ETH_P_IP))
+ return tcp_v6_md5_send_response_prepare(orig, flags, remaining,
+ opts, sk);
+ else
+#endif
+ return tcp_v4_md5_send_response_prepare(orig, flags, remaining,
+ opts, sk);
+}
+
+static __be32 *tcp_md5_send_response_write(__be32 *ptr, struct sk_buff *orig,
+ struct tcphdr *th,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (orig->protocol != htons(ETH_P_IP))
+ return tcp_v6_md5_send_response_write(ptr, orig, th, opts, sk);
+#endif
+ return tcp_v4_md5_send_response_write(ptr, orig, th, opts, sk);
+}
+
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
{
@@ -909,59 +993,6 @@ bool tcp_v6_inbound_md5_hash(const struct sock *sk,
return false;
}
EXPORT_SYMBOL_GPL(tcp_v6_inbound_md5_hash);
-#endif
-
-void tcp_v4_md5_destroy_sock(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- /* Clean up the MD5 key list, if any */
- if (tp->md5sig_info) {
- tcp_clear_md5_list(sk);
- kfree_rcu(tp->md5sig_info, rcu);
- tp->md5sig_info = NULL;
- }
-}
-
-void tcp_v4_md5_syn_recv_sock(const struct sock *listener, struct sock *sk)
-{
- struct inet_sock *inet = inet_sk(sk);
- struct tcp_md5sig_key *key;
-
- /* Copy over the MD5 key from the original socket */
- key = tcp_md5_do_lookup(listener, (union tcp_md5_addr *)&inet->inet_daddr,
- AF_INET);
- if (key) {
- /* We're using one, so create a matching key
- * on the sk structure. If we fail to get
- * memory, then we end up not copying the key
- * across. Shucks.
- */
- tcp_md5_do_add(sk, (union tcp_md5_addr *)&inet->inet_daddr,
- AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
- sk_nocaps_add(sk, NETIF_F_GSO_MASK);
- }
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-void tcp_v6_md5_syn_recv_sock(const struct sock *listener, struct sock *sk)
-{
- struct tcp_md5sig_key *key;
-
- /* Copy over the MD5 key from the original socket */
- key = tcp_v6_md5_do_lookup(listener, &sk->sk_v6_daddr);
- if (key) {
- /* We're using one, so create a matching key
- * on the newsk structure. If we fail to get
- * memory, then we end up not copying the key
- * across. Shucks.
- */
- tcp_md5_do_add(sk, (union tcp_md5_addr *)&sk->sk_v6_daddr,
- AF_INET6, 128, key->key, key->keylen,
- sk_gfp_mask(sk, GFP_ATOMIC));
- }
-}
-EXPORT_SYMBOL_GPL(tcp_v6_md5_syn_recv_sock);
struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
@@ -971,25 +1002,6 @@ struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
EXPORT_SYMBOL_GPL(tcp_v6_md5_lookup);
#endif
-void tcp_md5_time_wait(struct sock *sk, struct inet_timewait_sock *tw)
-{
- struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_key *key;
-
- /* The timewait bucket does not have the key DB from the
- * sock structure. We just make a quick copy of the
- * md5 key being used (if indeed we are using one)
- * so the timewait ack generating code has the key.
- */
- tcptw->tw_md5_key = NULL;
- key = tp->af_specific->md5_lookup(sk, sk);
- if (key) {
- tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
- BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
- }
-}
-
static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info,
const struct tcp_md5sig_key *key)
{
@@ -1039,13 +1051,17 @@ static int tcp_diag_put_md5sig(struct sk_buff *skb,
int tcp_md5_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb)
{
if (net_admin) {
+ struct tcp_md5_extopt *md5_opt;
struct tcp_md5sig_info *md5sig;
int err = 0;
rcu_read_lock();
- md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
- if (md5sig)
- err = tcp_diag_put_md5sig(skb, md5sig);
+ md5_opt = tcp_md5_opt_find(sk);
+ if (md5_opt) {
+ md5sig = rcu_dereference(md5_opt->md5sig_info);
+ if (md5sig)
+ err = tcp_diag_put_md5sig(skb, md5sig);
+ }
rcu_read_unlock();
if (err < 0)
return err;
@@ -1060,15 +1076,19 @@ int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin)
int size = 0;
if (net_admin && sk_fullsock(sk)) {
+ struct tcp_md5_extopt *md5_opt;
const struct tcp_md5sig_info *md5sig;
const struct tcp_md5sig_key *key;
size_t md5sig_count = 0;
rcu_read_lock();
- md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
- if (md5sig) {
- hlist_for_each_entry_rcu(key, &md5sig->head, node)
- md5sig_count++;
+ md5_opt = tcp_md5_opt_find(sk);
+ if (md5_opt) {
+ md5sig = rcu_dereference(md5_opt->md5sig_info);
+ if (md5sig) {
+ hlist_for_each_entry_rcu(key, &md5sig->head, node)
+ md5sig_count++;
+ }
}
rcu_read_unlock();
size += nla_total_size(md5sig_count *
@@ -1079,6 +1099,260 @@ int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin)
}
EXPORT_SYMBOL_GPL(tcp_md5_diag_get_aux_size);
+static int tcp_md5_extopt_add_header_len(const struct sock *orig,
+ const struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->af_specific->md5_lookup(orig, sk))
+ return TCPOLEN_MD5SIG_ALIGNED;
+
+ return 0;
+}
+
+static unsigned int tcp_md5_extopt_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+ int ret = 0;
+
+ if (sk_fullsock(sk)) {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ opts->md5 = tp->af_specific->md5_lookup(sk, sk);
+ } else {
+ struct request_sock *req = inet_reqsk(sk);
+ struct sock *listener = req->rsk_listener;
+
+ /* Coming from tcp_make_synack, unlock is in
+ * tcp_md5_extopt_write
+ */
+ rcu_read_lock();
+
+ opts->md5 = tcp_rsk(req)->af_specific->req_md5_lookup(listener, sk);
+
+ if (!opts->md5)
+ rcu_read_unlock();
+ }
+
+ if (unlikely(opts->md5)) {
+ ret = TCPOLEN_MD5SIG_ALIGNED;
+ opts->options |= OPTION_MD5;
+
+ /* Don't use TCP timestamps with TCP_MD5 */
+ if ((opts->options & OPTION_TS)) {
+ ret -= TCPOLEN_TSTAMP_ALIGNED;
+
+ /* When TS are enabled, Linux puts the SACK_OK
+ * next to the timestamp option, thus not accounting
+ * for its space. Here, we disable timestamps, thus
+ * we need to account for the space.
+ */
+ if (opts->options & OPTION_SACK_ADVERTISE)
+ ret += TCPOLEN_SACKPERM_ALIGNED;
+ }
+
+ opts->options &= ~OPTION_TS;
+ opts->tsval = 0;
+ opts->tsecr = 0;
+
+ if (!sk_fullsock(sk)) {
+ struct request_sock *req = inet_reqsk(sk);
+
+ inet_rsk(req)->tstamp_ok = 0;
+ }
+ }
+
+ return ret;
+}
+
+static __be32 *tcp_md5_extopt_write(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+ if (unlikely(OPTION_MD5 & opts->options)) {
+#if IS_ENABLED(CONFIG_IPV6)
+ const struct in6_addr *addr6;
+
+ if (sk_fullsock(sk)) {
+ addr6 = &sk->sk_v6_daddr;
+ } else {
+ BUG_ON(sk->sk_state != TCP_NEW_SYN_RECV);
+ addr6 = &inet_rsk(inet_reqsk(sk))->ir_v6_rmt_addr;
+ }
+#endif
+
+ *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
+
+ if (sk_fullsock(sk))
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
+
+ /* Calculate the MD5 hash, as we have all we need now */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6 && !ipv6_addr_v4mapped(addr6))
+ tcp_v6_md5_hash_skb((__u8 *)ptr, opts->md5, sk, skb);
+ else
+#endif
+ tcp_v4_md5_hash_skb((__u8 *)ptr, opts->md5, sk, skb);
+
+ ptr += 4;
+
+ /* Coming from tcp_make_synack */
+ if (!sk_fullsock(sk))
+ rcu_read_unlock();
+ }
+
+ return ptr;
+}
+
+static struct tcp_md5_extopt *__tcp_md5_extopt_copy(struct request_sock *req,
+ const struct tcp_md5sig_key *key,
+ const union tcp_md5_addr *addr,
+ int family)
+{
+ struct tcp_md5_extopt *md5_opt = NULL;
+ struct tcp_md5sig_info *md5sig;
+ struct tcp_md5sig_key *newkey;
+
+ md5_opt = tcp_md5_alloc_store(req_to_sk(req));
+ if (!md5_opt)
+ goto err;
+
+ md5sig = kmalloc(sizeof(*md5sig), GFP_ATOMIC);
+ if (!md5sig)
+ goto err_md5sig;
+
+ INIT_HLIST_HEAD(&md5sig->head);
+ rcu_assign_pointer(md5_opt->md5sig_info, md5sig);
+
+ newkey = kmalloc(sizeof(*newkey), GFP_ATOMIC);
+ if (!newkey)
+ goto err_newkey;
+
+ memcpy(newkey->key, key->key, key->keylen);
+ newkey->keylen = key->keylen;
+ newkey->family = family;
+ newkey->prefixlen = 32;
+ memcpy(&newkey->addr, addr,
+ (family == AF_INET6) ? sizeof(struct in6_addr) :
+ sizeof(struct in_addr));
+ hlist_add_head_rcu(&newkey->node, &md5sig->head);
+
+ return md5_opt;
+
+err_newkey:
+ kfree(md5sig);
+err_md5sig:
+ kfree_rcu(md5_opt, rcu);
+err:
+ return NULL;
+}
+
+static struct tcp_extopt_store *tcp_md5_v4_extopt_copy(const struct sock *listener,
+ struct request_sock *req)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+ struct tcp_md5sig_key *key;
+
+ /* Copy over the MD5 key from the original socket */
+ key = tcp_md5_do_lookup(listener,
+ (union tcp_md5_addr *)&ireq->ir_rmt_addr,
+ AF_INET);
+ if (!key)
+ return NULL;
+
+ return (struct tcp_extopt_store *)__tcp_md5_extopt_copy(req, key,
+ (union tcp_md5_addr *)&ireq->ir_rmt_addr,
+ AF_INET);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct tcp_extopt_store *tcp_md5_v6_extopt_copy(const struct sock *listener,
+ struct request_sock *req)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+ struct tcp_md5sig_key *key;
+
+ /* Copy over the MD5 key from the original socket */
+ key = tcp_v6_md5_do_lookup(listener, &ireq->ir_v6_rmt_addr);
+ if (!key)
+ return NULL;
+
+ return (struct tcp_extopt_store *)__tcp_md5_extopt_copy(req, key,
+ (union tcp_md5_addr *)&ireq->ir_v6_rmt_addr,
+ AF_INET6);
+}
+#endif
+
+/* We are creating a new request-socket, based on the listener's key that
+ * matches the IP-address. Thus, we need to create a new tcp_extopt_store, and
+ * store the matching key in there for the request-sock.
+ */
+static struct tcp_extopt_store *tcp_md5_extopt_copy(struct sock *listener,
+ struct request_sock *req,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *store)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ if (ireq->ireq_family == AF_INET6)
+ return tcp_md5_v6_extopt_copy(listener, req);
+#endif
+ return tcp_md5_v4_extopt_copy(listener, req);
+}
+
+/* Moving from a request-sock to a full socket means we need to account for
+ * the memory and set GSO-flags. When moving from a full socket to ta time-wait
+ * socket we also need to adjust the memory accounting.
+ */
+static struct tcp_extopt_store *tcp_md5_extopt_move(struct sock *from,
+ struct sock *to,
+ struct tcp_extopt_store *store)
+{
+ struct tcp_md5_extopt *md5_opt = tcp_extopt_to_md5(store);
+ unsigned int size = sizeof(struct tcp_md5sig_key);
+
+ if (sk_fullsock(to)) {
+ /* From request-sock to full socket */
+
+ if (size > sysctl_optmem_max ||
+ atomic_read(&to->sk_omem_alloc) + size >= sysctl_optmem_max) {
+ tcp_md5_extopt_destroy(store);
+ return NULL;
+ }
+
+ sk_nocaps_add(to, NETIF_F_GSO_MASK);
+ atomic_add(size, &to->sk_omem_alloc);
+ } else if (sk_fullsock(from)) {
+ /* From full socket to time-wait-socket */
+ atomic_sub(size, &from->sk_omem_alloc);
+ }
+
+ md5_opt->sk = to;
+
+ return store;
+}
+
+static void tcp_md5_extopt_destroy(struct tcp_extopt_store *store)
+{
+ struct tcp_md5_extopt *md5_opt = tcp_extopt_to_md5(store);
+
+ /* Clean up the MD5 key list, if any */
+ if (md5_opt) {
+ tcp_clear_md5_list(md5_opt);
+ kfree_rcu(md5_opt->md5sig_info, rcu);
+ md5_opt->md5sig_info = NULL;
+
+ kfree_rcu(md5_opt, rcu);
+ }
+}
+
const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
.md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f33214b29167..3da1c823240b 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -22,7 +22,6 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
-#include <linux/tcp_md5.h>
#include <linux/workqueue.h>
#include <linux/static_key.h>
#include <net/tcp.h>
@@ -295,9 +294,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcp_extopt_move(sk, (struct sock *)tw);
INIT_HLIST_HEAD(&tp->tcp_option_list);
}
-#ifdef CONFIG_TCP_MD5SIG
- tcp_md5_time_wait(sk, tw);
-#endif
/* Get the TIME_WAIT timeout firing. */
if (timeo < rto)
@@ -332,10 +328,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
void tcp_twsk_destructor(struct sock *sk)
{
-#ifdef CONFIG_TCP_MD5SIG
- tcp_md5_twsk_destructor(sk);
-#endif
-
if (unlikely(!hlist_empty(&tcp_twsk(sk)->tcp_option_list)))
tcp_extopt_destroy(sk);
}
@@ -520,10 +512,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->tcp_header_len = sizeof(struct tcphdr);
}
newtp->tsoffset = treq->ts_off;
-#ifdef CONFIG_TCP_MD5SIG
- newtp->md5sig_info = NULL; /*XXX*/
- tcp_md5_add_header_len(sk, newsk);
-#endif
if (unlikely(!hlist_empty(&treq->tcp_option_list)))
newtp->tcp_header_len += tcp_extopt_add_header(req_to_sk(req), newsk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 137645753abb..41bd8a791b0d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -42,7 +42,6 @@
#include <linux/gfp.h>
#include <linux/module.h>
#include <linux/static_key.h>
-#include <linux/tcp_md5.h>
#include <trace/events/tcp.h>
@@ -424,14 +423,6 @@ static void tcp_options_write(__be32 *ptr, struct sk_buff *skb, struct sock *sk,
extopt_list = tcp_extopt_get_list(sk);
- if (unlikely(OPTION_MD5 & options)) {
- *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
- /* overload cookie hash location */
- opts->hash_location = (__u8 *)ptr;
- ptr += 4;
- }
-
if (unlikely(opts->mss)) {
*ptr++ = htonl((TCPOPT_MSS << 24) |
(TCPOLEN_MSS << 16) |
@@ -527,14 +518,6 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
unsigned int remaining = MAX_TCP_OPTION_SPACE;
struct tcp_fastopen_request *fastopen = tp->fastopen_req;
-#ifdef CONFIG_TCP_MD5SIG
- opts->md5 = tp->af_specific->md5_lookup(sk, sk);
- if (opts->md5) {
- opts->options |= OPTION_MD5;
- remaining -= TCPOLEN_MD5SIG_ALIGNED;
- }
-#endif
-
/* We always get an MSS option. The option bytes which will be seen in
* normal data packets should timestamps be used, must be in the MSS
* advertised. But we subtract them from tp->mss_cache so that
@@ -547,7 +530,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !opts->md5)) {
+ if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
@@ -596,20 +579,6 @@ static unsigned int tcp_synack_options(const struct sock *sk,
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
-#ifdef CONFIG_TCP_MD5SIG
- if (opts->md5) {
- opts->options |= OPTION_MD5;
- remaining -= TCPOLEN_MD5SIG_ALIGNED;
-
- /* We can't fit any SACK blocks in a packet with MD5 + TS
- * options. There was discussion about disabling SACK
- * rather than TS in order to fit in better with old,
- * buggy kernels, but that was deemed to be unnecessary.
- */
- ireq->tstamp_ok &= !ireq->sack_ok;
- }
-#endif
-
/* We always send an MSS option. */
opts->mss = mss;
remaining -= TCPOLEN_MSS_ALIGNED;
@@ -670,16 +639,6 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
size += TCPOLEN_TSTAMP_ALIGNED;
}
-#ifdef CONFIG_TCP_MD5SIG
- opts->md5 = tp->af_specific->md5_lookup(sk, sk);
- if (unlikely(opts->md5)) {
- opts->options |= OPTION_MD5;
- size += TCPOLEN_MD5SIG_ALIGNED;
- }
-#else
- opts->md5 = NULL;
-#endif
-
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
size += tcp_extopt_prepare(skb, 0, MAX_TCP_OPTION_SPACE - size,
opts, tcp_to_sk(tp));
@@ -1082,14 +1041,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->window = htons(min(tp->rcv_wnd, 65535U));
}
tcp_options_write((__be32 *)(th + 1), skb, sk, &opts);
-#ifdef CONFIG_TCP_MD5SIG
- /* Calculate the MD5 hash, as we have all we need now */
- if (opts.md5) {
- sk_nocaps_add(sk, NETIF_F_GSO_MASK);
- tp->af_specific->calc_md5_hash(opts.hash_location,
- opts.md5, sk, skb);
- }
-#endif
icsk->icsk_af_ops->send_check(sk, skb);
@@ -3159,10 +3110,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
#endif
skb->skb_mstamp = tcp_clock_us();
-#ifdef CONFIG_TCP_MD5SIG
- rcu_read_lock();
- opts.md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
-#endif
skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts,
foc) + sizeof(*th);
@@ -3189,15 +3136,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
tcp_options_write((__be32 *)(th + 1), skb, req_to_sk(req), &opts);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
-#ifdef CONFIG_TCP_MD5SIG
- /* Okay, we have all we need - do the md5 hash if needed */
- if (opts.md5)
- tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
- opts.md5,
- req_to_sk(req), skb);
- rcu_read_unlock();
-#endif
-
/* Do not fool tcpdump (if any), clean our debris */
skb->tstamp = 0;
return skb;
@@ -3238,10 +3176,6 @@ static void tcp_connect_init(struct sock *sk)
if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
-#ifdef CONFIG_TCP_MD5SIG
- tcp_md5_add_header_len(sk, sk);
-#endif
-
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
tp->tcp_header_len += tcp_extopt_add_header(sk, sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e9b72d794140..16cbd6ec2063 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -577,20 +577,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
-#ifdef CONFIG_TCP_MD5SIG
-{
- int ret;
-
- ret = tcp_v6_md5_send_response_prepare(skb, 0,
- MAX_TCP_OPTION_SPACE - tot_len,
- &extraopts, sk);
-
- if (ret == -1)
- goto out;
-
- tot_len += ret;
-}
-#endif
if (sk)
extopt_list = tcp_extopt_get_list(sk);
@@ -639,11 +625,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
*topt++ = htonl(tsecr);
}
-#ifdef CONFIG_TCP_MD5SIG
- if (extraopts.md5)
- tcp_v6_md5_send_response_write(topt, skb, t1, &extraopts, sk);
-#endif
-
if (unlikely(extopt_list && !hlist_empty(extopt_list)))
tcp_extopt_response_write(topt, skb, t1, &extraopts, sk);
@@ -957,10 +938,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
-#ifdef CONFIG_TCP_MD5SIG
- tcp_v6_md5_syn_recv_sock(sk, newsk);
-#endif
-
if (__inet_inherit_port(sk, newsk) < 0) {
inet_csk_prepare_forced_close(newsk);
tcp_done(newsk);
--
2.15.0
^ permalink raw reply related
* [RFC 05/14] tcp: Register handlers for extra TCP options
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
In-Reply-To: <20171218215109.38700-1-cpaasch@apple.com>
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Allow additional TCP options to be handled by registered hook
functions.
Registered options have a priority that determines the order in which
options are prepared and written. Lower priority numbers are handled
first.
Option parsing will call the provided 'parse' function when a TCP option
number is not recognized by the normal option parsing code.
After parsing, there are two places where we post-process the options.
First, a 'check' callback that allows to drop the packet based on the
parsed options (e.g., useful for TCP MD5SIG). Then, a 'post_process'
function that gets called after other validity checks (aka, in-window,
PAWS,...). This post_process function can then update other state for
this particular extra-option.
In the output-path, the 'prepare' function determines the required space
for registered options and store associated data. 'write' adds the option
to the TCP header.
These additional TCP-options are stored in hlists of the TCP-socket. To
pass the state and options around during the 3-way handshake and in
time-wait state, the hlists are also on the tcp_request_sock and
tcp_timewait_sock.
The list is copied from the listener to the request-socket (calling into
the 'copy' callback). Then, moved from the request-socket to the
TCP-socket and finally to the time-wait socket.
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
---
drivers/infiniband/hw/cxgb4/cm.c | 2 +-
include/linux/tcp.h | 28 ++++
include/net/tcp.h | 110 ++++++++++++-
net/ipv4/syncookies.c | 6 +-
net/ipv4/tcp.c | 327 ++++++++++++++++++++++++++++++++++++++-
net/ipv4/tcp_input.c | 49 +++++-
net/ipv4/tcp_ipv4.c | 98 +++++++++---
net/ipv4/tcp_minisocks.c | 32 +++-
net/ipv4/tcp_output.c | 40 ++---
net/ipv6/syncookies.c | 6 +-
net/ipv6/tcp_ipv6.c | 32 ++++
11 files changed, 676 insertions(+), 54 deletions(-)
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 21db3b48a617..a1ea5583f07b 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3746,7 +3746,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
*/
memset(&tmp_opt, 0, sizeof(tmp_opt));
tcp_clear_options(&tmp_opt);
- tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
+ tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL, NULL);
req = __skb_push(skb, sizeof(*req));
memset(req, 0, sizeof(*req));
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4f93f0953c41..4756bd2c4b54 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -115,6 +115,24 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
#endif
}
+#define OPTION_SACK_ADVERTISE (1 << 0)
+#define OPTION_TS (1 << 1)
+#define OPTION_MD5 (1 << 2)
+#define OPTION_WSCALE (1 << 3)
+#define OPTION_FAST_OPEN_COOKIE (1 << 8)
+#define OPTION_SMC (1 << 9)
+
+struct tcp_out_options {
+ u16 options; /* bit field of OPTION_* */
+ u16 mss; /* 0 to disable */
+ u8 ws; /* window scale, 0 to disable */
+ u8 num_sack_blocks; /* number of SACK blocks to include */
+ u8 hash_size; /* bytes in hash_location */
+ __u8 *hash_location; /* temporary pointer, overloaded */
+ __u32 tsval, tsecr; /* need to include OPTION_TS */
+ struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
+};
+
/* This is the max number of SACKS that we'll generate and process. It's safe
* to increase this, although since:
* size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
@@ -137,6 +155,7 @@ struct tcp_request_sock {
* FastOpen it's the seq#
* after data-in-SYN.
*/
+ struct hlist_head tcp_option_list;
};
static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -373,6 +392,8 @@ struct tcp_sock {
*/
struct request_sock *fastopen_rsk;
u32 *saved_syn;
+
+ struct hlist_head tcp_option_list;
};
enum tsq_enum {
@@ -400,6 +421,11 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk)
return (struct tcp_sock *)sk;
}
+static inline struct sock *tcp_to_sk(const struct tcp_sock *tp)
+{
+ return (struct sock *)tp;
+}
+
struct tcp_timewait_sock {
struct inet_timewait_sock tw_sk;
#define tw_rcv_nxt tw_sk.__tw_common.skc_tw_rcv_nxt
@@ -412,6 +438,8 @@ struct tcp_timewait_sock {
u32 tw_last_oow_ack_time;
long tw_ts_recent_stamp;
+
+ struct hlist_head tcp_option_list;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *tw_md5_key;
#endif
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6939e69d3c37..ac62ceff9815 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -202,6 +202,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOLEN_FASTOPEN_BASE 2
#define TCPOLEN_EXP_FASTOPEN_BASE 4
#define TCPOLEN_EXP_SMC_BASE 6
+#define TCPOLEN_EXP_BASE 6
/* But this is what stacks really send out. */
#define TCPOLEN_TSTAMP_ALIGNED 12
@@ -403,7 +404,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
- int estab, struct tcp_fastopen_cookie *foc);
+ int estab, struct tcp_fastopen_cookie *foc,
+ struct sock *sk);
const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
/*
@@ -2063,4 +2065,110 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
#if IS_ENABLED(CONFIG_SMC)
extern struct static_key_false tcp_have_smc;
#endif
+
+struct tcp_extopt_store;
+
+struct tcp_extopt_ops {
+ u32 option_kind;
+ unsigned char priority;
+ void (*parse)(int opsize, const unsigned char *opptr,
+ const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx,
+ struct sock *sk,
+ struct tcp_extopt_store *store);
+ bool (*check)(struct sock *sk,
+ const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx,
+ struct tcp_extopt_store *store);
+ void (*post_process)(struct sock *sk,
+ struct tcp_options_received *opt_rx,
+ struct tcp_extopt_store *store);
+ /* Return the number of bytes consumed */
+ unsigned int (*prepare)(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+ __be32 *(*write)(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts, struct sock *sk,
+ struct tcp_extopt_store *store);
+ int (*response_prepare)(struct sk_buff *orig, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+ __be32 *(*response_write)(__be32 *ptr, struct sk_buff *orig,
+ struct tcphdr *th,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+ int (*add_header_len)(const struct sock *orig,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+ struct tcp_extopt_store *(*copy)(struct sock *listener,
+ struct request_sock *req,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *from);
+ struct tcp_extopt_store *(*move)(struct sock *from, struct sock *to,
+ struct tcp_extopt_store *store);
+ void (*destroy)(struct tcp_extopt_store *store);
+ struct module *owner;
+};
+
+/* The tcp_extopt_store is the generic structure that will be added to the
+ * list of TCP extra-options.
+ *
+ * Protocols using the framework can create a wrapper structure around it that
+ * stores protocol-specific state. The tcp_extopt-functions will provide
+ * tcp_extopt_store though, so the protocol can use container_of to get
+ * access to the wrapper structure containing the state.
+ */
+struct tcp_extopt_store {
+ struct hlist_node list;
+ const struct tcp_extopt_ops *ops;
+};
+
+struct hlist_head *tcp_extopt_get_list(const struct sock *sk);
+
+struct tcp_extopt_store *tcp_extopt_find_kind(u32 kind, const struct sock *sk);
+
+void tcp_extopt_parse(u32 opcode, int opsize, const unsigned char *opptr,
+ const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx, struct sock *sk);
+
+bool tcp_extopt_check(struct sock *sk, const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx);
+
+void tcp_extopt_post_process(struct sock *sk,
+ struct tcp_options_received *opt_rx);
+
+unsigned int tcp_extopt_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk);
+
+void tcp_extopt_write(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts, struct sock *sk);
+
+int tcp_extopt_response_prepare(struct sk_buff *orig, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk);
+
+void tcp_extopt_response_write(__be32 *ptr, struct sk_buff *orig,
+ struct tcphdr *th, struct tcp_out_options *opts,
+ const struct sock *sk);
+
+int tcp_extopt_add_header(const struct sock *orig, const struct sock *sk);
+
+/* Socket lock must be held when calling this function */
+int tcp_register_extopt(struct tcp_extopt_store *store, struct sock *sk);
+
+void tcp_extopt_copy(struct sock *listener, struct request_sock *req,
+ struct tcp_options_received *opt);
+
+void tcp_extopt_move(struct sock *from, struct sock *to);
+
+void tcp_extopt_destroy(struct sock *sk);
+
#endif /* _TCP_H */
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index fda37f2862c9..8373abf19440 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -313,7 +313,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+ tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, sk);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
tsoff = secure_tcp_ts_off(sock_net(sk),
@@ -325,6 +325,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
goto out;
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)) &&
+ tcp_extopt_check(sk, skb, &tcp_opt))
+ goto out;
+
ret = NULL;
req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */
if (!req)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c470fec9062f..17f38afb4212 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -416,6 +416,7 @@ void tcp_init_sock(struct sock *sk)
tcp_init_xmit_timers(sk);
INIT_LIST_HEAD(&tp->tsq_node);
INIT_LIST_HEAD(&tp->tsorted_sent_queue);
+ INIT_HLIST_HEAD(&tp->tcp_option_list);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
@@ -3473,6 +3474,331 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
#endif
+struct hlist_head *tcp_extopt_get_list(const struct sock *sk)
+{
+ if (sk_fullsock(sk))
+ return &tcp_sk(sk)->tcp_option_list;
+ else if (sk->sk_state == TCP_NEW_SYN_RECV)
+ return &tcp_rsk(inet_reqsk(sk))->tcp_option_list;
+ else if (sk->sk_state == TCP_TIME_WAIT)
+ return &tcp_twsk(sk)->tcp_option_list;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_get_list);
+
+/* Caller must ensure that rcu is locked */
+struct tcp_extopt_store *tcp_extopt_find_kind(u32 kind, const struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (entry->ops->option_kind == kind)
+ return entry;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_find_kind);
+
+void tcp_extopt_parse(u32 opcode, int opsize, const unsigned char *opptr,
+ const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx, struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+
+ rcu_read_lock();
+ entry = tcp_extopt_find_kind(opcode, sk);
+
+ if (entry && entry->ops->parse)
+ entry->ops->parse(opsize, opptr, skb, opt_rx, sk, entry);
+ rcu_read_unlock();
+}
+
+bool tcp_extopt_check(struct sock *sk, const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+ bool drop = false;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ bool ret = false;
+
+ if (entry->ops->check)
+ ret = entry->ops->check(sk, skb, opt_rx, entry);
+
+ if (ret)
+ drop = true;
+ }
+ rcu_read_unlock();
+
+ return drop;
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_check);
+
+void tcp_extopt_post_process(struct sock *sk,
+ struct tcp_options_received *opt_rx)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (entry->ops->post_process)
+ entry->ops->post_process(sk, opt_rx, entry);
+ }
+ rcu_read_unlock();
+}
+
+unsigned int tcp_extopt_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+ unsigned int used = 0;
+
+ if (!sk)
+ return 0;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (unlikely(!entry->ops->prepare))
+ continue;
+
+ used += entry->ops->prepare(skb, flags, remaining - used, opts,
+ sk, entry);
+ }
+ rcu_read_unlock();
+
+ return roundup(used, 4);
+}
+
+void tcp_extopt_write(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts, struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+
+ if (!sk)
+ return;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (unlikely(!entry->ops->write))
+ continue;
+
+ ptr = entry->ops->write(ptr, skb, opts, sk, entry);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_write);
+
+int tcp_extopt_response_prepare(struct sk_buff *orig, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+ unsigned int used = 0;
+
+ if (!sk)
+ return 0;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ int ret;
+
+ if (unlikely(!entry->ops->response_prepare))
+ continue;
+
+ ret = entry->ops->response_prepare(orig, flags,
+ remaining - used, opts,
+ sk, entry);
+
+ used += ret;
+ }
+ rcu_read_unlock();
+
+ return roundup(used, 4);
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_response_prepare);
+
+void tcp_extopt_response_write(__be32 *ptr, struct sk_buff *orig,
+ struct tcphdr *th, struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+
+ if (!sk)
+ return;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (unlikely(!entry->ops->response_write))
+ continue;
+
+ ptr = entry->ops->response_write(ptr, orig, th, opts, sk, entry);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_response_write);
+
+int tcp_extopt_add_header(const struct sock *orig, const struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+ int tcp_header_len = 0;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (unlikely(!entry->ops->add_header_len))
+ continue;
+
+ tcp_header_len += entry->ops->add_header_len(orig, sk, entry);
+ }
+ rcu_read_unlock();
+
+ return tcp_header_len;
+}
+
+/* Socket lock must be held when calling this function */
+int tcp_register_extopt(struct tcp_extopt_store *store, struct sock *sk)
+{
+ struct hlist_node *add_before = NULL;
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+ int ret = 0;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ if (!store->ops->option_kind)
+ return -EINVAL;
+
+ if (!try_module_get(store->ops->owner))
+ return -ENOENT;
+
+ hlist_for_each_entry_rcu(entry, lhead, list) {
+ if (entry->ops->option_kind == store->ops->option_kind) {
+ pr_notice("Option kind %u already registered\n",
+ store->ops->option_kind);
+ module_put(store->ops->owner);
+ return -EEXIST;
+ }
+
+ if (entry->ops->priority <= store->ops->priority)
+ add_before = &entry->list;
+ }
+
+ if (add_before)
+ hlist_add_behind_rcu(&store->list, add_before);
+ else
+ hlist_add_head_rcu(&store->list, lhead);
+
+ pr_debug("Option kind %u registered\n", store->ops->option_kind);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tcp_register_extopt);
+
+void tcp_extopt_copy(struct sock *listener, struct request_sock *req,
+ struct tcp_options_received *opt)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *from, *to;
+
+ from = tcp_extopt_get_list(listener);
+ to = tcp_extopt_get_list(req_to_sk(req));
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, from, list) {
+ struct tcp_extopt_store *new;
+
+ if (!try_module_get(entry->ops->owner)) {
+ pr_err("%s Module get failed while copying\n", __func__);
+ continue;
+ }
+
+ new = entry->ops->copy(listener, req, opt, entry);
+ if (!new) {
+ module_put(entry->ops->owner);
+ continue;
+ }
+
+ hlist_add_tail_rcu(&new->list, to);
+ }
+ rcu_read_unlock();
+}
+
+void tcp_extopt_move(struct sock *from, struct sock *to)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lfrom, *lto;
+ struct hlist_node *tmp;
+
+ lfrom = tcp_extopt_get_list(from);
+ lto = tcp_extopt_get_list(to);
+
+ rcu_read_lock();
+ hlist_for_each_entry_safe(entry, tmp, lfrom, list) {
+ hlist_del_rcu(&entry->list);
+
+ if (entry->ops->move) {
+ entry = entry->ops->move(from, to, entry);
+ if (!entry)
+ continue;
+ }
+
+ hlist_add_tail_rcu(&entry->list, lto);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_move);
+
+void tcp_extopt_destroy(struct sock *sk)
+{
+ struct tcp_extopt_store *entry;
+ struct hlist_head *lhead;
+ struct hlist_node *tmp;
+
+ lhead = tcp_extopt_get_list(sk);
+
+ rcu_read_lock();
+ hlist_for_each_entry_safe(entry, tmp, lhead, list) {
+ struct module *owner = entry->ops->owner;
+
+ hlist_del_rcu(&entry->list);
+
+ entry->ops->destroy(entry);
+
+ module_put(owner);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_destroy);
+
void tcp_done(struct sock *sk)
{
struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
@@ -3622,7 +3948,6 @@ void __init tcp_init(void)
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
}
-
cnt = tcp_hashinfo.ehash_mask + 1;
sysctl_tcp_max_orphans = cnt / 2;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5c35fd568b13..1950ff80fb3f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3696,7 +3696,7 @@ static int smc_parse_options(const struct tcphdr *th,
void tcp_parse_options(const struct net *net,
const struct sk_buff *skb,
struct tcp_options_received *opt_rx, int estab,
- struct tcp_fastopen_cookie *foc)
+ struct tcp_fastopen_cookie *foc, struct sock *sk)
{
const unsigned char *ptr;
const struct tcphdr *th = tcp_hdr(skb);
@@ -3796,9 +3796,18 @@ void tcp_parse_options(const struct net *net,
tcp_parse_fastopen_option(opsize -
TCPOLEN_EXP_FASTOPEN_BASE,
ptr + 2, th->syn, foc, true);
- else
- smc_parse_options(th, opt_rx, ptr,
- opsize);
+ else if (smc_parse_options(th, opt_rx, ptr,
+ opsize))
+ break;
+ else if (opsize >= TCPOLEN_EXP_BASE)
+ tcp_extopt_parse(get_unaligned_be32(ptr),
+ opsize, ptr, skb,
+ opt_rx, sk);
+ break;
+
+ default:
+ tcp_extopt_parse(opcode, opsize, ptr, skb,
+ opt_rx, sk);
break;
}
@@ -3849,11 +3858,13 @@ static bool tcp_fast_parse_options(const struct net *net,
goto extra_opt_check;
}
- tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
+ tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL, tcp_to_sk(tp));
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
extra_opt_check:
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ return tcp_extopt_check(tcp_to_sk(tp), skb, &tp->rx_opt);
return false;
}
@@ -5327,6 +5338,9 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tp->rx_opt.saw_tstamp = 0;
+ if (!hlist_empty(&tp->tcp_option_list))
+ goto slow_path;
+
/* pred_flags is 0xS?10 << 16 + snd_wnd
* if header_prediction is to be made
* 'S' will always be tp->tcp_header_len >> 2
@@ -5514,7 +5528,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
/* Get original SYNACK MSS value if user MSS sets mss_clamp */
tcp_clear_options(&opt);
opt.user_mss = opt.mss_clamp = 0;
- tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
+ tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL, sk);
mss = opt.mss_clamp;
}
@@ -5577,10 +5591,14 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
int saved_clamp = tp->rx_opt.mss_clamp;
bool fastopen_fail;
- tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
+ tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc, sk);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)) &&
+ tcp_extopt_check(sk, skb, &tp->rx_opt))
+ goto discard;
+
if (th->ack) {
/* rfc793:
* "If the state is SYN-SENT then
@@ -5663,6 +5681,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tp->tcp_header_len = sizeof(struct tcphdr);
}
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ tcp_extopt_post_process(sk, &tp->rx_opt);
+
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
@@ -5756,6 +5777,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_ecn_rcv_syn(tp, th);
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ tcp_extopt_post_process(sk, &tp->rx_opt);
+
tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
@@ -6239,12 +6263,17 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->af_specific = af_ops;
tcp_rsk(req)->ts_off = 0;
+ INIT_HLIST_HEAD(&tcp_rsk(req)->tcp_option_list);
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = af_ops->mss_clamp;
tmp_opt.user_mss = tp->rx_opt.user_mss;
tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
- want_cookie ? NULL : &foc);
+ want_cookie ? NULL : &foc, sk);
+
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)) &&
+ tcp_extopt_check(sk, skb, &tmp_opt))
+ goto drop_and_free;
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
@@ -6305,6 +6334,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_reqsk_record_syn(sk, req, skb);
fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
}
+
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ tcp_extopt_copy(sk, req, &tmp_opt);
+
if (fastopen_sk) {
af_ops->send_synack(fastopen_sk, dst, &fl, req,
&foc, TCP_SYNACK_FASTOPEN);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 94e28350f420..dee296097b8f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -600,10 +600,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
const struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
-#ifdef CONFIG_TCP_MD5SIG
- __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
-#endif
+ __be32 opt[(MAX_TCP_OPTION_SPACE >> 2)];
} rep;
+ struct hlist_head *extopt_list = NULL;
struct ip_reply_arg arg;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key = NULL;
@@ -613,6 +612,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
struct sock *sk1 = NULL;
#endif
struct net *net;
+ int offset = 0;
/* Never send a reset in response to a reset. */
if (th->rst)
@@ -624,6 +624,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
return;
+ if (sk)
+ extopt_list = tcp_extopt_get_list(sk);
+
/* Swap the send and the receive. */
memset(&rep, 0, sizeof(rep));
rep.th.dest = th->source;
@@ -678,19 +681,44 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
goto out;
}
+#endif
+
+ if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
+ unsigned int remaining;
+ struct tcp_out_options opts;
+ int used;
+ remaining = sizeof(rep.opt);
+#ifdef CONFIG_TCP_MD5SIG
+ if (key)
+ remaining -= TCPOLEN_MD5SIG_ALIGNED;
+#endif
+
+ memset(&opts, 0, sizeof(opts));
+
+ used = tcp_extopt_response_prepare(skb, TCPHDR_RST, remaining,
+ &opts, sk);
+
+ arg.iov[0].iov_len += used;
+ rep.th.doff = arg.iov[0].iov_len / 4;
+
+ tcp_extopt_response_write(&rep.opt[0], skb, &rep.th, &opts, sk);
+ offset += used / 4;
+ }
+
+#ifdef CONFIG_TCP_MD5SIG
if (key) {
- rep.opt[0] = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
+ rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
/* Update length and the length the header thinks exists */
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
rep.th.doff = arg.iov[0].iov_len / 4;
- tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
- key, ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, &rep.th);
+ tcp_v4_md5_hash_hdr((__u8 *)&rep.opt[offset],
+ key, ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr, &rep.th);
}
#endif
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
@@ -742,14 +770,14 @@ static void tcp_v4_send_ack(const struct sock *sk,
const struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
- __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
-#ifdef CONFIG_TCP_MD5SIG
- + (TCPOLEN_MD5SIG_ALIGNED >> 2)
-#endif
- ];
+ __be32 opt[(MAX_TCP_OPTION_SPACE >> 2)];
} rep;
+ struct hlist_head *extopt_list = NULL;
struct net *net = sock_net(sk);
struct ip_reply_arg arg;
+ int offset = 0;
+
+ extopt_list = tcp_extopt_get_list(sk);
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
@@ -763,6 +791,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
rep.opt[1] = htonl(tsval);
rep.opt[2] = htonl(tsecr);
arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
+ offset += 3;
}
/* Swap the send and the receive. */
@@ -774,22 +803,45 @@ static void tcp_v4_send_ack(const struct sock *sk,
rep.th.ack = 1;
rep.th.window = htons(win);
+ if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
+ unsigned int remaining;
+ struct tcp_out_options opts;
+ int used;
+
+ remaining = sizeof(rep.th) + sizeof(rep.opt) - arg.iov[0].iov_len;
+
#ifdef CONFIG_TCP_MD5SIG
- if (key) {
- int offset = (tsecr) ? 3 : 0;
+ if (key)
+ remaining -= TCPOLEN_MD5SIG_ALIGNED;
+#endif
+
+ memset(&opts, 0, sizeof(opts));
+ used = tcp_extopt_response_prepare(skb, TCPHDR_ACK, remaining,
+ &opts, sk);
+
+ arg.iov[0].iov_len += used;
+ rep.th.doff = arg.iov[0].iov_len / 4;
+ tcp_extopt_response_write(&rep.opt[offset], skb, &rep.th, &opts, sk);
+
+ offset += used / 4;
+ }
+
+#ifdef CONFIG_TCP_MD5SIG
+ if (key) {
rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) |
TCPOLEN_MD5SIG);
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
- rep.th.doff = arg.iov[0].iov_len/4;
+ rep.th.doff = arg.iov[0].iov_len / 4;
tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
key, ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr, &rep.th);
}
#endif
+
arg.flags = reply_flags;
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
@@ -893,6 +945,9 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
*/
static void tcp_v4_reqsk_destructor(struct request_sock *req)
{
+ if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)))
+ tcp_extopt_destroy(req_to_sk(req));
+
kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
@@ -1410,6 +1465,11 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
if (likely(*own_req)) {
tcp_move_syn(newtp, req);
ireq->ireq_opt = NULL;
+
+ if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list))) {
+ tcp_extopt_move(req_to_sk(req), newsk);
+ INIT_HLIST_HEAD(&tcp_rsk(req)->tcp_option_list);
+ }
} else {
newinet->inet_opt = NULL;
}
@@ -1907,6 +1967,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
/* Cleans up our, hopefully empty, out_of_order_queue. */
skb_rbtree_purge(&tp->out_of_order_queue);
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ tcp_extopt_destroy(sk);
#ifdef CONFIG_TCP_MD5SIG
/* Clean up the MD5 key list, if any */
if (tp->md5sig_info) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a8384b0c11f8..676ad7ca13ad 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -95,9 +95,10 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
bool paws_reject = false;
- tmp_opt.saw_tstamp = 0;
+ tcp_clear_options(&tmp_opt);
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
- tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
+ tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL,
+ (struct sock *)tw);
if (tmp_opt.saw_tstamp) {
if (tmp_opt.rcv_tsecr)
@@ -108,6 +109,10 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
}
}
+ if (unlikely(!hlist_empty(&tcptw->tcp_option_list)) &&
+ tcp_extopt_check((struct sock *)tw, skb, &tmp_opt))
+ return TCP_TW_SUCCESS;
+
if (tw->tw_substate == TCP_FIN_WAIT2) {
/* Just repeat all the checks of tcp_rcv_state_process() */
@@ -251,7 +256,7 @@ EXPORT_SYMBOL(tcp_timewait_state_process);
void tcp_time_wait(struct sock *sk, int state, int timeo)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- const struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
struct inet_timewait_sock *tw;
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
@@ -271,6 +276,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
tcptw->tw_ts_offset = tp->tsoffset;
tcptw->tw_last_oow_ack_time = 0;
+ INIT_HLIST_HEAD(&tcptw->tcp_option_list);
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
@@ -284,6 +290,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
}
#endif
+ if (unlikely(!hlist_empty(&tp->tcp_option_list))) {
+ tcp_extopt_move(sk, (struct sock *)tw);
+ INIT_HLIST_HEAD(&tp->tcp_option_list);
+ }
#ifdef CONFIG_TCP_MD5SIG
/*
* The timewait bucket does not have the key DB from the
@@ -341,6 +351,9 @@ void tcp_twsk_destructor(struct sock *sk)
if (twsk->tw_md5_key)
kfree_rcu(twsk->tw_md5_key, rcu);
#endif
+
+ if (unlikely(!hlist_empty(&twsk->tcp_option_list)))
+ tcp_extopt_destroy(sk);
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
@@ -470,6 +483,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
INIT_LIST_HEAD(&newtp->tsq_node);
INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
+ INIT_HLIST_HEAD(&newtp->tcp_option_list);
tcp_init_wl(newtp, treq->rcv_isn);
@@ -545,6 +559,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
if (newtp->af_specific->md5_lookup(sk, newsk))
newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
+ if (unlikely(!hlist_empty(&treq->tcp_option_list)))
+ newtp->tcp_header_len += tcp_extopt_add_header(req_to_sk(req), newsk);
+
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
@@ -587,9 +604,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
bool paws_reject = false;
bool own_req;
- tmp_opt.saw_tstamp = 0;
+ tcp_clear_options(&tmp_opt);
if (th->doff > (sizeof(struct tcphdr)>>2)) {
- tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
+ tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL,
+ req_to_sk(req));
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = req->ts_recent;
@@ -604,6 +622,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
}
}
+ if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)) &&
+ tcp_extopt_check(req_to_sk(req), skb, &tmp_opt))
+ return NULL;
+
/* Check for pure retransmitted SYN. */
if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
flg == TCP_FLAG_SYN &&
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index efe599a41e36..6804a9325107 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -398,13 +398,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
return tp->snd_una != tp->snd_up;
}
-#define OPTION_SACK_ADVERTISE (1 << 0)
-#define OPTION_TS (1 << 1)
-#define OPTION_MD5 (1 << 2)
-#define OPTION_WSCALE (1 << 3)
-#define OPTION_FAST_OPEN_COOKIE (1 << 8)
-#define OPTION_SMC (1 << 9)
-
static void smc_options_write(__be32 *ptr, u16 *options)
{
#if IS_ENABLED(CONFIG_SMC)
@@ -420,17 +413,6 @@ static void smc_options_write(__be32 *ptr, u16 *options)
#endif
}
-struct tcp_out_options {
- u16 options; /* bit field of OPTION_* */
- u16 mss; /* 0 to disable */
- u8 ws; /* window scale, 0 to disable */
- u8 num_sack_blocks; /* number of SACK blocks to include */
- u8 hash_size; /* bytes in hash_location */
- __u8 *hash_location; /* temporary pointer, overloaded */
- __u32 tsval, tsecr; /* need to include OPTION_TS */
- struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
-};
-
/* Write previously computed TCP options to the packet.
*
* Beware: Something in the Internet is very sensitive to the ordering of
@@ -447,12 +429,15 @@ struct tcp_out_options {
static void tcp_options_write(__be32 *ptr, struct sk_buff *skb, struct sock *sk,
struct tcp_out_options *opts)
{
+ struct hlist_head *extopt_list;
u16 options = opts->options; /* mungable copy */
struct tcp_sock *tp = NULL;
if (sk_fullsock(sk))
tp = tcp_sk(sk);
+ extopt_list = tcp_extopt_get_list(sk);
+
if (unlikely(OPTION_MD5 & options)) {
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
@@ -543,6 +528,9 @@ static void tcp_options_write(__be32 *ptr, struct sk_buff *skb, struct sock *sk,
}
smc_options_write(ptr, &options);
+
+ if (unlikely(!hlist_empty(extopt_list)))
+ tcp_extopt_write(ptr, skb, opts, sk);
}
static void smc_set_option(const struct tcp_sock *tp,
@@ -645,6 +633,10 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
smc_set_option(tp, opts, &remaining);
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ remaining -= tcp_extopt_prepare(skb, TCPHDR_SYN, remaining,
+ opts, tcp_to_sk(tp));
+
return MAX_TCP_OPTION_SPACE - remaining;
}
@@ -708,6 +700,11 @@ static unsigned int tcp_synack_options(const struct sock *sk,
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
+ if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)))
+ remaining -= tcp_extopt_prepare(skb, TCPHDR_SYN | TCPHDR_ACK,
+ remaining, opts,
+ req_to_sk(req));
+
return MAX_TCP_OPTION_SPACE - remaining;
}
@@ -741,6 +738,10 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
size += TCPOLEN_TSTAMP_ALIGNED;
}
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ size += tcp_extopt_prepare(skb, 0, MAX_TCP_OPTION_SPACE - size,
+ opts, tcp_to_sk(tp));
+
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
if (unlikely(eff_sacks)) {
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
@@ -3303,6 +3304,9 @@ static void tcp_connect_init(struct sock *sk)
tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+ tp->tcp_header_len += tcp_extopt_add_header(sk, sk);
+
/* If user gave his TCP_MAXSEG, record it to clamp */
if (tp->rx_opt.user_mss)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index e7a3a6b6cf56..d0716c7e9390 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -162,7 +162,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+ tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, sk);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
tsoff = secure_tcpv6_ts_off(sock_net(sk),
@@ -174,6 +174,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
goto out;
+ if (unlikely(!hlist_empty(&tp->tcp_option_list)) &&
+ tcp_extopt_check(sk, skb, &tcp_opt))
+ goto out;
+
ret = NULL;
req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
if (!req)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7178476b3d2f..5af5dcc1ac83 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -500,6 +500,9 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
+ if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)))
+ tcp_extopt_destroy(req_to_sk(req));
+
kfree(inet_rsk(req)->ipv6_opt);
kfree_skb(inet_rsk(req)->pktopts);
}
@@ -789,6 +792,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
unsigned int tot_len = sizeof(struct tcphdr);
struct dst_entry *dst;
__be32 *topt;
+ struct hlist_head *extopt_list = NULL;
+ struct tcp_out_options extraopts;
if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
@@ -797,6 +802,25 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
tot_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
+ if (sk)
+ extopt_list = tcp_extopt_get_list(sk);
+
+ if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
+ unsigned int remaining = MAX_TCP_OPTION_SPACE - tot_len;
+ u8 extraflags = rst ? TCPHDR_RST : 0;
+ int used;
+
+ if (!rst || !th->ack)
+ extraflags |= TCPHDR_ACK;
+
+ memset(&extraopts, 0, sizeof(extraopts));
+
+ used = tcp_extopt_response_prepare(skb, extraflags, remaining,
+ &extraopts, sk);
+
+ tot_len += used;
+ }
+
buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
GFP_ATOMIC);
if (!buff)
@@ -837,6 +861,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
}
#endif
+ if (unlikely(extopt_list && !hlist_empty(extopt_list)))
+ tcp_extopt_response_write(topt, skb, t1, &extraopts, sk);
+
memset(&fl6, 0, sizeof(fl6));
fl6.daddr = ipv6_hdr(skb)->saddr;
fl6.saddr = ipv6_hdr(skb)->daddr;
@@ -1231,6 +1258,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
skb_set_owner_r(newnp->pktoptions, newsk);
}
}
+
+ if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list))) {
+ tcp_extopt_move(req_to_sk(req), newsk);
+ INIT_HLIST_HEAD(&tcp_rsk(req)->tcp_option_list);
+ }
}
return newsk;
--
2.15.0
^ permalink raw reply related
* Re: [trivial PATCH] treewide: Align function definition open/close braces
From: Paul Moore @ 2017-12-18 22:04 UTC (permalink / raw)
To: Joe Perches
Cc: Jiri Kosina, Linus Torvalds, linux-rtc, alsa-devel, linuxppc-dev,
linux-scsi, netdev, acpi4asus-user, linux-wireless, linux-kernel,
dri-devel, platform-driver-x86, linux-xfs, linux-acpi,
linux-audit, amd-gfx, linux-fsdevel, MPT-FusionLinux.pdl,
ocfs2-devel, linux-media
In-Reply-To: <1513556924.31581.51.camel@perches.com>
On Sun, Dec 17, 2017 at 7:28 PM, Joe Perches <joe@perches.com> wrote:
> Some functions definitions have either the initial open brace and/or
> the closing brace outside of column 1.
>
> Move those braces to column 1.
>
> This allows various function analyzers like gnu complexity to work
> properly for these modified functions.
>
> Miscellanea:
>
> o Remove extra trailing ; and blank line from xfs_agf_verify
>
> Signed-off-by: Joe Perches <joe@perches.com>
> ---
> git diff -w shows no difference other than the above 'Miscellanea'
>
> (this is against -next, but it applies against Linus' tree
> with a couple offsets)
>
> arch/x86/include/asm/atomic64_32.h | 2 +-
> drivers/acpi/custom_method.c | 2 +-
> drivers/acpi/fan.c | 2 +-
> drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +-
> drivers/media/i2c/msp3400-kthreads.c | 2 +-
> drivers/message/fusion/mptsas.c | 2 +-
> drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c | 2 +-
> drivers/net/wireless/ath/ath9k/xmit.c | 2 +-
> drivers/platform/x86/eeepc-laptop.c | 2 +-
> drivers/rtc/rtc-ab-b5ze-s3.c | 2 +-
> drivers/scsi/dpt_i2o.c | 2 +-
> drivers/scsi/sym53c8xx_2/sym_glue.c | 2 +-
> fs/locks.c | 2 +-
> fs/ocfs2/stack_user.c | 2 +-
> fs/xfs/libxfs/xfs_alloc.c | 5 ++---
> fs/xfs/xfs_export.c | 2 +-
> kernel/audit.c | 6 +++---
> kernel/trace/trace_printk.c | 4 ++--
> lib/raid6/sse2.c | 14 +++++++-------
> sound/soc/fsl/fsl_dma.c | 2 +-
> 20 files changed, 30 insertions(+), 31 deletions(-)
For the audit bits ...
Acked-by: Paul Moore <paul@paul-moore.com>
--
paul moore
www.paul-moore.com
^ permalink raw reply
* Re: thunderx sgmii interface hang
From: Tim Harvey @ 2017-12-18 21:53 UTC (permalink / raw)
To: Andrew Lunn, Sunil Goutham; +Cc: netdev
In-Reply-To: <20171213194347.GA932@lunn.ch>
On Wed, Dec 13, 2017 at 11:43 AM, Andrew Lunn <andrew@lunn.ch> wrote:
>> The nic appears to work fine (pings, TCP etc) up until a performance
>> test is attempted.
>> When an iperf bandwidth test is attempted the nic ends up in a state
>> where truncated-ip packets are being sent out (per a tcpdump from
>> another board):
>
> Hi Tim
>
> Are pause frames supported? Have you tried turning them off?
>
> Can you reproduce the issue with UDP? Or is it TCP only?
>
Andrew,
Pause frames don't appear to be supported yet and the issue occurs
when using UDP as well as TCP. I'm not clear what the best way to
troubleshoot this is.
Sunil, have any others reported this issue? I do not have a Cavium
CN80xx/CN81xx reference board that has SGMII.
Regards,
Tim
^ permalink raw reply
* [RFC 11/14] tcp_md5: Move TCP-MD5 code out of TCP itself
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
In-Reply-To: <20171218215109.38700-1-cpaasch@apple.com>
This is all just copy-pasting the TCP_MD5-code into functions that are
placed in net/ipv4/tcp_md5.c.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
include/linux/inet_diag.h | 1 +
include/linux/tcp_md5.h | 138 ++++++
include/net/tcp.h | 77 ----
net/ipv4/Makefile | 1 +
net/ipv4/tcp.c | 133 +-----
net/ipv4/tcp_diag.c | 81 +---
net/ipv4/tcp_input.c | 38 --
net/ipv4/tcp_ipv4.c | 520 ++-------------------
net/ipv4/tcp_md5.c | 1102 +++++++++++++++++++++++++++++++++++++++++++++
net/ipv4/tcp_minisocks.c | 27 +-
net/ipv4/tcp_output.c | 4 +-
net/ipv6/tcp_ipv6.c | 318 +------------
12 files changed, 1305 insertions(+), 1135 deletions(-)
create mode 100644 include/linux/tcp_md5.h
create mode 100644 net/ipv4/tcp_md5.c
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 39faaaf843e1..1ef6727e41c9 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -2,6 +2,7 @@
#ifndef _INET_DIAG_H_
#define _INET_DIAG_H_ 1
+#include <linux/user_namespace.h>
#include <uapi/linux/inet_diag.h>
struct net;
diff --git a/include/linux/tcp_md5.h b/include/linux/tcp_md5.h
new file mode 100644
index 000000000000..f6a681cdded4
--- /dev/null
+++ b/include/linux/tcp_md5.h
@@ -0,0 +1,138 @@
+#ifndef _LINUX_TCP_MD5_H
+#define _LINUX_TCP_MD5_H
+
+#include <linux/skbuff.h>
+
+#ifdef CONFIG_TCP_MD5SIG
+#include <linux/types.h>
+
+#include <net/tcp.h>
+
+union tcp_md5_addr {
+ struct in_addr a4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr a6;
+#endif
+};
+
+/* - key database */
+struct tcp_md5sig_key {
+ struct hlist_node node;
+ u8 keylen;
+ u8 family; /* AF_INET or AF_INET6 */
+ union tcp_md5_addr addr;
+ u8 prefixlen;
+ u8 key[TCP_MD5SIG_MAXKEYLEN];
+ struct rcu_head rcu;
+};
+
+/* - sock block */
+struct tcp_md5sig_info {
+ struct hlist_head head;
+ struct rcu_head rcu;
+};
+
+union tcp_md5sum_block {
+ struct tcp4_pseudohdr ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct tcp6_pseudohdr ip6;
+#endif
+};
+
+/* - pool: digest algorithm, hash description and scratch buffer */
+struct tcp_md5sig_pool {
+ struct ahash_request *md5_req;
+ void *scratch;
+};
+
+extern const struct tcp_sock_af_ops tcp_sock_ipv4_specific;
+extern const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
+extern const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
+
+/* - functions */
+int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
+ const struct sock *sk, const struct sk_buff *skb);
+
+struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
+ const struct sock *addr_sk);
+
+void tcp_v4_md5_destroy_sock(struct sock *sk);
+
+int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk);
+
+void tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
+ struct tcphdr *t1,
+ struct tcp_out_options *opts,
+ const struct sock *sk);
+
+int tcp_v6_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk);
+
+void tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
+ struct tcphdr *t1,
+ struct tcp_out_options *opts,
+ const struct sock *sk);
+
+bool tcp_v4_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb);
+
+void tcp_v4_md5_syn_recv_sock(const struct sock *listener, struct sock *sk);
+
+void tcp_v6_md5_syn_recv_sock(const struct sock *listener, struct sock *sk);
+
+void tcp_md5_time_wait(struct sock *sk, struct inet_timewait_sock *tw);
+
+struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
+ const struct sock *addr_sk);
+
+int tcp_v6_md5_hash_skb(char *md5_hash,
+ const struct tcp_md5sig_key *key,
+ const struct sock *sk,
+ const struct sk_buff *skb);
+
+bool tcp_v6_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb);
+
+static inline void tcp_md5_twsk_destructor(struct sock *sk)
+{
+ struct tcp_timewait_sock *twsk = tcp_twsk(sk);
+
+ if (twsk->tw_md5_key)
+ kfree_rcu(twsk->tw_md5_key, rcu);
+}
+
+static inline void tcp_md5_add_header_len(const struct sock *listener,
+ struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->af_specific->md5_lookup(listener, sk))
+ tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
+}
+
+int tcp_md5_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb);
+
+int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin);
+
+#else
+
+static inline bool tcp_v4_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ return false;
+}
+
+static inline bool tcp_v6_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ return false;
+}
+
+#endif
+
+#endif /* _LINUX_TCP_MD5_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a5c4856e25c7..e955c5f0997f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -406,7 +406,6 @@ void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
int estab, struct tcp_fastopen_cookie *foc,
struct sock *sk);
-const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
/*
* TCP v4 functions exported for the inet6 API
@@ -1415,30 +1414,6 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
tp->retransmit_skb_hint = NULL;
}
-union tcp_md5_addr {
- struct in_addr a4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr a6;
-#endif
-};
-
-/* - key database */
-struct tcp_md5sig_key {
- struct hlist_node node;
- u8 keylen;
- u8 family; /* AF_INET or AF_INET6 */
- union tcp_md5_addr addr;
- u8 prefixlen;
- u8 key[TCP_MD5SIG_MAXKEYLEN];
- struct rcu_head rcu;
-};
-
-/* - sock block */
-struct tcp_md5sig_info {
- struct hlist_head head;
- struct rcu_head rcu;
-};
-
/* - pseudo header */
struct tcp4_pseudohdr {
__be32 saddr;
@@ -1455,58 +1430,6 @@ struct tcp6_pseudohdr {
__be32 protocol; /* including padding */
};
-union tcp_md5sum_block {
- struct tcp4_pseudohdr ip4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct tcp6_pseudohdr ip6;
-#endif
-};
-
-/* - pool: digest algorithm, hash description and scratch buffer */
-struct tcp_md5sig_pool {
- struct ahash_request *md5_req;
- void *scratch;
-};
-
-/* - functions */
-int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
- const struct sock *sk, const struct sk_buff *skb);
-int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
- gfp_t gfp);
-int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen);
-struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk);
-
-#ifdef CONFIG_TCP_MD5SIG
-struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family);
-#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
-#else
-static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family)
-{
- return NULL;
-}
-#define tcp_twsk_md5_key(twsk) NULL
-#endif
-
-bool tcp_alloc_md5sig_pool(void);
-
-struct tcp_md5sig_pool *tcp_get_md5sig_pool(void);
-static inline void tcp_put_md5sig_pool(void)
-{
- local_bh_enable();
-}
-
-int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
- unsigned int header_len);
-int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
- const struct tcp_md5sig_key *key);
-
/* From tcp_fastopen.c */
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie);
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index c6c8ad1d4b6d..9262d9a01035 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_MD5SIG) += tcp_md5.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0a1cabee6d5e..29f3ce8a0b54 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,6 +271,7 @@
#include <linux/slab.h>
#include <linux/errqueue.h>
#include <linux/static_key.h>
+#include <linux/tcp_md5.h>
#include <net/icmp.h>
#include <net/inet_common.h>
@@ -3337,138 +3338,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
-#ifdef CONFIG_TCP_MD5SIG
-static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
-static DEFINE_MUTEX(tcp_md5sig_mutex);
-static bool tcp_md5sig_pool_populated = false;
-
-static void __tcp_alloc_md5sig_pool(void)
-{
- struct crypto_ahash *hash;
- int cpu;
-
- hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(hash))
- return;
-
- for_each_possible_cpu(cpu) {
- void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
- struct ahash_request *req;
-
- if (!scratch) {
- scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
- sizeof(struct tcphdr),
- GFP_KERNEL,
- cpu_to_node(cpu));
- if (!scratch)
- return;
- per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
- }
- if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
- continue;
-
- req = ahash_request_alloc(hash, GFP_KERNEL);
- if (!req)
- return;
-
- ahash_request_set_callback(req, 0, NULL, NULL);
-
- per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
- }
- /* before setting tcp_md5sig_pool_populated, we must commit all writes
- * to memory. See smp_rmb() in tcp_get_md5sig_pool()
- */
- smp_wmb();
- tcp_md5sig_pool_populated = true;
-}
-
-bool tcp_alloc_md5sig_pool(void)
-{
- if (unlikely(!tcp_md5sig_pool_populated)) {
- mutex_lock(&tcp_md5sig_mutex);
-
- if (!tcp_md5sig_pool_populated)
- __tcp_alloc_md5sig_pool();
-
- mutex_unlock(&tcp_md5sig_mutex);
- }
- return tcp_md5sig_pool_populated;
-}
-EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
-
-
-/**
- * tcp_get_md5sig_pool - get md5sig_pool for this user
- *
- * We use percpu structure, so if we succeed, we exit with preemption
- * and BH disabled, to make sure another thread or softirq handling
- * wont try to get same context.
- */
-struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
-{
- local_bh_disable();
-
- if (tcp_md5sig_pool_populated) {
- /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
- smp_rmb();
- return this_cpu_ptr(&tcp_md5sig_pool);
- }
- local_bh_enable();
- return NULL;
-}
-EXPORT_SYMBOL(tcp_get_md5sig_pool);
-
-int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
- const struct sk_buff *skb, unsigned int header_len)
-{
- struct scatterlist sg;
- const struct tcphdr *tp = tcp_hdr(skb);
- struct ahash_request *req = hp->md5_req;
- unsigned int i;
- const unsigned int head_data_len = skb_headlen(skb) > header_len ?
- skb_headlen(skb) - header_len : 0;
- const struct skb_shared_info *shi = skb_shinfo(skb);
- struct sk_buff *frag_iter;
-
- sg_init_table(&sg, 1);
-
- sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
- ahash_request_set_crypt(req, &sg, NULL, head_data_len);
- if (crypto_ahash_update(req))
- return 1;
-
- for (i = 0; i < shi->nr_frags; ++i) {
- const struct skb_frag_struct *f = &shi->frags[i];
- unsigned int offset = f->page_offset;
- struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
-
- sg_set_page(&sg, page, skb_frag_size(f),
- offset_in_page(offset));
- ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
- if (crypto_ahash_update(req))
- return 1;
- }
-
- skb_walk_frags(skb, frag_iter)
- if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
- return 1;
-
- return 0;
-}
-EXPORT_SYMBOL(tcp_md5_hash_skb_data);
-
-int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
-{
- struct scatterlist sg;
-
- sg_init_one(&sg, key->key, key->keylen);
- ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen);
- return crypto_ahash_update(hp->md5_req);
-}
-EXPORT_SYMBOL(tcp_md5_hash_key);
-
-#endif
-
struct hlist_head *tcp_extopt_get_list(const struct sock *sk)
{
if (sk_fullsock(sk))
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index abbf0edcf6c2..5cfe5dc8f8dd 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -15,6 +15,7 @@
#include <linux/inet_diag.h>
#include <linux/tcp.h>
+#include <linux/tcp_md5.h>
#include <net/netlink.h>
#include <net/tcp.h>
@@ -37,70 +38,14 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
tcp_get_info(sk, info);
}
-#ifdef CONFIG_TCP_MD5SIG
-static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info,
- const struct tcp_md5sig_key *key)
-{
- info->tcpm_family = key->family;
- info->tcpm_prefixlen = key->prefixlen;
- info->tcpm_keylen = key->keylen;
- memcpy(info->tcpm_key, key->key, key->keylen);
-
- if (key->family == AF_INET)
- info->tcpm_addr[0] = key->addr.a4.s_addr;
- #if IS_ENABLED(CONFIG_IPV6)
- else if (key->family == AF_INET6)
- memcpy(&info->tcpm_addr, &key->addr.a6,
- sizeof(info->tcpm_addr));
- #endif
-}
-
-static int tcp_diag_put_md5sig(struct sk_buff *skb,
- const struct tcp_md5sig_info *md5sig)
-{
- const struct tcp_md5sig_key *key;
- struct tcp_diag_md5sig *info;
- struct nlattr *attr;
- int md5sig_count = 0;
-
- hlist_for_each_entry_rcu(key, &md5sig->head, node)
- md5sig_count++;
- if (md5sig_count == 0)
- return 0;
-
- attr = nla_reserve(skb, INET_DIAG_MD5SIG,
- md5sig_count * sizeof(struct tcp_diag_md5sig));
- if (!attr)
- return -EMSGSIZE;
-
- info = nla_data(attr);
- memset(info, 0, md5sig_count * sizeof(struct tcp_diag_md5sig));
- hlist_for_each_entry_rcu(key, &md5sig->head, node) {
- tcp_diag_md5sig_fill(info++, key);
- if (--md5sig_count == 0)
- break;
- }
-
- return 0;
-}
-#endif
-
static int tcp_diag_get_aux(struct sock *sk, bool net_admin,
struct sk_buff *skb)
{
#ifdef CONFIG_TCP_MD5SIG
- if (net_admin) {
- struct tcp_md5sig_info *md5sig;
- int err = 0;
-
- rcu_read_lock();
- md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
- if (md5sig)
- err = tcp_diag_put_md5sig(skb, md5sig);
- rcu_read_unlock();
- if (err < 0)
- return err;
- }
+ int err = tcp_md5_diag_get_aux(sk, net_admin, skb);
+
+ if (err < 0)
+ return err;
#endif
return 0;
@@ -111,21 +56,7 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
size_t size = 0;
#ifdef CONFIG_TCP_MD5SIG
- if (net_admin && sk_fullsock(sk)) {
- const struct tcp_md5sig_info *md5sig;
- const struct tcp_md5sig_key *key;
- size_t md5sig_count = 0;
-
- rcu_read_lock();
- md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
- if (md5sig) {
- hlist_for_each_entry_rcu(key, &md5sig->head, node)
- md5sig_count++;
- }
- rcu_read_unlock();
- size += nla_total_size(md5sig_count *
- sizeof(struct tcp_diag_md5sig));
- }
+ size += tcp_md5_diag_get_aux_size(sk, net_admin);
#endif
return size;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index af8f4f9fd098..db54bdbdee51 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3847,44 +3847,6 @@ static bool tcp_fast_parse_options(const struct net *net,
return false;
}
-#ifdef CONFIG_TCP_MD5SIG
-/*
- * Parse MD5 Signature option
- */
-const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
-{
- int length = (th->doff << 2) - sizeof(*th);
- const u8 *ptr = (const u8 *)(th + 1);
-
- /* If the TCP option is too short, we can short cut */
- if (length < TCPOLEN_MD5SIG)
- return NULL;
-
- while (length > 0) {
- int opcode = *ptr++;
- int opsize;
-
- switch (opcode) {
- case TCPOPT_EOL:
- return NULL;
- case TCPOPT_NOP:
- length--;
- continue;
- default:
- opsize = *ptr++;
- if (opsize < 2 || opsize > length)
- return NULL;
- if (opcode == TCPOPT_MD5SIG)
- return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
- }
- ptr += opsize - 2;
- length -= opsize;
- }
- return NULL;
-}
-EXPORT_SYMBOL(tcp_parse_md5sig_option);
-#endif
-
/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
*
* It is not fatal. If this ACK does _not_ change critical state (seqs, window)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 397975203e14..143e1f66a24a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -62,6 +62,7 @@
#include <linux/init.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/tcp_md5.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
@@ -87,11 +88,6 @@
#include <trace/events/tcp.h>
-#ifdef CONFIG_TCP_MD5SIG
-static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
- __be32 daddr, __be32 saddr, const struct tcphdr *th);
-#endif
-
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
@@ -603,16 +599,13 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
__be32 opt[(MAX_TCP_OPTION_SPACE >> 2)];
} rep;
struct hlist_head *extopt_list = NULL;
+ struct tcp_out_options opts;
struct ip_reply_arg arg;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key = NULL;
- const __u8 *hash_location = NULL;
- unsigned char newhash[16];
- int genhash;
- struct sock *sk1 = NULL;
-#endif
struct net *net;
int offset = 0;
+#ifdef CONFIG_TCP_MD5SIG
+ int ret;
+#endif
/* Never send a reset in response to a reset. */
if (th->rst)
@@ -627,6 +620,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk)
extopt_list = tcp_extopt_get_list(sk);
+ memset(&opts, 0, sizeof(opts));
+
/* Swap the send and the receive. */
memset(&rep, 0, sizeof(rep));
rep.th.dest = th->source;
@@ -647,55 +642,28 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
arg.iov[0].iov_len = sizeof(rep.th);
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
-#ifdef CONFIG_TCP_MD5SIG
- rcu_read_lock();
- hash_location = tcp_parse_md5sig_option(th);
- if (sk && sk_fullsock(sk)) {
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
- &ip_hdr(skb)->saddr, AF_INET);
- } else if (hash_location) {
- /*
- * active side is lost. Try to find listening socket through
- * source port, and then find md5 key through listening socket.
- * we are not loose security here:
- * Incoming packet is checked with md5 hash with finding key,
- * no RST generated if md5 hash doesn't match.
- */
- sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
- ip_hdr(skb)->saddr,
- th->source, ip_hdr(skb)->daddr,
- ntohs(th->source), inet_iif(skb),
- tcp_v4_sdif(skb));
- /* don't send rst if it can't find key */
- if (!sk1)
- goto out;
-
- key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
- &ip_hdr(skb)->saddr, AF_INET);
- if (!key)
- goto out;
+#ifdef CONFIG_TCP_MD5SIG
+ ret = tcp_v4_md5_send_response_prepare(skb, 0,
+ MAX_TCP_OPTION_SPACE - arg.iov[0].iov_len,
+ &opts, sk);
- genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
- if (genhash || memcmp(hash_location, newhash, 16) != 0)
- goto out;
+ if (ret == -1)
+ return;
- }
+ arg.iov[0].iov_len += ret;
#endif
if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
unsigned int remaining;
- struct tcp_out_options opts;
int used;
remaining = sizeof(rep.opt);
#ifdef CONFIG_TCP_MD5SIG
- if (key)
+ if (opts.md5)
remaining -= TCPOLEN_MD5SIG_ALIGNED;
#endif
- memset(&opts, 0, sizeof(opts));
-
used = tcp_extopt_response_prepare(skb, TCPHDR_RST, remaining,
&opts, sk);
@@ -707,19 +675,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
}
#ifdef CONFIG_TCP_MD5SIG
- if (key) {
- rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
- /* Update length and the length the header thinks exists */
- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
- rep.th.doff = arg.iov[0].iov_len / 4;
-
- tcp_v4_md5_hash_hdr((__u8 *)&rep.opt[offset],
- key, ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, &rep.th);
- }
+ tcp_v4_md5_send_response_write(&rep.opt[offset], skb, &rep.th, &opts, sk);
#endif
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
@@ -750,11 +706,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
local_bh_enable();
-
-#ifdef CONFIG_TCP_MD5SIG
-out:
- rcu_read_unlock();
-#endif
}
/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
@@ -772,17 +723,19 @@ static void tcp_v4_send_ack(const struct sock *sk,
__be32 opt[(MAX_TCP_OPTION_SPACE >> 2)];
} rep;
struct hlist_head *extopt_list = NULL;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key;
-#endif
+ struct tcp_out_options opts;
struct net *net = sock_net(sk);
struct ip_reply_arg arg;
int offset = 0;
+#ifdef CONFIG_TCP_MD5SIG
+ int ret;
+#endif
extopt_list = tcp_extopt_get_list(sk);
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
+ memset(&opts, 0, sizeof(opts));
arg.iov[0].iov_base = (unsigned char *)&rep;
arg.iov[0].iov_len = sizeof(rep.th);
@@ -806,25 +759,24 @@ static void tcp_v4_send_ack(const struct sock *sk,
rep.th.window = htons(win);
#ifdef CONFIG_TCP_MD5SIG
- if (sk->sk_state == TCP_TIME_WAIT) {
- key = tcp_twsk_md5_key(tcp_twsk(sk));
- } else if (sk->sk_state == TCP_NEW_SYN_RECV) {
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
- AF_INET);
- } else {
- key = NULL; /* Should not happen */
- }
+ ret = tcp_v4_md5_send_response_prepare(skb, 0,
+ MAX_TCP_OPTION_SPACE - arg.iov[0].iov_len,
+ &opts, sk);
+
+ if (ret == -1)
+ return;
+
+ arg.iov[0].iov_len += ret;
#endif
if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
unsigned int remaining;
- struct tcp_out_options opts;
int used;
remaining = sizeof(rep.th) + sizeof(rep.opt) - arg.iov[0].iov_len;
#ifdef CONFIG_TCP_MD5SIG
- if (key)
+ if (opts.md5)
remaining -= TCPOLEN_MD5SIG_ALIGNED;
#endif
@@ -841,18 +793,11 @@ static void tcp_v4_send_ack(const struct sock *sk,
}
#ifdef CONFIG_TCP_MD5SIG
- if (key) {
- rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
+ if (opts.md5) {
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
rep.th.doff = arg.iov[0].iov_len / 4;
-
- tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
- key, ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, &rep.th);
}
+ tcp_v4_md5_send_response_write(&rep.opt[offset], skb, &rep.th, &opts, sk);
#endif
arg.flags = reply_flags;
@@ -961,374 +906,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
-#ifdef CONFIG_TCP_MD5SIG
-/*
- * RFC2385 MD5 checksumming requires a mapping of
- * IP address->MD5 Key.
- * We need to maintain these in the sk structure.
- */
-
-/* Find the Key structure for an address. */
-struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_key *key;
- const struct tcp_md5sig_info *md5sig;
- __be32 mask;
- struct tcp_md5sig_key *best_match = NULL;
- bool match;
-
- /* caller either holds rcu_read_lock() or socket lock */
- md5sig = rcu_dereference_check(tp->md5sig_info,
- lockdep_sock_is_held(sk));
- if (!md5sig)
- return NULL;
-
- hlist_for_each_entry_rcu(key, &md5sig->head, node) {
- if (key->family != family)
- continue;
-
- if (family == AF_INET) {
- mask = inet_make_mask(key->prefixlen);
- match = (key->addr.a4.s_addr & mask) ==
- (addr->a4.s_addr & mask);
-#if IS_ENABLED(CONFIG_IPV6)
- } else if (family == AF_INET6) {
- match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
- key->prefixlen);
-#endif
- } else {
- match = false;
- }
-
- if (match && (!best_match ||
- key->prefixlen > best_match->prefixlen))
- best_match = key;
- }
- return best_match;
-}
-EXPORT_SYMBOL(tcp_md5_do_lookup);
-
-static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family, u8 prefixlen)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_key *key;
- unsigned int size = sizeof(struct in_addr);
- const struct tcp_md5sig_info *md5sig;
-
- /* caller either holds rcu_read_lock() or socket lock */
- md5sig = rcu_dereference_check(tp->md5sig_info,
- lockdep_sock_is_held(sk));
- if (!md5sig)
- return NULL;
-#if IS_ENABLED(CONFIG_IPV6)
- if (family == AF_INET6)
- size = sizeof(struct in6_addr);
-#endif
- hlist_for_each_entry_rcu(key, &md5sig->head, node) {
- if (key->family != family)
- continue;
- if (!memcmp(&key->addr, addr, size) &&
- key->prefixlen == prefixlen)
- return key;
- }
- return NULL;
-}
-
-struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk)
-{
- const union tcp_md5_addr *addr;
-
- addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
- return tcp_md5_do_lookup(sk, addr, AF_INET);
-}
-EXPORT_SYMBOL(tcp_v4_md5_lookup);
-
-/* This can be called on a newly created socket, from other files */
-int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
- gfp_t gfp)
-{
- /* Add Key to the list */
- struct tcp_md5sig_key *key;
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_info *md5sig;
-
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
- if (key) {
- /* Pre-existing entry - just update that one. */
- memcpy(key->key, newkey, newkeylen);
- key->keylen = newkeylen;
- return 0;
- }
-
- md5sig = rcu_dereference_protected(tp->md5sig_info,
- lockdep_sock_is_held(sk));
- if (!md5sig) {
- md5sig = kmalloc(sizeof(*md5sig), gfp);
- if (!md5sig)
- return -ENOMEM;
-
- sk_nocaps_add(sk, NETIF_F_GSO_MASK);
- INIT_HLIST_HEAD(&md5sig->head);
- rcu_assign_pointer(tp->md5sig_info, md5sig);
- }
-
- key = sock_kmalloc(sk, sizeof(*key), gfp);
- if (!key)
- return -ENOMEM;
- if (!tcp_alloc_md5sig_pool()) {
- sock_kfree_s(sk, key, sizeof(*key));
- return -ENOMEM;
- }
-
- memcpy(key->key, newkey, newkeylen);
- key->keylen = newkeylen;
- key->family = family;
- key->prefixlen = prefixlen;
- memcpy(&key->addr, addr,
- (family == AF_INET6) ? sizeof(struct in6_addr) :
- sizeof(struct in_addr));
- hlist_add_head_rcu(&key->node, &md5sig->head);
- return 0;
-}
-EXPORT_SYMBOL(tcp_md5_do_add);
-
-int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
- u8 prefixlen)
-{
- struct tcp_md5sig_key *key;
-
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
- if (!key)
- return -ENOENT;
- hlist_del_rcu(&key->node);
- atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
- kfree_rcu(key, rcu);
- return 0;
-}
-EXPORT_SYMBOL(tcp_md5_do_del);
-
-static void tcp_clear_md5_list(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_key *key;
- struct hlist_node *n;
- struct tcp_md5sig_info *md5sig;
-
- md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
-
- hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
- hlist_del_rcu(&key->node);
- atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
- kfree_rcu(key, rcu);
- }
-}
-
-static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
- char __user *optval, int optlen)
-{
- struct tcp_md5sig cmd;
- struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
- u8 prefixlen = 32;
-
- if (optlen < sizeof(cmd))
- return -EINVAL;
-
- if (copy_from_user(&cmd, optval, sizeof(cmd)))
- return -EFAULT;
-
- if (sin->sin_family != AF_INET)
- return -EINVAL;
-
- if (optname == TCP_MD5SIG_EXT &&
- cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
- prefixlen = cmd.tcpm_prefixlen;
- if (prefixlen > 32)
- return -EINVAL;
- }
-
- if (!cmd.tcpm_keylen)
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
- AF_INET, prefixlen);
-
- if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
- return -EINVAL;
-
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
- AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
- GFP_KERNEL);
-}
-
-static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
- __be32 daddr, __be32 saddr,
- const struct tcphdr *th, int nbytes)
-{
- struct tcp4_pseudohdr *bp;
- struct scatterlist sg;
- struct tcphdr *_th;
-
- bp = hp->scratch;
- bp->saddr = saddr;
- bp->daddr = daddr;
- bp->pad = 0;
- bp->protocol = IPPROTO_TCP;
- bp->len = cpu_to_be16(nbytes);
-
- _th = (struct tcphdr *)(bp + 1);
- memcpy(_th, th, sizeof(*th));
- _th->check = 0;
-
- sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
- ahash_request_set_crypt(hp->md5_req, &sg, NULL,
- sizeof(*bp) + sizeof(*th));
- return crypto_ahash_update(hp->md5_req);
-}
-
-static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
- __be32 daddr, __be32 saddr, const struct tcphdr *th)
-{
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
-
- if (crypto_ahash_init(req))
- goto clear_hash;
- if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
- goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
- goto clear_hash;
-
- tcp_put_md5sig_pool();
- return 0;
-
-clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
- memset(md5_hash, 0, 16);
- return 1;
-}
-
-int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
- const struct sock *sk,
- const struct sk_buff *skb)
-{
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
- const struct tcphdr *th = tcp_hdr(skb);
- __be32 saddr, daddr;
-
- if (sk) { /* valid for establish/request sockets */
- saddr = sk->sk_rcv_saddr;
- daddr = sk->sk_daddr;
- } else {
- const struct iphdr *iph = ip_hdr(skb);
- saddr = iph->saddr;
- daddr = iph->daddr;
- }
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
-
- if (crypto_ahash_init(req))
- goto clear_hash;
-
- if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
- goto clear_hash;
- if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
- goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
- goto clear_hash;
-
- tcp_put_md5sig_pool();
- return 0;
-
-clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
- memset(md5_hash, 0, 16);
- return 1;
-}
-EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
-
-#endif
-
-/* Called with rcu_read_lock() */
-static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
-{
-#ifdef CONFIG_TCP_MD5SIG
- /*
- * This gets called for each TCP segment that arrives
- * so we want to be efficient.
- * We have 3 drop cases:
- * o No MD5 hash and one expected.
- * o MD5 hash and we're not expecting one.
- * o MD5 hash and its wrong.
- */
- const __u8 *hash_location = NULL;
- struct tcp_md5sig_key *hash_expected;
- const struct iphdr *iph = ip_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
- int genhash;
- unsigned char newhash[16];
-
- hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
- AF_INET);
- hash_location = tcp_parse_md5sig_option(th);
-
- /* We've parsed the options - do we have a hash? */
- if (!hash_expected && !hash_location)
- return false;
-
- if (hash_expected && !hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
- return true;
- }
-
- if (!hash_expected && hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
- return true;
- }
-
- /* Okay, so this is hash_expected and hash_location -
- * so we need to calculate the checksum.
- */
- genhash = tcp_v4_md5_hash_skb(newhash,
- hash_expected,
- NULL, skb);
-
- if (genhash || memcmp(hash_location, newhash, 16) != 0) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
- &iph->saddr, ntohs(th->source),
- &iph->daddr, ntohs(th->dest),
- genhash ? " tcp_v4_calc_md5_hash failed"
- : "");
- return true;
- }
- return false;
-#endif
- return false;
-}
-
static void tcp_v4_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
@@ -1404,9 +981,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key;
-#endif
struct ip_options_rcu *inet_opt;
if (sk_acceptq_is_full(sk))
@@ -1453,20 +1027,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
tcp_initialize_rcv_mss(newsk);
#ifdef CONFIG_TCP_MD5SIG
- /* Copy over the MD5 key from the original socket */
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
- AF_INET);
- if (key) {
- /*
- * We're using one, so create a matching key
- * on the newsk structure. If we fail to get
- * memory, then we end up not copying the key
- * across. Shucks.
- */
- tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
- AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
- sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
- }
+ tcp_v4_md5_syn_recv_sock(sk, newsk);
#endif
if (__inet_inherit_port(sk, newsk) < 0)
@@ -1930,14 +1491,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
};
EXPORT_SYMBOL(ipv4_specific);
-#ifdef CONFIG_TCP_MD5SIG
-static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
- .md5_lookup = tcp_v4_md5_lookup,
- .calc_md5_hash = tcp_v4_md5_hash_skb,
- .md5_parse = tcp_v4_parse_md5_keys,
-};
-#endif
-
/* NOTE: A lot of things set to zero explicitly by call to
* sk_alloc() so need not be done here.
*/
@@ -1980,12 +1533,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
tcp_extopt_destroy(sk);
#ifdef CONFIG_TCP_MD5SIG
- /* Clean up the MD5 key list, if any */
- if (tp->md5sig_info) {
- tcp_clear_md5_list(sk);
- kfree_rcu(tp->md5sig_info, rcu);
- tp->md5sig_info = NULL;
- }
+ tcp_v4_md5_destroy_sock(sk);
#endif
/* Clean up a referenced TCP bind bucket. */
diff --git a/net/ipv4/tcp_md5.c b/net/ipv4/tcp_md5.c
new file mode 100644
index 000000000000..a31b404e6dbf
--- /dev/null
+++ b/net/ipv4/tcp_md5.c
@@ -0,0 +1,1102 @@
+#include <linux/inet_diag.h>
+#include <linux/inetdevice.h>
+#include <linux/tcp.h>
+#include <linux/tcp_md5.h>
+
+#include <crypto/hash.h>
+
+#include <net/inet6_hashtables.h>
+
+static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
+static DEFINE_MUTEX(tcp_md5sig_mutex);
+static bool tcp_md5sig_pool_populated;
+
+#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
+
+static void __tcp_alloc_md5sig_pool(void)
+{
+ struct crypto_ahash *hash;
+ int cpu;
+
+ hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(hash))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
+ struct ahash_request *req;
+
+ if (!scratch) {
+ scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
+ sizeof(struct tcphdr),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!scratch)
+ return;
+ per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
+ }
+ if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
+ continue;
+
+ req = ahash_request_alloc(hash, GFP_KERNEL);
+ if (!req)
+ return;
+
+ ahash_request_set_callback(req, 0, NULL, NULL);
+
+ per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
+ }
+ /* before setting tcp_md5sig_pool_populated, we must commit all writes
+ * to memory. See smp_rmb() in tcp_get_md5sig_pool()
+ */
+ smp_wmb();
+ tcp_md5sig_pool_populated = true;
+}
+
+static bool tcp_alloc_md5sig_pool(void)
+{
+ if (unlikely(!tcp_md5sig_pool_populated)) {
+ mutex_lock(&tcp_md5sig_mutex);
+
+ if (!tcp_md5sig_pool_populated)
+ __tcp_alloc_md5sig_pool();
+
+ mutex_unlock(&tcp_md5sig_mutex);
+ }
+ return tcp_md5sig_pool_populated;
+}
+
+static void tcp_put_md5sig_pool(void)
+{
+ local_bh_enable();
+}
+
+/**
+ * tcp_get_md5sig_pool - get md5sig_pool for this user
+ *
+ * We use percpu structure, so if we succeed, we exit with preemption
+ * and BH disabled, to make sure another thread or softirq handling
+ * wont try to get same context.
+ */
+static struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
+{
+ local_bh_disable();
+
+ if (tcp_md5sig_pool_populated) {
+ /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
+ smp_rmb();
+ return this_cpu_ptr(&tcp_md5sig_pool);
+ }
+ local_bh_enable();
+ return NULL;
+}
+
+static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
+ const union tcp_md5_addr *addr,
+ int family, u8 prefixlen)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_key *key;
+ unsigned int size = sizeof(struct in_addr);
+ const struct tcp_md5sig_info *md5sig;
+
+ /* caller either holds rcu_read_lock() or socket lock */
+ md5sig = rcu_dereference_check(tp->md5sig_info,
+ lockdep_sock_is_held(sk));
+ if (!md5sig)
+ return NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6)
+ size = sizeof(struct in6_addr);
+#endif
+ hlist_for_each_entry_rcu(key, &md5sig->head, node) {
+ if (key->family != family)
+ continue;
+ if (!memcmp(&key->addr, addr, size) &&
+ key->prefixlen == prefixlen)
+ return key;
+ }
+ return NULL;
+}
+
+/* This can be called on a newly created socket, from other files */
+static int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
+ int family, u8 prefixlen, const u8 *newkey,
+ u8 newkeylen, gfp_t gfp)
+{
+ /* Add Key to the list */
+ struct tcp_md5sig_key *key;
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_info *md5sig;
+
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
+ if (key) {
+ /* Pre-existing entry - just update that one. */
+ memcpy(key->key, newkey, newkeylen);
+ key->keylen = newkeylen;
+ return 0;
+ }
+
+ md5sig = rcu_dereference_protected(tp->md5sig_info,
+ lockdep_sock_is_held(sk));
+ if (!md5sig) {
+ md5sig = kmalloc(sizeof(*md5sig), gfp);
+ if (!md5sig)
+ return -ENOMEM;
+
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
+ INIT_HLIST_HEAD(&md5sig->head);
+ rcu_assign_pointer(tp->md5sig_info, md5sig);
+ }
+
+ key = sock_kmalloc(sk, sizeof(*key), gfp);
+ if (!key)
+ return -ENOMEM;
+ if (!tcp_alloc_md5sig_pool()) {
+ sock_kfree_s(sk, key, sizeof(*key));
+ return -ENOMEM;
+ }
+
+ memcpy(key->key, newkey, newkeylen);
+ key->keylen = newkeylen;
+ key->family = family;
+ key->prefixlen = prefixlen;
+ memcpy(&key->addr, addr,
+ (family == AF_INET6) ? sizeof(struct in6_addr) :
+ sizeof(struct in_addr));
+ hlist_add_head_rcu(&key->node, &md5sig->head);
+ return 0;
+}
+
+static void tcp_clear_md5_list(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_key *key;
+ struct hlist_node *n;
+ struct tcp_md5sig_info *md5sig;
+
+ md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
+
+ hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
+ hlist_del_rcu(&key->node);
+ atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
+ kfree_rcu(key, rcu);
+ }
+}
+
+static int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
+ int family, u8 prefixlen)
+{
+ struct tcp_md5sig_key *key;
+
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
+ if (!key)
+ return -ENOENT;
+ hlist_del_rcu(&key->node);
+ atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
+ kfree_rcu(key, rcu);
+ return 0;
+}
+
+static int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
+ const struct tcp_md5sig_key *key)
+{
+ struct scatterlist sg;
+
+ sg_init_one(&sg, key->key, key->keylen);
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen);
+ return crypto_ahash_update(hp->md5_req);
+}
+
+static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
+ char __user *optval, int optlen)
+{
+ struct tcp_md5sig cmd;
+ struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
+ u8 prefixlen = 32;
+
+ if (optlen < sizeof(cmd))
+ return -EINVAL;
+
+ if (copy_from_user(&cmd, optval, sizeof(cmd)))
+ return -EFAULT;
+
+ if (sin->sin_family != AF_INET)
+ return -EINVAL;
+
+ if (optname == TCP_MD5SIG_EXT &&
+ cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
+ prefixlen = cmd.tcpm_prefixlen;
+ if (prefixlen > 32)
+ return -EINVAL;
+ }
+
+ if (!cmd.tcpm_keylen)
+ return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
+ AF_INET, prefixlen);
+
+ if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
+ return -EINVAL;
+
+ return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
+ AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
+ GFP_KERNEL);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
+ char __user *optval, int optlen)
+{
+ struct tcp_md5sig cmd;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
+ u8 prefixlen;
+
+ if (optlen < sizeof(cmd))
+ return -EINVAL;
+
+ if (copy_from_user(&cmd, optval, sizeof(cmd)))
+ return -EFAULT;
+
+ if (sin6->sin6_family != AF_INET6)
+ return -EINVAL;
+
+ if (optname == TCP_MD5SIG_EXT &&
+ cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
+ prefixlen = cmd.tcpm_prefixlen;
+ if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
+ prefixlen > 32))
+ return -EINVAL;
+ } else {
+ prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
+ }
+
+ if (!cmd.tcpm_keylen) {
+ if (ipv6_addr_v4mapped(&sin6->sin6_addr))
+ return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+ AF_INET, prefixlen);
+ return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+ AF_INET6, prefixlen);
+ }
+
+ if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
+ return -EINVAL;
+
+ if (ipv6_addr_v4mapped(&sin6->sin6_addr))
+ return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+ AF_INET, prefixlen, cmd.tcpm_key,
+ cmd.tcpm_keylen, GFP_KERNEL);
+
+ return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+ AF_INET6, prefixlen, cmd.tcpm_key,
+ cmd.tcpm_keylen, GFP_KERNEL);
+}
+#endif
+
+static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
+ __be32 daddr, __be32 saddr,
+ const struct tcphdr *th, int nbytes)
+{
+ struct tcp4_pseudohdr *bp;
+ struct scatterlist sg;
+ struct tcphdr *_th;
+
+ bp = hp->scratch;
+ bp->saddr = saddr;
+ bp->daddr = daddr;
+ bp->pad = 0;
+ bp->protocol = IPPROTO_TCP;
+ bp->len = cpu_to_be16(nbytes);
+
+ _th = (struct tcphdr *)(bp + 1);
+ memcpy(_th, th, sizeof(*th));
+ _th->check = 0;
+
+ sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+ sizeof(*bp) + sizeof(*th));
+ return crypto_ahash_update(hp->md5_req);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr,
+ const struct tcphdr *th, int nbytes)
+{
+ struct tcp6_pseudohdr *bp;
+ struct scatterlist sg;
+ struct tcphdr *_th;
+
+ bp = hp->scratch;
+ /* 1. TCP pseudo-header (RFC2460) */
+ bp->saddr = *saddr;
+ bp->daddr = *daddr;
+ bp->protocol = cpu_to_be32(IPPROTO_TCP);
+ bp->len = cpu_to_be32(nbytes);
+
+ _th = (struct tcphdr *)(bp + 1);
+ memcpy(_th, th, sizeof(*th));
+ _th->check = 0;
+
+ sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+ sizeof(*bp) + sizeof(*th));
+ return crypto_ahash_update(hp->md5_req);
+}
+#endif
+
+static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
+ __be32 daddr, __be32 saddr,
+ const struct tcphdr *th)
+{
+ struct tcp_md5sig_pool *hp;
+ struct ahash_request *req;
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ req = hp->md5_req;
+
+ if (crypto_ahash_init(req))
+ goto clear_hash;
+ if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
+ const struct in6_addr *daddr,
+ struct in6_addr *saddr, const struct tcphdr *th)
+{
+ struct tcp_md5sig_pool *hp;
+ struct ahash_request *req;
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ req = hp->md5_req;
+
+ if (crypto_ahash_init(req))
+ goto clear_hash;
+ if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+#endif
+
+/* RFC2385 MD5 checksumming requires a mapping of
+ * IP address->MD5 Key.
+ * We need to maintain these in the sk structure.
+ */
+
+/* Find the Key structure for an address. */
+static struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
+ const union tcp_md5_addr *addr,
+ int family)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_key *key;
+ const struct tcp_md5sig_info *md5sig;
+ __be32 mask;
+ struct tcp_md5sig_key *best_match = NULL;
+ bool match;
+
+ /* caller either holds rcu_read_lock() or socket lock */
+ md5sig = rcu_dereference_check(tp->md5sig_info,
+ lockdep_sock_is_held(sk));
+ if (!md5sig)
+ return NULL;
+
+ hlist_for_each_entry_rcu(key, &md5sig->head, node) {
+ if (key->family != family)
+ continue;
+
+ if (family == AF_INET) {
+ mask = inet_make_mask(key->prefixlen);
+ match = (key->addr.a4.s_addr & mask) ==
+ (addr->a4.s_addr & mask);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (family == AF_INET6) {
+ match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
+ key->prefixlen);
+#endif
+ } else {
+ match = false;
+ }
+
+ if (match && (!best_match ||
+ key->prefixlen > best_match->prefixlen))
+ best_match = key;
+ }
+ return best_match;
+}
+
+/* Parse MD5 Signature option */
+static const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
+{
+ int length = (th->doff << 2) - sizeof(*th);
+ const u8 *ptr = (const u8 *)(th + 1);
+
+ /* If the TCP option is too short, we can short cut */
+ if (length < TCPOLEN_MD5SIG)
+ return NULL;
+
+ while (length > 0) {
+ int opcode = *ptr++;
+ int opsize;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ return NULL;
+ case TCPOPT_NOP:
+ length--;
+ continue;
+ default:
+ opsize = *ptr++;
+ if (opsize < 2 || opsize > length)
+ return NULL;
+ if (opcode == TCPOPT_MD5SIG)
+ return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
+ }
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ return NULL;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
+ const struct in6_addr *addr)
+{
+ return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
+}
+#endif
+
+static int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
+ const struct sk_buff *skb,
+ unsigned int header_len)
+{
+ struct scatterlist sg;
+ const struct tcphdr *tp = tcp_hdr(skb);
+ struct ahash_request *req = hp->md5_req;
+ unsigned int i;
+ const unsigned int head_data_len = skb_headlen(skb) > header_len ?
+ skb_headlen(skb) - header_len : 0;
+ const struct skb_shared_info *shi = skb_shinfo(skb);
+ struct sk_buff *frag_iter;
+
+ sg_init_table(&sg, 1);
+
+ sg_set_buf(&sg, ((u8 *)tp) + header_len, head_data_len);
+ ahash_request_set_crypt(req, &sg, NULL, head_data_len);
+ if (crypto_ahash_update(req))
+ return 1;
+
+ for (i = 0; i < shi->nr_frags; ++i) {
+ const struct skb_frag_struct *f = &shi->frags[i];
+ unsigned int offset = f->page_offset;
+ struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
+
+ sg_set_page(&sg, page, skb_frag_size(f),
+ offset_in_page(offset));
+ ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
+ if (crypto_ahash_update(req))
+ return 1;
+ }
+
+ skb_walk_frags(skb, frag_iter)
+ if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
+ return 1;
+
+ return 0;
+}
+
+int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
+ const __u8 *hash_location = NULL;
+
+ rcu_read_lock();
+ hash_location = tcp_parse_md5sig_option(th);
+ if (sk && sk_fullsock(sk)) {
+ opts->md5 = tcp_md5_do_lookup(sk,
+ (union tcp_md5_addr *)&iph->saddr,
+ AF_INET);
+ } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
+ struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+
+ opts->md5 = tcp_twsk_md5_key(tcptw);
+ } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
+ opts->md5 = tcp_md5_do_lookup(sk,
+ (union tcp_md5_addr *)&iph->saddr,
+ AF_INET);
+ } else if (hash_location) {
+ unsigned char newhash[16];
+ struct sock *sk1;
+ int genhash;
+
+ /* active side is lost. Try to find listening socket through
+ * source port, and then find md5 key through listening socket.
+ * we are not loose security here:
+ * Incoming packet is checked with md5 hash with finding key,
+ * no RST generated if md5 hash doesn't match.
+ */
+ sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
+ &tcp_hashinfo, NULL, 0,
+ iph->saddr,
+ th->source, iph->daddr,
+ ntohs(th->source), inet_iif(skb),
+ tcp_v4_sdif(skb));
+ /* don't send rst if it can't find key */
+ if (!sk1)
+ goto out_err;
+
+ opts->md5 = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
+ &iph->saddr, AF_INET);
+ if (!opts->md5)
+ goto out_err;
+
+ genhash = tcp_v4_md5_hash_skb(newhash, opts->md5, NULL, skb);
+ if (genhash || memcmp(hash_location, newhash, 16) != 0)
+ goto out_err;
+ }
+
+ if (opts->md5)
+ return TCPOLEN_MD5SIG_ALIGNED;
+
+ rcu_read_unlock();
+ return 0;
+
+out_err:
+ rcu_read_unlock();
+ return -1;
+}
+
+void tcp_v4_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
+ struct tcphdr *t1,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ if (opts->md5) {
+ *topt++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
+
+ tcp_v4_md5_hash_hdr((__u8 *)topt, opts->md5,
+ ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr, t1);
+ rcu_read_unlock();
+ }
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+int tcp_v6_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ const __u8 *hash_location = NULL;
+
+ rcu_read_lock();
+ hash_location = tcp_parse_md5sig_option(th);
+ if (sk && sk_fullsock(sk)) {
+ opts->md5 = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
+ } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
+ struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+
+ opts->md5 = tcp_twsk_md5_key(tcptw);
+ } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
+ opts->md5 = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
+ } else if (hash_location) {
+ unsigned char newhash[16];
+ struct sock *sk1;
+ int genhash;
+
+ /* active side is lost. Try to find listening socket through
+ * source port, and then find md5 key through listening socket.
+ * we are not loose security here:
+ * Incoming packet is checked with md5 hash with finding key,
+ * no RST generated if md5 hash doesn't match.
+ */
+ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
+ &tcp_hashinfo, NULL, 0,
+ &ipv6h->saddr,
+ th->source, &ipv6h->daddr,
+ ntohs(th->source), tcp_v6_iif(skb),
+ tcp_v6_sdif(skb));
+ if (!sk1)
+ goto out_err;
+
+ opts->md5 = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
+ if (!opts->md5)
+ goto out_err;
+
+ genhash = tcp_v6_md5_hash_skb(newhash, opts->md5, NULL, skb);
+ if (genhash || memcmp(hash_location, newhash, 16) != 0)
+ goto out_err;
+ }
+
+ if (opts->md5)
+ return TCPOLEN_MD5SIG_ALIGNED;
+
+ rcu_read_unlock();
+ return 0;
+
+out_err:
+ rcu_read_unlock();
+ return -1;
+}
+EXPORT_SYMBOL_GPL(tcp_v6_md5_send_response_prepare);
+
+void tcp_v6_md5_send_response_write(__be32 *topt, struct sk_buff *skb,
+ struct tcphdr *t1,
+ struct tcp_out_options *opts,
+ const struct sock *sk)
+{
+ if (opts->md5) {
+ *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
+ tcp_v6_md5_hash_hdr((__u8 *)topt, opts->md5,
+ &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, t1);
+
+ rcu_read_unlock();
+ }
+}
+EXPORT_SYMBOL_GPL(tcp_v6_md5_send_response_write);
+#endif
+
+struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
+ const struct sock *addr_sk)
+{
+ const union tcp_md5_addr *addr;
+
+ addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
+ return tcp_md5_do_lookup(sk, addr, AF_INET);
+}
+EXPORT_SYMBOL(tcp_v4_md5_lookup);
+
+int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
+ const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ struct tcp_md5sig_pool *hp;
+ struct ahash_request *req;
+ const struct tcphdr *th = tcp_hdr(skb);
+ __be32 saddr, daddr;
+
+ if (sk) { /* valid for establish/request sockets */
+ saddr = sk->sk_rcv_saddr;
+ daddr = sk->sk_daddr;
+ } else {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ }
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ req = hp->md5_req;
+
+ if (crypto_ahash_init(req))
+ goto clear_hash;
+
+ if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
+ goto clear_hash;
+ if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int tcp_v6_md5_hash_skb(char *md5_hash,
+ const struct tcp_md5sig_key *key,
+ const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const struct in6_addr *saddr, *daddr;
+ struct tcp_md5sig_pool *hp;
+ struct ahash_request *req;
+ const struct tcphdr *th = tcp_hdr(skb);
+
+ if (sk) { /* valid for establish/request sockets */
+ saddr = &sk->sk_v6_rcv_saddr;
+ daddr = &sk->sk_v6_daddr;
+ } else {
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+
+ saddr = &ip6h->saddr;
+ daddr = &ip6h->daddr;
+ }
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ req = hp->md5_req;
+
+ if (crypto_ahash_init(req))
+ goto clear_hash;
+
+ if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
+ goto clear_hash;
+ if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(tcp_v6_md5_hash_skb);
+#endif
+
+/* Called with rcu_read_lock() */
+bool tcp_v4_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ /* This gets called for each TCP segment that arrives
+ * so we want to be efficient.
+ * We have 3 drop cases:
+ * o No MD5 hash and one expected.
+ * o MD5 hash and we're not expecting one.
+ * o MD5 hash and its wrong.
+ */
+ const __u8 *hash_location = NULL;
+ struct tcp_md5sig_key *hash_expected;
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+ int genhash;
+ unsigned char newhash[16];
+
+ hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
+ AF_INET);
+ hash_location = tcp_parse_md5sig_option(th);
+
+ /* We've parsed the options - do we have a hash? */
+ if (!hash_expected && !hash_location)
+ return false;
+
+ if (hash_expected && !hash_location) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
+ return true;
+ }
+
+ if (!hash_expected && hash_location) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
+ return true;
+ }
+
+ /* Okay, so this is hash_expected and hash_location -
+ * so we need to calculate the checksum.
+ */
+ genhash = tcp_v4_md5_hash_skb(newhash,
+ hash_expected,
+ NULL, skb);
+
+ if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
+ net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
+ &iph->saddr, ntohs(th->source),
+ &iph->daddr, ntohs(th->dest),
+ genhash ? " tcp_v4_calc_md5_hash failed"
+ : "");
+ return true;
+ }
+ return false;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+bool tcp_v6_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const __u8 *hash_location = NULL;
+ struct tcp_md5sig_key *hash_expected;
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+ int genhash;
+ u8 newhash[16];
+
+ hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
+ hash_location = tcp_parse_md5sig_option(th);
+
+ /* We've parsed the options - do we have a hash? */
+ if (!hash_expected && !hash_location)
+ return false;
+
+ if (hash_expected && !hash_location) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
+ return true;
+ }
+
+ if (!hash_expected && hash_location) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
+ return true;
+ }
+
+ /* check the signature */
+ genhash = tcp_v6_md5_hash_skb(newhash,
+ hash_expected,
+ NULL, skb);
+
+ if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
+ net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
+ genhash ? "failed" : "mismatch",
+ &ip6h->saddr, ntohs(th->source),
+ &ip6h->daddr, ntohs(th->dest));
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(tcp_v6_inbound_md5_hash);
+#endif
+
+void tcp_v4_md5_destroy_sock(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ /* Clean up the MD5 key list, if any */
+ if (tp->md5sig_info) {
+ tcp_clear_md5_list(sk);
+ kfree_rcu(tp->md5sig_info, rcu);
+ tp->md5sig_info = NULL;
+ }
+}
+
+void tcp_v4_md5_syn_recv_sock(const struct sock *listener, struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct tcp_md5sig_key *key;
+
+ /* Copy over the MD5 key from the original socket */
+ key = tcp_md5_do_lookup(listener, (union tcp_md5_addr *)&inet->inet_daddr,
+ AF_INET);
+ if (key) {
+ /* We're using one, so create a matching key
+ * on the sk structure. If we fail to get
+ * memory, then we end up not copying the key
+ * across. Shucks.
+ */
+ tcp_md5_do_add(sk, (union tcp_md5_addr *)&inet->inet_daddr,
+ AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
+ }
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+void tcp_v6_md5_syn_recv_sock(const struct sock *listener, struct sock *sk)
+{
+ struct tcp_md5sig_key *key;
+
+ /* Copy over the MD5 key from the original socket */
+ key = tcp_v6_md5_do_lookup(listener, &sk->sk_v6_daddr);
+ if (key) {
+ /* We're using one, so create a matching key
+ * on the newsk structure. If we fail to get
+ * memory, then we end up not copying the key
+ * across. Shucks.
+ */
+ tcp_md5_do_add(sk, (union tcp_md5_addr *)&sk->sk_v6_daddr,
+ AF_INET6, 128, key->key, key->keylen,
+ sk_gfp_mask(sk, GFP_ATOMIC));
+ }
+}
+EXPORT_SYMBOL_GPL(tcp_v6_md5_syn_recv_sock);
+
+struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
+ const struct sock *addr_sk)
+{
+ return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
+}
+EXPORT_SYMBOL_GPL(tcp_v6_md5_lookup);
+#endif
+
+void tcp_md5_time_wait(struct sock *sk, struct inet_timewait_sock *tw)
+{
+ struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_key *key;
+
+ /* The timewait bucket does not have the key DB from the
+ * sock structure. We just make a quick copy of the
+ * md5 key being used (if indeed we are using one)
+ * so the timewait ack generating code has the key.
+ */
+ tcptw->tw_md5_key = NULL;
+ key = tp->af_specific->md5_lookup(sk, sk);
+ if (key) {
+ tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
+ BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
+ }
+}
+
+static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info,
+ const struct tcp_md5sig_key *key)
+{
+ info->tcpm_family = key->family;
+ info->tcpm_prefixlen = key->prefixlen;
+ info->tcpm_keylen = key->keylen;
+ memcpy(info->tcpm_key, key->key, key->keylen);
+
+ if (key->family == AF_INET)
+ info->tcpm_addr[0] = key->addr.a4.s_addr;
+ #if IS_ENABLED(CONFIG_IPV6)
+ else if (key->family == AF_INET6)
+ memcpy(&info->tcpm_addr, &key->addr.a6,
+ sizeof(info->tcpm_addr));
+ #endif
+}
+
+static int tcp_diag_put_md5sig(struct sk_buff *skb,
+ const struct tcp_md5sig_info *md5sig)
+{
+ const struct tcp_md5sig_key *key;
+ struct tcp_diag_md5sig *info;
+ struct nlattr *attr;
+ int md5sig_count = 0;
+
+ hlist_for_each_entry_rcu(key, &md5sig->head, node)
+ md5sig_count++;
+ if (md5sig_count == 0)
+ return 0;
+
+ attr = nla_reserve(skb, INET_DIAG_MD5SIG,
+ md5sig_count * sizeof(struct tcp_diag_md5sig));
+ if (!attr)
+ return -EMSGSIZE;
+
+ info = nla_data(attr);
+ memset(info, 0, md5sig_count * sizeof(struct tcp_diag_md5sig));
+ hlist_for_each_entry_rcu(key, &md5sig->head, node) {
+ tcp_diag_md5sig_fill(info++, key);
+ if (--md5sig_count == 0)
+ break;
+ }
+
+ return 0;
+}
+
+int tcp_md5_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb)
+{
+ if (net_admin) {
+ struct tcp_md5sig_info *md5sig;
+ int err = 0;
+
+ rcu_read_lock();
+ md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
+ if (md5sig)
+ err = tcp_diag_put_md5sig(skb, md5sig);
+ rcu_read_unlock();
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tcp_md5_diag_get_aux);
+
+int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin)
+{
+ int size = 0;
+
+ if (net_admin && sk_fullsock(sk)) {
+ const struct tcp_md5sig_info *md5sig;
+ const struct tcp_md5sig_key *key;
+ size_t md5sig_count = 0;
+
+ rcu_read_lock();
+ md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
+ if (md5sig) {
+ hlist_for_each_entry_rcu(key, &md5sig->head, node)
+ md5sig_count++;
+ }
+ rcu_read_unlock();
+ size += nla_total_size(md5sig_count *
+ sizeof(struct tcp_diag_md5sig));
+ }
+
+ return size;
+}
+EXPORT_SYMBOL_GPL(tcp_md5_diag_get_aux_size);
+
+const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
+ .md5_lookup = tcp_v4_md5_lookup,
+ .calc_md5_hash = tcp_v4_md5_hash_skb,
+ .md5_parse = tcp_v4_parse_md5_keys,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
+ .md5_lookup = tcp_v6_md5_lookup,
+ .calc_md5_hash = tcp_v6_md5_hash_skb,
+ .md5_parse = tcp_v6_parse_md5_keys,
+};
+EXPORT_SYMBOL_GPL(tcp_sock_ipv6_specific);
+
+const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
+ .md5_lookup = tcp_v4_md5_lookup,
+ .calc_md5_hash = tcp_v4_md5_hash_skb,
+ .md5_parse = tcp_v6_parse_md5_keys,
+};
+EXPORT_SYMBOL_GPL(tcp_sock_ipv6_mapped_specific);
+#endif
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index aa2ff9aadad0..f33214b29167 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
+#include <linux/tcp_md5.h>
#include <linux/workqueue.h>
#include <linux/static_key.h>
#include <net/tcp.h>
@@ -295,21 +296,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
INIT_HLIST_HEAD(&tp->tcp_option_list);
}
#ifdef CONFIG_TCP_MD5SIG
- /*
- * The timewait bucket does not have the key DB from the
- * sock structure. We just make a quick copy of the
- * md5 key being used (if indeed we are using one)
- * so the timewait ack generating code has the key.
- */
- do {
- struct tcp_md5sig_key *key;
- tcptw->tw_md5_key = NULL;
- key = tp->af_specific->md5_lookup(sk, sk);
- if (key) {
- tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
- BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
- }
- } while (0);
+ tcp_md5_time_wait(sk, tw);
#endif
/* Get the TIME_WAIT timeout firing. */
@@ -346,13 +333,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
void tcp_twsk_destructor(struct sock *sk)
{
#ifdef CONFIG_TCP_MD5SIG
- struct tcp_timewait_sock *twsk = tcp_twsk(sk);
-
- if (twsk->tw_md5_key)
- kfree_rcu(twsk->tw_md5_key, rcu);
+ tcp_md5_twsk_destructor(sk);
#endif
- if (unlikely(!hlist_empty(&twsk->tcp_option_list)))
+ if (unlikely(!hlist_empty(&tcp_twsk(sk)->tcp_option_list)))
tcp_extopt_destroy(sk);
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
@@ -538,8 +522,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
- if (newtp->af_specific->md5_lookup(sk, newsk))
- newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
+ tcp_md5_add_header_len(sk, newsk);
#endif
if (unlikely(!hlist_empty(&treq->tcp_option_list)))
newtp->tcp_header_len += tcp_extopt_add_header(req_to_sk(req), newsk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7ea65f70e5ec..137645753abb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -42,6 +42,7 @@
#include <linux/gfp.h>
#include <linux/module.h>
#include <linux/static_key.h>
+#include <linux/tcp_md5.h>
#include <trace/events/tcp.h>
@@ -3238,8 +3239,7 @@ static void tcp_connect_init(struct sock *sk)
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
- if (tp->af_specific->md5_lookup(sk, sk))
- tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
+ tcp_md5_add_header_len(sk, sk);
#endif
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 202a59511950..e9b72d794140 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -43,6 +43,7 @@
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
+#include <linux/tcp_md5.h>
#include <net/tcp.h>
#include <net/ndisc.h>
@@ -79,10 +80,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
static const struct inet_connection_sock_af_ops ipv6_mapped;
static const struct inet_connection_sock_af_ops ipv6_specific;
-#ifdef CONFIG_TCP_MD5SIG
-static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
-static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
-#endif
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
@@ -501,218 +498,6 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req)
kfree_skb(inet_rsk(req)->pktopts);
}
-#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
- const struct in6_addr *addr)
-{
- return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
-}
-
-static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk)
-{
- return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
-}
-
-static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
- char __user *optval, int optlen)
-{
- struct tcp_md5sig cmd;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
- u8 prefixlen;
-
- if (optlen < sizeof(cmd))
- return -EINVAL;
-
- if (copy_from_user(&cmd, optval, sizeof(cmd)))
- return -EFAULT;
-
- if (sin6->sin6_family != AF_INET6)
- return -EINVAL;
-
- if (optname == TCP_MD5SIG_EXT &&
- cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
- prefixlen = cmd.tcpm_prefixlen;
- if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
- prefixlen > 32))
- return -EINVAL;
- } else {
- prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
- }
-
- if (!cmd.tcpm_keylen) {
- if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen);
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen);
- }
-
- if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
- return -EINVAL;
-
- if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen, cmd.tcpm_key,
- cmd.tcpm_keylen, GFP_KERNEL);
-
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, cmd.tcpm_key,
- cmd.tcpm_keylen, GFP_KERNEL);
-}
-
-static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr,
- const struct tcphdr *th, int nbytes)
-{
- struct tcp6_pseudohdr *bp;
- struct scatterlist sg;
- struct tcphdr *_th;
-
- bp = hp->scratch;
- /* 1. TCP pseudo-header (RFC2460) */
- bp->saddr = *saddr;
- bp->daddr = *daddr;
- bp->protocol = cpu_to_be32(IPPROTO_TCP);
- bp->len = cpu_to_be32(nbytes);
-
- _th = (struct tcphdr *)(bp + 1);
- memcpy(_th, th, sizeof(*th));
- _th->check = 0;
-
- sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
- ahash_request_set_crypt(hp->md5_req, &sg, NULL,
- sizeof(*bp) + sizeof(*th));
- return crypto_ahash_update(hp->md5_req);
-}
-
-static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
- const struct in6_addr *daddr, struct in6_addr *saddr,
- const struct tcphdr *th)
-{
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
-
- if (crypto_ahash_init(req))
- goto clear_hash;
- if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
- goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
- goto clear_hash;
-
- tcp_put_md5sig_pool();
- return 0;
-
-clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
- memset(md5_hash, 0, 16);
- return 1;
-}
-
-static int tcp_v6_md5_hash_skb(char *md5_hash,
- const struct tcp_md5sig_key *key,
- const struct sock *sk,
- const struct sk_buff *skb)
-{
- const struct in6_addr *saddr, *daddr;
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
- const struct tcphdr *th = tcp_hdr(skb);
-
- if (sk) { /* valid for establish/request sockets */
- saddr = &sk->sk_v6_rcv_saddr;
- daddr = &sk->sk_v6_daddr;
- } else {
- const struct ipv6hdr *ip6h = ipv6_hdr(skb);
- saddr = &ip6h->saddr;
- daddr = &ip6h->daddr;
- }
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
-
- if (crypto_ahash_init(req))
- goto clear_hash;
-
- if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
- goto clear_hash;
- if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
- goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
- goto clear_hash;
-
- tcp_put_md5sig_pool();
- return 0;
-
-clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
- memset(md5_hash, 0, 16);
- return 1;
-}
-
-#endif
-
-static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
-{
-#ifdef CONFIG_TCP_MD5SIG
- const __u8 *hash_location = NULL;
- struct tcp_md5sig_key *hash_expected;
- const struct ipv6hdr *ip6h = ipv6_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
- int genhash;
- u8 newhash[16];
-
- hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
- hash_location = tcp_parse_md5sig_option(th);
-
- /* We've parsed the options - do we have a hash? */
- if (!hash_expected && !hash_location)
- return false;
-
- if (hash_expected && !hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
- return true;
- }
-
- if (!hash_expected && hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
- return true;
- }
-
- /* check the signature */
- genhash = tcp_v6_md5_hash_skb(newhash,
- hash_expected,
- NULL, skb);
-
- if (genhash || memcmp(hash_location, newhash, 16) != 0) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
- genhash ? "failed" : "mismatch",
- &ip6h->saddr, ntohs(th->source),
- &ip6h->daddr, ntohs(th->dest));
- return true;
- }
-#endif
- return false;
-}
-
static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
@@ -787,56 +572,24 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
__be32 *topt;
struct hlist_head *extopt_list = NULL;
struct tcp_out_options extraopts;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key = NULL;
- const __u8 *hash_location = NULL;
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-#endif
+
+ memset(&extraopts, 0, sizeof(extraopts));
if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
- rcu_read_lock();
- hash_location = tcp_parse_md5sig_option(th);
- if (sk && sk_fullsock(sk)) {
- key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
- } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
- struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
-
- key = tcp_twsk_md5_key(tcptw);
- } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
- key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
- } else if (hash_location) {
- unsigned char newhash[16];
- struct sock *sk1 = NULL;
- int genhash;
-
- /* active side is lost. Try to find listening socket through
- * source port, and then find md5 key through listening socket.
- * we are not loose security here:
- * Incoming packet is checked with md5 hash with finding key,
- * no RST generated if md5 hash doesn't match.
- */
- sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, NULL, 0,
- &ipv6h->saddr,
- th->source, &ipv6h->daddr,
- ntohs(th->source), tcp_v6_iif(skb),
- tcp_v6_sdif(skb));
- if (!sk1)
- goto out;
+{
+ int ret;
- key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
- if (!key)
- goto out;
+ ret = tcp_v6_md5_send_response_prepare(skb, 0,
+ MAX_TCP_OPTION_SPACE - tot_len,
+ &extraopts, sk);
- genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
- if (genhash || memcmp(hash_location, newhash, 16) != 0)
- goto out;
- }
+ if (ret == -1)
+ goto out;
- if (key)
- tot_len += TCPOLEN_MD5SIG_ALIGNED;
+ tot_len += ret;
+}
#endif
if (sk)
@@ -850,8 +603,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (!rst || !th->ack)
extraflags |= TCPHDR_ACK;
- memset(&extraopts, 0, sizeof(extraopts));
-
used = tcp_extopt_response_prepare(skb, extraflags, remaining,
&extraopts, sk);
@@ -889,13 +640,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
}
#ifdef CONFIG_TCP_MD5SIG
- if (key) {
- *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
- tcp_v6_md5_hash_hdr((__u8 *)topt, key,
- &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, t1);
- }
+ if (extraopts.md5)
+ tcp_v6_md5_send_response_write(topt, skb, t1, &extraopts, sk);
#endif
if (unlikely(extopt_list && !hlist_empty(extopt_list)))
@@ -943,10 +689,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
out:
kfree_skb(buff);
-
-#ifdef CONFIG_TCP_MD5SIG
- rcu_read_unlock();
-#endif
}
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
@@ -1072,9 +814,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key;
-#endif
struct flowi6 fl6;
if (skb->protocol == htons(ETH_P_IP)) {
@@ -1219,18 +958,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
#ifdef CONFIG_TCP_MD5SIG
- /* Copy over the MD5 key from the original socket */
- key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
- if (key) {
- /* We're using one, so create a matching key
- * on the newsk structure. If we fail to get
- * memory, then we end up not copying the key
- * across. Shucks.
- */
- tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
- AF_INET6, 128, key->key, key->keylen,
- sk_gfp_mask(sk, GFP_ATOMIC));
- }
+ tcp_v6_md5_syn_recv_sock(sk, newsk);
#endif
if (__inet_inherit_port(sk, newsk) < 0) {
@@ -1692,14 +1420,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.mtu_reduced = tcp_v6_mtu_reduced,
};
-#ifdef CONFIG_TCP_MD5SIG
-static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
- .md5_lookup = tcp_v6_md5_lookup,
- .calc_md5_hash = tcp_v6_md5_hash_skb,
- .md5_parse = tcp_v6_parse_md5_keys,
-};
-#endif
-
/*
* TCP over IPv4 via INET6 API
*/
@@ -1722,14 +1442,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.mtu_reduced = tcp_v4_mtu_reduced,
};
-#ifdef CONFIG_TCP_MD5SIG
-static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
- .md5_lookup = tcp_v4_md5_lookup,
- .calc_md5_hash = tcp_v4_md5_hash_skb,
- .md5_parse = tcp_v6_parse_md5_keys,
-};
-#endif
-
/* NOTE: A lot of things set to zero explicitly by call to
* sk_alloc() so need not be done here.
*/
--
2.15.0
^ permalink raw reply related
* [RFC 14/14] tcp_md5: Use TCP extra-options on the input path
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
In-Reply-To: <20171218215109.38700-1-cpaasch@apple.com>
The checks are now being done through the extra-option framework. For
TCP MD5 this means that the check happens a bit later than usual.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
include/linux/tcp_md5.h | 23 +----------------------
net/ipv4/tcp_input.c | 8 --------
net/ipv4/tcp_ipv4.c | 9 ---------
net/ipv4/tcp_md5.c | 29 ++++++++++++++++++++++++-----
net/ipv6/tcp_ipv6.c | 9 ---------
5 files changed, 25 insertions(+), 53 deletions(-)
diff --git a/include/linux/tcp_md5.h b/include/linux/tcp_md5.h
index 509fc36335e7..bef277f55b36 100644
--- a/include/linux/tcp_md5.h
+++ b/include/linux/tcp_md5.h
@@ -31,30 +31,9 @@ struct tcp_md5sig_key {
int tcp_md5_parse_keys(struct sock *sk, int optname, char __user *optval,
int optlen);
-bool tcp_v4_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb);
-
-bool tcp_v6_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb);
-
int tcp_md5_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb);
int tcp_md5_diag_get_aux_size(struct sock *sk, bool net_admin);
-#else
-
-static inline bool tcp_v4_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
-{
- return false;
-}
-
-static inline bool tcp_v6_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
-{
- return false;
-}
-
-#endif
-
+#endif /* CONFIG_TCP_MD5SIG */
#endif /* _LINUX_TCP_MD5_H */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index db54bdbdee51..e4de06e28a85 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3754,14 +3754,6 @@ void tcp_parse_options(const struct net *net,
TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
}
break;
-#ifdef CONFIG_TCP_MD5SIG
- case TCPOPT_MD5SIG:
- /*
- * The MD5 Hash has already been
- * checked (see tcp_v{4,6}_do_rcv()).
- */
- break;
-#endif
case TCPOPT_FASTOPEN:
tcp_parse_fastopen_option(
opsize - TCPOLEN_FASTOPEN_BASE,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 670d7751f814..707ad1a343ba 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -62,7 +62,6 @@
#include <linux/init.h>
#include <linux/times.h>
#include <linux/slab.h>
-#include <linux/tcp_md5.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
@@ -1249,11 +1248,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
struct sock *nsk;
sk = req->rsk_listener;
- if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
- sk_drops_add(sk, skb);
- reqsk_put(req);
- goto discard_it;
- }
if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
@@ -1293,9 +1287,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- if (tcp_v4_inbound_md5_hash(sk, skb))
- goto discard_and_relse;
-
nf_reset(skb);
if (tcp_filter(sk, skb))
diff --git a/net/ipv4/tcp_md5.c b/net/ipv4/tcp_md5.c
index 052f5a587783..723320d0741a 100644
--- a/net/ipv4/tcp_md5.c
+++ b/net/ipv4/tcp_md5.c
@@ -29,6 +29,10 @@ static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
static DEFINE_MUTEX(tcp_md5sig_mutex);
static bool tcp_md5sig_pool_populated;
+static bool tcp_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx,
+ struct tcp_extopt_store *store);
+
static unsigned int tcp_md5_extopt_prepare(struct sk_buff *skb, u8 flags,
unsigned int remaining,
struct tcp_out_options *opts,
@@ -76,6 +80,7 @@ struct tcp_md5_extopt {
static const struct tcp_extopt_ops tcp_md5_extra_ops = {
.option_kind = TCPOPT_MD5SIG,
+ .check = tcp_inbound_md5_hash,
.prepare = tcp_md5_extopt_prepare,
.write = tcp_md5_extopt_write,
.response_prepare = tcp_md5_send_response_prepare,
@@ -863,8 +868,8 @@ static struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
}
/* Called with rcu_read_lock() */
-bool tcp_v4_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
+static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
{
/* This gets called for each TCP segment that arrives
* so we want to be efficient.
@@ -918,8 +923,8 @@ bool tcp_v4_inbound_md5_hash(const struct sock *sk,
}
#if IS_ENABLED(CONFIG_IPV6)
-bool tcp_v6_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
+static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
{
const __u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
@@ -961,7 +966,6 @@ bool tcp_v6_inbound_md5_hash(const struct sock *sk,
return false;
}
-EXPORT_SYMBOL_GPL(tcp_v6_inbound_md5_hash);
static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
@@ -971,6 +975,21 @@ static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
EXPORT_SYMBOL_GPL(tcp_v6_md5_lookup);
#endif
+static bool tcp_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx,
+ struct tcp_extopt_store *store)
+{
+ if (skb->protocol == htons(ETH_P_IP)) {
+ return tcp_v4_inbound_md5_hash(sk, skb);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ return tcp_v6_inbound_md5_hash(sk, skb);
+#endif
+ }
+
+ return false;
+}
+
static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info,
const struct tcp_md5sig_key *key)
{
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 890616fc5591..f5dc730d3abc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -43,7 +43,6 @@
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
-#include <linux/tcp_md5.h>
#include <net/tcp.h>
#include <net/ndisc.h>
@@ -1173,11 +1172,6 @@ static int tcp_v6_rcv(struct sk_buff *skb)
struct sock *nsk;
sk = req->rsk_listener;
- if (tcp_v6_inbound_md5_hash(sk, skb)) {
- sk_drops_add(sk, skb);
- reqsk_put(req);
- goto discard_it;
- }
if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
@@ -1214,9 +1208,6 @@ static int tcp_v6_rcv(struct sk_buff *skb)
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- if (tcp_v6_inbound_md5_hash(sk, skb))
- goto discard_and_relse;
-
if (tcp_filter(sk, skb))
goto discard_and_relse;
th = (const struct tcphdr *)skb->data;
--
2.15.0
^ permalink raw reply related
* [RFC 09/14] tcp_md5: Detect key inside tcp_v6_send_response instead of passing it as an argument
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
In-Reply-To: <20171218215109.38700-1-cpaasch@apple.com>
We want to move all the TCP-MD5 code to a single place which enables us
to factor the TCP-MD5 code out of the TCP-stack into the extra-option
framework.
Detection of whether or not to drop the segment (as done in
tcp_v6_send_reset()) has now been moved to tcp_v6_send_response().
So we needed to adapt the latter so that it can handle the case where we
want to exit without sending anything.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
net/ipv6/tcp_ipv6.c | 119 +++++++++++++++++++++++++---------------------------
1 file changed, 57 insertions(+), 62 deletions(-)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5af5dcc1ac83..202a59511950 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -82,12 +82,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific;
#ifdef CONFIG_TCP_MD5SIG
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
-#else
-static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
- const struct in6_addr *addr)
-{
- return NULL;
-}
#endif
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
@@ -780,12 +774,11 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr,
- int oif, struct tcp_md5sig_key *key, int rst,
- u8 tclass, __be32 label)
+ int oif, int rst, u8 tclass, __be32 label)
{
const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1;
- struct sk_buff *buff;
+ struct sk_buff *buff = NULL;
struct flowi6 fl6;
struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
struct sock *ctl_sk = net->ipv6.tcp_sk;
@@ -794,10 +787,54 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
__be32 *topt;
struct hlist_head *extopt_list = NULL;
struct tcp_out_options extraopts;
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_md5sig_key *key = NULL;
+ const __u8 *hash_location = NULL;
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+#endif
if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
+ rcu_read_lock();
+ hash_location = tcp_parse_md5sig_option(th);
+ if (sk && sk_fullsock(sk)) {
+ key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
+ } else if (sk && sk->sk_state == TCP_TIME_WAIT) {
+ struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+
+ key = tcp_twsk_md5_key(tcptw);
+ } else if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
+ key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
+ } else if (hash_location) {
+ unsigned char newhash[16];
+ struct sock *sk1 = NULL;
+ int genhash;
+
+ /* active side is lost. Try to find listening socket through
+ * source port, and then find md5 key through listening socket.
+ * we are not loose security here:
+ * Incoming packet is checked with md5 hash with finding key,
+ * no RST generated if md5 hash doesn't match.
+ */
+ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
+ &tcp_hashinfo, NULL, 0,
+ &ipv6h->saddr,
+ th->source, &ipv6h->daddr,
+ ntohs(th->source), tcp_v6_iif(skb),
+ tcp_v6_sdif(skb));
+ if (!sk1)
+ goto out;
+
+ key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
+ if (!key)
+ goto out;
+
+ genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
+ if (genhash || memcmp(hash_location, newhash, 16) != 0)
+ goto out;
+ }
+
if (key)
tot_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
@@ -824,7 +861,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
GFP_ATOMIC);
if (!buff)
- return;
+ goto out;
skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
@@ -901,24 +938,21 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
- return;
+ buff = NULL;
}
+out:
kfree_skb(buff);
+
+#ifdef CONFIG_TCP_MD5SIG
+ rcu_read_unlock();
+#endif
}
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
{
const struct tcphdr *th = tcp_hdr(skb);
u32 seq = 0, ack_seq = 0;
- struct tcp_md5sig_key *key = NULL;
-#ifdef CONFIG_TCP_MD5SIG
- const __u8 *hash_location = NULL;
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- unsigned char newhash[16];
- int genhash;
- struct sock *sk1 = NULL;
-#endif
int oif = 0;
if (th->rst)
@@ -930,38 +964,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
if (!sk && !ipv6_unicast_destination(skb))
return;
-#ifdef CONFIG_TCP_MD5SIG
- rcu_read_lock();
- hash_location = tcp_parse_md5sig_option(th);
- if (sk && sk_fullsock(sk)) {
- key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
- } else if (hash_location) {
- /*
- * active side is lost. Try to find listening socket through
- * source port, and then find md5 key through listening socket.
- * we are not loose security here:
- * Incoming packet is checked with md5 hash with finding key,
- * no RST generated if md5 hash doesn't match.
- */
- sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, NULL, 0,
- &ipv6h->saddr,
- th->source, &ipv6h->daddr,
- ntohs(th->source), tcp_v6_iif(skb),
- tcp_v6_sdif(skb));
- if (!sk1)
- goto out;
-
- key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
- if (!key)
- goto out;
-
- genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
- if (genhash || memcmp(hash_location, newhash, 16) != 0)
- goto out;
- }
-#endif
-
if (th->ack)
seq = ntohl(th->ack_seq);
else
@@ -973,20 +975,14 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
trace_tcp_send_reset(sk, skb);
}
- tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
-
-#ifdef CONFIG_TCP_MD5SIG
-out:
- rcu_read_unlock();
-#endif
+ tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, 0, 0);
}
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
- struct tcp_md5sig_key *key, u8 tclass,
- __be32 label)
+ u8 tclass, __be32 label)
{
- tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
+ tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0,
tclass, label);
}
@@ -998,7 +994,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
- tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
+ tcptw->tw_ts_recent, tw->tw_bound_dev_if,
tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
inet_twsk_put(tw);
@@ -1021,7 +1017,6 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent, sk->sk_bound_dev_if,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
0, 0);
}
--
2.15.0
^ permalink raw reply related
* [RFC 08/14] tcp_md5: Detect key inside tcp_v4_send_ack instead of passing it as an argument
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
In-Reply-To: <20171218215109.38700-1-cpaasch@apple.com>
This will simplify to consolidate the TCP_MD5-code into a single place.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
net/ipv4/tcp_ipv4.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dee296097b8f..397975203e14 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -764,7 +764,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
static void tcp_v4_send_ack(const struct sock *sk,
struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
- struct tcp_md5sig_key *key,
int reply_flags, u8 tos)
{
const struct tcphdr *th = tcp_hdr(skb);
@@ -773,6 +772,9 @@ static void tcp_v4_send_ack(const struct sock *sk,
__be32 opt[(MAX_TCP_OPTION_SPACE >> 2)];
} rep;
struct hlist_head *extopt_list = NULL;
+#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_md5sig_key *key;
+#endif
struct net *net = sock_net(sk);
struct ip_reply_arg arg;
int offset = 0;
@@ -803,6 +805,17 @@ static void tcp_v4_send_ack(const struct sock *sk,
rep.th.ack = 1;
rep.th.window = htons(win);
+#ifdef CONFIG_TCP_MD5SIG
+ if (sk->sk_state == TCP_TIME_WAIT) {
+ key = tcp_twsk_md5_key(tcp_twsk(sk));
+ } else if (sk->sk_state == TCP_NEW_SYN_RECV) {
+ key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
+ AF_INET);
+ } else {
+ key = NULL; /* Should not happen */
+ }
+#endif
+
if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
unsigned int remaining;
struct tcp_out_options opts;
@@ -872,7 +885,6 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
tcptw->tw_ts_recent,
tw->tw_bound_dev_if,
- tcp_twsk_md5_key(tcptw),
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
tw->tw_tos
);
@@ -900,8 +912,6 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent,
0,
- tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
- AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
}
--
2.15.0
^ permalink raw reply related
* [RFC 13/14] tcp_md5: Cleanup TCP-code
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
In-Reply-To: <20171218215109.38700-1-cpaasch@apple.com>
Now that we have consolidated the TCP_MD5 output path, we can cleanup
TCP and its callbacks to MD5.
These callbacks are solely there to handle the different
address-familiese (v4, v6 and v4mapped).
Now that we have isolated the TCP_MD5-code it is acceptable to add a bit
more complexity inside tcp_md5.c to handle these address-families at the
benefit of getting rid of these callbacks in tcp_sock, together with its
assignments in tcp_v4/6_connect,...
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
include/linux/tcp.h | 5 -
include/linux/tcp_md5.h | 18 +--
include/net/tcp.h | 24 ----
net/ipv4/tcp.c | 2 +-
net/ipv4/tcp_ipv4.c | 8 --
net/ipv4/tcp_md5.c | 340 ++++++++++++++++++++++--------------------------
net/ipv6/tcp_ipv6.c | 17 ---
7 files changed, 155 insertions(+), 259 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 034fbd9e0a38..5278387fabe7 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -377,11 +377,6 @@ struct tcp_sock {
* while socket was owned by user.
*/
-#ifdef CONFIG_TCP_MD5SIG
-/* TCP AF-Specific parts; only used by MD5 Signature support so far */
- const struct tcp_sock_af_ops *af_specific;
-#endif
-
/* TCP fastopen related information */
struct tcp_fastopen_request *fastopen_req;
/* fastopen_rsk points to request_sock that resulted in this big
diff --git a/include/linux/tcp_md5.h b/include/linux/tcp_md5.h
index 8dee4fc3dc7f..509fc36335e7 100644
--- a/include/linux/tcp_md5.h
+++ b/include/linux/tcp_md5.h
@@ -26,28 +26,14 @@ struct tcp_md5sig_key {
struct rcu_head rcu;
};
-extern const struct tcp_sock_af_ops tcp_sock_ipv4_specific;
-extern const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
-extern const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
-
/* - functions */
-int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
- const struct sock *sk, const struct sk_buff *skb);
-struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk);
+int tcp_md5_parse_keys(struct sock *sk, int optname, char __user *optval,
+ int optlen);
bool tcp_v4_inbound_md5_hash(const struct sock *sk,
const struct sk_buff *skb);
-struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk);
-
-int tcp_v6_md5_hash_skb(char *md5_hash,
- const struct tcp_md5sig_key *key,
- const struct sock *sk,
- const struct sk_buff *skb);
-
bool tcp_v6_inbound_md5_hash(const struct sock *sk,
const struct sk_buff *skb);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e955c5f0997f..baf0a6989a79 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1729,32 +1729,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb);
-/* TCP af-specific functions */
-struct tcp_sock_af_ops {
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *(*md5_lookup) (const struct sock *sk,
- const struct sock *addr_sk);
- int (*calc_md5_hash)(char *location,
- const struct tcp_md5sig_key *md5,
- const struct sock *sk,
- const struct sk_buff *skb);
- int (*md5_parse)(struct sock *sk,
- int optname,
- char __user *optval,
- int optlen);
-#endif
-};
-
struct tcp_request_sock_ops {
u16 mss_clamp;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *(*req_md5_lookup)(const struct sock *sk,
- const struct sock *addr_sk);
- int (*calc_md5_hash) (char *location,
- const struct tcp_md5sig_key *md5,
- const struct sock *sk,
- const struct sk_buff *skb);
-#endif
void (*init_req)(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 29f3ce8a0b54..8b6f5efe9509 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2795,7 +2795,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_MD5SIG:
case TCP_MD5SIG_EXT:
/* Read the IP->Key mappings from userspace */
- err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
+ err = tcp_md5_parse_keys(sk, optname, optval, optlen);
break;
#endif
case TCP_USER_TIMEOUT:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 356bf41ec73a..670d7751f814 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -889,10 +889,6 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.mss_clamp = TCP_MSS_DEFAULT,
-#ifdef CONFIG_TCP_MD5SIG
- .req_md5_lookup = tcp_v4_md5_lookup,
- .calc_md5_hash = tcp_v4_md5_hash_skb,
-#endif
.init_req = tcp_v4_init_req,
#ifdef CONFIG_SYN_COOKIES
.cookie_init_seq = cookie_v4_init_sequence,
@@ -1450,10 +1446,6 @@ static int tcp_v4_init_sock(struct sock *sk)
icsk->icsk_af_ops = &ipv4_specific;
-#ifdef CONFIG_TCP_MD5SIG
- tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
-#endif
-
return 0;
}
diff --git a/net/ipv4/tcp_md5.c b/net/ipv4/tcp_md5.c
index 64e5b4420ce9..052f5a587783 100644
--- a/net/ipv4/tcp_md5.c
+++ b/net/ipv4/tcp_md5.c
@@ -336,12 +336,13 @@ static int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
return crypto_ahash_update(hp->md5_req);
}
-static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
- char __user *optval, int optlen)
+int tcp_md5_parse_keys(struct sock *sk, int optname, char __user *optval,
+ int optlen)
{
+ u8 prefixlen = 32, maxprefixlen;
+ union tcp_md5_addr *tcpmd5addr;
struct tcp_md5sig cmd;
- struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
- u8 prefixlen = 32;
+ unsigned short family;
if (optlen < sizeof(cmd))
return -EINVAL;
@@ -349,76 +350,48 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
if (copy_from_user(&cmd, optval, sizeof(cmd)))
return -EFAULT;
- if (sin->sin_family != AF_INET)
- return -EINVAL;
-
- if (optname == TCP_MD5SIG_EXT &&
- cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
- prefixlen = cmd.tcpm_prefixlen;
- if (prefixlen > 32)
- return -EINVAL;
- }
+ family = cmd.tcpm_addr.ss_family;
- if (!cmd.tcpm_keylen)
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
- AF_INET, prefixlen);
-
- if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
+ if (family != AF_INET && family != AF_INET6)
return -EINVAL;
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
- AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
- GFP_KERNEL);
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
- char __user *optval, int optlen)
-{
- struct tcp_md5sig cmd;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
- u8 prefixlen;
-
- if (optlen < sizeof(cmd))
+ if (sk->sk_family != family)
return -EINVAL;
- if (copy_from_user(&cmd, optval, sizeof(cmd)))
- return -EFAULT;
+ if (family == AF_INET6) {
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
- if (sin6->sin6_family != AF_INET6)
- return -EINVAL;
+ if (!ipv6_addr_v4mapped(&sin6->sin6_addr)) {
+ tcpmd5addr = (union tcp_md5_addr *)&sin6->sin6_addr;
+ maxprefixlen = 128;
+ } else {
+ tcpmd5addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3];
+ family = AF_INET;
+ maxprefixlen = 32;
+ }
+ } else {
+ struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
+
+ tcpmd5addr = (union tcp_md5_addr *)&sin->sin_addr;
+ maxprefixlen = 32;
+ }
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
prefixlen = cmd.tcpm_prefixlen;
- if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
- prefixlen > 32))
+ if (prefixlen > maxprefixlen)
return -EINVAL;
- } else {
- prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
}
- if (!cmd.tcpm_keylen) {
- if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen);
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen);
- }
+ if (!cmd.tcpm_keylen)
+ return tcp_md5_do_del(sk, tcpmd5addr, family, prefixlen);
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
- if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen, cmd.tcpm_key,
- cmd.tcpm_keylen, GFP_KERNEL);
-
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, cmd.tcpm_key,
+ return tcp_md5_do_add(sk, tcpmd5addr, family, prefixlen, cmd.tcpm_key,
cmd.tcpm_keylen, GFP_KERNEL);
}
-#endif
static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
__be32 daddr, __be32 saddr,
@@ -670,6 +643,102 @@ static int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
return 0;
}
+static int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
+ const struct sock *sk, const struct sk_buff *skb)
+{
+ struct tcp_md5sig_pool *hp;
+ struct ahash_request *req;
+ const struct tcphdr *th = tcp_hdr(skb);
+ __be32 saddr, daddr;
+
+ if (sk) { /* valid for establish/request sockets */
+ saddr = sk->sk_rcv_saddr;
+ daddr = sk->sk_daddr;
+ } else {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ }
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ req = hp->md5_req;
+
+ if (crypto_ahash_init(req))
+ goto clear_hash;
+
+ if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
+ goto clear_hash;
+ if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int tcp_v6_md5_hash_skb(char *md5_hash,
+ const struct tcp_md5sig_key *key,
+ const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const struct in6_addr *saddr, *daddr;
+ struct tcp_md5sig_pool *hp;
+ struct ahash_request *req;
+ const struct tcphdr *th = tcp_hdr(skb);
+
+ if (sk) { /* valid for establish/request sockets */
+ saddr = &sk->sk_v6_rcv_saddr;
+ daddr = &sk->sk_v6_daddr;
+ } else {
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+
+ saddr = &ip6h->saddr;
+ daddr = &ip6h->daddr;
+ }
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ req = hp->md5_req;
+
+ if (crypto_ahash_init(req))
+ goto clear_hash;
+
+ if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
+ goto clear_hash;
+ if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ ahash_request_set_crypt(req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(req))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
+}
+#endif
+
static int tcp_v4_md5_send_response_prepare(struct sk_buff *skb, u8 flags,
unsigned int remaining,
struct tcp_out_options *opts,
@@ -784,114 +853,14 @@ static __be32 *tcp_md5_send_response_write(__be32 *ptr, struct sk_buff *orig,
return tcp_v4_md5_send_response_write(ptr, orig, th, opts, sk);
}
-struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk)
+static struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
+ const struct sock *addr_sk)
{
const union tcp_md5_addr *addr;
addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
return tcp_md5_do_lookup(sk, addr, AF_INET);
}
-EXPORT_SYMBOL(tcp_v4_md5_lookup);
-
-int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
- const struct sock *sk,
- const struct sk_buff *skb)
-{
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
- const struct tcphdr *th = tcp_hdr(skb);
- __be32 saddr, daddr;
-
- if (sk) { /* valid for establish/request sockets */
- saddr = sk->sk_rcv_saddr;
- daddr = sk->sk_daddr;
- } else {
- const struct iphdr *iph = ip_hdr(skb);
-
- saddr = iph->saddr;
- daddr = iph->daddr;
- }
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
-
- if (crypto_ahash_init(req))
- goto clear_hash;
-
- if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
- goto clear_hash;
- if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
- goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
- goto clear_hash;
-
- tcp_put_md5sig_pool();
- return 0;
-
-clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
- memset(md5_hash, 0, 16);
- return 1;
-}
-EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
-
-#if IS_ENABLED(CONFIG_IPV6)
-int tcp_v6_md5_hash_skb(char *md5_hash,
- const struct tcp_md5sig_key *key,
- const struct sock *sk,
- const struct sk_buff *skb)
-{
- const struct in6_addr *saddr, *daddr;
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
- const struct tcphdr *th = tcp_hdr(skb);
-
- if (sk) { /* valid for establish/request sockets */
- saddr = &sk->sk_v6_rcv_saddr;
- daddr = &sk->sk_v6_daddr;
- } else {
- const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-
- saddr = &ip6h->saddr;
- daddr = &ip6h->daddr;
- }
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
-
- if (crypto_ahash_init(req))
- goto clear_hash;
-
- if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
- goto clear_hash;
- if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
- goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
- goto clear_hash;
-
- tcp_put_md5sig_pool();
- return 0;
-
-clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
- memset(md5_hash, 0, 16);
- return 1;
-}
-EXPORT_SYMBOL_GPL(tcp_v6_md5_hash_skb);
-#endif
/* Called with rcu_read_lock() */
bool tcp_v4_inbound_md5_hash(const struct sock *sk,
@@ -994,8 +963,8 @@ bool tcp_v6_inbound_md5_hash(const struct sock *sk,
}
EXPORT_SYMBOL_GPL(tcp_v6_inbound_md5_hash);
-struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
- const struct sock *addr_sk)
+static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
+ const struct sock *addr_sk)
{
return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
}
@@ -1103,10 +1072,17 @@ static int tcp_md5_extopt_add_header_len(const struct sock *orig,
const struct sock *sk,
struct tcp_extopt_store *store)
{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (tp->af_specific->md5_lookup(orig, sk))
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6 &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
+ if (tcp_v6_md5_lookup(orig, sk))
+ return TCPOLEN_MD5SIG_ALIGNED;
+ } else
+#endif
+{
+ if (tcp_v4_md5_lookup(orig, sk))
return TCPOLEN_MD5SIG_ALIGNED;
+}
return 0;
}
@@ -1120,19 +1096,29 @@ static unsigned int tcp_md5_extopt_prepare(struct sk_buff *skb, u8 flags,
int ret = 0;
if (sk_fullsock(sk)) {
- struct tcp_sock *tp = tcp_sk(sk);
-
- opts->md5 = tp->af_specific->md5_lookup(sk, sk);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6 && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
+ opts->md5 = tcp_v6_md5_lookup(sk, sk);
+ else
+#endif
+ opts->md5 = tcp_v4_md5_lookup(sk, sk);
} else {
struct request_sock *req = inet_reqsk(sk);
struct sock *listener = req->rsk_listener;
+ struct inet_request_sock *ireq = inet_rsk(req);
/* Coming from tcp_make_synack, unlock is in
* tcp_md5_extopt_write
*/
rcu_read_lock();
- opts->md5 = tcp_rsk(req)->af_specific->req_md5_lookup(listener, sk);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ireq->ireq_family == AF_INET6 &&
+ !ipv6_addr_v4mapped(&ireq->ir_v6_rmt_addr))
+ opts->md5 = tcp_v6_md5_lookup(listener, sk);
+ else
+#endif
+ opts->md5 = tcp_v4_md5_lookup(listener, sk);
if (!opts->md5)
rcu_read_unlock();
@@ -1352,25 +1338,3 @@ static void tcp_md5_extopt_destroy(struct tcp_extopt_store *store)
kfree_rcu(md5_opt, rcu);
}
}
-
-const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
- .md5_lookup = tcp_v4_md5_lookup,
- .calc_md5_hash = tcp_v4_md5_hash_skb,
- .md5_parse = tcp_v4_parse_md5_keys,
-};
-
-#if IS_ENABLED(CONFIG_IPV6)
-const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
- .md5_lookup = tcp_v6_md5_lookup,
- .calc_md5_hash = tcp_v6_md5_hash_skb,
- .md5_parse = tcp_v6_parse_md5_keys,
-};
-EXPORT_SYMBOL_GPL(tcp_sock_ipv6_specific);
-
-const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
- .md5_lookup = tcp_v4_md5_lookup,
- .calc_md5_hash = tcp_v4_md5_hash_skb,
- .md5_parse = tcp_v6_parse_md5_keys,
-};
-EXPORT_SYMBOL_GPL(tcp_sock_ipv6_mapped_specific);
-#endif
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 16cbd6ec2063..890616fc5591 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -208,9 +208,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
icsk->icsk_af_ops = &ipv6_mapped;
sk->sk_backlog_rcv = tcp_v4_do_rcv;
-#ifdef CONFIG_TCP_MD5SIG
- tp->af_specific = &tcp_sock_ipv6_mapped_specific;
-#endif
err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
@@ -218,9 +215,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
icsk->icsk_ext_hdr_len = exthdrlen;
icsk->icsk_af_ops = &ipv6_specific;
sk->sk_backlog_rcv = tcp_v6_do_rcv;
-#ifdef CONFIG_TCP_MD5SIG
- tp->af_specific = &tcp_sock_ipv6_specific;
-#endif
goto failure;
}
np->saddr = sk->sk_v6_rcv_saddr;
@@ -543,10 +537,6 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
sizeof(struct ipv6hdr),
-#ifdef CONFIG_TCP_MD5SIG
- .req_md5_lookup = tcp_v6_md5_lookup,
- .calc_md5_hash = tcp_v6_md5_hash_skb,
-#endif
.init_req = tcp_v6_init_req,
#ifdef CONFIG_SYN_COOKIES
.cookie_init_seq = cookie_v6_init_sequence,
@@ -821,9 +811,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
-#ifdef CONFIG_TCP_MD5SIG
- newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
-#endif
newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
@@ -1430,10 +1417,6 @@ static int tcp_v6_init_sock(struct sock *sk)
icsk->icsk_af_ops = &ipv6_specific;
-#ifdef CONFIG_TCP_MD5SIG
- tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
-#endif
-
return 0;
}
--
2.15.0
^ permalink raw reply related
* [RFC 10/14] tcp_md5: Check for TCP_MD5 after TCP Timestamps in tcp_established_options
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov
In-Reply-To: <20171218215109.38700-1-cpaasch@apple.com>
It really does not matter, because we never use TCP timestamps when
TCP_MD5 is enabled (see tcp_syn_options).
Moving TCP_MD5 a bit lower allows for easier adoption of the
tcp_extra_option framework.
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
net/ipv4/tcp_output.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 43849ed73b03..7ea65f70e5ec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -662,6 +662,13 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
opts->options = 0;
+ if (likely(tp->rx_opt.tstamp_ok)) {
+ opts->options |= OPTION_TS;
+ opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
+ opts->tsecr = tp->rx_opt.ts_recent;
+ size += TCPOLEN_TSTAMP_ALIGNED;
+ }
+
#ifdef CONFIG_TCP_MD5SIG
opts->md5 = tp->af_specific->md5_lookup(sk, sk);
if (unlikely(opts->md5)) {
@@ -672,13 +679,6 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
opts->md5 = NULL;
#endif
- if (likely(tp->rx_opt.tstamp_ok)) {
- opts->options |= OPTION_TS;
- opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
- opts->tsecr = tp->rx_opt.ts_recent;
- size += TCPOLEN_TSTAMP_ALIGNED;
- }
-
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
size += tcp_extopt_prepare(skb, 0, MAX_TCP_OPTION_SPACE - size,
opts, tcp_to_sk(tp));
--
2.15.0
^ permalink raw reply related
* [RFC 06/14] tcp_smc: Make SMC use TCP extra-option framework
From: Christoph Paasch @ 2017-12-18 21:51 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov, Ursula Braun
In-Reply-To: <20171218215109.38700-1-cpaasch@apple.com>
Adopt the extra-option framework for SMC.
It allows us to entirely remove SMC-code out of the TCP-stack.
The static key is gone, as this is now covered by the static key of the
extra-option framework.
We allocate state (struct tcp_smc_opt) that indicates whether SMC was
successfully negotiated or not and check this state in the relevant
functions.
Cc: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
include/linux/tcp.h | 3 +-
include/net/inet_sock.h | 3 +-
include/net/tcp.h | 4 -
net/ipv4/tcp.c | 5 --
net/ipv4/tcp_input.c | 36 ---------
net/ipv4/tcp_minisocks.c | 18 -----
net/ipv4/tcp_output.c | 54 --------------
net/smc/af_smc.c | 190 +++++++++++++++++++++++++++++++++++++++++++++--
8 files changed, 186 insertions(+), 127 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4756bd2c4b54..231b352f587f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -257,8 +257,7 @@ struct tcp_sock {
syn_fastopen_ch:1, /* Active TFO re-enabling probe */
syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
save_syn:1, /* Save headers of SYN packet */
- is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
- syn_smc:1; /* SYN includes SMC */
+ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
/* RTT measurement */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 39efb968b7a4..8e51b4a69088 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -90,8 +90,7 @@ struct inet_request_sock {
wscale_ok : 1,
ecn_ok : 1,
acked : 1,
- no_srccheck: 1,
- smc_ok : 1;
+ no_srccheck: 1;
u32 ir_mark;
union {
struct ip_options_rcu __rcu *ireq_opt;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ac62ceff9815..a5c4856e25c7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2062,10 +2062,6 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
}
-#if IS_ENABLED(CONFIG_SMC)
-extern struct static_key_false tcp_have_smc;
-#endif
-
struct tcp_extopt_store;
struct tcp_extopt_ops {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 17f38afb4212..0a1cabee6d5e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -294,11 +294,6 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
-#if IS_ENABLED(CONFIG_SMC)
-DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
-EXPORT_SYMBOL(tcp_have_smc);
-#endif
-
/*
* Current number of TCP sockets.
*/
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1950ff80fb3f..af8f4f9fd098 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3671,24 +3671,6 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
foc->exp = exp_opt;
}
-static int smc_parse_options(const struct tcphdr *th,
- struct tcp_options_received *opt_rx,
- const unsigned char *ptr,
- int opsize)
-{
-#if IS_ENABLED(CONFIG_SMC)
- if (static_branch_unlikely(&tcp_have_smc)) {
- if (th->syn && !(opsize & 1) &&
- opsize >= TCPOLEN_EXP_SMC_BASE &&
- get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
- opt_rx->smc_ok = 1;
- return 1;
- }
- }
-#endif
- return 0;
-}
-
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
* the fast version below fails.
@@ -3796,9 +3778,6 @@ void tcp_parse_options(const struct net *net,
tcp_parse_fastopen_option(opsize -
TCPOLEN_EXP_FASTOPEN_BASE,
ptr + 2, th->syn, foc, true);
- else if (smc_parse_options(th, opt_rx, ptr,
- opsize))
- break;
else if (opsize >= TCPOLEN_EXP_BASE)
tcp_extopt_parse(get_unaligned_be32(ptr),
opsize, ptr, skb,
@@ -5572,16 +5551,6 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
return false;
}
-static void smc_check_reset_syn(struct tcp_sock *tp)
-{
-#if IS_ENABLED(CONFIG_SMC)
- if (static_branch_unlikely(&tcp_have_smc)) {
- if (tp->syn_smc && !tp->rx_opt.smc_ok)
- tp->syn_smc = 0;
- }
-#endif
-}
-
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th)
{
@@ -5692,8 +5661,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* is initialized. */
tp->copied_seq = tp->rcv_nxt;
- smc_check_reset_syn(tp);
-
smp_mb();
tcp_finish_connect(sk, skb);
@@ -6150,9 +6117,6 @@ static void tcp_openreq_init(struct request_sock *req,
ireq->ir_rmt_port = tcp_hdr(skb)->source;
ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
ireq->ir_mark = inet_request_mark(sk, skb);
-#if IS_ENABLED(CONFIG_SMC)
- ireq->smc_ok = rx_opt->smc_ok;
-#endif
}
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 676ad7ca13ad..aa2ff9aadad0 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -435,21 +435,6 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
}
EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
-static void smc_check_reset_syn_req(struct tcp_sock *oldtp,
- struct request_sock *req,
- struct tcp_sock *newtp)
-{
-#if IS_ENABLED(CONFIG_SMC)
- struct inet_request_sock *ireq;
-
- if (static_branch_unlikely(&tcp_have_smc)) {
- ireq = inet_rsk(req);
- if (oldtp->syn_smc && !ireq->smc_ok)
- newtp->syn_smc = 0;
- }
-#endif
-}
-
/* This is not only more efficient than what we used to do, it eliminates
* a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
*
@@ -467,9 +452,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
struct tcp_request_sock *treq = tcp_rsk(req);
struct inet_connection_sock *newicsk = inet_csk(newsk);
struct tcp_sock *newtp = tcp_sk(newsk);
- struct tcp_sock *oldtp = tcp_sk(sk);
-
- smc_check_reset_syn_req(oldtp, req, newtp);
/* Now setup tcp_sock */
newtp->pred_flags = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6804a9325107..baf1c913ca7f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -398,21 +398,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
return tp->snd_una != tp->snd_up;
}
-static void smc_options_write(__be32 *ptr, u16 *options)
-{
-#if IS_ENABLED(CONFIG_SMC)
- if (static_branch_unlikely(&tcp_have_smc)) {
- if (unlikely(OPTION_SMC & *options)) {
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_EXP << 8) |
- (TCPOLEN_EXP_SMC_BASE));
- *ptr++ = htonl(TCPOPT_SMC_MAGIC);
- }
- }
-#endif
-}
-
/* Write previously computed TCP options to the packet.
*
* Beware: Something in the Internet is very sensitive to the ordering of
@@ -527,45 +512,10 @@ static void tcp_options_write(__be32 *ptr, struct sk_buff *skb, struct sock *sk,
ptr += (len + 3) >> 2;
}
- smc_options_write(ptr, &options);
-
if (unlikely(!hlist_empty(extopt_list)))
tcp_extopt_write(ptr, skb, opts, sk);
}
-static void smc_set_option(const struct tcp_sock *tp,
- struct tcp_out_options *opts,
- unsigned int *remaining)
-{
-#if IS_ENABLED(CONFIG_SMC)
- if (static_branch_unlikely(&tcp_have_smc)) {
- if (tp->syn_smc) {
- if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
- opts->options |= OPTION_SMC;
- *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
- }
- }
- }
-#endif
-}
-
-static void smc_set_option_cond(const struct tcp_sock *tp,
- const struct inet_request_sock *ireq,
- struct tcp_out_options *opts,
- unsigned int *remaining)
-{
-#if IS_ENABLED(CONFIG_SMC)
- if (static_branch_unlikely(&tcp_have_smc)) {
- if (tp->syn_smc && ireq->smc_ok) {
- if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
- opts->options |= OPTION_SMC;
- *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
- }
- }
- }
-#endif
-}
-
/* Compute TCP options for SYN packets. This is not the final
* network wire format yet.
*/
@@ -631,8 +581,6 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
}
}
- smc_set_option(tp, opts, &remaining);
-
if (unlikely(!hlist_empty(&tp->tcp_option_list)))
remaining -= tcp_extopt_prepare(skb, TCPHDR_SYN, remaining,
opts, tcp_to_sk(tp));
@@ -698,8 +646,6 @@ static unsigned int tcp_synack_options(const struct sock *sk,
}
}
- smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
-
if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)))
remaining -= tcp_extopt_prepare(skb, TCPHDR_SYN | TCPHDR_ACK,
remaining, opts,
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index daf8075f5a4c..14bb84f81a50 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -44,6 +44,149 @@
#include "smc_rx.h"
#include "smc_close.h"
+static unsigned int tcp_smc_opt_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store);
+static __be32 *tcp_smc_opt_write(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct sock *sk,
+ struct tcp_extopt_store *store);
+static void tcp_smc_opt_parse(int opsize, const unsigned char *opptr,
+ const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx,
+ struct sock *sk,
+ struct tcp_extopt_store *store);
+static void tcp_smc_opt_post_process(struct sock *sk,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *store);
+static struct tcp_extopt_store *tcp_smc_opt_copy(struct sock *listener,
+ struct request_sock *req,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *store);
+static void tcp_smc_opt_destroy(struct tcp_extopt_store *store);
+
+struct tcp_smc_opt {
+ struct tcp_extopt_store store;
+ int smc_ok:1; /* SMC supported on this connection */
+ struct rcu_head rcu;
+};
+
+static const struct tcp_extopt_ops tcp_smc_extra_ops = {
+ .option_kind = TCPOPT_SMC_MAGIC,
+ .parse = tcp_smc_opt_parse,
+ .post_process = tcp_smc_opt_post_process,
+ .prepare = tcp_smc_opt_prepare,
+ .write = tcp_smc_opt_write,
+ .copy = tcp_smc_opt_copy,
+ .destroy = tcp_smc_opt_destroy,
+ .owner = THIS_MODULE,
+};
+
+static struct tcp_smc_opt *tcp_extopt_to_smc(struct tcp_extopt_store *store)
+{
+ return container_of(store, struct tcp_smc_opt, store);
+}
+
+static struct tcp_smc_opt *tcp_smc_opt_find(struct sock *sk)
+{
+ struct tcp_extopt_store *ext_opt;
+
+ ext_opt = tcp_extopt_find_kind(TCPOPT_SMC_MAGIC, sk);
+
+ return tcp_extopt_to_smc(ext_opt);
+}
+
+static unsigned int tcp_smc_opt_prepare(struct sk_buff *skb, u8 flags,
+ unsigned int remaining,
+ struct tcp_out_options *opts,
+ const struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+ if (!(flags & TCPHDR_SYN))
+ return 0;
+
+ if (remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+ opts->options |= OPTION_SMC;
+ return TCPOLEN_EXP_SMC_BASE_ALIGNED;
+ }
+
+ return 0;
+}
+
+static __be32 *tcp_smc_opt_write(__be32 *ptr, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+ if (unlikely(OPTION_SMC & opts->options)) {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_EXP << 8) |
+ (TCPOLEN_EXP_SMC_BASE));
+ *ptr++ = htonl(TCPOPT_SMC_MAGIC);
+ }
+
+ return ptr;
+}
+
+static void tcp_smc_opt_parse(int opsize, const unsigned char *opptr,
+ const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx,
+ struct sock *sk,
+ struct tcp_extopt_store *store)
+{
+ struct tcphdr *th = tcp_hdr(skb);
+
+ if (th->syn && !(opsize & 1) && opsize >= TCPOLEN_EXP_SMC_BASE)
+ opt_rx->smc_ok = 1;
+}
+
+static void tcp_smc_opt_post_process(struct sock *sk,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *store)
+{
+ struct tcp_smc_opt *smc_opt = tcp_extopt_to_smc(store);
+
+ if (sk->sk_state != TCP_SYN_SENT)
+ return;
+
+ if (opt->smc_ok)
+ smc_opt->smc_ok = 1;
+ else
+ smc_opt->smc_ok = 0;
+}
+
+static struct tcp_extopt_store *tcp_smc_opt_copy(struct sock *listener,
+ struct request_sock *req,
+ struct tcp_options_received *opt,
+ struct tcp_extopt_store *store)
+{
+ struct tcp_smc_opt *smc_opt;
+
+ /* First, check if the peer sent us the smc-opt */
+ if (!opt->smc_ok)
+ return NULL;
+
+ smc_opt = kzalloc(sizeof(*smc_opt), GFP_ATOMIC);
+ if (!smc_opt)
+ return NULL;
+
+ smc_opt->store.ops = &tcp_smc_extra_ops;
+
+ smc_opt->smc_ok = 1;
+
+ return (struct tcp_extopt_store *)smc_opt;
+}
+
+static void tcp_smc_opt_destroy(struct tcp_extopt_store *store)
+{
+ struct tcp_smc_opt *smc_opt = tcp_extopt_to_smc(store);
+
+ kfree_rcu(smc_opt, rcu);
+}
+
static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
* creation
*/
@@ -384,13 +527,15 @@ static int smc_connect_rdma(struct smc_sock *smc)
struct smc_clc_msg_accept_confirm aclc;
int local_contact = SMC_FIRST_CONTACT;
struct smc_ib_device *smcibdev;
+ struct tcp_smc_opt *smc_opt;
struct smc_link *link;
u8 srv_first_contact;
int reason_code = 0;
int rc = 0;
u8 ibport;
- if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
+ smc_opt = tcp_smc_opt_find(smc->clcsock->sk);
+ if (!smc_opt || !smc_opt->smc_ok) {
/* peer has not signalled SMC-capability */
smc->use_fallback = true;
goto out_connected;
@@ -535,6 +680,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
static int smc_connect(struct socket *sock, struct sockaddr *addr,
int alen, int flags)
{
+ struct tcp_smc_opt *smc_opt;
struct sock *sk = sock->sk;
struct smc_sock *smc;
int rc = -EINVAL;
@@ -548,9 +694,17 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
goto out_err;
smc->addr = addr; /* needed for nonblocking connect */
+ smc_opt = kzalloc(sizeof(*smc_opt), GFP_KERNEL);
+ if (!smc_opt) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ smc_opt->store.ops = &tcp_smc_extra_ops;
+
lock_sock(sk);
switch (sk->sk_state) {
default:
+ rc = -EINVAL;
goto out;
case SMC_ACTIVE:
rc = -EISCONN;
@@ -560,8 +714,15 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
break;
}
+ /* We are the only owner of smc->clcsock->sk, so we can be lockless */
+ rc = tcp_register_extopt(&smc_opt->store, smc->clcsock->sk);
+ if (rc) {
+ release_sock(smc->clcsock->sk);
+ kfree(smc_opt);
+ goto out_err;
+ }
+
smc_copy_sock_settings_to_clc(smc);
- tcp_sk(smc->clcsock->sk)->syn_smc = 1;
rc = kernel_connect(smc->clcsock, addr, alen, flags);
if (rc)
goto out;
@@ -760,6 +921,7 @@ static void smc_listen_work(struct work_struct *work)
struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *smcibdev;
struct sockaddr_in peeraddr;
+ struct tcp_smc_opt *smc_opt;
u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
@@ -769,7 +931,8 @@ static void smc_listen_work(struct work_struct *work)
u8 ibport;
/* check if peer is smc capable */
- if (!tcp_sk(newclcsock->sk)->syn_smc) {
+ smc_opt = tcp_smc_opt_find(newclcsock->sk);
+ if (!smc_opt || !smc_opt->smc_ok) {
new_smc->use_fallback = true;
goto out_connected;
}
@@ -962,10 +1125,18 @@ static void smc_tcp_listen_work(struct work_struct *work)
static int smc_listen(struct socket *sock, int backlog)
{
+ struct tcp_smc_opt *smc_opt;
struct sock *sk = sock->sk;
struct smc_sock *smc;
int rc;
+ smc_opt = kzalloc(sizeof(*smc_opt), GFP_KERNEL);
+ if (!smc_opt) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ smc_opt->store.ops = &tcp_smc_extra_ops;
+
smc = smc_sk(sk);
lock_sock(sk);
@@ -978,11 +1149,19 @@ static int smc_listen(struct socket *sock, int backlog)
sk->sk_max_ack_backlog = backlog;
goto out;
}
+
+ /* We are the only owner of smc->clcsock->sk, so we can be lockless */
+ rc = tcp_register_extopt(&smc_opt->store, smc->clcsock->sk);
+ if (rc) {
+ release_sock(smc->clcsock->sk);
+ kfree(smc_opt);
+ goto out_err;
+ }
+
/* some socket options are handled in core, so we could not apply
* them to the clc socket -- copy smc socket options to clc socket
*/
smc_copy_sock_settings_to_clc(smc);
- tcp_sk(smc->clcsock->sk)->syn_smc = 1;
rc = kernel_listen(smc->clcsock, backlog);
if (rc)
@@ -995,6 +1174,7 @@ static int smc_listen(struct socket *sock, int backlog)
out:
release_sock(sk);
+out_err:
return rc;
}
@@ -1425,7 +1605,6 @@ static int __init smc_init(void)
goto out_sock;
}
- static_branch_enable(&tcp_have_smc);
return 0;
out_sock:
@@ -1450,7 +1629,6 @@ static void __exit smc_exit(void)
list_del_init(&lgr->list);
smc_lgr_free(lgr); /* free link group */
}
- static_branch_disable(&tcp_have_smc);
smc_ib_unregister_client();
sock_unregister(PF_SMC);
proto_unregister(&smc_proto);
--
2.15.0
^ permalink raw reply related
* [RFC 04/14] tcp_smc: Make smc_parse_options return 1 on success
From: Christoph Paasch @ 2017-12-18 21:50 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, Mat Martineau, Alexei Starovoitov, Ursula Braun
In-Reply-To: <20171218215109.38700-1-cpaasch@apple.com>
As we allow a generic TCP-option parser that also parses experimental
TCP options, we need to add a return-value to smc_parse_options() that
indicates whether the option actually matched or not.
Cc: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
net/ipv4/tcp_input.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb97ee24c601..5c35fd568b13 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3671,19 +3671,22 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
foc->exp = exp_opt;
}
-static void smc_parse_options(const struct tcphdr *th,
- struct tcp_options_received *opt_rx,
- const unsigned char *ptr,
- int opsize)
+static int smc_parse_options(const struct tcphdr *th,
+ struct tcp_options_received *opt_rx,
+ const unsigned char *ptr,
+ int opsize)
{
#if IS_ENABLED(CONFIG_SMC)
if (static_branch_unlikely(&tcp_have_smc)) {
if (th->syn && !(opsize & 1) &&
opsize >= TCPOLEN_EXP_SMC_BASE &&
- get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
+ get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
opt_rx->smc_ok = 1;
+ return 1;
+ }
}
#endif
+ return 0;
}
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
--
2.15.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox