* [PATCH stable 4.9 v2 18/29] inet: frags: fix ip6frag_low_thresh boundary
From: Florian Fainelli @ 2018-10-10 19:30 UTC (permalink / raw)
To: netdev; +Cc: davem, gregkh, stable, edumazet, sthemmin
In-Reply-To: <20181010193017.25221-1-f.fainelli@gmail.com>
From: Eric Dumazet <edumazet@google.com>
Giving an integer to proc_doulongvec_minmax() is dangerous on 64bit arches,
since linker might place next to it a non zero value preventing a change
to ip6frag_low_thresh.
ip6frag_low_thresh is not used anymore in the kernel, but we do not
want to prematuraly break user scripts wanting to change it.
Since specifying a minimal value of 0 for proc_doulongvec_minmax()
is moot, let's remove these zero values in all defrag units.
Fixes: 6e00f7dd5e4e ("ipv6: frags: fix /proc/sys/net/ipv6/ip6frag_low_thresh")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 3d23401283e80ceb03f765842787e0e79ff598b7)
---
net/ieee802154/6lowpan/reassembly.c | 2 --
net/ipv4/ip_fragment.c | 40 ++++++++++---------------
net/ipv6/netfilter/nf_conntrack_reasm.c | 2 --
net/ipv6/reassembly.c | 4 +--
4 files changed, 17 insertions(+), 31 deletions(-)
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 122a625d9a66..6fca75581e13 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -410,7 +410,6 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
}
#ifdef CONFIG_SYSCTL
-static long zero;
static struct ctl_table lowpan_frags_ns_ctl_table[] = {
{
@@ -427,7 +426,6 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = {
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &zero,
.extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
},
{
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e235f62dab58..73c0adc61a65 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -56,14 +56,6 @@
*/
static const char ip_frag_cache_name[] = "ip4-frags";
-struct ipfrag_skb_cb
-{
- struct inet_skb_parm h;
- int offset;
-};
-
-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
-
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;
@@ -351,13 +343,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* this fragment, right?
*/
prev = qp->q.fragments_tail;
- if (!prev || FRAG_CB(prev)->offset < offset) {
+ if (!prev || prev->ip_defrag_offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = qp->q.fragments; next != NULL; next = next->next) {
- if (FRAG_CB(next)->offset >= offset)
+ if (next->ip_defrag_offset >= offset)
break; /* bingo! */
prev = next;
}
@@ -368,7 +360,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* any overlaps are eliminated.
*/
if (prev) {
- int i = (FRAG_CB(prev)->offset + prev->len) - offset;
+ int i = (prev->ip_defrag_offset + prev->len) - offset;
if (i > 0) {
offset += i;
@@ -385,8 +377,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
err = -ENOMEM;
- while (next && FRAG_CB(next)->offset < end) {
- int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
+ while (next && next->ip_defrag_offset < end) {
+ int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */
if (i < next->len) {
int delta = -next->truesize;
@@ -399,7 +391,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
delta += next->truesize;
if (delta)
add_frag_mem_limit(qp->q.net, delta);
- FRAG_CB(next)->offset += i;
+ next->ip_defrag_offset += i;
qp->q.meat -= i;
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
@@ -423,7 +415,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
}
}
- FRAG_CB(skb)->offset = offset;
+ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ dev = skb->dev;
+ if (dev)
+ qp->iif = dev->ifindex;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+ skb->ip_defrag_offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
@@ -434,11 +432,6 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
else
qp->q.fragments = skb;
- dev = skb->dev;
- if (dev) {
- qp->iif = dev->ifindex;
- skb->dev = NULL;
- }
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
@@ -514,7 +507,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
}
WARN_ON(!head);
- WARN_ON(FRAG_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);
/* Allocate a new buffer for the datagram. */
ihlen = ip_hdrlen(head);
@@ -677,7 +670,7 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
EXPORT_SYMBOL(ip_check_defrag);
#ifdef CONFIG_SYSCTL
-static long zero;
+static int dist_min;
static struct ctl_table ip4_frags_ns_ctl_table[] = {
{
@@ -694,7 +687,6 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &zero,
.extra2 = &init_net.ipv4.frags.high_thresh
},
{
@@ -710,7 +702,7 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero
+ .extra1 = &dist_min,
},
{ }
};
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 8bc0df9ad2ab..ff49d1f2c8cb 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -63,7 +63,6 @@ struct nf_ct_frag6_skb_cb
static struct inet_frags nf_frags;
#ifdef CONFIG_SYSCTL
-static long zero;
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
@@ -79,7 +78,6 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &zero,
.extra2 = &init_net.nf_frag.frags.high_thresh
},
{
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 1cb45a0d1a0e..dbe726c9a2ae 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -548,7 +548,6 @@ static const struct inet6_protocol frag_protocol = {
};
#ifdef CONFIG_SYSCTL
-static int zero;
static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
@@ -564,8 +563,7 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
.data = &init_net.ipv6.frags.low_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ipv6.frags.high_thresh
},
{
--
2.17.1
^ permalink raw reply related
* [PATCH stable 4.9 v2 15/29] rhashtable: reorganize struct rhashtable layout
From: Florian Fainelli @ 2018-10-10 19:30 UTC (permalink / raw)
To: netdev; +Cc: davem, gregkh, stable, edumazet, sthemmin
In-Reply-To: <20181010193017.25221-1-f.fainelli@gmail.com>
From: Eric Dumazet <edumazet@google.com>
While under frags DDOS I noticed unfortunate false sharing between
@nelems and @params.automatic_shrinking
Move @nelems at the end of struct rhashtable so that first cache line
is shared between all cpus, because almost never dirtied.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit e5d672a0780d9e7118caad4c171ec88b8299398d)
---
include/linux/rhashtable.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 85d1ffc90285..4421e5ccb092 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -138,7 +138,6 @@ struct rhashtable_params {
/**
* struct rhashtable - Hash table handle
* @tbl: Bucket table
- * @nelems: Number of elements in table
* @key_len: Key length for hashfn
* @elasticity: Maximum chain length before rehash
* @p: Configuration parameters
@@ -146,10 +145,10 @@ struct rhashtable_params {
* @run_work: Deferred worker to expand/shrink asynchronously
* @mutex: Mutex to protect current/future table swapping
* @lock: Spin lock to protect walker list
+ * @nelems: Number of elements in table
*/
struct rhashtable {
struct bucket_table __rcu *tbl;
- atomic_t nelems;
unsigned int key_len;
unsigned int elasticity;
struct rhashtable_params p;
@@ -157,6 +156,7 @@ struct rhashtable {
struct work_struct run_work;
struct mutex mutex;
spinlock_t lock;
+ atomic_t nelems;
};
/**
--
2.17.1
^ permalink raw reply related
* [PATCH stable 4.9 v2 14/29] ipv6: frags: rewrite ip6_expire_frag_queue()
From: Florian Fainelli @ 2018-10-10 19:30 UTC (permalink / raw)
To: netdev; +Cc: davem, gregkh, stable, edumazet, sthemmin
In-Reply-To: <20181010193017.25221-1-f.fainelli@gmail.com>
From: Eric Dumazet <edumazet@google.com>
Make it similar to IPv4 ip_expire(), and release the lock
before calling icmp functions.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 05c0b86b9696802fd0ce5676a92a63f1b455bdf3)
---
net/ipv6/reassembly.c | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 8a4ece339c19..1cb45a0d1a0e 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -92,7 +92,9 @@ EXPORT_SYMBOL(ip6_frag_init);
void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
{
struct net_device *dev = NULL;
+ struct sk_buff *head;
+ rcu_read_lock();
spin_lock(&fq->q.lock);
if (fq->q.flags & INET_FRAG_COMPLETE)
@@ -100,28 +102,34 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
inet_frag_kill(&fq->q);
- rcu_read_lock();
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
- goto out_rcu_unlock;
+ goto out;
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
/* Don't send error if the first segment did not arrive. */
- if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
- goto out_rcu_unlock;
+ head = fq->q.fragments;
+ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+ goto out;
/* But use as source device on which LAST ARRIVED
* segment was received. And do not use fq->dev
* pointer directly, device might already disappeared.
*/
- fq->q.fragments->dev = dev;
- icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
-out_rcu_unlock:
- rcu_read_unlock();
+ head->dev = dev;
+ skb_get(head);
+ spin_unlock(&fq->q.lock);
+
+ icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+ kfree_skb(head);
+ goto out_rcu_unlock;
+
out:
spin_unlock(&fq->q.lock);
+out_rcu_unlock:
+ rcu_read_unlock();
inet_frag_put(&fq->q);
}
EXPORT_SYMBOL(ip6_expire_frag_queue);
--
2.17.1
^ permalink raw reply related
* [PATCH stable 4.9 v2 13/29] inet: frags: do not clone skb in ip_expire()
From: Florian Fainelli @ 2018-10-10 19:30 UTC (permalink / raw)
To: netdev; +Cc: davem, gregkh, stable, edumazet, sthemmin
In-Reply-To: <20181010193017.25221-1-f.fainelli@gmail.com>
From: Eric Dumazet <edumazet@google.com>
An skb_clone() was added in commit ec4fbd64751d ("inet: frag: release
spinlock before calling icmp_send()")
While fixing the bug at that time, it also added a very high cost
for DDOS frags, as the ICMP rate limit is applied after this
expensive operation (skb_clone() + consume_skb(), implying memory
allocations, copy, and freeing)
We can use skb_get(head) here, all we want is to make sure skb wont
be freed by another cpu.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 1eec5d5670084ee644597bd26c25e22c69b9f748)
---
net/ipv4/ip_fragment.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3dd19bebeb55..e235f62dab58 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -141,8 +141,8 @@ static bool frag_expire_skip_icmp(u32 user)
*/
static void ip_expire(unsigned long arg)
{
- struct sk_buff *clone, *head;
const struct iphdr *iph;
+ struct sk_buff *head;
struct net *net;
struct ipq *qp;
int err;
@@ -185,16 +185,12 @@ static void ip_expire(unsigned long arg)
(skb_rtable(head)->rt_type != RTN_LOCAL))
goto out;
- clone = skb_clone(head, GFP_ATOMIC);
+ skb_get(head);
+ spin_unlock(&qp->q.lock);
+ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+ kfree_skb(head);
+ goto out_rcu_unlock;
- /* Send an ICMP "Fragment Reassembly Timeout" message. */
- if (clone) {
- spin_unlock(&qp->q.lock);
- icmp_send(clone, ICMP_TIME_EXCEEDED,
- ICMP_EXC_FRAGTIME, 0);
- consume_skb(clone);
- goto out_rcu_unlock;
- }
out:
spin_unlock(&qp->q.lock);
out_rcu_unlock:
--
2.17.1
^ permalink raw reply related
* [PATCH stable 4.9 v2 12/29] inet: frags: break the 2GB limit for frags storage
From: Florian Fainelli @ 2018-10-10 19:30 UTC (permalink / raw)
To: netdev; +Cc: davem, gregkh, stable, edumazet, sthemmin
In-Reply-To: <20181010193017.25221-1-f.fainelli@gmail.com>
From: Eric Dumazet <edumazet@google.com>
Some users are willing to provision huge amounts of memory to be able
to perform reassembly reasonnably well under pressure.
Current memory tracking is using one atomic_t and integers.
Switch to atomic_long_t so that 64bit arches can use more than 2GB,
without any cost for 32bit arches.
Note that this patch avoids an overflow error, if high_thresh was set
to ~2GB, since this test in inet_frag_alloc() was never true :
if (... || frag_mem_limit(nf) > nf->high_thresh)
Tested:
$ echo 16000000000 >/proc/sys/net/ipv4/ipfrag_high_thresh
<frag DDOS>
$ grep FRAG /proc/net/sockstat
FRAG: inuse 14705885 memory 16000002880
$ nstat -n ; sleep 1 ; nstat | grep Reas
IpReasmReqds 3317150 0.0
IpReasmFails 3317112 0.0
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 3e67f106f619dcfaf6f4e2039599bdb69848c714)
---
Documentation/networking/ip-sysctl.txt | 4 ++--
include/net/inet_frag.h | 20 ++++++++++----------
net/ieee802154/6lowpan/reassembly.c | 10 +++++-----
net/ipv4/ip_fragment.c | 10 +++++-----
net/ipv4/proc.c | 2 +-
net/ipv6/netfilter/nf_conntrack_reasm.c | 10 +++++-----
net/ipv6/proc.c | 2 +-
net/ipv6/reassembly.c | 6 +++---
8 files changed, 32 insertions(+), 32 deletions(-)
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 6cd632578ce8..dbdc4130e149 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -122,10 +122,10 @@ min_adv_mss - INTEGER
IP Fragmentation:
-ipfrag_high_thresh - INTEGER
+ipfrag_high_thresh - LONG INTEGER
Maximum memory used to reassemble IP fragments.
-ipfrag_low_thresh - INTEGER
+ipfrag_low_thresh - LONG INTEGER
(Obsolete since linux-4.17)
Maximum memory used to reassemble IP fragments before the kernel
begins to remove incomplete fragment queues to free up resources.
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 23161bf5d899..dea175f3418a 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -7,11 +7,11 @@ struct netns_frags {
struct rhashtable rhashtable ____cacheline_aligned_in_smp;
/* Keep atomic mem on separate cachelines in structs that include it */
- atomic_t mem ____cacheline_aligned_in_smp;
+ atomic_long_t mem ____cacheline_aligned_in_smp;
/* sysctls */
+ long high_thresh;
+ long low_thresh;
int timeout;
- int high_thresh;
- int low_thresh;
int max_dist;
struct inet_frags *f;
};
@@ -101,7 +101,7 @@ void inet_frags_fini(struct inet_frags *);
static inline int inet_frags_init_net(struct netns_frags *nf)
{
- atomic_set(&nf->mem, 0);
+ atomic_long_set(&nf->mem, 0);
return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
}
void inet_frags_exit_net(struct netns_frags *nf);
@@ -118,19 +118,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
/* Memory Tracking Functions. */
-static inline int frag_mem_limit(struct netns_frags *nf)
+static inline long frag_mem_limit(const struct netns_frags *nf)
{
- return atomic_read(&nf->mem);
+ return atomic_long_read(&nf->mem);
}
-static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
+static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
{
- atomic_sub(i, &nf->mem);
+ atomic_long_sub(val, &nf->mem);
}
-static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
+static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
{
- atomic_add(i, &nf->mem);
+ atomic_long_add(val, &nf->mem);
}
/* RFC 3168 support :
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index b54015981af9..122a625d9a66 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -410,23 +410,23 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
}
#ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
static struct ctl_table lowpan_frags_ns_ctl_table[] = {
{
.procname = "6lowpanfrag_high_thresh",
.data = &init_net.ieee802154_lowpan.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
},
{
.procname = "6lowpanfrag_low_thresh",
.data = &init_net.ieee802154_lowpan.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
},
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 696bfef06caa..3dd19bebeb55 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -681,23 +681,23 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
EXPORT_SYMBOL(ip_check_defrag);
#ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
static struct ctl_table ip4_frags_ns_ctl_table[] = {
{
.procname = "ipfrag_high_thresh",
.data = &init_net.ipv4.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv4.frags.low_thresh
},
{
.procname = "ipfrag_low_thresh",
.data = &init_net.ipv4.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.ipv4.frags.high_thresh
},
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index b7a2d002cb27..aa1e52587bf5 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -73,7 +73,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
sock_prot_inuse_get(net, &raw_prot));
- seq_printf(seq, "FRAG: inuse %u memory %u\n",
+ seq_printf(seq, "FRAG: inuse %u memory %lu\n",
atomic_read(&net->ipv4.frags.rhashtable.nelems),
frag_mem_limit(&net->ipv4.frags));
return 0;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 267f2ae2d05c..8bc0df9ad2ab 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -63,7 +63,7 @@ struct nf_ct_frag6_skb_cb
static struct inet_frags nf_frags;
#ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
@@ -76,18 +76,18 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_low_thresh",
.data = &init_net.nf_frag.frags.low_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
.data = &init_net.nf_frag.frags.high_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 5704ec3d3178..dc04c024986c 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -47,7 +47,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
- seq_printf(seq, "FRAG6: inuse %u memory %u\n",
+ seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
atomic_read(&net->ipv6.frags.rhashtable.nelems),
frag_mem_limit(&net->ipv6.frags));
return 0;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6de4cec69054..8a4ece339c19 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -546,15 +546,15 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
.procname = "ip6frag_high_thresh",
.data = &init_net.ipv6.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv6.frags.low_thresh
},
{
.procname = "ip6frag_low_thresh",
.data = &init_net.ipv6.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
--
2.17.1
^ permalink raw reply related
* [PATCH stable 4.9 v2 11/29] inet: frags: remove inet_frag_maybe_warn_overflow()
From: Florian Fainelli @ 2018-10-10 19:29 UTC (permalink / raw)
To: netdev; +Cc: davem, gregkh, stable, edumazet, sthemmin
In-Reply-To: <20181010193017.25221-1-f.fainelli@gmail.com>
From: Eric Dumazet <edumazet@google.com>
This function is obsolete, after rhashtable addition to inet defrag.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 2d44ed22e607f9a285b049de2263e3840673a260)
---
include/net/inet_frag.h | 2 --
net/ieee802154/6lowpan/reassembly.c | 5 ++---
net/ipv4/inet_fragment.c | 11 -----------
net/ipv4/ip_fragment.c | 5 ++---
net/ipv6/netfilter/nf_conntrack_reasm.c | 5 ++---
net/ipv6/reassembly.c | 5 ++---
6 files changed, 8 insertions(+), 25 deletions(-)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 7e984045b2b7..23161bf5d899 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -109,8 +109,6 @@ void inet_frags_exit_net(struct netns_frags *nf);
void inet_frag_kill(struct inet_frag_queue *q);
void inet_frag_destroy(struct inet_frag_queue *q);
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
- const char *prefix);
static inline void inet_frag_put(struct inet_frag_queue *q)
{
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index a63360a05108..b54015981af9 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -83,10 +83,9 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
struct inet_frag_queue *q;
q = inet_frag_find(&ieee802154_lowpan->frags, &key);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ if (!q)
return NULL;
- }
+
return container_of(q, struct lowpan_frag_queue, q);
}
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index a50ac25878aa..47c240f50b99 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -217,14 +217,3 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
return inet_frag_create(nf, key);
}
EXPORT_SYMBOL(inet_frag_find);
-
-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
- const char *prefix)
-{
- static const char msg[] = "inet_frag_find: Fragment hash bucket"
- " list length grew over limit. Dropping fragment.\n";
-
- if (PTR_ERR(q) == -ENOBUFS)
- net_dbg_ratelimited("%s%s", prefix, msg);
-}
-EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 308592a8ba97..696bfef06caa 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -219,10 +219,9 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
struct inet_frag_queue *q;
q = inet_frag_find(&net->ipv4.frags, &key);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ if (!q)
return NULL;
- }
+
return container_of(q, struct ipq, q);
}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 314568d8b84a..267f2ae2d05c 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -177,10 +177,9 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
struct inet_frag_queue *q;
q = inet_frag_find(&net->nf_frag.frags, &key);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ if (!q)
return NULL;
- }
+
return container_of(q, struct frag_queue, q);
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 629a45a4c79f..6de4cec69054 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -154,10 +154,9 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
key.iif = 0;
q = inet_frag_find(&net->ipv6.frags, &key);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ if (!q)
return NULL;
- }
+
return container_of(q, struct frag_queue, q);
}
--
2.17.1
^ permalink raw reply related
* [PATCH stable 4.9 v2 10/29] inet: frags: get rif of inet_frag_evicting()
From: Florian Fainelli @ 2018-10-10 19:29 UTC (permalink / raw)
To: netdev; +Cc: davem, gregkh, stable, edumazet, sthemmin
In-Reply-To: <20181010193017.25221-1-f.fainelli@gmail.com>
From: Eric Dumazet <edumazet@google.com>
This refactors ip_expire() since one indentation level is removed.
Note: in the future, we should try hard to avoid the skb_clone()
since this is a serious performance cost.
Under DDOS, the ICMP message wont be sent because of rate limits.
Fact that ip6_expire_frag_queue() does not use skb_clone() is
disturbing too. Presumably IPv6 should have the same
issue than the one we fixed in commit ec4fbd64751d
("inet: frag: release spinlock before calling icmp_send()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 399d1404be660d355192ff4df5ccc3f4159ec1e4)
---
include/net/inet_frag.h | 5 ----
net/ipv4/ip_fragment.c | 65 ++++++++++++++++++++---------------------
net/ipv6/reassembly.c | 4 ---
3 files changed, 32 insertions(+), 42 deletions(-)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index a49cdf25cef0..7e984045b2b7 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -118,11 +118,6 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
inet_frag_destroy(q);
}
-static inline bool inet_frag_evicting(struct inet_frag_queue *q)
-{
- return false;
-}
-
/* Memory Tracking Functions. */
static inline int frag_mem_limit(struct netns_frags *nf)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8c4072b19296..308592a8ba97 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -141,8 +141,11 @@ static bool frag_expire_skip_icmp(u32 user)
*/
static void ip_expire(unsigned long arg)
{
- struct ipq *qp;
+ struct sk_buff *clone, *head;
+ const struct iphdr *iph;
struct net *net;
+ struct ipq *qp;
+ int err;
qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
@@ -156,45 +159,41 @@ static void ip_expire(unsigned long arg)
ipq_kill(qp);
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
- if (!inet_frag_evicting(&qp->q)) {
- struct sk_buff *clone, *head = qp->q.fragments;
- const struct iphdr *iph;
- int err;
+ head = qp->q.fragments;
- __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
- if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
- goto out;
+ if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head)
+ goto out;
- head->dev = dev_get_by_index_rcu(net, qp->iif);
- if (!head->dev)
- goto out;
+ head->dev = dev_get_by_index_rcu(net, qp->iif);
+ if (!head->dev)
+ goto out;
- /* skb has no dst, perform route lookup again */
- iph = ip_hdr(head);
- err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ /* skb has no dst, perform route lookup again */
+ iph = ip_hdr(head);
+ err = ip_route_input_noref(head, iph->daddr, iph->saddr,
iph->tos, head->dev);
- if (err)
- goto out;
+ if (err)
+ goto out;
- /* Only an end host needs to send an ICMP
- * "Fragment Reassembly Timeout" message, per RFC792.
- */
- if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
- (skb_rtable(head)->rt_type != RTN_LOCAL))
- goto out;
-
- clone = skb_clone(head, GFP_ATOMIC);
-
- /* Send an ICMP "Fragment Reassembly Timeout" message. */
- if (clone) {
- spin_unlock(&qp->q.lock);
- icmp_send(clone, ICMP_TIME_EXCEEDED,
- ICMP_EXC_FRAGTIME, 0);
- consume_skb(clone);
- goto out_rcu_unlock;
- }
+ /* Only an end host needs to send an ICMP
+ * "Fragment Reassembly Timeout" message, per RFC792.
+ */
+ if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
+ (skb_rtable(head)->rt_type != RTN_LOCAL))
+ goto out;
+
+ clone = skb_clone(head, GFP_ATOMIC);
+
+ /* Send an ICMP "Fragment Reassembly Timeout" message. */
+ if (clone) {
+ spin_unlock(&qp->q.lock);
+ icmp_send(clone, ICMP_TIME_EXCEEDED,
+ ICMP_EXC_FRAGTIME, 0);
+ consume_skb(clone);
+ goto out_rcu_unlock;
}
out:
spin_unlock(&qp->q.lock);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 8fe3836d1410..629a45a4c79f 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -106,10 +106,6 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
goto out_rcu_unlock;
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
-
- if (inet_frag_evicting(&fq->q))
- goto out_rcu_unlock;
-
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
/* Don't send error if the first segment did not arrive. */
--
2.17.1
^ permalink raw reply related
* [PATCH stable 4.9 v2 09/29] inet: frags: remove some helpers
From: Florian Fainelli @ 2018-10-10 19:29 UTC (permalink / raw)
To: netdev; +Cc: davem, gregkh, stable, edumazet, sthemmin
In-Reply-To: <20181010193017.25221-1-f.fainelli@gmail.com>
From: Eric Dumazet <edumazet@google.com>
Remove sum_frag_mem_limit(), ip_frag_mem() & ip6_frag_mem()
Also since we use rhashtable we can bring back the number of fragments
in "grep FRAG /proc/net/sockstat /proc/net/sockstat6" that was
removed in commit 434d305405ab ("inet: frag: don't account number
of fragment queues")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 6befe4a78b1553edb6eed3a78b4bcd9748526672)
---
include/net/inet_frag.h | 5 -----
include/net/ip.h | 1 -
include/net/ipv6.h | 7 -------
net/ipv4/ip_fragment.c | 5 -----
net/ipv4/proc.c | 6 +++---
net/ipv6/proc.c | 5 +++--
6 files changed, 6 insertions(+), 23 deletions(-)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index b146f1e456a5..a49cdf25cef0 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -140,11 +140,6 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
atomic_add(i, &nf->mem);
}
-static inline int sum_frag_mem_limit(struct netns_frags *nf)
-{
- return atomic_read(&nf->mem);
-}
-
/* RFC 3168 support :
* We want to check ECN values of all fragments, do detect invalid combinations.
* In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
diff --git a/include/net/ip.h b/include/net/ip.h
index bc9b4deeb60e..8646da034851 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -548,7 +548,6 @@ static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *s
return skb;
}
#endif
-int ip_frag_mem(struct net *net);
/*
* Functions provided by ip_forward.c
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 6b9b551f653b..7cb100d25bb5 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -330,13 +330,6 @@ static inline bool ipv6_accept_ra(struct inet6_dev *idev)
idev->cnf.accept_ra;
}
-#if IS_ENABLED(CONFIG_IPV6)
-static inline int ip6_frag_mem(struct net *net)
-{
- return sum_frag_mem_limit(&net->ipv6.frags);
-}
-#endif
-
#define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */
#define IPV6_FRAG_LOW_THRESH (3 * 1024*1024) /* 3145728 */
#define IPV6_FRAG_TIMEOUT (60 * HZ) /* 60 seconds */
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 29038513f3ca..8c4072b19296 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -82,11 +82,6 @@ static u8 ip4_frag_ecn(u8 tos)
static struct inet_frags ip4_frags;
-int ip_frag_mem(struct net *net)
-{
- return sum_frag_mem_limit(&net->ipv4.frags);
-}
-
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
struct net_device *dev);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 7143ca1a6af9..b7a2d002cb27 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -54,7 +54,6 @@
static int sockstat_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
- unsigned int frag_mem;
int orphans, sockets;
local_bh_disable();
@@ -74,8 +73,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
sock_prot_inuse_get(net, &raw_prot));
- frag_mem = ip_frag_mem(net);
- seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ seq_printf(seq, "FRAG: inuse %u memory %u\n",
+ atomic_read(&net->ipv4.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv4.frags));
return 0;
}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index e88bcb8ff0fd..5704ec3d3178 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -38,7 +38,6 @@
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
- unsigned int frag_mem = ip6_frag_mem(net);
seq_printf(seq, "TCP6: inuse %d\n",
sock_prot_inuse_get(net, &tcpv6_prot));
@@ -48,7 +47,9 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
- seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ seq_printf(seq, "FRAG6: inuse %u memory %u\n",
+ atomic_read(&net->ipv6.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv6.frags));
return 0;
}
--
2.17.1
^ permalink raw reply related
* [PATCH stable 4.9 v2 08/29] inet: frags: use rhashtables for reassembly units
From: Florian Fainelli @ 2018-10-10 19:29 UTC (permalink / raw)
To: netdev
Cc: davem, gregkh, stable, edumazet, sthemmin, Kirill Tkhai,
Herbert Xu, Florian Westphal, Jesper Dangaard Brouer,
Alexander Aring, Stefan Schmidt
In-Reply-To: <20181010193017.25221-1-f.fainelli@gmail.com>
From: Eric Dumazet <edumazet@google.com>
Some applications still rely on IP fragmentation, and to be fair linux
reassembly unit is not working under any serious load.
It uses static hash tables of 1024 buckets, and up to 128 items per bucket (!!!)
A work queue is supposed to garbage collect items when host is under memory
pressure, and doing a hash rebuild, changing seed used in hash computations.
This work queue blocks softirqs for up to 25 ms when doing a hash rebuild,
occurring every 5 seconds if host is under fire.
Then there is the problem of sharing this hash table for all netns.
It is time to switch to rhashtables, and allocate one of them per netns
to speedup netns dismantle, since this is a critical metric these days.
Lookup is now using RCU. A followup patch will even remove
the refcount hold/release left from prior implementation and save
a couple of atomic operations.
Before this patch, 16 cpus (16 RX queue NIC) could not handle more
than 1 Mpps frags DDOS.
After the patch, I reach 9 Mpps without any tuning, and can use up to 2GB
of storage for the fragments (exact number depends on frags being evicted
after timeout)
$ grep FRAG /proc/net/sockstat
FRAG: inuse 1966916 memory 2140004608
A followup patch will change the limits for 64bit arches.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Florian Westphal <fw@strlen.de>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Alexander Aring <alex.aring@gmail.com>
Cc: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 648700f76b03b7e8149d13cc2bdb3355035258a9)
---
Documentation/networking/ip-sysctl.txt | 7 +-
include/net/inet_frag.h | 81 +++---
include/net/ipv6.h | 16 +-
net/ieee802154/6lowpan/6lowpan_i.h | 26 +-
net/ieee802154/6lowpan/reassembly.c | 91 +++---
net/ipv4/inet_fragment.c | 349 +++++-------------------
net/ipv4/ip_fragment.c | 112 ++++----
net/ipv6/netfilter/nf_conntrack_reasm.c | 51 +---
net/ipv6/reassembly.c | 110 ++++----
9 files changed, 267 insertions(+), 576 deletions(-)
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 3db8c67d2c8d..6cd632578ce8 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -123,13 +123,10 @@ min_adv_mss - INTEGER
IP Fragmentation:
ipfrag_high_thresh - INTEGER
- Maximum memory used to reassemble IP fragments. When
- ipfrag_high_thresh bytes of memory is allocated for this purpose,
- the fragment handler will toss packets until ipfrag_low_thresh
- is reached. This also serves as a maximum limit to namespaces
- different from the initial one.
+ Maximum memory used to reassemble IP fragments.
ipfrag_low_thresh - INTEGER
+ (Obsolete since linux-4.17)
Maximum memory used to reassemble IP fragments before the kernel
begins to remove incomplete fragment queues to free up resources.
The kernel still accepts new fragments for defragmentation.
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 19a2ccda0300..b146f1e456a5 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -1,7 +1,11 @@
#ifndef __NET_FRAG_H__
#define __NET_FRAG_H__
+#include <linux/rhashtable.h>
+
struct netns_frags {
+ struct rhashtable rhashtable ____cacheline_aligned_in_smp;
+
/* Keep atomic mem on separate cachelines in structs that include it */
atomic_t mem ____cacheline_aligned_in_smp;
/* sysctls */
@@ -25,12 +29,30 @@ enum {
INET_FRAG_COMPLETE = BIT(2),
};
+struct frag_v4_compare_key {
+ __be32 saddr;
+ __be32 daddr;
+ u32 user;
+ u32 vif;
+ __be16 id;
+ u16 protocol;
+};
+
+struct frag_v6_compare_key {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ u32 user;
+ __be32 id;
+ u32 iif;
+};
+
/**
* struct inet_frag_queue - fragment queue
*
- * @lock: spinlock protecting the queue
+ * @node: rhash node
+ * @key: keys identifying this frag.
* @timer: queue expiration timer
- * @list: hash bucket list
+ * @lock: spinlock protecting this frag
* @refcnt: reference count of the queue
* @fragments: received fragments head
* @fragments_tail: received fragments tail
@@ -40,12 +62,16 @@ enum {
* @flags: fragment queue flags
* @max_size: maximum received fragment size
* @net: namespace that this frag belongs to
- * @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
+ * @rcu: rcu head for freeing deferall
*/
struct inet_frag_queue {
- spinlock_t lock;
+ struct rhash_head node;
+ union {
+ struct frag_v4_compare_key v4;
+ struct frag_v6_compare_key v6;
+ } key;
struct timer_list timer;
- struct hlist_node list;
+ spinlock_t lock;
atomic_t refcnt;
struct sk_buff *fragments;
struct sk_buff *fragments_tail;
@@ -54,51 +80,20 @@ struct inet_frag_queue {
int meat;
__u8 flags;
u16 max_size;
- struct netns_frags *net;
- struct hlist_node list_evictor;
-};
-
-#define INETFRAGS_HASHSZ 1024
-
-/* averaged:
- * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ /
- * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or
- * struct frag_queue))
- */
-#define INETFRAGS_MAXDEPTH 128
-
-struct inet_frag_bucket {
- struct hlist_head chain;
- spinlock_t chain_lock;
+ struct netns_frags *net;
+ struct rcu_head rcu;
};
struct inet_frags {
- struct inet_frag_bucket hash[INETFRAGS_HASHSZ];
-
- struct work_struct frags_work;
- unsigned int next_bucket;
- unsigned long last_rebuild_jiffies;
- bool rebuild;
-
- /* The first call to hashfn is responsible to initialize
- * rnd. This is best done with net_get_random_once.
- *
- * rnd_seqlock is used to let hash insertion detect
- * when it needs to re-lookup the hash chain to use.
- */
- u32 rnd;
- seqlock_t rnd_seqlock;
int qsize;
- unsigned int (*hashfn)(const struct inet_frag_queue *);
- bool (*match)(const struct inet_frag_queue *q,
- const void *arg);
void (*constructor)(struct inet_frag_queue *q,
const void *arg);
void (*destructor)(struct inet_frag_queue *);
void (*frag_expire)(unsigned long data);
struct kmem_cache *frags_cachep;
const char *frags_cache_name;
+ struct rhashtable_params rhash_params;
};
int inet_frags_init(struct inet_frags *);
@@ -107,15 +102,13 @@ void inet_frags_fini(struct inet_frags *);
static inline int inet_frags_init_net(struct netns_frags *nf)
{
atomic_set(&nf->mem, 0);
- return 0;
+ return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
}
void inet_frags_exit_net(struct netns_frags *nf);
void inet_frag_kill(struct inet_frag_queue *q);
void inet_frag_destroy(struct inet_frag_queue *q);
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
- struct inet_frags *f, void *key, unsigned int hash);
-
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
const char *prefix);
@@ -127,7 +120,7 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
static inline bool inet_frag_evicting(struct inet_frag_queue *q)
{
- return !hlist_unhashed(&q->list_evictor);
+ return false;
}
/* Memory Tracking Functions. */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 903a10a5f259..6b9b551f653b 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -530,17 +530,8 @@ enum ip6_defrag_users {
__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
};
-struct ip6_create_arg {
- __be32 id;
- u32 user;
- const struct in6_addr *src;
- const struct in6_addr *dst;
- int iif;
- u8 ecn;
-};
-
void ip6_frag_init(struct inet_frag_queue *q, const void *a);
-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
+extern const struct rhashtable_params ip6_rhash_params;
/*
* Equivalent of ipv4 struct ip
@@ -548,11 +539,6 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
struct frag_queue {
struct inet_frag_queue q;
- __be32 id; /* fragment id */
- u32 user;
- struct in6_addr saddr;
- struct in6_addr daddr;
-
int iif;
unsigned int csum;
__u16 nhoffset;
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index 5ac778962e4e..3bfec472734a 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -16,37 +16,19 @@ typedef unsigned __bitwise__ lowpan_rx_result;
#define LOWPAN_DISPATCH_FRAG1 0xc0
#define LOWPAN_DISPATCH_FRAGN 0xe0
-struct lowpan_create_arg {
+struct frag_lowpan_compare_key {
u16 tag;
u16 d_size;
- const struct ieee802154_addr *src;
- const struct ieee802154_addr *dst;
+ const struct ieee802154_addr src;
+ const struct ieee802154_addr dst;
};
-/* Equivalent of ipv4 struct ip
+/* Equivalent of ipv4 struct ipq
*/
struct lowpan_frag_queue {
struct inet_frag_queue q;
-
- u16 tag;
- u16 d_size;
- struct ieee802154_addr saddr;
- struct ieee802154_addr daddr;
};
-static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
-{
- switch (a->mode) {
- case IEEE802154_ADDR_LONG:
- return (((__force u64)a->extended_addr) >> 32) ^
- (((__force u64)a->extended_addr) & 0xffffffff);
- case IEEE802154_ADDR_SHORT:
- return (__force u32)(a->short_addr + (a->pan_id << 16));
- default:
- return 0;
- }
-}
-
int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
void lowpan_net_frag_exit(void);
int lowpan_net_frag_init(void);
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 977b4ed58112..a63360a05108 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -37,47 +37,15 @@ static struct inet_frags lowpan_frags;
static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
struct sk_buff *prev, struct net_device *ldev);
-static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
- const struct ieee802154_addr *saddr,
- const struct ieee802154_addr *daddr)
-{
- net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
- return jhash_3words(ieee802154_addr_hash(saddr),
- ieee802154_addr_hash(daddr),
- (__force u32)(tag + (d_size << 16)),
- lowpan_frags.rnd);
-}
-
-static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
-{
- const struct lowpan_frag_queue *fq;
-
- fq = container_of(q, struct lowpan_frag_queue, q);
- return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
-}
-
-static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct lowpan_frag_queue *fq;
- const struct lowpan_create_arg *arg = a;
-
- fq = container_of(q, struct lowpan_frag_queue, q);
- return fq->tag == arg->tag && fq->d_size == arg->d_size &&
- ieee802154_addr_equal(&fq->saddr, arg->src) &&
- ieee802154_addr_equal(&fq->daddr, arg->dst);
-}
-
static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
{
- const struct lowpan_create_arg *arg = a;
+ const struct frag_lowpan_compare_key *key = a;
struct lowpan_frag_queue *fq;
fq = container_of(q, struct lowpan_frag_queue, q);
- fq->tag = arg->tag;
- fq->d_size = arg->d_size;
- fq->saddr = *arg->src;
- fq->daddr = *arg->dst;
+ BUILD_BUG_ON(sizeof(*key) > sizeof(q->key));
+ memcpy(&q->key, key, sizeof(*key));
}
static void lowpan_frag_expire(unsigned long data)
@@ -104,21 +72,17 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
const struct ieee802154_addr *src,
const struct ieee802154_addr *dst)
{
- struct inet_frag_queue *q;
- struct lowpan_create_arg arg;
- unsigned int hash;
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
+ struct frag_lowpan_compare_key key = {
+ .tag = cb->d_tag,
+ .d_size = cb->d_size,
+ .src = *src,
+ .dst = *dst,
+ };
+ struct inet_frag_queue *q;
- arg.tag = cb->d_tag;
- arg.d_size = cb->d_size;
- arg.src = src;
- arg.dst = dst;
-
- hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst);
-
- q = inet_frag_find(&ieee802154_lowpan->frags,
- &lowpan_frags, &arg, hash);
+ q = inet_frag_find(&ieee802154_lowpan->frags, &key);
if (IS_ERR_OR_NULL(q)) {
inet_frag_maybe_warn_overflow(q, pr_fmt());
return NULL;
@@ -610,17 +574,46 @@ static struct pernet_operations lowpan_frags_ops = {
.exit = lowpan_frags_exit_net,
};
+static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ return jhash2(data,
+ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}
+
+static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
+
+ return jhash2((const u32 *)&fq->key,
+ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}
+
+static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_lowpan_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params lowpan_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .hashfn = lowpan_key_hashfn,
+ .obj_hashfn = lowpan_obj_hashfn,
+ .obj_cmpfn = lowpan_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
int __init lowpan_net_frag_init(void)
{
int ret;
- lowpan_frags.hashfn = lowpan_hashfn;
lowpan_frags.constructor = lowpan_frag_init;
lowpan_frags.destructor = NULL;
lowpan_frags.qsize = sizeof(struct frag_queue);
- lowpan_frags.match = lowpan_frag_match;
lowpan_frags.frag_expire = lowpan_frag_expire;
lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
+ lowpan_frags.rhash_params = lowpan_rhash_params;
ret = inet_frags_init(&lowpan_frags);
if (ret)
goto out;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 89cfe0df8e56..a50ac25878aa 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,12 +25,6 @@
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
-#define INETFRAGS_EVICT_BUCKETS 128
-#define INETFRAGS_EVICT_MAX 512
-
-/* don't rebuild inetfrag table with new secret more often than this */
-#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
-
/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
* Value : 0xff if frame should be dropped.
* 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
@@ -52,157 +46,8 @@ const u8 ip_frag_ecn_table[16] = {
};
EXPORT_SYMBOL(ip_frag_ecn_table);
-static unsigned int
-inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
-{
- return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
-}
-
-static bool inet_frag_may_rebuild(struct inet_frags *f)
-{
- return time_after(jiffies,
- f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
-}
-
-static void inet_frag_secret_rebuild(struct inet_frags *f)
-{
- int i;
-
- write_seqlock_bh(&f->rnd_seqlock);
-
- if (!inet_frag_may_rebuild(f))
- goto out;
-
- get_random_bytes(&f->rnd, sizeof(u32));
-
- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
- struct inet_frag_bucket *hb;
- struct inet_frag_queue *q;
- struct hlist_node *n;
-
- hb = &f->hash[i];
- spin_lock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(q, n, &hb->chain, list) {
- unsigned int hval = inet_frag_hashfn(f, q);
-
- if (hval != i) {
- struct inet_frag_bucket *hb_dest;
-
- hlist_del(&q->list);
-
- /* Relink to new hash chain. */
- hb_dest = &f->hash[hval];
-
- /* This is the only place where we take
- * another chain_lock while already holding
- * one. As this will not run concurrently,
- * we cannot deadlock on hb_dest lock below, if its
- * already locked it will be released soon since
- * other caller cannot be waiting for hb lock
- * that we've taken above.
- */
- spin_lock_nested(&hb_dest->chain_lock,
- SINGLE_DEPTH_NESTING);
- hlist_add_head(&q->list, &hb_dest->chain);
- spin_unlock(&hb_dest->chain_lock);
- }
- }
- spin_unlock(&hb->chain_lock);
- }
-
- f->rebuild = false;
- f->last_rebuild_jiffies = jiffies;
-out:
- write_sequnlock_bh(&f->rnd_seqlock);
-}
-
-static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
-{
- if (!hlist_unhashed(&q->list_evictor))
- return false;
-
- return q->net->low_thresh == 0 ||
- frag_mem_limit(q->net) >= q->net->low_thresh;
-}
-
-static unsigned int
-inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
-{
- struct inet_frag_queue *fq;
- struct hlist_node *n;
- unsigned int evicted = 0;
- HLIST_HEAD(expired);
-
- spin_lock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
- if (!inet_fragq_should_evict(fq))
- continue;
-
- if (!del_timer(&fq->timer))
- continue;
-
- hlist_add_head(&fq->list_evictor, &expired);
- ++evicted;
- }
-
- spin_unlock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
- f->frag_expire((unsigned long) fq);
-
- return evicted;
-}
-
-static void inet_frag_worker(struct work_struct *work)
-{
- unsigned int budget = INETFRAGS_EVICT_BUCKETS;
- unsigned int i, evicted = 0;
- struct inet_frags *f;
-
- f = container_of(work, struct inet_frags, frags_work);
-
- BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
-
- local_bh_disable();
-
- for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
- evicted += inet_evict_bucket(f, &f->hash[i]);
- i = (i + 1) & (INETFRAGS_HASHSZ - 1);
- if (evicted > INETFRAGS_EVICT_MAX)
- break;
- }
-
- f->next_bucket = i;
-
- local_bh_enable();
-
- if (f->rebuild && inet_frag_may_rebuild(f))
- inet_frag_secret_rebuild(f);
-}
-
-static void inet_frag_schedule_worker(struct inet_frags *f)
-{
- if (unlikely(!work_pending(&f->frags_work)))
- schedule_work(&f->frags_work);
-}
-
int inet_frags_init(struct inet_frags *f)
{
- int i;
-
- INIT_WORK(&f->frags_work, inet_frag_worker);
-
- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
- struct inet_frag_bucket *hb = &f->hash[i];
-
- spin_lock_init(&hb->chain_lock);
- INIT_HLIST_HEAD(&hb->chain);
- }
-
- seqlock_init(&f->rnd_seqlock);
- f->last_rebuild_jiffies = 0;
f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
NULL);
if (!f->frags_cachep)
@@ -214,66 +59,42 @@ EXPORT_SYMBOL(inet_frags_init);
void inet_frags_fini(struct inet_frags *f)
{
- cancel_work_sync(&f->frags_work);
+ /* We must wait that all inet_frag_destroy_rcu() have completed. */
+ rcu_barrier();
+
kmem_cache_destroy(f->frags_cachep);
+ f->frags_cachep = NULL;
}
EXPORT_SYMBOL(inet_frags_fini);
-void inet_frags_exit_net(struct netns_frags *nf)
-{
- struct inet_frags *f =nf->f;
- unsigned int seq;
- int i;
-
- nf->low_thresh = 0;
-
-evict_again:
- local_bh_disable();
- seq = read_seqbegin(&f->rnd_seqlock);
-
- for (i = 0; i < INETFRAGS_HASHSZ ; i++)
- inet_evict_bucket(f, &f->hash[i]);
-
- local_bh_enable();
- cond_resched();
-
- if (read_seqretry(&f->rnd_seqlock, seq) ||
- sum_frag_mem_limit(nf))
- goto evict_again;
-}
-EXPORT_SYMBOL(inet_frags_exit_net);
-
-static struct inet_frag_bucket *
-get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
-__acquires(hb->chain_lock)
+static void inet_frags_free_cb(void *ptr, void *arg)
{
- struct inet_frag_bucket *hb;
- unsigned int seq, hash;
+ struct inet_frag_queue *fq = ptr;
- restart:
- seq = read_seqbegin(&f->rnd_seqlock);
-
- hash = inet_frag_hashfn(f, fq);
- hb = &f->hash[hash];
+ /* If we can not cancel the timer, it means this frag_queue
+ * is already disappearing, we have nothing to do.
+ * Otherwise, we own a refcount until the end of this function.
+ */
+ if (!del_timer(&fq->timer))
+ return;
- spin_lock(&hb->chain_lock);
- if (read_seqretry(&f->rnd_seqlock, seq)) {
- spin_unlock(&hb->chain_lock);
- goto restart;
+ spin_lock_bh(&fq->lock);
+ if (!(fq->flags & INET_FRAG_COMPLETE)) {
+ fq->flags |= INET_FRAG_COMPLETE;
+ atomic_dec(&fq->refcnt);
}
+ spin_unlock_bh(&fq->lock);
- return hb;
+ inet_frag_put(fq);
}
-static inline void fq_unlink(struct inet_frag_queue *fq)
+void inet_frags_exit_net(struct netns_frags *nf)
{
- struct inet_frag_bucket *hb;
+ nf->low_thresh = 0; /* prevent creation of new frags */
- hb = get_frag_bucket_locked(fq, fq->net->f);
- hlist_del(&fq->list);
- fq->flags |= INET_FRAG_COMPLETE;
- spin_unlock(&hb->chain_lock);
+ rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
}
+EXPORT_SYMBOL(inet_frags_exit_net);
void inet_frag_kill(struct inet_frag_queue *fq)
{
@@ -281,12 +102,26 @@ void inet_frag_kill(struct inet_frag_queue *fq)
atomic_dec(&fq->refcnt);
if (!(fq->flags & INET_FRAG_COMPLETE)) {
- fq_unlink(fq);
+ struct netns_frags *nf = fq->net;
+
+ fq->flags |= INET_FRAG_COMPLETE;
+ rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
atomic_dec(&fq->refcnt);
}
}
EXPORT_SYMBOL(inet_frag_kill);
+static void inet_frag_destroy_rcu(struct rcu_head *head)
+{
+ struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
+ rcu);
+ struct inet_frags *f = q->net->f;
+
+ if (f->destructor)
+ f->destructor(q);
+ kmem_cache_free(f->frags_cachep, q);
+}
+
void inet_frag_destroy(struct inet_frag_queue *q)
{
struct sk_buff *fp;
@@ -310,55 +145,21 @@ void inet_frag_destroy(struct inet_frag_queue *q)
}
sum = sum_truesize + f->qsize;
- if (f->destructor)
- f->destructor(q);
- kmem_cache_free(f->frags_cachep, q);
+ call_rcu(&q->rcu, inet_frag_destroy_rcu);
sub_frag_mem_limit(nf, sum);
}
EXPORT_SYMBOL(inet_frag_destroy);
-static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
- struct inet_frag_queue *qp_in,
- struct inet_frags *f,
- void *arg)
-{
- struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
- struct inet_frag_queue *qp;
-
-#ifdef CONFIG_SMP
- /* With SMP race we have to recheck hash table, because
- * such entry could have been created on other cpu before
- * we acquired hash bucket lock.
- */
- hlist_for_each_entry(qp, &hb->chain, list) {
- if (qp->net == nf && f->match(qp, arg)) {
- atomic_inc(&qp->refcnt);
- spin_unlock(&hb->chain_lock);
- qp_in->flags |= INET_FRAG_COMPLETE;
- inet_frag_put(qp_in);
- return qp;
- }
- }
-#endif
- qp = qp_in;
- if (!mod_timer(&qp->timer, jiffies + nf->timeout))
- atomic_inc(&qp->refcnt);
-
- atomic_inc(&qp->refcnt);
- hlist_add_head(&qp->list, &hb->chain);
-
- spin_unlock(&hb->chain_lock);
-
- return qp;
-}
-
static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
struct inet_frags *f,
void *arg)
{
struct inet_frag_queue *q;
+ if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
+ return NULL;
+
q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
if (!q)
return NULL;
@@ -369,64 +170,51 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
- atomic_set(&q->refcnt, 1);
+ atomic_set(&q->refcnt, 3);
return q;
}
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
- struct inet_frags *f,
void *arg)
{
+ struct inet_frags *f = nf->f;
struct inet_frag_queue *q;
+ int err;
q = inet_frag_alloc(nf, f, arg);
if (!q)
return NULL;
- return inet_frag_intern(nf, q, f, arg);
-}
+ mod_timer(&q->timer, jiffies + nf->timeout);
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
- struct inet_frags *f, void *key,
- unsigned int hash)
-{
- struct inet_frag_bucket *hb;
- struct inet_frag_queue *q;
- int depth = 0;
-
- if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
- inet_frag_schedule_worker(f);
+ err = rhashtable_insert_fast(&nf->rhashtable, &q->node,
+ f->rhash_params);
+ if (err < 0) {
+ q->flags |= INET_FRAG_COMPLETE;
+ inet_frag_kill(q);
+ inet_frag_destroy(q);
return NULL;
}
+ return q;
+}
+EXPORT_SYMBOL(inet_frag_create);
- if (frag_mem_limit(nf) > nf->low_thresh)
- inet_frag_schedule_worker(f);
-
- hash &= (INETFRAGS_HASHSZ - 1);
- hb = &f->hash[hash];
-
- spin_lock(&hb->chain_lock);
- hlist_for_each_entry(q, &hb->chain, list) {
- if (q->net == nf && f->match(q, key)) {
- atomic_inc(&q->refcnt);
- spin_unlock(&hb->chain_lock);
- return q;
- }
- depth++;
- }
- spin_unlock(&hb->chain_lock);
-
- if (depth <= INETFRAGS_MAXDEPTH)
- return inet_frag_create(nf, f, key);
+/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
+{
+ struct inet_frag_queue *fq;
- if (inet_frag_may_rebuild(f)) {
- if (!f->rebuild)
- f->rebuild = true;
- inet_frag_schedule_worker(f);
+ rcu_read_lock();
+ fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+ if (fq) {
+ if (!atomic_inc_not_zero(&fq->refcnt))
+ fq = NULL;
+ rcu_read_unlock();
+ return fq;
}
-
- return ERR_PTR(-ENOBUFS);
+ rcu_read_unlock();
+ return inet_frag_create(nf, key);
}
EXPORT_SYMBOL(inet_frag_find);
@@ -434,8 +222,7 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
const char *prefix)
{
static const char msg[] = "inet_frag_find: Fragment hash bucket"
- " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
- ". Dropping fragment.\n";
+ " list length grew over limit. Dropping fragment.\n";
if (PTR_ERR(q) == -ENOBUFS)
net_dbg_ratelimited("%s%s", prefix, msg);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b6fe05e8a431..29038513f3ca 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -68,15 +68,9 @@ struct ipfrag_skb_cb
struct ipq {
struct inet_frag_queue q;
- u32 user;
- __be32 saddr;
- __be32 daddr;
- __be16 id;
- u8 protocol;
u8 ecn; /* RFC3168 support */
u16 max_df_size; /* largest frag with DF set seen */
int iif;
- int vif; /* L3 master device index */
unsigned int rid;
struct inet_peer *peer;
};
@@ -96,41 +90,6 @@ int ip_frag_mem(struct net *net)
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
struct net_device *dev);
-struct ip4_create_arg {
- struct iphdr *iph;
- u32 user;
- int vif;
-};
-
-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
-{
- net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
- return jhash_3words((__force u32)id << 16 | prot,
- (__force u32)saddr, (__force u32)daddr,
- ip4_frags.rnd);
-}
-
-static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
-{
- const struct ipq *ipq;
-
- ipq = container_of(q, struct ipq, q);
- return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
-}
-
-static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct ipq *qp;
- const struct ip4_create_arg *arg = a;
-
- qp = container_of(q, struct ipq, q);
- return qp->id == arg->iph->id &&
- qp->saddr == arg->iph->saddr &&
- qp->daddr == arg->iph->daddr &&
- qp->protocol == arg->iph->protocol &&
- qp->user == arg->user &&
- qp->vif == arg->vif;
-}
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
@@ -139,17 +98,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
frags);
struct net *net = container_of(ipv4, struct net, ipv4);
- const struct ip4_create_arg *arg = a;
+ const struct frag_v4_compare_key *key = a;
- qp->protocol = arg->iph->protocol;
- qp->id = arg->iph->id;
- qp->ecn = ip4_frag_ecn(arg->iph->tos);
- qp->saddr = arg->iph->saddr;
- qp->daddr = arg->iph->daddr;
- qp->vif = arg->vif;
- qp->user = arg->user;
+ q->key.v4 = *key;
+ qp->ecn = 0;
qp->peer = q->net->max_dist ?
- inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
+ inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
NULL;
}
@@ -232,7 +186,7 @@ static void ip_expire(unsigned long arg)
/* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
*/
- if (frag_expire_skip_icmp(qp->user) &&
+ if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
(skb_rtable(head)->rt_type != RTN_LOCAL))
goto out;
@@ -260,17 +214,17 @@ static void ip_expire(unsigned long arg)
static struct ipq *ip_find(struct net *net, struct iphdr *iph,
u32 user, int vif)
{
+ struct frag_v4_compare_key key = {
+ .saddr = iph->saddr,
+ .daddr = iph->daddr,
+ .user = user,
+ .vif = vif,
+ .id = iph->id,
+ .protocol = iph->protocol,
+ };
struct inet_frag_queue *q;
- struct ip4_create_arg arg;
- unsigned int hash;
-
- arg.iph = iph;
- arg.user = user;
- arg.vif = vif;
- hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
-
- q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
+ q = inet_frag_find(&net->ipv4.frags, &key);
if (IS_ERR_OR_NULL(q)) {
inet_frag_maybe_warn_overflow(q, pr_fmt());
return NULL;
@@ -659,7 +613,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
err = -ENOMEM;
goto out_fail;
out_oversize:
- net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
+ net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr);
out_fail:
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
return err;
@@ -897,15 +851,47 @@ static struct pernet_operations ip4_frags_ops = {
.exit = ipv4_frags_exit_net,
};
+
+static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ return jhash2(data,
+ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
+
+ return jhash2((const u32 *)&fq->key.v4,
+ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_v4_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params ip4_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .key_offset = offsetof(struct inet_frag_queue, key),
+ .key_len = sizeof(struct frag_v4_compare_key),
+ .hashfn = ip4_key_hashfn,
+ .obj_hashfn = ip4_obj_hashfn,
+ .obj_cmpfn = ip4_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
void __init ipfrag_init(void)
{
- ip4_frags.hashfn = ip4_hashfn;
ip4_frags.constructor = ip4_frag_init;
ip4_frags.destructor = ip4_frag_free;
ip4_frags.qsize = sizeof(struct ipq);
- ip4_frags.match = ip4_frag_match;
ip4_frags.frag_expire = ip_expire;
ip4_frags.frags_cache_name = ip_frag_cache_name;
+ ip4_frags.rhash_params = ip4_rhash_params;
if (inet_frags_init(&ip4_frags))
panic("IP: failed to allocate ip4_frags cache\n");
ip4_frags_ctl_register();
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 7ea2b4490672..314568d8b84a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -152,23 +152,6 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
-static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, nf_frags.rnd);
-}
-
-
-static unsigned int nf_hashfn(const struct inet_frag_queue *q)
-{
- const struct frag_queue *nq;
-
- nq = container_of(q, struct frag_queue, q);
- return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
-}
-
static void nf_ct_frag6_expire(unsigned long data)
{
struct frag_queue *fq;
@@ -181,26 +164,19 @@ static void nf_ct_frag6_expire(unsigned long data)
}
/* Creation primitives. */
-static inline struct frag_queue *fq_find(struct net *net, __be32 id,
- u32 user, struct in6_addr *src,
- struct in6_addr *dst, int iif, u8 ecn)
+static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
+ const struct ipv6hdr *hdr, int iif)
{
+ struct frag_v6_compare_key key = {
+ .id = id,
+ .saddr = hdr->saddr,
+ .daddr = hdr->daddr,
+ .user = user,
+ .iif = iif,
+ };
struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;
-
- arg.id = id;
- arg.user = user;
- arg.src = src;
- arg.dst = dst;
- arg.iif = iif;
- arg.ecn = ecn;
-
- local_bh_disable();
- hash = nf_hash_frag(id, src, dst);
- q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
- local_bh_enable();
+ q = inet_frag_find(&net->nf_frag.frags, &key);
if (IS_ERR_OR_NULL(q)) {
inet_frag_maybe_warn_overflow(q, pr_fmt());
return NULL;
@@ -592,8 +568,8 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
fhdr = (struct frag_hdr *)skb_transport_header(skb);
skb_orphan(skb);
- fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ fq = fq_find(net, fhdr->identification, user, hdr,
+ skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
return -ENOMEM;
@@ -661,13 +637,12 @@ int nf_ct_frag6_init(void)
{
int ret = 0;
- nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
nf_frags.qsize = sizeof(struct frag_queue);
- nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
nf_frags.frags_cache_name = nf_frags_cache_name;
+ nf_frags.rhash_params = ip6_rhash_params;
ret = inet_frags_init(&nf_frags);
if (ret)
goto out;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 9fe99475bddf..8fe3836d1410 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -79,52 +79,13 @@ static struct inet_frags ip6_frags;
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
struct net_device *dev);
-/*
- * callers should be careful not to use the hash value outside the ipfrag_lock
- * as doing so could race with ipfrag_hash_rnd being recalculated.
- */
-static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, ip6_frags.rnd);
-}
-
-static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
-{
- const struct frag_queue *fq;
-
- fq = container_of(q, struct frag_queue, q);
- return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
-}
-
-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct frag_queue *fq;
- const struct ip6_create_arg *arg = a;
-
- fq = container_of(q, struct frag_queue, q);
- return fq->id == arg->id &&
- fq->user == arg->user &&
- ipv6_addr_equal(&fq->saddr, arg->src) &&
- ipv6_addr_equal(&fq->daddr, arg->dst) &&
- (arg->iif == fq->iif ||
- !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
- IPV6_ADDR_LINKLOCAL)));
-}
-EXPORT_SYMBOL(ip6_frag_match);
-
void ip6_frag_init(struct inet_frag_queue *q, const void *a)
{
struct frag_queue *fq = container_of(q, struct frag_queue, q);
- const struct ip6_create_arg *arg = a;
+ const struct frag_v6_compare_key *key = a;
- fq->id = arg->id;
- fq->user = arg->user;
- fq->saddr = *arg->src;
- fq->daddr = *arg->dst;
- fq->ecn = arg->ecn;
+ q->key.v6 = *key;
+ fq->ecn = 0;
}
EXPORT_SYMBOL(ip6_frag_init);
@@ -181,23 +142,22 @@ static void ip6_frag_expire(unsigned long data)
}
static struct frag_queue *
-fq_find(struct net *net, __be32 id, const struct in6_addr *src,
- const struct in6_addr *dst, int iif, u8 ecn)
+fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
{
+ struct frag_v6_compare_key key = {
+ .id = id,
+ .saddr = hdr->saddr,
+ .daddr = hdr->daddr,
+ .user = IP6_DEFRAG_LOCAL_DELIVER,
+ .iif = iif,
+ };
struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;
-
- arg.id = id;
- arg.user = IP6_DEFRAG_LOCAL_DELIVER;
- arg.src = src;
- arg.dst = dst;
- arg.iif = iif;
- arg.ecn = ecn;
- hash = inet6_hash_frag(id, src, dst);
+ if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)))
+ key.iif = 0;
- q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
+ q = inet_frag_find(&net->ipv6.frags, &key);
if (IS_ERR_OR_NULL(q)) {
inet_frag_maybe_warn_overflow(q, pr_fmt());
return NULL;
@@ -524,6 +484,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct frag_queue *fq;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct net *net = dev_net(skb_dst(skb)->dev);
+ int iif;
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
goto fail_hdr;
@@ -552,13 +513,14 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ iif = skb->dev ? skb->dev->ifindex : 0;
+ fq = fq_find(net, fhdr->identification, hdr, iif);
if (fq) {
int ret;
spin_lock(&fq->q.lock);
+ fq->iif = iif;
ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
spin_unlock(&fq->q.lock);
@@ -732,17 +694,47 @@ static struct pernet_operations ip6_frags_ops = {
.exit = ipv6_frags_exit_net,
};
+static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ return jhash2(data,
+ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
+
+ return jhash2((const u32 *)&fq->key.v6,
+ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_v6_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+const struct rhashtable_params ip6_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .hashfn = ip6_key_hashfn,
+ .obj_hashfn = ip6_obj_hashfn,
+ .obj_cmpfn = ip6_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+EXPORT_SYMBOL(ip6_rhash_params);
+
int __init ipv6_frag_init(void)
{
int ret;
- ip6_frags.hashfn = ip6_hashfn;
ip6_frags.constructor = ip6_frag_init;
ip6_frags.destructor = NULL;
ip6_frags.qsize = sizeof(struct frag_queue);
- ip6_frags.match = ip6_frag_match;
ip6_frags.frag_expire = ip6_frag_expire;
ip6_frags.frags_cache_name = ip6_frag_cache_name;
+ ip6_frags.rhash_params = ip6_rhash_params;
ret = inet_frags_init(&ip6_frags);
if (ret)
goto out;
--
2.17.1
^ permalink raw reply related
* [PATCH stable 4.9 v2 05/29] inet: frags: refactor lowpan_net_frag_init()
From: Florian Fainelli @ 2018-10-10 19:29 UTC (permalink / raw)
To: netdev; +Cc: davem, gregkh, stable, edumazet, sthemmin
In-Reply-To: <20181010193017.25221-1-f.fainelli@gmail.com>
From: Eric Dumazet <edumazet@google.com>
We want to call lowpan_net_frag_init() earlier.
Similar to commit "inet: frags: refactor ipv6_frag_init()"
This is a prereq to "inet: frags: use rhashtables for reassembly units"
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 807f1844df4ac23594268fa9f41902d0549e92aa)
---
net/ieee802154/6lowpan/reassembly.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 9ccb8458b5c3..977b4ed58112 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -614,14 +614,6 @@ int __init lowpan_net_frag_init(void)
{
int ret;
- ret = lowpan_frags_sysctl_register();
- if (ret)
- return ret;
-
- ret = register_pernet_subsys(&lowpan_frags_ops);
- if (ret)
- goto err_pernet;
-
lowpan_frags.hashfn = lowpan_hashfn;
lowpan_frags.constructor = lowpan_frag_init;
lowpan_frags.destructor = NULL;
@@ -631,11 +623,21 @@ int __init lowpan_net_frag_init(void)
lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
ret = inet_frags_init(&lowpan_frags);
if (ret)
- goto err_pernet;
+ goto out;
+ ret = lowpan_frags_sysctl_register();
+ if (ret)
+ goto err_sysctl;
+
+ ret = register_pernet_subsys(&lowpan_frags_ops);
+ if (ret)
+ goto err_pernet;
+out:
return ret;
err_pernet:
lowpan_frags_sysctl_unregister();
+err_sysctl:
+ inet_frags_fini(&lowpan_frags);
return ret;
}
--
2.17.1
^ permalink raw reply related
* [PATCH stable 4.9 v2 04/29] inet: frags: refactor ipv6_frag_init()
From: Florian Fainelli @ 2018-10-10 19:29 UTC (permalink / raw)
To: netdev; +Cc: davem, gregkh, stable, edumazet, sthemmin
In-Reply-To: <20181010193017.25221-1-f.fainelli@gmail.com>
From: Eric Dumazet <edumazet@google.com>
We want to call inet_frags_init() earlier.
This is a prereq to "inet: frags: use rhashtables for reassembly units"
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 5b975bab23615cd0fdf67af6c9298eb01c4b9f61)
---
net/ipv6/reassembly.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 436e6d594f25..9440bb9bdab7 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -740,10 +740,21 @@ int __init ipv6_frag_init(void)
{
int ret;
- ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ ip6_frags.hashfn = ip6_hashfn;
+ ip6_frags.constructor = ip6_frag_init;
+ ip6_frags.destructor = NULL;
+ ip6_frags.qsize = sizeof(struct frag_queue);
+ ip6_frags.match = ip6_frag_match;
+ ip6_frags.frag_expire = ip6_frag_expire;
+ ip6_frags.frags_cache_name = ip6_frag_cache_name;
+ ret = inet_frags_init(&ip6_frags);
if (ret)
goto out;
+ ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ if (ret)
+ goto err_protocol;
+
ret = ip6_frags_sysctl_register();
if (ret)
goto err_sysctl;
@@ -752,16 +763,6 @@ int __init ipv6_frag_init(void)
if (ret)
goto err_pernet;
- ip6_frags.hashfn = ip6_hashfn;
- ip6_frags.constructor = ip6_frag_init;
- ip6_frags.destructor = NULL;
- ip6_frags.qsize = sizeof(struct frag_queue);
- ip6_frags.match = ip6_frag_match;
- ip6_frags.frag_expire = ip6_frag_expire;
- ip6_frags.frags_cache_name = ip6_frag_cache_name;
- ret = inet_frags_init(&ip6_frags);
- if (ret)
- goto err_pernet;
out:
return ret;
@@ -769,6 +770,8 @@ int __init ipv6_frag_init(void)
ip6_frags_sysctl_unregister();
err_sysctl:
inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+err_protocol:
+ inet_frags_fini(&ip6_frags);
goto out;
}
--
2.17.1
^ permalink raw reply related
* [PATCH stable 4.9 v2 03/29] inet: frags: refactor ipfrag_init()
From: Florian Fainelli @ 2018-10-10 19:29 UTC (permalink / raw)
To: netdev; +Cc: davem, gregkh, stable, edumazet, sthemmin
In-Reply-To: <20181010193017.25221-1-f.fainelli@gmail.com>
From: Eric Dumazet <edumazet@google.com>
We need to call inet_frags_init() before register_pernet_subsys(),
as a prereq for following patch ("inet: frags: use rhashtables for reassembly units")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 483a6e4fa055123142d8956866fe2aa9c98d546d)
---
net/ipv4/ip_fragment.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 0fd3b475b929..b6fe05e8a431 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -899,8 +899,6 @@ static struct pernet_operations ip4_frags_ops = {
void __init ipfrag_init(void)
{
- ip4_frags_ctl_register();
- register_pernet_subsys(&ip4_frags_ops);
ip4_frags.hashfn = ip4_hashfn;
ip4_frags.constructor = ip4_frag_init;
ip4_frags.destructor = ip4_frag_free;
@@ -910,4 +908,6 @@ void __init ipfrag_init(void)
ip4_frags.frags_cache_name = ip_frag_cache_name;
if (inet_frags_init(&ip4_frags))
panic("IP: failed to allocate ip4_frags cache\n");
+ ip4_frags_ctl_register();
+ register_pernet_subsys(&ip4_frags_ops);
}
--
2.17.1
^ permalink raw reply related
* [PATCH stable 4.9 v2 02/29] inet: frags: add a pointer to struct netns_frags
From: Florian Fainelli @ 2018-10-10 19:29 UTC (permalink / raw)
To: netdev; +Cc: davem, gregkh, stable, edumazet, sthemmin
In-Reply-To: <20181010193017.25221-1-f.fainelli@gmail.com>
From: Eric Dumazet <edumazet@google.com>
In order to simplify the API, add a pointer to struct inet_frags.
This will allow us to make things less complex.
These functions no longer have a struct inet_frags parameter :
inet_frag_destroy(struct inet_frag_queue *q /*, struct inet_frags *f */)
inet_frag_put(struct inet_frag_queue *q /*, struct inet_frags *f */)
inet_frag_kill(struct inet_frag_queue *q /*, struct inet_frags *f */)
inet_frags_exit_net(struct netns_frags *nf /*, struct inet_frags *f */)
ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 093ba72914b696521e4885756a68a3332782c8de)
---
include/net/inet_frag.h | 11 ++++++-----
include/net/ipv6.h | 3 +--
net/ieee802154/6lowpan/reassembly.c | 13 +++++++------
net/ipv4/inet_fragment.c | 17 ++++++++++-------
net/ipv4/ip_fragment.c | 9 +++++----
net/ipv6/netfilter/nf_conntrack_reasm.c | 16 +++++++++-------
net/ipv6/reassembly.c | 20 ++++++++++----------
7 files changed, 48 insertions(+), 41 deletions(-)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 3fb9c48dedfe..19a2ccda0300 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -9,6 +9,7 @@ struct netns_frags {
int high_thresh;
int low_thresh;
int max_dist;
+ struct inet_frags *f;
};
/**
@@ -108,20 +109,20 @@ static inline int inet_frags_init_net(struct netns_frags *nf)
atomic_set(&nf->mem, 0);
return 0;
}
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
+void inet_frags_exit_net(struct netns_frags *nf);
-void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f);
+void inet_frag_kill(struct inet_frag_queue *q);
+void inet_frag_destroy(struct inet_frag_queue *q);
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
struct inet_frags *f, void *key, unsigned int hash);
void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
const char *prefix);
-static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
+static inline void inet_frag_put(struct inet_frag_queue *q)
{
if (atomic_dec_and_test(&q->refcnt))
- inet_frag_destroy(q, f);
+ inet_frag_destroy(q);
}
static inline bool inet_frag_evicting(struct inet_frag_queue *q)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 64b0e9df31c7..903a10a5f259 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -559,8 +559,7 @@ struct frag_queue {
u8 ecn;
};
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
- struct inet_frags *frags);
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);
static inline bool ipv6_addr_any(const struct in6_addr *a)
{
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 9757ce6c077a..9ccb8458b5c3 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -93,10 +93,10 @@ static void lowpan_frag_expire(unsigned long data)
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
- inet_frag_kill(&fq->q, &lowpan_frags);
+ inet_frag_kill(&fq->q);
out:
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &lowpan_frags);
+ inet_frag_put(&fq->q);
}
static inline struct lowpan_frag_queue *
@@ -229,7 +229,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
struct sk_buff *fp, *head = fq->q.fragments;
int sum_truesize;
- inet_frag_kill(&fq->q, &lowpan_frags);
+ inet_frag_kill(&fq->q);
/* Make the one we just received the head. */
if (prev) {
@@ -437,7 +437,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
ret = lowpan_frag_queue(fq, skb, frag_type);
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &lowpan_frags);
+ inet_frag_put(&fq->q);
return ret;
}
@@ -585,13 +585,14 @@ static int __net_init lowpan_frags_init_net(struct net *net)
ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+ ieee802154_lowpan->frags.f = &lowpan_frags;
res = inet_frags_init_net(&ieee802154_lowpan->frags);
if (res < 0)
return res;
res = lowpan_frags_ns_sysctl_register(net);
if (res < 0)
- inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
+ inet_frags_exit_net(&ieee802154_lowpan->frags);
return res;
}
@@ -601,7 +602,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
net_ieee802154_lowpan(net);
lowpan_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
+ inet_frags_exit_net(&ieee802154_lowpan->frags);
}
static struct pernet_operations lowpan_frags_ops = {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index f8b41aaac76f..89cfe0df8e56 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -219,8 +219,9 @@ void inet_frags_fini(struct inet_frags *f)
}
EXPORT_SYMBOL(inet_frags_fini);
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
+void inet_frags_exit_net(struct netns_frags *nf)
{
+ struct inet_frags *f =nf->f;
unsigned int seq;
int i;
@@ -264,33 +265,34 @@ __acquires(hb->chain_lock)
return hb;
}
-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+static inline void fq_unlink(struct inet_frag_queue *fq)
{
struct inet_frag_bucket *hb;
- hb = get_frag_bucket_locked(fq, f);
+ hb = get_frag_bucket_locked(fq, fq->net->f);
hlist_del(&fq->list);
fq->flags |= INET_FRAG_COMPLETE;
spin_unlock(&hb->chain_lock);
}
-void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
+void inet_frag_kill(struct inet_frag_queue *fq)
{
if (del_timer(&fq->timer))
atomic_dec(&fq->refcnt);
if (!(fq->flags & INET_FRAG_COMPLETE)) {
- fq_unlink(fq, f);
+ fq_unlink(fq);
atomic_dec(&fq->refcnt);
}
}
EXPORT_SYMBOL(inet_frag_kill);
-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
+void inet_frag_destroy(struct inet_frag_queue *q)
{
struct sk_buff *fp;
struct netns_frags *nf;
unsigned int sum, sum_truesize = 0;
+ struct inet_frags *f;
WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
WARN_ON(del_timer(&q->timer) != 0);
@@ -298,6 +300,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
/* Release all fragment data. */
fp = q->fragments;
nf = q->net;
+ f = nf->f;
while (fp) {
struct sk_buff *xp = fp->next;
@@ -333,7 +336,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
atomic_inc(&qp->refcnt);
spin_unlock(&hb->chain_lock);
qp_in->flags |= INET_FRAG_COMPLETE;
- inet_frag_put(qp_in, f);
+ inet_frag_put(qp_in);
return qp;
}
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 803914569858..0fd3b475b929 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -167,7 +167,7 @@ static void ip4_frag_free(struct inet_frag_queue *q)
static void ipq_put(struct ipq *ipq)
{
- inet_frag_put(&ipq->q, &ip4_frags);
+ inet_frag_put(&ipq->q);
}
/* Kill ipq entry. It is not destroyed immediately,
@@ -175,7 +175,7 @@ static void ipq_put(struct ipq *ipq)
*/
static void ipq_kill(struct ipq *ipq)
{
- inet_frag_kill(&ipq->q, &ip4_frags);
+ inet_frag_kill(&ipq->q);
}
static bool frag_expire_skip_icmp(u32 user)
@@ -875,20 +875,21 @@ static int __net_init ipv4_frags_init_net(struct net *net)
net->ipv4.frags.timeout = IP_FRAG_TIME;
net->ipv4.frags.max_dist = 64;
+ net->ipv4.frags.f = &ip4_frags;
res = inet_frags_init_net(&net->ipv4.frags);
if (res < 0)
return res;
res = ip4_frags_ns_ctl_register(net);
if (res < 0)
- inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
+ inet_frags_exit_net(&net->ipv4.frags);
return res;
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
{
ip4_frags_ns_ctl_unregister(net);
- inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
+ inet_frags_exit_net(&net->ipv4.frags);
}
static struct pernet_operations ip4_frags_ops = {
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index afa9ea76155d..7ea2b4490672 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -177,7 +177,7 @@ static void nf_ct_frag6_expire(unsigned long data)
fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
- ip6_expire_frag_queue(net, fq, &nf_frags);
+ ip6_expire_frag_queue(net, fq);
}
/* Creation primitives. */
@@ -263,7 +263,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
return -EPROTO;
}
if (end > fq->q.len) {
@@ -356,7 +356,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
return 0;
discard_fq:
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
err:
return -EINVAL;
}
@@ -378,7 +378,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
int payload_len;
u8 ecn;
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
WARN_ON(head == NULL);
WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
@@ -623,7 +623,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
out_unlock:
spin_unlock_bh(&fq->q.lock);
- inet_frag_put(&fq->q, &nf_frags);
+ inet_frag_put(&fq->q);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
@@ -635,19 +635,21 @@ static int nf_ct_net_init(struct net *net)
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
+ net->nf_frag.frags.f = &nf_frags;
+
res = inet_frags_init_net(&net->nf_frag.frags);
if (res < 0)
return res;
res = nf_ct_frag6_sysctl_register(net);
if (res < 0)
- inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+ inet_frags_exit_net(&net->nf_frag.frags);
return res;
}
static void nf_ct_net_exit(struct net *net)
{
nf_ct_frags6_sysctl_unregister(net);
- inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+ inet_frags_exit_net(&net->nf_frag.frags);
}
static struct pernet_operations nf_ct_net_ops = {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index ee8095eaca5d..436e6d594f25 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -128,8 +128,7 @@ void ip6_frag_init(struct inet_frag_queue *q, const void *a)
}
EXPORT_SYMBOL(ip6_frag_init);
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
- struct inet_frags *frags)
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
{
struct net_device *dev = NULL;
@@ -138,7 +137,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
- inet_frag_kill(&fq->q, frags);
+ inet_frag_kill(&fq->q);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, fq->iif);
@@ -166,7 +165,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
rcu_read_unlock();
out:
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, frags);
+ inet_frag_put(&fq->q);
}
EXPORT_SYMBOL(ip6_expire_frag_queue);
@@ -178,7 +177,7 @@ static void ip6_frag_expire(unsigned long data)
fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ipv6.frags);
- ip6_expire_frag_queue(net, fq, &ip6_frags);
+ ip6_expire_frag_queue(net, fq);
}
static struct frag_queue *
@@ -359,7 +358,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
return -1;
discard_fq:
- inet_frag_kill(&fq->q, &ip6_frags);
+ inet_frag_kill(&fq->q);
err:
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS);
@@ -386,7 +385,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
int sum_truesize;
u8 ecn;
- inet_frag_kill(&fq->q, &ip6_frags);
+ inet_frag_kill(&fq->q);
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
@@ -563,7 +562,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &ip6_frags);
+ inet_frag_put(&fq->q);
return ret;
}
@@ -714,6 +713,7 @@ static int __net_init ipv6_frags_init_net(struct net *net)
net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
+ net->ipv6.frags.f = &ip6_frags;
res = inet_frags_init_net(&net->ipv6.frags);
if (res < 0)
@@ -721,14 +721,14 @@ static int __net_init ipv6_frags_init_net(struct net *net)
res = ip6_frags_ns_sysctl_register(net);
if (res < 0)
- inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
+ inet_frags_exit_net(&net->ipv6.frags);
return res;
}
static void __net_exit ipv6_frags_exit_net(struct net *net)
{
ip6_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
+ inet_frags_exit_net(&net->ipv6.frags);
}
static struct pernet_operations ip6_frags_ops = {
--
2.17.1
^ permalink raw reply related
* Re: [PATCH] qmi_wwan: Added support for Gemalto's Cinterion ALASxx WWAN interface
From: Bjørn Mork @ 2018-10-10 19:23 UTC (permalink / raw)
To: Giacinto Cifelli; +Cc: netdev
In-Reply-To: <20181010180553.3986-1-gciofono@gmail.com>
Giacinto Cifelli <gciofono@gmail.com> writes:
> Added support for Gemalto's Cinterion ALASxx WWAN interfaces
> by adding QMI_FIXED_INTF with Cinterion's VID and PID.
>
> Signed-off-by: Giacinto Cifelli <gciofono@gmail.com>
Acked-by: Bjørn Mork <bjorn@mork.no>
^ permalink raw reply
* Re: [PATCH iproute 2/2] utils: fix get_rtnl_link_stats_rta stats parsing
From: Stephen Hemminger @ 2018-10-10 19:11 UTC (permalink / raw)
To: Lorenzo Bianconi; +Cc: netdev
In-Reply-To: <8b8bb899db6c8ce78d91aa8b192e6c697daf4e27.1539182623.git.lorenzo.bianconi@redhat.com>
On Wed, 10 Oct 2018 17:00:58 +0200
Lorenzo Bianconi <lorenzo.bianconi@redhat.com> wrote:
> iproute2 walks through the list of available tunnels using netlink
> protocol in order to get device info instead of reading
> them from proc filesystem. However the kernel reports device statistics
> using IFLA_INET6_STATS/IFLA_INET6_ICMP6STATS attributes nested in
> IFLA_PROTINFO one but iproutes expects these info in
> IFLA_STATS64/IFLA_STATS attributes.
> The issue can be triggered with the following reproducer:
>
> $ip link add ip6d0 type ip6tnl mode ip6ip6 local 1111::1 remote 2222::1
> $ip -6 -d -s tunnel show ip6d0
> ip6d0: ipv6/ipv6 remote 2222::1 local 1111::1 encaplimit 4 hoplimit 64
> tclass 0x00 flowlabel 0x00000 (flowinfo 0x00000000)
> Dump terminated
>
> Fix the issue introducing IFLA_INET6_STATS attribute parsing
>
> Fixes: 3e953938717f ("iptunnel/ip6tunnel: Use netlink to walk through
> tunnels list")
>
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Can't we fix the kernel to report statistics properly, rather than
starting iproute2 doing more /proc interfaces.
^ permalink raw reply
* Re: [sky2 driver] 88E8056 PCI-E Gigabit Ethernet Controller not working after suspend
From: Stephen Hemminger @ 2018-10-10 19:09 UTC (permalink / raw)
To: Laurent Bigonville; +Cc: netdev
In-Reply-To: <aa7dd2f9-fc1a-6b59-3f9a-716dfbc4de9b@bigon.be>
On Wed, 10 Oct 2018 03:16:40 +0200
Laurent Bigonville <bigon@bigon.be> wrote:
> Le 9/10/18 à 22:09, Stephen Hemminger a écrit :
> > On Tue, 9 Oct 2018 19:30:30 +0200
> > Laurent Bigonville <bigon@bigon.be> wrote:
> >
> >> Hello,
> >>
> >> On my desktop (Asus MB with dual Ethernet port), when waking up after
> >> suspend, the network card is not detecting the link.
> >>
> >> I have to rmmod the sky2 driver and then modprobing it again.
> >>
> >> lspci shows me:
> >>
> >> 04:00.0 Ethernet controller: Marvell Technology Group Ltd. 88E8056 PCI-E
> >> Gigabit Ethernet Controller (rev 12)
> >> 05:00.0 Ethernet controller: Marvell Technology Group Ltd. 88E8056 PCI-E
> >> Gigabit Ethernet Controller (rev 12)
> >>
> >> An idea what's wrong here?
> >>
> >> Kind regards,
> >>
> >> Laurent Bigonville
> >>
> > I used to have that motherboard (about 8 years ago). Long dead by now.
> >
> > There was some issue with how the power management worked. Forgot the workaround,
> > you might have to dig in the mailing list archive.
>
> I've made some test and it seems that this was working in 4.14 and then
> broken in 4.15 (using the debian kernel pkg), so it was working not that
> long ago:
>
> The only commit I see to the sky2 driver is the following:
>
> commit e99e88a9d2b067465adaa9c111ada99a041bef9a
> Author: Kees Cook <keescook@chromium.org>
> Date: Mon Oct 16 14:43:17 2017 -0700
>
> treewide: setup_timer() -> timer_setup()
>
> This converts all remaining cases of the old setup_timer() API into
> using
> timer_setup(), where the callback argument is the structure already
> holding the struct timer_list. These should have no behavioral changes,
> since they just change which pointer is passed into the callback with
> the same available pointers after conversion. It handles the following
> examples, in addition to some other variations.
>
>
Probably something in PCI power management could.
^ permalink raw reply
* Re: [PATCH v3 2/2] treewide: use bus_find_device_by_fwnode
From: Rob Herring @ 2018-10-10 19:05 UTC (permalink / raw)
To: svellattu
Cc: Mathieu Poirier, Greg Kroah-Hartman, Rafael J. Wysocki,
linux-kernel@vger.kernel.org,
moderated list:ARM/FREESCALE IMX / MXC ARM ARCHITECTURE,
Linux I2C, linux-rdma, netdev, devicetree, linux-spi,
Wolfram Sang, oulijun, xavier.huwei, Yisen Zhuang, Salil Mehta,
Srinivas Kandagatla, Andrew Lunn, Florian Fainelli, Frank Rowand
In-Reply-To: <CALabi0FgQWGL8KMpCKt9N6Kkoig77LxOsea3Oe47tk5nOH10-Q@mail.gmail.com>
On Tue, Oct 9, 2018 at 9:59 PM Silesh C V <svellattu@mvista.com> wrote:
>
> Hello Mathieu,
>
> On Tue, Oct 9, 2018 at 11:14 PM Mathieu Poirier
> <mathieu.poirier@linaro.org> wrote:
> >
> >
> > This doesn't apply on linux-next. This late in the cycle you may want to rebase
> > your work on that tree.
>
> OK. Will send v4 after rebasing onto linux-next.
That doesn't help anything unless you are targeting 4.21. If you are
it would be easiest to apply patch 1 in 4.20 and split this patch by
subsystem for 4.21. That avoids any cross subsystem dependencies.
Rob
^ permalink raw reply
* Re: [PATCH net-next] net: enable RPS on vlan devices
From: Eric Dumazet @ 2018-10-10 18:32 UTC (permalink / raw)
To: Shannon Nelson, davem, netdev; +Cc: silviu.smarandache
In-Reply-To: <13b1a780-ebde-3f0c-65f3-72157bc23690@oracle.com>
On 10/10/2018 11:23 AM, Shannon Nelson wrote:
> For that matter, lots of features are "default off" until someone configures them, and there are lots of features that are only used by a subset of users. In this case, we're trying to use something that already exists and arguably is broken for the vlan case. Are there some technical reasons why this is not a good solution?
Simply code maintenance, and added cost for non vlan users.
Adding extra tests might help your use-case, but is slowing down other cases.
Since the need for new stuff will never end, really eBPF allow for arbitrary usage,
without the need for review kernel patches and maintaining it for next 10 years.
^ permalink raw reply
* Re: [PATCH net-next] net: enable RPS on vlan devices
From: Shannon Nelson @ 2018-10-10 18:25 UTC (permalink / raw)
To: John Fastabend, Eric Dumazet, davem, netdev; +Cc: silviu.smarandache
In-Reply-To: <ed80d9bb-fad3-a0b7-9473-ad2cdf0c1e2e@gmail.com>
On 10/10/2018 10:37 AM, John Fastabend wrote:
> Latest tree has a sk_lookup() helper supported in 'tc' layer now
> to lookup the socket. And XDP has support for a "cpumap" object
> that allows redirect to remote CPUs. Neither was specifically
> designed for this but I suspect with some extra work these might
> be what is needed.
>
> I would start by looking at bpf_sk_lookup() in filter.c and the
> cpumap type in ./kernel/bpf/cpumap.c, also in general sk_lookup
> from XDP layer will likely be needed shortly anyways.
Thanks, John, for pointing to something I can start looking at. My
customer still wants to use the RPS that they already know, but I'll
start looking into how this also might work for us.
sln
^ permalink raw reply
* Please Reply / Can i have a bit of your attention?
From: Bauer McDonalds @ 2018-10-10 16:30 UTC (permalink / raw)
--
Hello dear! I sincerely hope i do not bother you with my message.I know
this might look strange because we don't know each other but i believe
anything is possible if we try. I have actually lost confidence on
dating sites and due to the nature of my work its not easy to find
someone so i decided to give this a shot. Hey relax haha! I know you're
still in shock Let me quickly introduce myself I'm Bauer McDonalds I'm a
down to earth person Hardworking and Ambitious, I would love to meet a
good person to be friends with and see where it leads to. Finding
someone is very important for me right now because i have less busy days
and i'm ready to give it my 100% to make this work when i find that
special one, You could be the one :).. Feel free to write back and ask
any questions about me and i will answer and i also promise to send
pictures of myself when i get your reply.. Hope to hear back from you
kisses & hugs
Bauer
^ permalink raw reply
* Re: [PATCH net-next] net: enable RPS on vlan devices
From: Shannon Nelson @ 2018-10-10 18:23 UTC (permalink / raw)
To: Eric Dumazet, davem, netdev; +Cc: silviu.smarandache
In-Reply-To: <ff43dd14-37e8-115a-15b2-27fa4bbbfd28@gmail.com>
On 10/10/2018 10:14 AM, Eric Dumazet wrote:
>
> On 10/10/2018 09:18 AM, Shannon Nelson wrote:
>> On 10/9/2018 7:17 PM, Eric Dumazet wrote:
>>>
>>>
>>> On 10/09/2018 07:11 PM, Shannon Nelson wrote:
>>>>
>>>> Hence the reason we sent this as an RFC a couple of weeks ago. We got no response, so followed up with this patch in order to get some input. Do you have any suggestions for how we might accomplish this in a less ugly way?
>>>
>>> I dunno, maybe a modern way for all these very specific needs would be to use an eBPF
>>> hook to implement whatever combination of RPS/RFS/what_have_you
>>>
>>> Then, we no longer have to review what various strategies are used by users.
>>
>> We're trying to make use of an existing useful feature that was designed for exactly this kind of problem. It is already there and no new user training is needed. We're actually fixing what could arguably be called a bug since the /sys/class/net/<dev>/queues/rx-0/rps_cpus entry exists for vlan devices but currently doesn't do anything. We're also addressing a security concern related to the recent L1TF excitement.
>>
>> For this case, we want to target the network stack processing to happen on a certain subset of CPUs. With admittedly only a cursory look through eBPF, I don't see an obvious way to target the packet processing to an alternative CPU, unless we add yet another field to the skb that eBPF/XDP could fill and then query that field in the same time as we currently check get_rps_cpu(). But adding to the skb is usually frowned upon unless absolutely necessary, and this seems like a duplication of what we already have with RPS, so why add a competing feature?
>>
>> Back to my earlier question: are there any suggestions for how we might accomplish this in a less ugly way?
>
>
> What if you want to have efficient multi queue processing ?
> The Vlan device could have multiple RX queues, but you forced queue_mapping=0
This would be easy enough to change to a simple modulus of a recorded
queue mapping with the number of queues in the vlan device. We can fix
that.
>
> Honestly, RPS & RFS show their age and complexity (look at net/core/net-sysfs.c ...)
>
> We should not expand it, we should put in place a new infrastructure, fully expandable.
> With socket lookups, we even can avoid having a hashtable for flow information, removing
> one cache miss, and removing flow collisions.
>
> eBPF seems perfect to me.
And yet specifying CPUs for processing is exactly what RPS was designed for.
>
> It is time that we stop adding core infra that most users do not need/use.
> (RPS and RFS are default off)
For that matter, lots of features are "default off" until someone
configures them, and there are lots of features that are only used by a
subset of users. In this case, we're trying to use something that
already exists and arguably is broken for the vlan case. Are there some
technical reasons why this is not a good solution?
sln
^ permalink raw reply
* [net-next] tipc: support binding to specific ip address when activating UDP bearer
From: Hoang Le @ 2018-10-11 1:43 UTC (permalink / raw)
To: jon.maloy, maloy, ying.xue, netdev, tipc-discussion
INADDR_ANY is hard-coded when activating UDP bearer. So, we could not
bind to a specific IP address even with replicast mode using - given
remote ip address instead of using multicast ip address.
In this commit, we fixed it by checking and switch to use appropriate
local ip address.
before:
$netstat -plu
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address
udp 0 0 **0.0.0.0:6118** 0.0.0.0:*
after:
$netstat -plu
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address
udp 0 0 **10.0.0.2:6118** 0.0.0.0:*
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Hoang Le <hoang.h.le@dektech.com.au>
---
net/tipc/udp_media.c | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 9783101bc4a9..10dc59ce9c82 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -650,6 +650,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
struct udp_tunnel_sock_cfg tuncfg = {NULL};
struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
u8 node_id[NODE_ID_LEN] = {0,};
+ int rmcast = 0;
ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
if (!ub)
@@ -680,6 +681,9 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
if (err)
goto err;
+ /* Checking remote ip address */
+ rmcast = tipc_udp_is_mcast_addr(&remote);
+
/* Autoconfigure own node identity if needed */
if (!tipc_own_id(net)) {
memcpy(node_id, local.ipv6.in6_u.u6_addr8, 16);
@@ -705,7 +709,12 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
goto err;
}
udp_conf.family = AF_INET;
- udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
+
+ /* Switch to use ANY to receive packets from group */
+ if (rmcast)
+ udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
+ else
+ udp_conf.local_ip.s_addr = local.ipv4.s_addr;
udp_conf.use_udp_checksums = false;
ub->ifindex = dev->ifindex;
if (tipc_mtu_bad(dev, sizeof(struct iphdr) +
@@ -719,7 +728,10 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
udp_conf.family = AF_INET6;
udp_conf.use_udp6_tx_checksums = true;
udp_conf.use_udp6_rx_checksums = true;
- udp_conf.local_ip6 = in6addr_any;
+ if (rmcast)
+ udp_conf.local_ip6 = in6addr_any;
+ else
+ udp_conf.local_ip6 = local.ipv6;
b->mtu = 1280;
#endif
} else {
@@ -741,7 +753,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
* is used if it's a multicast address.
*/
memcpy(&b->bcast_addr.value, &remote, sizeof(remote));
- if (tipc_udp_is_mcast_addr(&remote))
+ if (rmcast)
err = enable_mcast(ub, &remote);
else
err = tipc_udp_rcast_add(b, &remote);
--
2.17.1
^ permalink raw reply related
* [PATCH net-next v5] net/ncsi: Extend NC-SI Netlink interface to allow user space to send NC-SI command
From: Justin.Lee1 @ 2018-10-10 18:11 UTC (permalink / raw)
To: sam, joel; +Cc: linux-aspeed, netdev, openbmc, amithash, christian, vijaykhemka
The new command (NCSI_CMD_SEND_CMD) is added to allow user space application
to send NC-SI command to the network card.
Also, add a new attribute (NCSI_ATTR_DATA) for transferring request and response.
The work flow is as below.
Request:
User space application
-> Netlink interface (msg)
-> new Netlink handler - ncsi_send_cmd_nl()
-> ncsi_xmit_cmd()
Response:
Response received - ncsi_rcv_rsp()
-> internal response handler - ncsi_rsp_handler_xxx()
-> ncsi_rsp_handler_netlink()
-> ncsi_send_netlink_rsp ()
-> Netlink interface (msg)
-> user space application
Command timeout - ncsi_request_timeout()
-> ncsi_send_netlink_timeout ()
-> Netlink interface (msg with zero data length)
-> user space application
Error:
Error detected
-> ncsi_send_netlink_err ()
-> Netlink interface (err msg)
-> user space application
Signed-off-by: Justin Lee <justin.lee1@dell.com>
---
V5: Update comments and debug message.
V4: Update comments and remove some debug message.
V3: Based on http://patchwork.ozlabs.org/patch/979688/ to remove the duplicated code.
V2: Remove non-related debug message and clean up the code.
include/uapi/linux/ncsi.h | 6 ++
net/ncsi/internal.h | 7 ++
net/ncsi/ncsi-cmd.c | 8 ++
net/ncsi/ncsi-manage.c | 16 ++++
net/ncsi/ncsi-netlink.c | 200 ++++++++++++++++++++++++++++++++++++++++++++++
net/ncsi/ncsi-netlink.h | 12 +++
net/ncsi/ncsi-rsp.c | 67 ++++++++++++++--
7 files changed, 311 insertions(+), 5 deletions(-)
diff --git a/include/uapi/linux/ncsi.h b/include/uapi/linux/ncsi.h
index 4c292ec..0a26a55 100644
--- a/include/uapi/linux/ncsi.h
+++ b/include/uapi/linux/ncsi.h
@@ -23,6 +23,9 @@
* optionally the preferred NCSI_ATTR_CHANNEL_ID.
* @NCSI_CMD_CLEAR_INTERFACE: clear any preferred package/channel combination.
* Requires NCSI_ATTR_IFINDEX.
+ * @NCSI_CMD_SEND_CMD: send NC-SI command to network card.
+ * Requires NCSI_ATTR_IFINDEX, NCSI_ATTR_PACKAGE_ID
+ * and NCSI_ATTR_CHANNEL_ID.
* @NCSI_CMD_MAX: highest command number
*/
enum ncsi_nl_commands {
@@ -30,6 +33,7 @@ enum ncsi_nl_commands {
NCSI_CMD_PKG_INFO,
NCSI_CMD_SET_INTERFACE,
NCSI_CMD_CLEAR_INTERFACE,
+ NCSI_CMD_SEND_CMD,
__NCSI_CMD_AFTER_LAST,
NCSI_CMD_MAX = __NCSI_CMD_AFTER_LAST - 1
@@ -43,6 +47,7 @@ enum ncsi_nl_commands {
* @NCSI_ATTR_PACKAGE_LIST: nested array of NCSI_PKG_ATTR attributes
* @NCSI_ATTR_PACKAGE_ID: package ID
* @NCSI_ATTR_CHANNEL_ID: channel ID
+ * @NCSI_ATTR_DATA: command payload
* @NCSI_ATTR_MAX: highest attribute number
*/
enum ncsi_nl_attrs {
@@ -51,6 +56,7 @@ enum ncsi_nl_attrs {
NCSI_ATTR_PACKAGE_LIST,
NCSI_ATTR_PACKAGE_ID,
NCSI_ATTR_CHANNEL_ID,
+ NCSI_ATTR_DATA,
__NCSI_ATTR_AFTER_LAST,
NCSI_ATTR_MAX = __NCSI_ATTR_AFTER_LAST - 1
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 3d0a33b..13c9b5e 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -175,6 +175,8 @@ struct ncsi_package;
#define NCSI_RESERVED_CHANNEL 0x1f
#define NCSI_CHANNEL_INDEX(c) ((c) & ((1 << NCSI_PACKAGE_SHIFT) - 1))
#define NCSI_TO_CHANNEL(p, c) (((p) << NCSI_PACKAGE_SHIFT) | (c))
+#define NCSI_MAX_PACKAGE 8
+#define NCSI_MAX_CHANNEL 32
struct ncsi_channel {
unsigned char id;
@@ -220,11 +222,15 @@ struct ncsi_request {
bool used; /* Request that has been assigned */
unsigned int flags; /* NCSI request property */
#define NCSI_REQ_FLAG_EVENT_DRIVEN 1
+#define NCSI_REQ_FLAG_NETLINK_DRIVEN 2
struct ncsi_dev_priv *ndp; /* Associated NCSI device */
struct sk_buff *cmd; /* Associated NCSI command packet */
struct sk_buff *rsp; /* Associated NCSI response packet */
struct timer_list timer; /* Timer on waiting for response */
bool enabled; /* Time has been enabled or not */
+ u32 snd_seq; /* netlink sending sequence number */
+ u32 snd_portid; /* netlink portid of sender */
+ struct nlmsghdr nlhdr; /* netlink message header */
};
enum {
@@ -310,6 +316,7 @@ struct ncsi_cmd_arg {
unsigned int dwords[4];
};
unsigned char *data; /* NCSI OEM data */
+ struct genl_info *info; /* Netlink information */
};
extern struct list_head ncsi_dev_list;
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index 82b7d92..356af47 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -17,6 +17,7 @@
#include <net/ncsi.h>
#include <net/net_namespace.h>
#include <net/sock.h>
+#include <net/genetlink.h>
#include "internal.h"
#include "ncsi-pkt.h"
@@ -346,6 +347,13 @@ int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca)
if (!nr)
return -ENOMEM;
+ /* track netlink information */
+ if (nca->req_flags == NCSI_REQ_FLAG_NETLINK_DRIVEN) {
+ nr->snd_seq = nca->info->snd_seq;
+ nr->snd_portid = nca->info->snd_portid;
+ nr->nlhdr = *nca->info->nlhdr;
+ }
+
/* Prepare the packet */
nca->id = nr->id;
ret = nch->handler(nr->cmd, nca);
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 0912847..76a4bcb 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -19,6 +19,7 @@
#include <net/addrconf.h>
#include <net/ipv6.h>
#include <net/if_inet6.h>
+#include <net/genetlink.h>
#include "internal.h"
#include "ncsi-pkt.h"
@@ -406,6 +407,9 @@ static void ncsi_request_timeout(struct timer_list *t)
{
struct ncsi_request *nr = from_timer(nr, t, timer);
struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+ struct ncsi_cmd_pkt *cmd;
unsigned long flags;
/* If the request already had associated response,
@@ -419,6 +423,18 @@ static void ncsi_request_timeout(struct timer_list *t)
}
spin_unlock_irqrestore(&ndp->lock, flags);
+ if (nr->flags == NCSI_REQ_FLAG_NETLINK_DRIVEN) {
+ if (nr->cmd) {
+ /* Find the package */
+ cmd = (struct ncsi_cmd_pkt *)
+ skb_network_header(nr->cmd);
+ ncsi_find_package_and_channel(ndp,
+ cmd->cmd.common.channel,
+ &np, &nc);
+ ncsi_send_netlink_timeout(nr, np, nc);
+ }
+ }
+
/* Release the request */
ncsi_free_request(nr);
}
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 45f33d6..eaee570 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -20,6 +20,7 @@
#include <uapi/linux/ncsi.h>
#include "internal.h"
+#include "ncsi-pkt.h"
#include "ncsi-netlink.h"
static struct genl_family ncsi_genl_family;
@@ -29,6 +30,7 @@ static const struct nla_policy ncsi_genl_policy[NCSI_ATTR_MAX + 1] = {
[NCSI_ATTR_PACKAGE_LIST] = { .type = NLA_NESTED },
[NCSI_ATTR_PACKAGE_ID] = { .type = NLA_U32 },
[NCSI_ATTR_CHANNEL_ID] = { .type = NLA_U32 },
+ [NCSI_ATTR_DATA] = { .type = NLA_BINARY, .len = 2048 },
};
static struct ncsi_dev_priv *ndp_from_ifindex(struct net *net, u32 ifindex)
@@ -366,6 +368,198 @@ static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
return 0;
}
+static int ncsi_send_cmd_nl(struct sk_buff *msg, struct genl_info *info)
+{
+ struct ncsi_dev_priv *ndp;
+
+ struct ncsi_cmd_arg nca;
+ struct ncsi_pkt_hdr *hdr;
+
+ u32 package_id, channel_id;
+ unsigned char *data;
+ int len, ret;
+
+ if (!info || !info->attrs) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!info->attrs[NCSI_ATTR_IFINDEX]) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!info->attrs[NCSI_ATTR_PACKAGE_ID]) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!info->attrs[NCSI_ATTR_CHANNEL_ID]) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+ nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+ if (!ndp) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+ channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
+
+ if (package_id >= NCSI_MAX_PACKAGE || channel_id >= NCSI_MAX_CHANNEL) {
+ ret = -ERANGE;
+ goto out_netlink;
+ }
+
+ len = nla_len(info->attrs[NCSI_ATTR_DATA]);
+ if (len < sizeof(struct ncsi_pkt_hdr)) {
+ netdev_info(ndp->ndev.dev, "NCSI: no command to send %u\n",
+ package_id);
+ ret = -EINVAL;
+ goto out_netlink;
+ } else {
+ data = (unsigned char *)nla_data(info->attrs[NCSI_ATTR_DATA]);
+ }
+
+ hdr = (struct ncsi_pkt_hdr *)data;
+
+ nca.ndp = ndp;
+ nca.package = (unsigned char)package_id;
+ nca.channel = (unsigned char)channel_id;
+ nca.type = hdr->type;
+ nca.req_flags = NCSI_REQ_FLAG_NETLINK_DRIVEN;
+ nca.info = info;
+ nca.payload = ntohs(hdr->length);
+ nca.data = data + sizeof(*hdr);
+
+ ret = ncsi_xmit_cmd(&nca);
+out_netlink:
+ if (ret != 0) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Error %d sending command\n",
+ ret);
+ ncsi_send_netlink_err(ndp->ndev.dev,
+ info->snd_seq,
+ info->snd_portid,
+ info->nlhdr,
+ ret);
+ }
+out:
+ return ret;
+}
+
+int ncsi_send_netlink_rsp(struct ncsi_request *nr,
+ struct ncsi_package *np,
+ struct ncsi_channel *nc)
+{
+ struct sk_buff *skb;
+ struct net *net;
+ void *hdr;
+ int rc;
+
+ net = dev_net(nr->rsp->dev);
+
+ skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(skb, nr->snd_portid, nr->snd_seq,
+ &ncsi_genl_family, 0, NCSI_CMD_SEND_CMD);
+ if (!hdr) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ nla_put_u32(skb, NCSI_ATTR_IFINDEX, nr->rsp->dev->ifindex);
+ if (np)
+ nla_put_u32(skb, NCSI_ATTR_PACKAGE_ID, np->id);
+ if (nc)
+ nla_put_u32(skb, NCSI_ATTR_CHANNEL_ID, nc->id);
+ else
+ nla_put_u32(skb, NCSI_ATTR_CHANNEL_ID, NCSI_RESERVED_CHANNEL);
+
+ rc = nla_put(skb, NCSI_ATTR_DATA, nr->rsp->len, (void *)nr->rsp->data);
+ if (rc)
+ goto err;
+
+ genlmsg_end(skb, hdr);
+ return genlmsg_unicast(net, skb, nr->snd_portid);
+
+err:
+ kfree_skb(skb);
+ return rc;
+}
+
+int ncsi_send_netlink_timeout(struct ncsi_request *nr,
+ struct ncsi_package *np,
+ struct ncsi_channel *nc)
+{
+ struct sk_buff *skb;
+ struct net *net;
+ void *hdr;
+
+ skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(skb, nr->snd_portid, nr->snd_seq,
+ &ncsi_genl_family, 0, NCSI_CMD_SEND_CMD);
+ if (!hdr) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ net = dev_net(nr->cmd->dev);
+
+ nla_put_u32(skb, NCSI_ATTR_IFINDEX, nr->cmd->dev->ifindex);
+
+ if (np)
+ nla_put_u32(skb, NCSI_ATTR_PACKAGE_ID, np->id);
+ else
+ nla_put_u32(skb, NCSI_ATTR_PACKAGE_ID,
+ NCSI_PACKAGE_INDEX((((struct ncsi_pkt_hdr *)
+ nr->cmd->data)->channel)));
+
+ if (nc)
+ nla_put_u32(skb, NCSI_ATTR_CHANNEL_ID, nc->id);
+ else
+ nla_put_u32(skb, NCSI_ATTR_CHANNEL_ID, NCSI_RESERVED_CHANNEL);
+
+ genlmsg_end(skb, hdr);
+ return genlmsg_unicast(net, skb, nr->snd_portid);
+}
+
+int ncsi_send_netlink_err(struct net_device *dev,
+ u32 snd_seq,
+ u32 snd_portid,
+ struct nlmsghdr *nlhdr,
+ int err)
+{
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ struct nlmsgerr *nle;
+ struct net *net;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+
+ net = dev_net(dev);
+
+ nlh = nlmsg_put(skb, snd_portid, snd_seq,
+ NLMSG_ERROR, sizeof(*nle), 0);
+ nle = (struct nlmsgerr *)nlmsg_data(nlh);
+ nle->error = err;
+ memcpy(&nle->msg, nlhdr, sizeof(*nlh));
+
+ nlmsg_end(skb, nlh);
+
+ return nlmsg_unicast(net->genl_sock, skb, snd_portid);
+}
+
static const struct genl_ops ncsi_ops[] = {
{
.cmd = NCSI_CMD_PKG_INFO,
@@ -386,6 +580,12 @@ static const struct genl_ops ncsi_ops[] = {
.doit = ncsi_clear_interface_nl,
.flags = GENL_ADMIN_PERM,
},
+ {
+ .cmd = NCSI_CMD_SEND_CMD,
+ .policy = ncsi_genl_policy,
+ .doit = ncsi_send_cmd_nl,
+ .flags = GENL_ADMIN_PERM,
+ },
};
static struct genl_family ncsi_genl_family __ro_after_init = {
diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h
index 91a5c25..c4a4688 100644
--- a/net/ncsi/ncsi-netlink.h
+++ b/net/ncsi/ncsi-netlink.h
@@ -14,6 +14,18 @@
#include "internal.h"
+int ncsi_send_netlink_rsp(struct ncsi_request *nr,
+ struct ncsi_package *np,
+ struct ncsi_channel *nc);
+int ncsi_send_netlink_timeout(struct ncsi_request *nr,
+ struct ncsi_package *np,
+ struct ncsi_channel *nc);
+int ncsi_send_netlink_err(struct net_device *dev,
+ u32 snd_seq,
+ u32 snd_portid,
+ struct nlmsghdr *nlhdr,
+ int err);
+
int ncsi_init_netlink(struct net_device *dev);
int ncsi_unregister_netlink(struct net_device *dev);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index d66b347..dd931d2 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -16,9 +16,11 @@
#include <net/ncsi.h>
#include <net/net_namespace.h>
#include <net/sock.h>
+#include <net/genetlink.h>
#include "internal.h"
#include "ncsi-pkt.h"
+#include "ncsi-netlink.h"
static int ncsi_validate_rsp_pkt(struct ncsi_request *nr,
unsigned short payload)
@@ -32,15 +34,25 @@ static int ncsi_validate_rsp_pkt(struct ncsi_request *nr,
* before calling this function.
*/
h = (struct ncsi_rsp_pkt_hdr *)skb_network_header(nr->rsp);
- if (h->common.revision != NCSI_PKT_REVISION)
+
+ if (h->common.revision != NCSI_PKT_REVISION) {
+ netdev_dbg(nr->ndp->ndev.dev,
+ "NCSI: unsupported header revision\n");
return -EINVAL;
- if (ntohs(h->common.length) != payload)
+ }
+ if (ntohs(h->common.length) != payload) {
+ netdev_dbg(nr->ndp->ndev.dev,
+ "NCSI: payload length mismatched\n");
return -EINVAL;
+ }
/* Check on code and reason */
if (ntohs(h->code) != NCSI_PKT_RSP_C_COMPLETED ||
- ntohs(h->reason) != NCSI_PKT_RSP_R_NO_ERROR)
- return -EINVAL;
+ ntohs(h->reason) != NCSI_PKT_RSP_R_NO_ERROR) {
+ netdev_dbg(nr->ndp->ndev.dev,
+ "NCSI: non zero response/reason code\n");
+ return -EPERM;
+ }
/* Validate checksum, which might be zeroes if the
* sender doesn't support checksum according to NCSI
@@ -52,8 +64,11 @@ static int ncsi_validate_rsp_pkt(struct ncsi_request *nr,
checksum = ncsi_calculate_checksum((unsigned char *)h,
sizeof(*h) + payload - 4);
- if (*pchecksum != htonl(checksum))
+
+ if (*pchecksum != htonl(checksum)) {
+ netdev_dbg(nr->ndp->ndev.dev, "NCSI: checksum mismatched\n");
return -EINVAL;
+ }
return 0;
}
@@ -941,6 +956,26 @@ static int ncsi_rsp_handler_gpuuid(struct ncsi_request *nr)
return 0;
}
+static int ncsi_rsp_handler_netlink(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+ int ret;
+
+ /* Find the package */
+ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+ &np, &nc);
+ if (!np)
+ return -ENODEV;
+
+ ret = ncsi_send_netlink_rsp(nr, np, nc);
+
+ return ret;
+}
+
static struct ncsi_rsp_handler {
unsigned char type;
int payload;
@@ -1043,6 +1078,17 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
netdev_warn(ndp->ndev.dev,
"NCSI: 'bad' packet ignored for type 0x%x\n",
hdr->type);
+
+ if (nr->flags == NCSI_REQ_FLAG_NETLINK_DRIVEN) {
+ if (ret == -EPERM)
+ goto out_netlink;
+ else
+ ncsi_send_netlink_err(ndp->ndev.dev,
+ nr->snd_seq,
+ nr->snd_portid,
+ &nr->nlhdr,
+ ret);
+ }
goto out;
}
@@ -1052,6 +1098,17 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
netdev_err(ndp->ndev.dev,
"NCSI: Handler for packet type 0x%x returned %d\n",
hdr->type, ret);
+
+out_netlink:
+ if (nr->flags == NCSI_REQ_FLAG_NETLINK_DRIVEN) {
+ ret = ncsi_rsp_handler_netlink(nr);
+ if (ret) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Netlink handler for packet type 0x%x returned %d\n",
+ hdr->type, ret);
+ }
+ }
+
out:
ncsi_free_request(nr);
return ret;
--
2.9.3
^ permalink raw reply related
* [PATCH] qmi_wwan: Added support for Gemalto's Cinterion ALASxx WWAN interface
From: Giacinto Cifelli @ 2018-10-10 18:05 UTC (permalink / raw)
To: netdev; +Cc: Giacinto Cifelli
Added support for Gemalto's Cinterion ALASxx WWAN interfaces
by adding QMI_FIXED_INTF with Cinterion's VID and PID.
Signed-off-by: Giacinto Cifelli <gciofono@gmail.com>
---
drivers/net/usb/qmi_wwan.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 533b6fb8d923..72a55b6b4211 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1241,6 +1241,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */
{QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */
{QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */
+ {QMI_FIXED_INTF(0x1e2d, 0x0063, 10)}, /* Cinterion ALASxx (1 RmNet) */
{QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */
{QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */
{QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/
--
2.17.1
^ permalink raw reply related
* Re: [PATCH] bpf: bpftool, add support for attaching programs to maps
From: John Fastabend @ 2018-10-10 17:53 UTC (permalink / raw)
To: Jakub Kicinski; +Cc: ast, daniel, netdev
In-Reply-To: <20181010101146.18e7d3ff@cakuba.netronome.com>
On 10/10/2018 10:11 AM, Jakub Kicinski wrote:
> On Wed, 10 Oct 2018 09:44:26 -0700, John Fastabend wrote:
>> Sock map/hash introduce support for attaching programs to maps. To
>> date I have been doing this with custom tooling but this is less than
>> ideal as we shift to using bpftool as the single CLI for our BPF uses.
>> This patch adds new sub commands 'attach' and 'detach' to the 'prog'
>> command to attach programs to maps and then detach them.
>>
>> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
>> ---
>> tools/bpf/bpftool/main.h | 1 +
>> tools/bpf/bpftool/prog.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 92 insertions(+), 1 deletion(-)
>>
>> diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
>> index 40492cd..9ceb2b6 100644
>> --- a/tools/bpf/bpftool/main.h
>> +++ b/tools/bpf/bpftool/main.h
>> @@ -137,6 +137,7 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
>> int do_cgroup(int argc, char **arg);
>> int do_perf(int argc, char **arg);
>> int do_net(int argc, char **arg);
>> +int do_attach_cmd(int argc, char **arg);
>
> Looks like a leftover?
>
Yeah original I made attach/detach its own top level command
but it seems better fit under prog.
[..]
>> + if (!REQ_ARGS(4)) {
>
> Hm, 4 or 5? id $prog $type id $map ?
>
Yep thanks.
[...]
>> +
>> + NEXT_ARG();
>
> nit: maybe NEXT_ARG() should be grouped with the code that consumes the
> parameter, i.e. new line after not before?
sure.
>
>> + mapfd = map_parse_fd(&argc, &argv);
>> + if (mapfd < 0)
>> + return mapfd;
>> +
>> + err = bpf_prog_attach(progfd, mapfd, attach_type, 0);
>> + if (err) {
>> + p_err("failed prog attach to map");
>> + return -EINVAL;
>> + }
>
> Could you plunk a
>
> if (json_output)
> jsonw_null(json_wtr);
>
> here to always produce valid JSON even for commands with no output
> today?
>
Makes sense.
> Same comments for detach.
[...]
> Would you mind updating the man page and the bash completions?
>
Will do this. Thanks.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox