* [PATCH net-2.6.25 6/10][NETNS][FRAGS]: Make the net.ipv4.ipfrag_timeout work in namespaces.
From: Pavel Emelyanov @ 2008-01-22 14:02 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List, devel
In-Reply-To: <4795F524.8060204@openvz.org>
Move it to the netns_frags, adjust the usage and
make the appropriate ctl table writable.
Now fragment, that live in different namespaces can
live for different times.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/net/inet_frag.h | 4 +++-
net/ipv4/inet_fragment.c | 2 +-
net/ipv4/ip_fragment.c | 20 ++++++++++----------
net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++--
net/ipv6/reassembly.c | 6 +++---
5 files changed, 19 insertions(+), 17 deletions(-)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 6edce7b..f56e296 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -4,6 +4,9 @@
struct netns_frags {
int nqueues;
atomic_t mem;
+
+ /* sysctls */
+ int timeout;
};
struct inet_frag_queue {
@@ -29,7 +32,6 @@ struct inet_frag_queue {
struct inet_frags_ctl {
int high_thresh;
int low_thresh;
- int timeout;
int secret_interval;
};
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index ad79ae0..9da9679 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -206,7 +206,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
}
#endif
qp = qp_in;
- if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout))
+ if (!mod_timer(&qp->timer, jiffies + nf->timeout))
atomic_inc(&qp->refcnt);
atomic_inc(&qp->refcnt);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index c51e1a1..70d241c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -83,13 +83,6 @@ static struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
*/
.high_thresh = 256 * 1024,
.low_thresh = 192 * 1024,
-
- /*
- * Important NOTE! Fragment queue must be destroyed before MSL expires.
- * RFC791 is wrong proposing to prolongate timer each fragment arrival
- * by TTL.
- */
- .timeout = IP_FRAG_TIME,
.secret_interval = 10 * 60 * HZ,
};
@@ -287,7 +280,7 @@ static int ip_frag_reinit(struct ipq *qp)
{
struct sk_buff *fp;
- if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) {
+ if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
atomic_inc(&qp->q.refcnt);
return -ETIMEDOUT;
}
@@ -633,7 +626,7 @@ static struct ctl_table ip4_frags_ctl_table[] = {
{
.ctl_name = NET_IPV4_IPFRAG_TIME,
.procname = "ipfrag_time",
- .data = &ip4_frags_ctl.timeout,
+ .data = &init_net.ipv4.frags.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
@@ -672,7 +665,7 @@ static int ip4_frags_ctl_register(struct net *net)
table[0].mode &= ~0222;
table[1].mode &= ~0222;
- table[2].mode &= ~0222;
+ table[2].data = &net->ipv4.frags.timeout;
table[3].mode &= ~0222;
table[4].mode &= ~0222;
}
@@ -712,6 +705,13 @@ static inline void ip4_frags_ctl_unregister(struct net *net)
static int ipv4_frags_init_net(struct net *net)
{
+ /*
+ * Important NOTE! Fragment queue must be destroyed before MSL expires.
+ * RFC791 is wrong proposing to prolongate timer each fragment arrival
+ * by TTL.
+ */
+ net->ipv4.frags.timeout = IP_FRAG_TIME;
+
inet_frags_init_net(&net->ipv4.frags);
return ip4_frags_ctl_register(net);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index cb826be..92a311f 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -73,7 +73,6 @@ struct nf_ct_frag6_queue
static struct inet_frags_ctl nf_frags_ctl __read_mostly = {
.high_thresh = 256 * 1024,
.low_thresh = 192 * 1024,
- .timeout = IPV6_FRAG_TIMEOUT,
.secret_interval = 10 * 60 * HZ,
};
@@ -84,7 +83,7 @@ static struct netns_frags nf_init_frags;
struct ctl_table nf_ct_ipv6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_timeout",
- .data = &nf_frags_ctl.timeout,
+ .data = &nf_init_frags.timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
@@ -712,6 +711,7 @@ int nf_ct_frag6_init(void)
nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
+ nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
inet_frags_init_net(&nf_init_frags);
inet_frags_init(&nf_frags);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0300dcb..9176136 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -650,7 +650,7 @@ static struct ctl_table ip6_frags_ctl_table[] = {
{
.ctl_name = NET_IPV6_IP6FRAG_TIME,
.procname = "ip6frag_time",
- .data = &init_net.ipv6.sysctl.frags.timeout,
+ .data = &init_net.ipv6.frags.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
@@ -681,7 +681,7 @@ static int ip6_frags_sysctl_register(struct net *net)
table[0].mode &= ~0222;
table[1].mode &= ~0222;
- table[2].mode &= ~0222;
+ table[2].data = &net->ipv6.frags.timeout;
table[3].mode &= ~0222;
}
@@ -724,7 +724,7 @@ static int ipv6_frags_init_net(struct net *net)
net->ipv6.sysctl.frags.high_thresh = 256 * 1024;
net->ipv6.sysctl.frags.low_thresh = 192 * 1024;
- net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT;
+ net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
inet_frags_init_net(&net->ipv6.frags);
--
1.5.3.4
^ permalink raw reply related
* [PATCH net-2.6.25 5/10][NETNS][FRAGS]: Duplicate sysctl tables for new namespaces.
From: Pavel Emelyanov @ 2008-01-22 14:01 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List, devel
In-Reply-To: <4795F524.8060204@openvz.org>
Each namespace has to have own tables to tune their
different parameters, so duplicate the tables and
register them.
All the tables in sub-namespaces are temporarily made
read-only.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/net/netns/ipv4.h | 1 +
include/net/netns/ipv6.h | 1 +
net/ipv4/ip_fragment.c | 42 +++++++++++++++++++++++++++++++++++++++---
net/ipv6/reassembly.c | 41 ++++++++++++++++++++++++++++++++++++++---
4 files changed, 79 insertions(+), 6 deletions(-)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 80680e0..15a0b05 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -16,6 +16,7 @@ struct sock;
struct netns_ipv4 {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
+ struct ctl_table_header *frags_hdr;
#endif
struct ipv4_devconf *devconf_all;
struct ipv4_devconf *devconf_dflt;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 057c8e4..87ab56a 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -12,6 +12,7 @@ struct ctl_table_header;
struct netns_sysctl_ipv6 {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *table;
+ struct ctl_table_header *frags_hdr;
#endif
struct inet_frags_ctl frags;
int bindv6only;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 4f01334..c51e1a1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -661,17 +661,53 @@ static struct ctl_table ip4_frags_ctl_table[] = {
static int ip4_frags_ctl_register(struct net *net)
{
+ struct ctl_table *table;
struct ctl_table_header *hdr;
- hdr = register_net_sysctl_table(net, net_ipv4_ctl_path,
- ip4_frags_ctl_table);
- return hdr == NULL ? -ENOMEM : 0;
+ table = ip4_frags_ctl_table;
+ if (net != &init_net) {
+ table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL);
+ if (table == NULL)
+ goto err_alloc;
+
+ table[0].mode &= ~0222;
+ table[1].mode &= ~0222;
+ table[2].mode &= ~0222;
+ table[3].mode &= ~0222;
+ table[4].mode &= ~0222;
+ }
+
+ hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table);
+ if (hdr == NULL)
+ goto err_reg;
+
+ net->ipv4.frags_hdr = hdr;
+ return 0;
+
+err_reg:
+ if (net != &init_net)
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static void ip4_frags_ctl_unregister(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = net->ipv4.frags_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->ipv4.frags_hdr);
+ kfree(table);
}
#else
static inline int ip4_frags_ctl_register(struct net *net)
{
return 0;
}
+
+static inline void ip4_frags_ctl_unregister(struct net *net)
+{
+}
#endif
static int ipv4_frags_init_net(struct net *net)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 241b2cc..0300dcb 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -670,17 +670,52 @@ static struct ctl_table ip6_frags_ctl_table[] = {
static int ip6_frags_sysctl_register(struct net *net)
{
+ struct ctl_table *table;
struct ctl_table_header *hdr;
- hdr = register_net_sysctl_table(net, net_ipv6_ctl_path,
- ip6_frags_ctl_table);
- return hdr == NULL ? -ENOMEM : 0;
+ table = ip6_frags_ctl_table;
+ if (net != &init_net) {
+ table = kmemdup(table, sizeof(ip6_frags_ctl_table), GFP_KERNEL);
+ if (table == NULL)
+ goto err_alloc;
+
+ table[0].mode &= ~0222;
+ table[1].mode &= ~0222;
+ table[2].mode &= ~0222;
+ table[3].mode &= ~0222;
+ }
+
+ hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table);
+ if (hdr == NULL)
+ goto err_reg;
+
+ net->ipv6.sysctl.frags_hdr = hdr;
+ return 0;
+
+err_reg:
+ if (net != &init_net)
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static void ip6_frags_sysctl_unregister(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
+ kfree(table);
}
#else
static inline int ip6_frags_sysctl_register(struct net *net)
{
return 0;
}
+
+static inline void ip6_frags_sysctl_unregister(struct net *net)
+{
+}
#endif
static int ipv6_frags_init_net(struct net *net)
--
1.5.3.4
^ permalink raw reply related
* Re: [PATCH net-2.6.25 1/10][NETNS][FRAGS]: Move ctl tables around.
From: David Miller @ 2008-01-22 14:00 UTC (permalink / raw)
To: xemul; +Cc: netdev, devel
In-Reply-To: <4795F5CB.5080905@openvz.org>
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 22 Jan 2008 16:55:23 +0300
> This is a preparation for sysctl netns-ization.
> Move the ctl tables to the files, where the tuning
> variables reside. Plus make the helpers to register
> the tables.
>
> This will simplify the later patches and will keep
> similar things closer to each other.
>
> ipv4, ipv6 and conntrack_reasm are patched differently,
> bu the result is all the tables are in appropriate files.
>
> Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Applied.
^ permalink raw reply
* [PATCH net-2.6.25 4/10][NETNS][FRAGS]: Make the mem counter per-namespace.
From: Pavel Emelyanov @ 2008-01-22 13:59 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List, devel
In-Reply-To: <4795F524.8060204@openvz.org>
This is also simple, but introduces more changes, since
then mem counter is altered in more places.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/net/inet_frag.h | 4 ++--
include/net/ip.h | 2 +-
include/net/ipv6.h | 2 +-
net/ipv4/inet_fragment.c | 21 +++++++++++----------
net/ipv4/ip_fragment.c | 29 +++++++++++++++--------------
net/ipv4/proc.c | 2 +-
net/ipv6/netfilter/nf_conntrack_reasm.c | 14 +++++++-------
net/ipv6/proc.c | 2 +-
net/ipv6/reassembly.c | 28 +++++++++++++++-------------
9 files changed, 54 insertions(+), 50 deletions(-)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index d36f3a6..6edce7b 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -3,6 +3,7 @@
struct netns_frags {
int nqueues;
+ atomic_t mem;
};
struct inet_frag_queue {
@@ -38,7 +39,6 @@ struct inet_frags {
rwlock_t lock;
u32 rnd;
int qsize;
- atomic_t mem;
struct timer_list secret_timer;
struct inet_frags_ctl *ctl;
@@ -60,7 +60,7 @@ void inet_frags_init_net(struct netns_frags *nf);
void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
void inet_frag_destroy(struct inet_frag_queue *q,
struct inet_frags *f, int *work);
-int inet_frag_evictor(struct inet_frags *f);
+int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f);
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
struct inet_frags *f, void *key, unsigned int hash);
diff --git a/include/net/ip.h b/include/net/ip.h
index 9ea1bc5..d41ff83 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -329,7 +329,7 @@ enum ip_defrag_users
};
int ip_defrag(struct sk_buff *skb, u32 user);
-int ip_frag_mem(void);
+int ip_frag_mem(struct net *net);
int ip_frag_nqueues(struct net *net);
/*
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index da1c089..fa80ea4 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -246,7 +246,7 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb);
int ip6_frag_nqueues(struct net *net);
-int ip6_frag_mem(void);
+int ip6_frag_mem(struct net *net);
#define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 4fec0b9..ad79ae0 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -63,8 +63,6 @@ void inet_frags_init(struct inet_frags *f)
f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
(jiffies ^ (jiffies >> 6)));
- atomic_set(&f->mem, 0);
-
setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
(unsigned long)f);
f->secret_timer.expires = jiffies + f->ctl->secret_interval;
@@ -75,6 +73,7 @@ EXPORT_SYMBOL(inet_frags_init);
void inet_frags_init_net(struct netns_frags *nf)
{
nf->nqueues = 0;
+ atomic_set(&nf->mem, 0);
}
EXPORT_SYMBOL(inet_frags_init_net);
@@ -107,13 +106,13 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
EXPORT_SYMBOL(inet_frag_kill);
-static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb,
- int *work)
+static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
+ struct sk_buff *skb, int *work)
{
if (work)
*work -= skb->truesize;
- atomic_sub(skb->truesize, &f->mem);
+ atomic_sub(skb->truesize, &nf->mem);
if (f->skb_free)
f->skb_free(skb);
kfree_skb(skb);
@@ -123,22 +122,24 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
int *work)
{
struct sk_buff *fp;
+ struct netns_frags *nf;
BUG_TRAP(q->last_in & COMPLETE);
BUG_TRAP(del_timer(&q->timer) == 0);
/* Release all fragment data. */
fp = q->fragments;
+ nf = q->net;
while (fp) {
struct sk_buff *xp = fp->next;
- frag_kfree_skb(f, fp, work);
+ frag_kfree_skb(nf, f, fp, work);
fp = xp;
}
if (work)
*work -= f->qsize;
- atomic_sub(f->qsize, &f->mem);
+ atomic_sub(f->qsize, &nf->mem);
if (f->destructor)
f->destructor(q);
@@ -147,12 +148,12 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
}
EXPORT_SYMBOL(inet_frag_destroy);
-int inet_frag_evictor(struct inet_frags *f)
+int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f)
{
struct inet_frag_queue *q;
int work, evicted = 0;
- work = atomic_read(&f->mem) - f->ctl->low_thresh;
+ work = atomic_read(&nf->mem) - f->ctl->low_thresh;
while (work > 0) {
read_lock(&f->lock);
if (list_empty(&f->lru_list)) {
@@ -226,7 +227,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
return NULL;
f->constructor(q, arg);
- atomic_add(f->qsize, &f->mem);
+ atomic_add(f->qsize, &nf->mem);
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
atomic_set(&q->refcnt, 1);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cd8c830..4f01334 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -100,9 +100,9 @@ int ip_frag_nqueues(struct net *net)
return net->ipv4.frags.nqueues;
}
-int ip_frag_mem(void)
+int ip_frag_mem(struct net *net)
{
- return atomic_read(&ip4_frags.mem);
+ return atomic_read(&net->ipv4.frags.mem);
}
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
@@ -142,11 +142,12 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
}
/* Memory Tracking Functions. */
-static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
+static __inline__ void frag_kfree_skb(struct netns_frags *nf,
+ struct sk_buff *skb, int *work)
{
if (work)
*work -= skb->truesize;
- atomic_sub(skb->truesize, &ip4_frags.mem);
+ atomic_sub(skb->truesize, &nf->mem);
kfree_skb(skb);
}
@@ -192,11 +193,11 @@ static void ipq_kill(struct ipq *ipq)
/* Memory limiting on fragments. Evictor trashes the oldest
* fragment queue until we are back under the threshold.
*/
-static void ip_evictor(void)
+static void ip_evictor(struct net *net)
{
int evicted;
- evicted = inet_frag_evictor(&ip4_frags);
+ evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags);
if (evicted)
IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted);
}
@@ -294,7 +295,7 @@ static int ip_frag_reinit(struct ipq *qp)
fp = qp->q.fragments;
do {
struct sk_buff *xp = fp->next;
- frag_kfree_skb(fp, NULL);
+ frag_kfree_skb(qp->q.net, fp, NULL);
fp = xp;
} while (fp);
@@ -431,7 +432,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
qp->q.fragments = next;
qp->q.meat -= free_it->len;
- frag_kfree_skb(free_it, NULL);
+ frag_kfree_skb(qp->q.net, free_it, NULL);
}
}
@@ -451,7 +452,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
}
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
- atomic_add(skb->truesize, &ip4_frags.mem);
+ atomic_add(skb->truesize, &qp->q.net->mem);
if (offset == 0)
qp->q.last_in |= FIRST_IN;
@@ -534,12 +535,12 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- atomic_add(clone->truesize, &ip4_frags.mem);
+ atomic_add(clone->truesize, &qp->q.net->mem);
}
skb_shinfo(head)->frag_list = head->next;
skb_push(head, head->data - skb_network_header(head));
- atomic_sub(head->truesize, &ip4_frags.mem);
+ atomic_sub(head->truesize, &qp->q.net->mem);
for (fp=head->next; fp; fp = fp->next) {
head->data_len += fp->len;
@@ -549,7 +550,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
- atomic_sub(fp->truesize, &ip4_frags.mem);
+ atomic_sub(fp->truesize, &qp->q.net->mem);
}
head->next = NULL;
@@ -588,8 +589,8 @@ int ip_defrag(struct sk_buff *skb, u32 user)
net = skb->dev->nd_net;
/* Start by cleaning up the memory. */
- if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh)
- ip_evictor();
+ if (atomic_read(&net->ipv4.frags.mem) > ip4_frags_ctl.high_thresh)
+ ip_evictor(net);
/* Lookup (or create) queue header */
if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index bae3280..d63474c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -62,7 +62,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot));
seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot));
seq_printf(seq, "FRAG: inuse %d memory %d\n",
- ip_frag_nqueues(&init_net), ip_frag_mem());
+ ip_frag_nqueues(&init_net), ip_frag_mem(&init_net));
return 0;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 0b9d009..cb826be 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -155,7 +155,7 @@ static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
{
if (work)
*work -= skb->truesize;
- atomic_sub(skb->truesize, &nf_frags.mem);
+ atomic_sub(skb->truesize, &nf_init_frags.mem);
nf_skb_free(skb);
kfree_skb(skb);
}
@@ -177,7 +177,7 @@ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
static void nf_ct_frag6_evictor(void)
{
- inet_frag_evictor(&nf_frags);
+ inet_frag_evictor(&nf_init_frags, &nf_frags);
}
static void nf_ct_frag6_expire(unsigned long data)
@@ -382,7 +382,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
skb->dev = NULL;
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
- atomic_add(skb->truesize, &nf_frags.mem);
+ atomic_add(skb->truesize, &nf_init_frags.mem);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -459,7 +459,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL;
- atomic_add(clone->truesize, &nf_frags.mem);
+ atomic_add(clone->truesize, &nf_init_frags.mem);
}
/* We have to remove fragment header from datagram and to relocate
@@ -473,7 +473,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
skb_shinfo(head)->frag_list = head->next;
skb_reset_transport_header(head);
skb_push(head, head->data - skb_network_header(head));
- atomic_sub(head->truesize, &nf_frags.mem);
+ atomic_sub(head->truesize, &nf_init_frags.mem);
for (fp=head->next; fp; fp = fp->next) {
head->data_len += fp->len;
@@ -483,7 +483,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
- atomic_sub(fp->truesize, &nf_frags.mem);
+ atomic_sub(fp->truesize, &nf_init_frags.mem);
}
head->next = NULL;
@@ -633,7 +633,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
goto ret_orig;
}
- if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh)
+ if (atomic_read(&nf_init_frags.mem) > nf_frags_ctl.high_thresh)
nf_ct_frag6_evictor();
fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 0b55785..c51cf34 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -44,7 +44,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(&rawv6_prot));
seq_printf(seq, "FRAG6: inuse %d memory %d\n",
- ip6_frag_nqueues(&init_net), ip6_frag_mem());
+ ip6_frag_nqueues(&init_net), ip6_frag_mem(&init_net));
return 0;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 77a8740..241b2cc 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -89,9 +89,9 @@ int ip6_frag_nqueues(struct net *net)
return net->ipv6.frags.nqueues;
}
-int ip6_frag_mem(void)
+int ip6_frag_mem(struct net *net)
{
- return atomic_read(&ip6_frags.mem);
+ return atomic_read(&net->ipv6.frags.mem);
}
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
@@ -149,11 +149,12 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a)
EXPORT_SYMBOL(ip6_frag_match);
/* Memory Tracking Functions. */
-static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
+static inline void frag_kfree_skb(struct netns_frags *nf,
+ struct sk_buff *skb, int *work)
{
if (work)
*work -= skb->truesize;
- atomic_sub(skb->truesize, &ip6_frags.mem);
+ atomic_sub(skb->truesize, &nf->mem);
kfree_skb(skb);
}
@@ -183,11 +184,11 @@ static __inline__ void fq_kill(struct frag_queue *fq)
inet_frag_kill(&fq->q, &ip6_frags);
}
-static void ip6_evictor(struct inet6_dev *idev)
+static void ip6_evictor(struct net *net, struct inet6_dev *idev)
{
int evicted;
- evicted = inet_frag_evictor(&ip6_frags);
+ evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags);
if (evicted)
IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted);
}
@@ -389,7 +390,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->q.fragments = next;
fq->q.meat -= free_it->len;
- frag_kfree_skb(free_it, NULL);
+ frag_kfree_skb(fq->q.net, free_it, NULL);
}
}
@@ -409,7 +410,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
}
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
- atomic_add(skb->truesize, &ip6_frags.mem);
+ atomic_add(skb->truesize, &fq->q.net->mem);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -503,7 +504,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- atomic_add(clone->truesize, &ip6_frags.mem);
+ atomic_add(clone->truesize, &fq->q.net->mem);
}
/* We have to remove fragment header from datagram and to relocate
@@ -518,7 +519,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
skb_shinfo(head)->frag_list = head->next;
skb_reset_transport_header(head);
skb_push(head, head->data - skb_network_header(head));
- atomic_sub(head->truesize, &ip6_frags.mem);
+ atomic_sub(head->truesize, &fq->q.net->mem);
for (fp=head->next; fp; fp = fp->next) {
head->data_len += fp->len;
@@ -528,7 +529,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
- atomic_sub(fp->truesize, &ip6_frags.mem);
+ atomic_sub(fp->truesize, &fq->q.net->mem);
}
head->next = NULL;
@@ -600,8 +601,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
}
net = skb->dev->nd_net;
- if (atomic_read(&ip6_frags.mem) > init_net.ipv6.sysctl.frags.high_thresh)
- ip6_evictor(ip6_dst_idev(skb->dst));
+ if (atomic_read(&net->ipv6.frags.mem) >
+ init_net.ipv6.sysctl.frags.high_thresh)
+ ip6_evictor(net, ip6_dst_idev(skb->dst));
if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
ip6_dst_idev(skb->dst))) != NULL) {
--
1.5.3.4
^ permalink raw reply related
* [PATCH net-2.6.25 3/10][NETNS][FRAGS]: Make the nqueues counter per-namespace.
From: Pavel Emelyanov @ 2008-01-22 13:58 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List, devel
In-Reply-To: <4795F524.8060204@openvz.org>
This is simple - just move the variable from struct inet_frags
to struct netns_frags and adjust the usage appropriately.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/net/inet_frag.h | 4 +++-
include/net/ip.h | 2 +-
include/net/ipv6.h | 2 +-
net/ipv4/inet_fragment.c | 11 ++++++++---
net/ipv4/ip_fragment.c | 6 ++++--
net/ipv4/proc.c | 2 +-
net/ipv6/netfilter/nf_conntrack_reasm.c | 1 +
net/ipv6/proc.c | 2 +-
net/ipv6/reassembly.c | 6 ++++--
9 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 8ab6df6..d36f3a6 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -2,6 +2,7 @@
#define __NET_FRAG_H__
struct netns_frags {
+ int nqueues;
};
struct inet_frag_queue {
@@ -36,7 +37,6 @@ struct inet_frags {
struct hlist_head hash[INETFRAGS_HASHSZ];
rwlock_t lock;
u32 rnd;
- int nqueues;
int qsize;
atomic_t mem;
struct timer_list secret_timer;
@@ -55,6 +55,8 @@ struct inet_frags {
void inet_frags_init(struct inet_frags *);
void inet_frags_fini(struct inet_frags *);
+void inet_frags_init_net(struct netns_frags *nf);
+
void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
void inet_frag_destroy(struct inet_frag_queue *q,
struct inet_frags *f, int *work);
diff --git a/include/net/ip.h b/include/net/ip.h
index ff14fc8..9ea1bc5 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -330,7 +330,7 @@ enum ip_defrag_users
int ip_defrag(struct sk_buff *skb, u32 user);
int ip_frag_mem(void);
-int ip_frag_nqueues(void);
+int ip_frag_nqueues(struct net *net);
/*
* Functions provided by ip_forward.c
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 87ca1bf..da1c089 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -245,7 +245,7 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb);
-int ip6_frag_nqueues(void);
+int ip6_frag_nqueues(struct net *net);
int ip6_frag_mem(void);
#define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 158c5f6..4fec0b9 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -63,7 +63,6 @@ void inet_frags_init(struct inet_frags *f)
f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
(jiffies ^ (jiffies >> 6)));
- f->nqueues = 0;
atomic_set(&f->mem, 0);
setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
@@ -73,6 +72,12 @@ void inet_frags_init(struct inet_frags *f)
}
EXPORT_SYMBOL(inet_frags_init);
+void inet_frags_init_net(struct netns_frags *nf)
+{
+ nf->nqueues = 0;
+}
+EXPORT_SYMBOL(inet_frags_init_net);
+
void inet_frags_fini(struct inet_frags *f)
{
del_timer(&f->secret_timer);
@@ -84,7 +89,7 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
write_lock(&f->lock);
hlist_del(&fq->list);
list_del(&fq->lru_list);
- f->nqueues--;
+ fq->net->nqueues--;
write_unlock(&f->lock);
}
@@ -206,7 +211,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
atomic_inc(&qp->refcnt);
hlist_add_head(&qp->list, &f->hash[hash]);
list_add_tail(&qp->lru_list, &f->lru_list);
- f->nqueues++;
+ nf->nqueues++;
write_unlock(&f->lock);
return qp;
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 56211ef..cd8c830 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -95,9 +95,9 @@ static struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
static struct inet_frags ip4_frags;
-int ip_frag_nqueues(void)
+int ip_frag_nqueues(struct net *net)
{
- return ip4_frags.nqueues;
+ return net->ipv4.frags.nqueues;
}
int ip_frag_mem(void)
@@ -675,6 +675,8 @@ static inline int ip4_frags_ctl_register(struct net *net)
static int ipv4_frags_init_net(struct net *net)
{
+ inet_frags_init_net(&net->ipv4.frags);
+
return ip4_frags_ctl_register(net);
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index cb3787f..bae3280 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -62,7 +62,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot));
seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot));
seq_printf(seq, "FRAG: inuse %d memory %d\n",
- ip_frag_nqueues(), ip_frag_mem());
+ ip_frag_nqueues(&init_net), ip_frag_mem());
return 0;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 18accd4..0b9d009 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -712,6 +712,7 @@ int nf_ct_frag6_init(void)
nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
+ inet_frags_init_net(&nf_init_frags);
inet_frags_init(&nf_frags);
return 0;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 6b0314e..0b55785 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -44,7 +44,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(&rawv6_prot));
seq_printf(seq, "FRAG6: inuse %d memory %d\n",
- ip6_frag_nqueues(), ip6_frag_mem());
+ ip6_frag_nqueues(&init_net), ip6_frag_mem());
return 0;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index ab2d53b..77a8740 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -84,9 +84,9 @@ struct frag_queue
static struct inet_frags ip6_frags;
-int ip6_frag_nqueues(void)
+int ip6_frag_nqueues(struct net *net)
{
- return ip6_frags.nqueues;
+ return net->ipv6.frags.nqueues;
}
int ip6_frag_mem(void)
@@ -690,6 +690,8 @@ static int ipv6_frags_init_net(struct net *net)
net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT;
net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
+ inet_frags_init_net(&net->ipv6.frags);
+
return ip6_frags_sysctl_register(net);
}
--
1.5.3.4
^ permalink raw reply related
* [PATCH net-2.6.25 2/10][NETNS][FRAGS]: Make the inet_frag_queue lookup work in namespaces.
From: Pavel Emelyanov @ 2008-01-22 13:57 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List, devel
In-Reply-To: <4795F524.8060204@openvz.org>
Since fragment management code is consolidated, we
cannot have the pointer from inet_frag_queue to
struct net, since we must know what king of fragment
this is.
So, I introduce the netns_frags structure. This one
is currently empty, but will be eventually filled with
per-namespace attributes. Each inet_frag_queue is
tagged with this one.
The conntrack_reasm is not "netns-izated", so it has
one statis netns_frags instance to keep working in
init namespace.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/net/inet_frag.h | 8 ++++++--
include/net/netns/ipv4.h | 4 ++++
include/net/netns/ipv6.h | 1 +
net/ipv4/inet_fragment.c | 27 +++++++++++++++------------
net/ipv4/ip_fragment.c | 8 +++++---
net/ipv6/netfilter/nf_conntrack_reasm.c | 3 ++-
net/ipv6/reassembly.c | 8 +++++---
7 files changed, 38 insertions(+), 21 deletions(-)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 954def4..8ab6df6 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -1,8 +1,12 @@
#ifndef __NET_FRAG_H__
#define __NET_FRAG_H__
+struct netns_frags {
+};
+
struct inet_frag_queue {
struct hlist_node list;
+ struct netns_frags *net;
struct list_head lru_list; /* lru list member */
spinlock_t lock;
atomic_t refcnt;
@@ -55,8 +59,8 @@ void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
void inet_frag_destroy(struct inet_frag_queue *q,
struct inet_frags *f, int *work);
int inet_frag_evictor(struct inet_frags *f);
-struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key,
- unsigned int hash);
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
+ struct inet_frags *f, void *key, unsigned int hash);
static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
{
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 3872aa7..80680e0 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -5,6 +5,8 @@
#ifndef __NETNS_IPV4_H__
#define __NETNS_IPV4_H__
+#include <net/inet_frag.h>
+
struct ctl_table_header;
struct ipv4_devconf;
struct fib_rules_ops;
@@ -22,5 +24,7 @@ struct netns_ipv4 {
#endif
struct hlist_head *fib_table_hash;
struct sock *fibnl;
+
+ struct netns_frags frags;
};
#endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 06b4dc0..057c8e4 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -30,5 +30,6 @@ struct netns_ipv6 {
struct netns_sysctl_ipv6 sysctl;
struct ipv6_devconf *devconf_all;
struct ipv6_devconf *devconf_dflt;
+ struct netns_frags frags;
};
#endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 7379107..158c5f6 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -174,8 +174,9 @@ int inet_frag_evictor(struct inet_frags *f)
}
EXPORT_SYMBOL(inet_frag_evictor);
-static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
- struct inet_frags *f, unsigned int hash, void *arg)
+static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
+ struct inet_frag_queue *qp_in, struct inet_frags *f,
+ unsigned int hash, void *arg)
{
struct inet_frag_queue *qp;
#ifdef CONFIG_SMP
@@ -189,7 +190,7 @@ static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
* promoted read lock to write lock.
*/
hlist_for_each_entry(qp, n, &f->hash[hash], list) {
- if (f->match(qp, arg)) {
+ if (qp->net == nf && f->match(qp, arg)) {
atomic_inc(&qp->refcnt);
write_unlock(&f->lock);
qp_in->last_in |= COMPLETE;
@@ -210,7 +211,8 @@ static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
return qp;
}
-static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg)
+static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
+ struct inet_frags *f, void *arg)
{
struct inet_frag_queue *q;
@@ -223,31 +225,32 @@ static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg)
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
atomic_set(&q->refcnt, 1);
+ q->net = nf;
return q;
}
-static struct inet_frag_queue *inet_frag_create(struct inet_frags *f,
- void *arg, unsigned int hash)
+static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
+ struct inet_frags *f, void *arg, unsigned int hash)
{
struct inet_frag_queue *q;
- q = inet_frag_alloc(f, arg);
+ q = inet_frag_alloc(nf, f, arg);
if (q == NULL)
return NULL;
- return inet_frag_intern(q, f, hash, arg);
+ return inet_frag_intern(nf, q, f, hash, arg);
}
-struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key,
- unsigned int hash)
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
+ struct inet_frags *f, void *key, unsigned int hash)
{
struct inet_frag_queue *q;
struct hlist_node *n;
read_lock(&f->lock);
hlist_for_each_entry(q, n, &f->hash[hash], list) {
- if (f->match(q, key)) {
+ if (q->net == nf && f->match(q, key)) {
atomic_inc(&q->refcnt);
read_unlock(&f->lock);
return q;
@@ -255,6 +258,6 @@ struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key,
}
read_unlock(&f->lock);
- return inet_frag_create(f, key, hash);
+ return inet_frag_create(nf, f, key, hash);
}
EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a53463e..56211ef 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -236,7 +236,7 @@ out:
/* Find the correct entry in the "incomplete datagrams" queue for
* this IP datagram, and create new one, if nothing is found.
*/
-static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
+static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
{
struct inet_frag_queue *q;
struct ip4_create_arg arg;
@@ -246,7 +246,7 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
arg.user = user;
hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
- q = inet_frag_find(&ip4_frags, &arg, hash);
+ q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
if (q == NULL)
goto out_nomem;
@@ -582,15 +582,17 @@ out_fail:
int ip_defrag(struct sk_buff *skb, u32 user)
{
struct ipq *qp;
+ struct net *net;
IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
+ net = skb->dev->nd_net;
/* Start by cleaning up the memory. */
if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh)
ip_evictor();
/* Lookup (or create) queue header */
- if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
+ if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
int ret;
spin_lock(&qp->q.lock);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d631631..18accd4 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -78,6 +78,7 @@ static struct inet_frags_ctl nf_frags_ctl __read_mostly = {
};
static struct inet_frags nf_frags;
+static struct netns_frags nf_init_frags;
#ifdef CONFIG_SYSCTL
struct ctl_table nf_ct_ipv6_sysctl_table[] = {
@@ -212,7 +213,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
arg.dst = dst;
hash = ip6qhashfn(id, src, dst);
- q = inet_frag_find(&nf_frags, &arg, hash);
+ q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
if (q == NULL)
goto oom;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 1815ff0..ab2d53b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -234,7 +234,7 @@ out:
}
static __inline__ struct frag_queue *
-fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
+fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst,
struct inet6_dev *idev)
{
struct inet_frag_queue *q;
@@ -246,7 +246,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
arg.dst = dst;
hash = ip6qhashfn(id, src, dst);
- q = inet_frag_find(&ip6_frags, &arg, hash);
+ q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
if (q == NULL)
goto oom;
@@ -568,6 +568,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct frag_hdr *fhdr;
struct frag_queue *fq;
struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct net *net;
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
@@ -598,10 +599,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
+ net = skb->dev->nd_net;
if (atomic_read(&ip6_frags.mem) > init_net.ipv6.sysctl.frags.high_thresh)
ip6_evictor(ip6_dst_idev(skb->dst));
- if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr,
+ if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
ip6_dst_idev(skb->dst))) != NULL) {
int ret;
--
1.5.3.4
^ permalink raw reply related
* [PATCH net-2.6.25 1/10][NETNS][FRAGS]: Move ctl tables around.
From: Pavel Emelyanov @ 2008-01-22 13:55 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List, devel
In-Reply-To: <4795F524.8060204@openvz.org>
This is a preparation for sysctl netns-ization.
Move the ctl tables to the files, where the tuning
variables reside. Plus make the helpers to register
the tables.
This will simplify the later patches and will keep
similar things closer to each other.
ipv4, ipv6 and conntrack_reasm are patched differently,
bu the result is all the tables are in appropriate files.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/net/ip.h | 5 --
include/net/ipv6.h | 1 -
include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 4 +-
net/ipv4/ip_fragment.c | 74 +++++++++++++++++++++++-
net/ipv4/sysctl_net_ipv4.c | 42 -------------
net/ipv6/af_inet6.c | 5 --
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 29 ---------
net/ipv6/netfilter/nf_conntrack_reasm.c | 31 ++++++++++-
net/ipv6/reassembly.c | 66 ++++++++++++++++++++-
net/ipv6/sysctl_net_ipv6.c | 40 +------------
10 files changed, 169 insertions(+), 128 deletions(-)
diff --git a/include/net/ip.h b/include/net/ip.h
index 2ad4d2f..ff14fc8 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -179,11 +179,6 @@ extern int sysctl_ip_nonlocal_bind;
extern struct ctl_path net_ipv4_ctl_path[];
-/* From ip_fragment.c */
-struct inet_frags_ctl;
-extern struct inet_frags_ctl ip4_frags_ctl;
-extern int sysctl_ipfrag_max_dist;
-
/* From inetpeer.c */
extern int inet_peer_threshold;
extern int inet_peer_minttl;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 3712cae..87ca1bf 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -587,7 +587,6 @@ extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
#ifdef CONFIG_PROC_FS
extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
-extern void ipv6_frag_sysctl_init(struct net *net);
extern struct ctl_table *ipv6_route_sysctl_init(struct net *net);
extern int ac6_proc_init(void);
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index f703533..abc55ad 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -16,6 +16,8 @@ extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
int (*okfn)(struct sk_buff *));
struct inet_frags_ctl;
-extern struct inet_frags_ctl nf_frags_ctl;
+
+#include <linux/sysctl.h>
+extern struct ctl_table nf_ct_ipv6_sysctl_table[];
#endif /* _NF_CONNTRACK_IPV6_H*/
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2143bf3..a53463e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -50,7 +50,7 @@
* as well. Or notify me, at least. --ANK
*/
-int sysctl_ipfrag_max_dist __read_mostly = 64;
+static int sysctl_ipfrag_max_dist __read_mostly = 64;
struct ipfrag_skb_cb
{
@@ -74,7 +74,7 @@ struct ipq {
struct inet_peer *peer;
};
-struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
+static struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
/*
* Fragment cache limits. We will commit 256K at one time. Should we
* cross that limit we will prune down to 192K. This should cope with
@@ -607,8 +607,78 @@ int ip_defrag(struct sk_buff *skb, u32 user)
return -ENOMEM;
}
+#ifdef CONFIG_SYSCTL
+static int zero;
+
+static struct ctl_table ip4_frags_ctl_table[] = {
+ {
+ .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
+ .procname = "ipfrag_high_thresh",
+ .data = &ip4_frags_ctl.high_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH,
+ .procname = "ipfrag_low_thresh",
+ .data = &ip4_frags_ctl.low_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = NET_IPV4_IPFRAG_TIME,
+ .procname = "ipfrag_time",
+ .data = &ip4_frags_ctl.timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ .strategy = &sysctl_jiffies
+ },
+ {
+ .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
+ .procname = "ipfrag_secret_interval",
+ .data = &ip4_frags_ctl.secret_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ .strategy = &sysctl_jiffies
+ },
+ {
+ .procname = "ipfrag_max_dist",
+ .data = &sysctl_ipfrag_max_dist,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = &zero
+ },
+ { }
+};
+
+static int ip4_frags_ctl_register(struct net *net)
+{
+ struct ctl_table_header *hdr;
+
+ hdr = register_net_sysctl_table(net, net_ipv4_ctl_path,
+ ip4_frags_ctl_table);
+ return hdr == NULL ? -ENOMEM : 0;
+}
+#else
+static inline int ip4_frags_ctl_register(struct net *net)
+{
+ return 0;
+}
+#endif
+
+static int ipv4_frags_init_net(struct net *net)
+{
+ return ip4_frags_ctl_register(net);
+}
+
void __init ipfrag_init(void)
{
+ ipv4_frags_init_net(&init_net);
ip4_frags.ctl = &ip4_frags_ctl;
ip4_frags.hashfn = ip4_hashfn;
ip4_frags.constructor = ip4_frag_init;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 45536a9..82cdf23 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -284,22 +284,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = &proc_dointvec
},
{
- .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
- .procname = "ipfrag_high_thresh",
- .data = &ip4_frags_ctl.high_thresh,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH,
- .procname = "ipfrag_low_thresh",
- .data = &ip4_frags_ctl.low_thresh,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
.ctl_name = NET_IPV4_DYNADDR,
.procname = "ip_dynaddr",
.data = &sysctl_ip_dynaddr,
@@ -308,15 +292,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = &proc_dointvec
},
{
- .ctl_name = NET_IPV4_IPFRAG_TIME,
- .procname = "ipfrag_time",
- .data = &ip4_frags_ctl.timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies
- },
- {
.ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME,
.procname = "tcp_keepalive_time",
.data = &sysctl_tcp_keepalive_time,
@@ -659,23 +634,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = &proc_dointvec
},
{
- .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
- .procname = "ipfrag_secret_interval",
- .data = &ip4_frags_ctl.secret_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies
- },
- {
- .procname = "ipfrag_max_dist",
- .data = &sysctl_ipfrag_max_dist,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .extra1 = &zero
- },
- {
.ctl_name = NET_TCP_NO_METRICS_SAVE,
.procname = "tcp_no_metrics_save",
.data = &sysctl_tcp_nometrics_save,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 6738a7b..bddac0e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -721,10 +721,6 @@ static void cleanup_ipv6_mibs(void)
static int inet6_net_init(struct net *net)
{
net->ipv6.sysctl.bindv6only = 0;
- net->ipv6.sysctl.frags.high_thresh = 256 * 1024;
- net->ipv6.sysctl.frags.low_thresh = 192 * 1024;
- net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT;
- net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
net->ipv6.sysctl.flush_delay = 0;
net->ipv6.sysctl.ip6_rt_max_size = 4096;
net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
@@ -734,7 +730,6 @@ static int inet6_net_init(struct net *net)
net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
- ipv6_frag_sysctl_init(net);
return 0;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index cf42f5c..2d7b024 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -297,35 +297,6 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
},
};
-#ifdef CONFIG_SYSCTL
-static ctl_table nf_ct_ipv6_sysctl_table[] = {
- {
- .procname = "nf_conntrack_frag6_timeout",
- .data = &nf_frags_ctl.timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
- .procname = "nf_conntrack_frag6_low_thresh",
- .data = &nf_frags_ctl.low_thresh,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
- .procname = "nf_conntrack_frag6_high_thresh",
- .data = &nf_frags_ctl.high_thresh,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- { .ctl_name = 0 }
-};
-#endif
-
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
#include <linux/netfilter/nfnetlink.h>
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index e170c67..d631631 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -70,7 +70,7 @@ struct nf_ct_frag6_queue
__u16 nhoffset;
};
-struct inet_frags_ctl nf_frags_ctl __read_mostly = {
+static struct inet_frags_ctl nf_frags_ctl __read_mostly = {
.high_thresh = 256 * 1024,
.low_thresh = 192 * 1024,
.timeout = IPV6_FRAG_TIMEOUT,
@@ -79,6 +79,35 @@ struct inet_frags_ctl nf_frags_ctl __read_mostly = {
static struct inet_frags nf_frags;
+#ifdef CONFIG_SYSCTL
+struct ctl_table nf_ct_ipv6_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_frag6_timeout",
+ .data = &nf_frags_ctl.timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
+ .procname = "nf_conntrack_frag6_low_thresh",
+ .data = &nf_frags_ctl.low_thresh,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
+ .procname = "nf_conntrack_frag6_high_thresh",
+ .data = &nf_frags_ctl.high_thresh,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ { .ctl_name = 0 }
+};
+#endif
+
static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
struct in6_addr *daddr)
{
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4dfcddc..1815ff0 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -625,12 +625,70 @@ static struct inet6_protocol frag_protocol =
.flags = INET6_PROTO_NOPOLICY,
};
-void ipv6_frag_sysctl_init(struct net *net)
+#ifdef CONFIG_SYSCTL
+static struct ctl_table ip6_frags_ctl_table[] = {
+ {
+ .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH,
+ .procname = "ip6frag_high_thresh",
+ .data = &init_net.ipv6.sysctl.frags.high_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH,
+ .procname = "ip6frag_low_thresh",
+ .data = &init_net.ipv6.sysctl.frags.low_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = NET_IPV6_IP6FRAG_TIME,
+ .procname = "ip6frag_time",
+ .data = &init_net.ipv6.sysctl.frags.timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ .strategy = &sysctl_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL,
+ .procname = "ip6frag_secret_interval",
+ .data = &init_net.ipv6.sysctl.frags.secret_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ .strategy = &sysctl_jiffies
+ },
+ { }
+};
+
+static int ip6_frags_sysctl_register(struct net *net)
+{
+ struct ctl_table_header *hdr;
+
+ hdr = register_net_sysctl_table(net, net_ipv6_ctl_path,
+ ip6_frags_ctl_table);
+ return hdr == NULL ? -ENOMEM : 0;
+}
+#else
+static inline int ip6_frags_sysctl_register(struct net *net)
{
- if (net != &init_net)
- return;
+ return 0;
+}
+#endif
+static int ipv6_frags_init_net(struct net *net)
+{
ip6_frags.ctl = &net->ipv6.sysctl.frags;
+
+ net->ipv6.sysctl.frags.high_thresh = 256 * 1024;
+ net->ipv6.sysctl.frags.low_thresh = 192 * 1024;
+ net->ipv6.sysctl.frags.timeout = IPV6_FRAG_TIMEOUT;
+ net->ipv6.sysctl.frags.secret_interval = 10 * 60 * HZ;
+
+ return ip6_frags_sysctl_register(net);
}
int __init ipv6_frag_init(void)
@@ -641,6 +699,8 @@ int __init ipv6_frag_init(void)
if (ret)
goto out;
+ ipv6_frags_init_net(&init_net);
+
ip6_frags.hashfn = ip6_hashfn;
ip6_frags.constructor = ip6_frag_init;
ip6_frags.destructor = NULL;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7197eb7..408691b 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -38,40 +38,6 @@ static ctl_table ipv6_table_template[] = {
.proc_handler = &proc_dointvec
},
{
- .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH,
- .procname = "ip6frag_high_thresh",
- .data = &init_net.ipv6.sysctl.frags.high_thresh,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH,
- .procname = "ip6frag_low_thresh",
- .data = &init_net.ipv6.sysctl.frags.low_thresh,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_IPV6_IP6FRAG_TIME,
- .procname = "ip6frag_time",
- .data = &init_net.ipv6.sysctl.frags.timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
- },
- {
- .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL,
- .procname = "ip6frag_secret_interval",
- .data = &init_net.ipv6.sysctl.frags.secret_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies
- },
- {
.ctl_name = NET_IPV6_MLD_MAX_MSF,
.procname = "mld_max_msf",
.data = &sysctl_mld_max_msf,
@@ -126,16 +92,12 @@ static int ipv6_sysctl_net_init(struct net *net)
ipv6_table[1].child = ipv6_icmp_table;
ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
- ipv6_table[3].data = &net->ipv6.sysctl.frags.high_thresh;
- ipv6_table[4].data = &net->ipv6.sysctl.frags.low_thresh;
- ipv6_table[5].data = &net->ipv6.sysctl.frags.timeout;
- ipv6_table[6].data = &net->ipv6.sysctl.frags.secret_interval;
/* We don't want this value to be per namespace, it should be global
to all namespaces, so make it read-only when we are not in the
init network namespace */
if (net != &init_net)
- ipv6_table[7].mode = 0444;
+ ipv6_table[3].mode = 0444;
net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
ipv6_table);
--
1.5.3.4
^ permalink raw reply related
* Re: [PATCH 1/3 v2][NET] gen_estimator: faster gen_kill_estimator
From: jamal @ 2008-01-22 13:54 UTC (permalink / raw)
To: Jarek Poplawski; +Cc: David Miller, netdev, slavon, kaber
In-Reply-To: <20080122122927.GE2079@ff.dom.local>
On Tue, 2008-22-01 at 13:29 +0100, Jarek Poplawski wrote:
> On Tue, Jan 22, 2008 at 06:42:07AM -0500, jamal wrote:
> ...
> > Jarek,
> >
> > That looks different from the suggestion from Dave.
>
> Hmm..., I'm not sure you mean my or your suggestion here, but you
> are right anyway...
Your idea to grab a pointer to the estimator so you can quickly find it
upon destruction is a good one.
The challenge was not to break the ABI to user space.
Dave suggested to use a different struct for the kernel side and
maintain the user one as is[1]. Your patch didnt do this, hence my
statement;->
> Maybe I miss something, but there still could be a lot of this walking
Indeed, that is possible in the case of many estimators configured with
the same interval - because they will all fall in the same table bucket
and the idx is not that useful to begin with.
I was wrong given the nature of interval - the majority of the users
will have an estimator interval of say 1 sec which will put everything
in one bucket still.
We could introduce a proper index that will allow proper distribution
and have that stored by the class. I am not sure i made sense.
But you are coding - and your idea sounds better.
cheers,
jamal
[1] This is _not uncommon_ (note the usage of double negation here for
emphasis;->) technique actually; ones that go further for example can be
found all over the net/sched code (struct tcf_police vs tc_police) etc.
^ permalink raw reply
* [PATCH net-2.6.25 0/10] Make fragments live in net namespaces
From: Pavel Emelyanov @ 2008-01-22 13:52 UTC (permalink / raw)
To: David Miller; +Cc: Linux Netdev List, devel
The overall design I propose is to keep the hash table
global and tag inet_frag_queue with the net. Since the
fragments hash is going to be re-sizable, this is OK to
keep fragments from different namespace in one hash.
To speedup the evicting process LRU list is made per
namespace.
As far as the CTL-tuned variables are concerned, the
timeout and thresholds are made per namespace, since
they have the per namespace sense, but the secret rebuild
interval is read-only in sub-namespaces.
Since fragment management code is consolidated for ipv4
and ipv6 I make them all in one go. The conntrack_reasm
netns-ization is not done - we have to make at least the
core netfilter per namespace first, but this reasm code
is patched to keep working in the initial namespace.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
^ permalink raw reply
* Re: [PATCH 1/3 v2][NET] gen_estimator: faster gen_kill_estimator
From: Jarek Poplawski @ 2008-01-22 12:29 UTC (permalink / raw)
To: jamal; +Cc: David Miller, netdev, slavon, kaber
In-Reply-To: <1201002127.4443.32.camel@localhost>
On Tue, Jan 22, 2008 at 06:42:07AM -0500, jamal wrote:
...
> Jarek,
>
> That looks different from the suggestion from Dave.
Hmm..., I'm not sure you mean my or your suggestion here, but you
are right anyway...
> May i throw in another bone? Theoretically i can see why it would be a
> really bad idea to walk 50K estimators every time you delete one - which
> is horrible if you are trying to destroy the say 50K of them and gets
> worse as the number of schedulers with 50K classes goes up.
>
> But i am wondering why a simpler list couldnt be walked, meaning:
>
> In gen_kill_estimator(), instead of:
>
> for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
>
> Would deriving a better initial index be a big improvement?
> for (e = elist[est->interval].list; e; e = e->next) {
Maybe I miss something, but there still could be a lot of this walking
and IMHO any such longer waiting with BHs disabled is hard to accept
with current memory sizes and low-latencies prices. And currently time
seems to be even more precious here: RCU can't even free any
gen_estimator memory during such large qdisc with classes deletion.
Thanks,
Jarek P.
^ permalink raw reply
* forcedeth oops
From: Andrew Brooks @ 2008-01-22 11:54 UTC (permalink / raw)
To: netdev
Hello
I'm getting an oops in forcedeth whenever I shutdown, details below.
I've tried kernel 2.6.16.59 and the latest forcedeth.c from nvidia.com
which is package-1.23 version-0.62 date-2007/04/27.
How can I download the latest forcedeth.c (including 2008-01-13 patches) ?
It's not in the latest snapshot linux-2.6.24-rc8.
Also, why is the version on nvidia.com not just older than the one in
the kernel, but it appears to have forked back in May 2006. Has there
been independent development on each version? They should be the same!
Here's the diff:
< * 0.56: 22 Mar 2006: Additional ethtool and moduleparam support.
< * 0.57: 14 May 2006: Moved mac address writes to nv_probe and nv_remove.
< * 0.58: 20 May 2006: Optimized rx and tx data paths.
< * 0.59: 31 May 2006: Added support for sideband management unit.
< * 0.60: 31 May 2006: Added support for recoverable error.
< * 0.61: 18 Jul 2006: Added support for suspend/resume.
< * 0.62: 16 Jan 2007: Fixed statistics, mgmt communication, and low phy speed on S5.
---
> * 0.56: 22 Mar 2006: Additional ethtool config and moduleparam support.
> * 0.57: 14 May 2006: Mac address set in probe/remove and order corrections.
> * 0.58: 30 Oct 2006: Added support for sideband management unit.
> * 0.59: 30 Oct 2006: Added support for recoverable error.
> * 0.60: 20 Jan 2007: Code optimizations for rings, rx & tx data paths, and stats.
Here's the details of the oops:
md: md0 switched to read-only mode.
Unable to handle kernel NULL pointer dereference at virtual address 00000000
printing eip:
f8ccdd55
*pde = 36c6a001
Oops: 0000 [#1]
SMP
Modules linked in: nvidia ... forcedeth ... sata_nv
CPU: 1
EIP:
EFLAGS: 00010286 (2.6.16.59 #1)
EIP is at nv_suspend+0x85/0x350 [forcedeth]
eax:
esi:
ds:
Process reboot
Stack:
Call Trace:
show_stack_log
show_registers
die
do_page_fault
error_code
nv_reboot_handler
notifier_call_chain
kernel_restart_prepare
kernel_restart
sys_reboot
sysenter_past_esp
Code: 8b 8c 3a 98 01 00 00 01 c8 8b ...
INIT: no more processes left in this runlevel
Andrew
^ permalink raw reply
* Re: [PATCH 1/3 v2][NET] gen_estimator: faster gen_kill_estimator
From: jamal @ 2008-01-22 11:42 UTC (permalink / raw)
To: Jarek Poplawski; +Cc: David Miller, netdev, slavon, kaber
In-Reply-To: <20080122072152.GA977@ff.dom.local>
On Tue, 2008-22-01 at 08:21 +0100, Jarek Poplawski wrote:
> On 22-01-2008 01:29, David Miller wrote:
> ...
> > Fix this right, make a structure like:
> >
> > struct kernel_gnet_stats_rate_est {
> > struct gnet_stats_rate_est est;
> > void *gen_estimator;
> > }
> >
> > And update all the code as needed.
>
> Thanks!
> I'll try this...
Jarek,
That looks different from the suggestion from Dave.
May i throw in another bone? Theoretically i can see why it would be a
really bad idea to walk 50K estimators every time you delete one - which
is horrible if you are trying to destroy the say 50K of them and gets
worse as the number of schedulers with 50K classes goes up.
But i am wondering why a simpler list couldnt be walked, meaning:
In gen_kill_estimator(), instead of:
for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
Would deriving a better initial index be a big improvement?
for (e = elist[est->interval].list; e; e = e->next) {
cheers,
jamal
^ permalink raw reply
* Re: [PATCH] bluetooth : move children of connection device to NULL before connection down
From: Marcel Holtmann @ 2008-01-22 11:39 UTC (permalink / raw)
To: Dave Young
Cc: David Miller, netdev, linux-kernel, bluez-devel, cornelia.huck,
gombasg, htejun, viro, kay.sievers, greg
In-Reply-To: <a8e1da0801220024u30e9c814va74e44252fc8b11e@mail.gmail.com>
Hi Dave,
> could you tell something more about your coding style?
> I would like to submit patches about bluetooth according to your sytle
> later If I have.
>
> Maybe you could put it on the bluez web site or anywhere.
it follows closely the kernel coding style as layout within the kernel
documentation. However there are some minor style things, that I am
going to enforce from time to time. Like having the container_of or
get_user_data calls at the top of the variable declaration. This has
never formalized as far as I recall, but makes from my point of view the
code clearer and easier to read.
Some other times I like an extra empty line to more visual separate
different code blocks. For this some people might agree with me others
might disagree. It is fully a personal more liking one way over the
other.
When it comes to indentation and placement of braces etc. I is 100% the
kernel coding style and nothing else. If not, then it needs fixing and
is an oversight from the old days.
Regards
Marcel
^ permalink raw reply
* [Bug 9750] dev: avoid a race that triggers assertion failure
From: Matti Linnanvuori @ 2008-01-22 11:27 UTC (permalink / raw)
To: netdev, jgarzik; +Cc: bugme-daemon
From: Matti Linnanvuori <mattilinnanvuori@yahoo.com>
There is a race in Linux kernel file net/core/dev.c, function dev_close.
The function calls function dev_deactivate, which calls function
dev_watchdog_down that deletes the watchdog timer. However, after that, a
driver can call netif_carrier_ok, which calls function
__netdev_watchdog_up that can add the watchdog timer again. Function
unregister_netdevice calls function dev_shutdown that traps the bug
!timer_pending(&dev->watchdog_timer).
Signed-off-by: Matti Linnanvuori <mattilinnanvuori@yahoo.com>
---
--- linux-2.6.23.8/net/core/dev.c 2007-11-16 20:14:27.000000000 +0200
+++ linux-2.6.23.15/net/core/dev.c 2008-01-22 13:16:12.347125794 +0200
@@ -1013,8 +1013,6 @@ int dev_close(struct net_device *dev)
*/
raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
- dev_deactivate(dev);
-
clear_bit(__LINK_STATE_START, &dev->state);
/* Synchronize to scheduled poll. We cannot touch poll list,
@@ -1029,6 +1027,8 @@ int dev_close(struct net_device *dev)
msleep(1);
}
+ dev_deactivate(dev);
+
/*
* Call the device specific close. This cannot fail.
* Only if device is UP
____________________________________________________________________________________
Looking for last minute shopping deals?
Find them fast with Yahoo! Search. http://tools.search.yahoo.com/newsearch/category.php?category=shopping
^ permalink raw reply
* [DST]: shrinks sizeof(struct rtable) by 64 bytes on x86_64
From: Eric Dumazet @ 2008-01-22 10:50 UTC (permalink / raw)
To: David Miller; +Cc: netdev@vger.kernel.org
On x86_64, sizeof(struct rtable) is 0x148, which is rounded up to 0x180
bytes by SLAB allocator.
We can reduce this to exactly 0x140 bytes, without alignment overhead,
and store 12 struct rtable per PAGE instead of 10.
rate_tokens is currently defined as an "unsigned long", while its content
should not exceed 6*HZ. It can safely be converted to an unsigned int.
Moving tclassid right after rate_tokens to fill the 4 bytes hole permits
to save 8 bytes on 'struct dst_entry', which finally permits to save 8
bytes on 'struct rtable'
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
diff --git a/include/net/dst.h b/include/net/dst.h
index c45dcc3..e3ac7d0 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -56,7 +56,11 @@ struct dst_entry
struct dst_entry *path;
unsigned long rate_last; /* rate limiting for ICMP */
- unsigned long rate_tokens;
+ unsigned int rate_tokens;
+
+#ifdef CONFIG_NET_CLS_ROUTE
+ __u32 tclassid;
+#endif
struct neighbour *neighbour;
struct hh_cache *hh;
@@ -65,10 +69,6 @@ struct dst_entry
int (*input)(struct sk_buff*);
int (*output)(struct sk_buff*);
-#ifdef CONFIG_NET_CLS_ROUTE
- __u32 tclassid;
-#endif
-
struct dst_ops *ops;
unsigned long lastuse;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7ed8c50..1dbe89c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -275,18 +275,19 @@ static inline void icmp_xmit_unlock(void)
#define XRLIM_BURST_FACTOR 6
int xrlim_allow(struct dst_entry *dst, int timeout)
{
- unsigned long now;
+ unsigned long now, token = dst->rate_tokens;
int rc = 0;
now = jiffies;
- dst->rate_tokens += now - dst->rate_last;
+ token += now - dst->rate_last;
dst->rate_last = now;
- if (dst->rate_tokens > XRLIM_BURST_FACTOR * timeout)
- dst->rate_tokens = XRLIM_BURST_FACTOR * timeout;
- if (dst->rate_tokens >= timeout) {
- dst->rate_tokens -= timeout;
+ if (token > XRLIM_BURST_FACTOR * timeout)
+ token = XRLIM_BURST_FACTOR * timeout;
+ if (token >= timeout) {
+ token -= timeout;
rc = 1;
}
+ dst->rate_tokens = token;
return rc;
}
^ permalink raw reply related
* Re: 2.6.24-rc8-mm1 : net tcp_input.c warnings
From: Ilpo Järvinen @ 2008-01-22 10:47 UTC (permalink / raw)
To: Dave Young; +Cc: LKML, David Miller, Netdev, Andrew Morton
In-Reply-To: <a8e1da0801220109v6bf8931ev50f2210402c3ba41@mail.gmail.com>
[-- Attachment #1: Type: TEXT/PLAIN, Size: 11373 bytes --]
On Tue, 22 Jan 2008, Dave Young wrote:
> On Jan 22, 2008 12:37 PM, Dave Young <hidave.darkstar@gmail.com> wrote:
> >
> > On Jan 22, 2008 5:14 AM, Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> wrote:
> > >
> > > On Mon, 21 Jan 2008, Dave Young wrote:
> > >
> > > > Please see the kernel messages following,(trigged while using some qemu session)
> > > > BTW, seems there's some e100 error message as well.
> > > >
> > > > PCI: Setting latency timer of device 0000:00:1b.0 to 64
> > > > e100: Intel(R) PRO/100 Network Driver, 3.5.23-k4-NAPI
> > > > e100: Copyright(c) 1999-2006 Intel Corporation
> > > > ACPI: PCI Interrupt 0000:03:08.0[A] -> GSI 20 (level, low) -> IRQ 20
> > > > modprobe:2331 conflicting cache attribute efaff000-efb00000 uncached<->default
> > > > e100: 0000:03:08.0: e100_probe: Cannot map device registers, aborting.
> > > > ACPI: PCI interrupt for device 0000:03:08.0 disabled
> > > > e100: probe of 0000:03:08.0 failed with error -12
> > > > eth0: setting full-duplex.
> > > > ------------[ cut here ]------------
> > > > WARNING: at net/ipv4/tcp_input.c:2169 tcp_mark_head_lost+0x121/0x150()
> > > > Modules linked in: snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss eeprom e100 psmouse snd_hda_intel snd_pcm snd_timer btusb rtc_cmos thermal bluetooth rtc_core serio_raw intel_agp button processor sg snd rtc_lib i2c_i801 evdev agpgart soundcore dcdbas 3c59x pcspkr snd_page_alloc
> > > > Pid: 0, comm: swapper Not tainted 2.6.24-rc8-mm1 #4
> > > > [<c0132100>] ? printk+0x0/0x20
> > > > [<c0131834>] warn_on_slowpath+0x54/0x80
> > > > [<c03e8df8>] ? ip_finish_output+0x128/0x2e0
> > > > [<c03e9527>] ? ip_output+0xe7/0x100
> > > > [<c03e8a88>] ? ip_local_out+0x18/0x20
> > > > [<c03e991c>] ? ip_queue_xmit+0x3dc/0x470
> > > > [<c043641e>] ? _spin_unlock_irqrestore+0x5e/0x70
> > > > [<c0186be1>] ? check_pad_bytes+0x61/0x80
> > > > [<c03f6031>] tcp_mark_head_lost+0x121/0x150
> > > > [<c03f60ac>] tcp_update_scoreboard+0x4c/0x170
> > > > [<c03f6e0a>] tcp_fastretrans_alert+0x48a/0x6b0
> > > > [<c03f7d93>] tcp_ack+0x1b3/0x3a0
> > > > [<c03fa14b>] tcp_rcv_established+0x3eb/0x710
> > > > [<c04015c5>] tcp_v4_do_rcv+0xe5/0x100
> > > > [<c0401bbb>] tcp_v4_rcv+0x5db/0x660
> > >
> > > Doh, once more these S+L things..., the rest are symptom of the first
> > > problem.
> >
> > What is the S+L thing? Could you explain a bit?
It means that one of the skbs is both SACKed and marked as LOST at the
same time in the counters (might be due to miscount of lost/sacked_out
too, not necessarilily in the ->sacked bits). Such state is logically
invalid because it would mean that the sender thinks that the same packet
both reached the receiver and is lost in the network.
Traditionally TCP has just silently "corrected" over-estimates
(sacked_out+lost_out > packets_out). I changed this couple of releases ago
because those over-estimates often are due to bugs that should be fixed
(there have been couple of them but it has been very quite on this front
long time, months or even half year already; but I might have broken
something with the early Dec changes).
These problem may originate from a bug that occurred a number of ACKs
earlier the WARN_ON triggered, therefore they are a bit tricky to track,
those WARN_ON serve just for alerting purposes and usually do not point
out where the bug actually occurred.
I usually just asked people to include exhaustive verifier which compares
->sacked bitmaps with sacked/lost_out counters and report immediately when
the problem shows up, rather than waiting for the cheaper S+L check we do
in the WARN_ON to trigger. I tried to collect tracking patch from the
previous efforts (hopefully got it right after modifications).
> > I'm a bit worried about its
> > > reproducability if it takes this far to see it...
> > >
>
> It's trigged again in my pc, just while using firefox.
...Good, then there's some chance to catch it.
--
i.
[PATCH] [TCP]: debug S+L
---
include/net/tcp.h | 8 +++-
net/ipv4/tcp_input.c | 6 +++
net/ipv4/tcp_ipv4.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++
net/ipv4/tcp_output.c | 21 +++++++---
4 files changed, 129 insertions(+), 7 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7de4ea3..0685035 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -272,6 +272,8 @@ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val)
#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val)
+extern void tcp_verify_wq(struct sock *sk);
+
extern void tcp_v4_err(struct sk_buff *skb, u32);
extern void tcp_shutdown (struct sock *sk, int how);
@@ -768,7 +770,11 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
}
/* Use define here intentionally to get WARN_ON location shown at the caller */
-#define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out)
+#define tcp_verify_left_out(tp) \
+ do { \
+ WARN_ON(tcp_left_out(tp) > tp->packets_out); \
+ tcp_verify_wq((struct sock *)tp); \
+ } while(0)
extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fa2c85c..0bda0e1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2645,6 +2645,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
tcp_update_scoreboard(sk, fast_rexmit);
tcp_cwnd_down(sk, flag);
+
+ WARN_ON(tcp_write_queue_head(sk) == NULL);
+ WARN_ON(!tp->packets_out);
+
tcp_xmit_retransmit_queue(sk);
}
@@ -2848,6 +2852,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
tcp_clear_all_retrans_hints(tp);
}
+ tcp_verify_left_out(tp);
+
if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
flag |= FLAG_SACK_RENEGING;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9aea88b..21f5888 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -108,6 +108,107 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
};
+void tcp_print_queue(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *skb;
+ char s[50+1];
+ char h[50+1];
+ int idx = 0;
+ int i;
+
+ tcp_for_write_queue(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
+
+ for (i = 0; i < tcp_skb_pcount(skb); i++) {
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) {
+ s[idx] = 'S';
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
+ s[idx] = 'B';
+
+ } else if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
+ s[idx] = 'L';
+ } else {
+ s[idx] = ' ';
+ }
+ if (s[idx] != ' ' && skb->len < tp->mss_cache)
+ s[idx] += 'a' - 'A';
+
+ if (i == 0) {
+ if (TCP_SKB_CB(skb)->seq == tcp_highest_sack_seq(tp))
+ h[idx] = 'h';
+ else
+ h[idx] = '+';
+ } else {
+ h[idx] = '-';
+ }
+
+ if (++idx >= 50) {
+ s[idx] = 0;
+ h[idx] = 0;
+ printk(KERN_ERR "TCP wq(s) %s\n", s);
+ printk(KERN_ERR "TCP wq(h) %s\n", h);
+ idx = 0;
+ }
+ }
+ }
+ if (idx) {
+ s[idx] = '<';
+ s[idx+1] = 0;
+ h[idx] = '<';
+ h[idx+1] = 0;
+ printk(KERN_ERR "TCP wq(s) %s\n", s);
+ printk(KERN_ERR "TCP wq(h) %s\n", h);
+ }
+ printk(KERN_ERR "l%u s%u f%u p%u seq: su%u hs%u sn%u\n",
+ tp->lost_out, tp->sacked_out, tp->fackets_out, tp->packets_out,
+ tp->snd_una, tcp_highest_sack_seq(tp), tp->snd_nxt);
+}
+
+void tcp_verify_wq(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 lost = 0;
+ u32 sacked = 0;
+ u32 packets = 0;
+ struct sk_buff *skb;
+
+ tcp_for_write_queue(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
+
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) {
+ sacked += tcp_skb_pcount(skb);
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
+ printk(KERN_ERR "Sacked bitmap S+L: %u %u-%u/%u\n",
+ TCP_SKB_CB(skb)->sacked,
+ TCP_SKB_CB(skb)->end_seq - tp->snd_una,
+ TCP_SKB_CB(skb)->seq - tp->snd_una,
+ tp->snd_una);
+ }
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
+ lost += tcp_skb_pcount(skb);
+
+ packets += tcp_skb_pcount(skb);
+ }
+
+ WARN_ON(lost != tp->lost_out);
+ WARN_ON(sacked != tp->sacked_out);
+ WARN_ON(packets != tp->packets_out);
+ if ((lost != tp->lost_out) ||
+ (sacked != tp->sacked_out) ||
+ (packets != tp->packets_out)) {
+ printk(KERN_ERR "P: %u L: %u vs %u S: %u vs %u w: %u-%u (%u)\n",
+ tp->packets_out,
+ lost, tp->lost_out,
+ sacked, tp->sacked_out,
+ tp->snd_una, tp->snd_nxt,
+ tp->rx_opt.sack_ok);
+ tcp_print_queue(sk);
+ }
+}
+
static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
{
return inet_csk_get_port(&tcp_hashinfo, sk, snum,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 89f0188..648340f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -779,10 +779,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
tp->lost_out -= diff;
/* Adjust Reno SACK estimate. */
- if (tcp_is_reno(tp) && diff > 0) {
+ if (tcp_is_reno(tp) && diff > 0)
tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
- tcp_verify_left_out(tp);
- }
+
tcp_adjust_fackets_out(sk, skb, diff);
}
@@ -790,6 +789,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
skb_header_release(buff);
tcp_insert_write_queue_after(skb, buff, sk);
+ tcp_verify_left_out(tp);
+
return 0;
}
@@ -1463,6 +1464,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
} else if (result > 0) {
sent_pkts = 1;
}
+ tcp_verify_left_out(tp);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
@@ -1764,6 +1766,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
tcp_clear_retrans_hints_partial(tp);
sk_wmem_free_skb(sk, next_skb);
+ tcp_verify_left_out(tp);
}
/* Do a simple retransmit without using the backoff mechanisms in
@@ -1795,13 +1798,13 @@ void tcp_simple_retransmit(struct sock *sk)
}
}
+ tcp_verify_left_out(tp);
+
tcp_clear_all_retrans_hints(tp);
if (!lost)
return;
- tcp_verify_left_out(tp);
-
/* Don't muck with the congestion window here.
* Reason is that we do not increase amount of _data_
* in network, but units changed and effective
@@ -1970,8 +1973,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
* packet to be MSS sized and all the
* packet counting works out.
*/
- if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+ if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) {
+ tcp_verify_left_out(tp);
return;
+ }
if (sacked & TCPCB_LOST) {
if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
@@ -1997,6 +2002,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
}
}
+ tcp_verify_left_out(tp);
+
/* OK, demanded retransmission is finished. */
/* Forward retransmissions are possible only during Recovery. */
@@ -2054,6 +2061,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS);
}
+
+ tcp_verify_left_out(tp);
}
/* Send a fin. The caller locks the socket for us. This cannot be
--
1.5.2.2
^ permalink raw reply related
* Re: [GIT PULL] [IPV6,IPV4]: Fix several sparse warnings.
From: David Miller @ 2008-01-22 10:44 UTC (permalink / raw)
To: dada1; +Cc: yoshfuji, netdev
In-Reply-To: <20080122110312.10445043.dada1@cosmosbay.com>
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 22 Jan 2008 11:03:12 +0100
> On Tue, 22 Jan 2008 18:56:32 +0900 (JST)
> YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org> wrote:
>
> > @@ -418,7 +418,7 @@ out:
> >
> > void udp_err(struct sk_buff *skb, u32 info)
> > {
> > - return __udp4_lib_err(skb, info, udp_hash);
> > + __udp4_lib_err(skb, info, udp_hash);
> > }
>
> Hum... On this one, I would say Sparse is picky, not to say buggy :(
Agreed, but making this change is harmless :-)
^ permalink raw reply
* Re: [GIT PULL] [IPV6,IPV4]: Fix several sparse warnings.
From: David Miller @ 2008-01-22 10:42 UTC (permalink / raw)
To: yoshfuji; +Cc: netdev
In-Reply-To: <20080122.185632.09970660.yoshfuji@linux-ipv6.org>
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Tue, 22 Jan 2008 18:56:32 +0900 (JST)
> Dave, please consider pulling following changes on top of net-2.6.25 tree:
> git://git.linux-ipv6.org/gitroot/yoshfuji/linux-2.6-dev.git net-2.6-dev-20080122
Pulled, thank you.
^ permalink raw reply
* Re: [PATCH 2/3] virtio: Net header needs gso_hdr_len
From: Herbert Xu @ 2008-01-22 10:36 UTC (permalink / raw)
To: Rusty Russell; +Cc: netdev, virtualization
In-Reply-To: <200801161519.03339.rusty@rustcorp.com.au>
On Wed, Jan 16, 2008 at 03:19:03PM +1100, Rusty Russell wrote:
> > > It's far easier to deal with GSO if we don't have to parse the packet
> > > to figure out the header length. Add the field to the virtio_net_hdr
> > > struct (and fix the spaces that somehow crept in there).
>
> > Why do we need this? When receiving GSO packets from an untrusted
> > source the network stack will fill in the transport header offset
> > after verifying that the headers are sane.
>
> Thanks for clarifying; it simplifies things.
Actually now that I've tried your test program I can see that this
field exists not because of GSO, but because of SG. It tells you
how many bytes you want to put in the skb head as opposed to the
frag array.
So this field is fine with me as long as it is named as such to
avoid confusion since it really has nothing to do with GSO as you
also need it for SG with large MTUs.
I think this is more flexible than the Xen approach where this is
essentially hard-coded to 64 bytes.
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [IPV4] ip_gre: should take care of CONFIG_IPV6_MODULE
From: David Miller @ 2008-01-22 10:22 UTC (permalink / raw)
To: kaber; +Cc: dada1, netdev
In-Reply-To: <4795A786.5080606@trash.net>
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 22 Jan 2008 09:21:26 +0100
> Eric Dumazet wrote:
> > If IPV6 is configured as a module, GRE code misses some IPV6 parts.
>
>
> I believe this is intentional to avoid a runtime dependency on ipv6.
> Fixing this without pulling in the ipv6 module would be preferrable.
Unfortunately this is true.
The only symbol it really needs that isn't provided statically
is icmpv6_send() which is very unfortunate.
Other things it wants like ipv6_addr_type() are already provided
statically in net/ipv6/addrconf_core.c and the appropriate
net/ipv6/Makefile:obj-y rules.
^ permalink raw reply
* Re: RFC: igb: Intel 82575 gigabit ethernet driver (take #3)
From: Jeff Garzik @ 2008-01-22 10:05 UTC (permalink / raw)
To: Kok, Auke
Cc: NetDev, Arjan van de Ven, Jesse Brandeburg, Ronciak, John,
Andrew Morton
In-Reply-To: <4786AB0C.6010202@intel.com>
Kok, Auke wrote:
> All,
>
> here is the third version of the igb (82575) ethernet controller driver. This
> driver was previously posted 2007-07-13 and 2007-12-11. Many comments received
> were addressed:
>
> - removed indirection wrappers in the same way as e1000e and ixgbe.
> - cleaned up largely against sparse, checkpatch
> - removed module parameters and moved functionality to ethtool ioctls
> - new NAPI API rewrites
> - by default the driver runs in multiqueue mode with 2 to 40 RX queues enabled.
>
> and specifically in this version:
>
> - register macro's were condensed for readability
> - fixed namespace collisions by renaming functions to igb_*
>
> Since the driver is still too large (allthough the patch shrunk from 558k to 416k
> to 407k, almost 38% of its size) to post to this list I am attaching the bzipped
> patch here. You can get the same driver alternatively from here:
>
> http://foo-projects.org/~sofar/0001-igb-PCI-Express-82575-Gigabit-Ethernet-driver.patch
> [407k]
> http://foo-projects.org/~sofar/0001-igb-PCI-Express-82575-Gigabit-Ethernet-driver.patch.bz2
> [74k]
>
> or through git:
> git://lost.foo-projects.org/~ahkok/git/linux-2.6 #igb
>
>
> There are several concerns still open for this driver:
> - hardware code is still a large API. we're expecting more hardware to be
> supported by this driver in the future. The API has already been scrubbed but we
> anticipate that the remaining hooks will be used in the future.
> - The register defines are still named "E1000_" as they are mostly identical to
> the e1000 chipsets (igb register space is a superset of most recent e1000 register
> sets).
>
>
> Please review,
>
>
> Cheers,
>
> Auke
>
> ---
>
>>From 4ec9e52f44de0c1c41265c5f326b573643f24da7 Mon Sep 17 00:00:00 2001
> From: Auke Kok <auke-jan.h.kok@intel.com>
> Date: Thu, 10 Jan 2008 14:55:46 -0800
> Subject: [PATCH] igb: PCI-Express 82575 Gigabit Ethernet driver
>
> We are pleased to announce a new Gigabit Ethernet product and its
> driver to the linux community. This product is the Intel(R) 82575
> Gigabit Ethernet adapter family. Physical adapters will be available
> to the public soon. These adapters come in 2- and 4-port versions
> (copper PHY) currently. Other variants will be available later.
>
> The 82575 chipset supports significantly different features that
> warrant a new driver. The descriptor format is (just like the
> ixgbe driver) different. The device can use multiple MSI-X vectors
> and multiple queues for both send and receive. This allows us to
> optimize some of the driver code specifically as well compared to
> the e1000-supported devices.
>
> This version of the igb driver no lnger uses fake netdevices and
> incorporates napi_struct members for each ring to do the multi-
> queue polling. multi-queue is enabled by default and the driver
> supports NAPI mode only.
>
> All the namespace collisions should be gone in this version too. The
> register macro's have been condensed to improve readability.
>
> Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
> ---
> drivers/net/Kconfig | 22 +
> drivers/net/Makefile | 1 +
> drivers/net/igb/Makefile | 37 +
> drivers/net/igb/e1000_82575.c | 1269 ++++++++++++
> drivers/net/igb/e1000_82575.h | 150 ++
> drivers/net/igb/e1000_defines.h | 772 ++++++++
> drivers/net/igb/e1000_hw.h | 599 ++++++
> drivers/net/igb/e1000_mac.c | 1505 ++++++++++++++
> drivers/net/igb/e1000_mac.h | 98 +
> drivers/net/igb/e1000_nvm.c | 605 ++++++
> drivers/net/igb/e1000_nvm.h | 40 +
> drivers/net/igb/e1000_phy.c | 1807 +++++++++++++++++
> drivers/net/igb/e1000_phy.h | 98 +
> drivers/net/igb/e1000_regs.h | 270 +++
> drivers/net/igb/igb.h | 300 +++
> drivers/net/igb/igb_ethtool.c | 1927 ++++++++++++++++++
> drivers/net/igb/igb_main.c | 4138 +++++++++++++++++++++++++++++++++++++++
> 17 files changed, 13638 insertions(+), 0 deletions(-)
> create mode 100644 drivers/net/igb/Makefile
> create mode 100644 drivers/net/igb/e1000_82575.c
> create mode 100644 drivers/net/igb/e1000_82575.h
> create mode 100644 drivers/net/igb/e1000_defines.h
> create mode 100644 drivers/net/igb/e1000_hw.h
> create mode 100644 drivers/net/igb/e1000_mac.c
> create mode 100644 drivers/net/igb/e1000_mac.h
> create mode 100644 drivers/net/igb/e1000_nvm.c
> create mode 100644 drivers/net/igb/e1000_nvm.h
> create mode 100644 drivers/net/igb/e1000_phy.c
> create mode 100644 drivers/net/igb/e1000_phy.h
> create mode 100644 drivers/net/igb/e1000_regs.h
> create mode 100644 drivers/net/igb/igb.h
> create mode 100644 drivers/net/igb/igb_ethtool.c
> create mode 100644 drivers/net/igb/igb_main.c
applied
^ permalink raw reply
* Re: [GIT PULL] [IPV6,IPV4]: Fix several sparse warnings.
From: Eric Dumazet @ 2008-01-22 10:03 UTC (permalink / raw)
To: YOSHIFUJI Hideaki / 吉藤英明; +Cc: davem, netdev
In-Reply-To: <20080122.185632.09970660.yoshfuji@linux-ipv6.org>
On Tue, 22 Jan 2008 18:56:32 +0900 (JST)
YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org> wrote:
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index cb2411c..ecd9d91 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -418,7 +418,7 @@ out:
>
> void udp_err(struct sk_buff *skb, u32 info)
> {
> - return __udp4_lib_err(skb, info, udp_hash);
> + __udp4_lib_err(skb, info, udp_hash);
> }
Hum... On this one, I would say Sparse is picky, not to say buggy :(
^ permalink raw reply
* [GIT PULL] [IPV6,IPV4]: Fix several sparse warnings.
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2008-01-22 9:56 UTC (permalink / raw)
To: davem; +Cc: yoshfuji, netdev
Dave, please consider pulling following changes on top of net-2.6.25 tree:
git://git.linux-ipv6.org/gitroot/yoshfuji/linux-2.6-dev.git net-2.6-dev-20080122
Thank you.
HEADLINES
---------
[IPV4] UDP,UDPLITE: Sparse: {__udp4_lib,udp,udplite}_err() are of void.
[IPV6] UDP,UDPLITE: Sparse: {__udp6_lib,udp,udplite}_err() are of void.
[IPV6] UDPLITE: Sparse: Declare non-static symbols in header.
[IPV6] ADDRLABEL: Sparse: Make several functions static.
[IPV6]: Sparse: Declare non-static ipv6_{route,icmp,frag}_sysctl_init() in header.
[IPV6] ADDRCONF: Sparse: Make inet6_dump_addr() code paths more straight-forward.
[IPV6] NDISC: Sparse: Use different variable name for local use.
DIFFSTAT
--------
include/net/ipv6.h | 4 ++++
net/ipv4/udp.c | 2 +-
net/ipv4/udplite.c | 2 +-
net/ipv6/addrconf.c | 38 ++++++++++++++++++--------------------
net/ipv6/addrlabel.c | 20 ++++++++++----------
net/ipv6/af_inet6.c | 2 --
net/ipv6/ndisc.c | 10 +++++-----
net/ipv6/sysctl_net_ipv6.c | 3 ---
net/ipv6/udp.c | 2 +-
net/ipv6/udp_impl.h | 1 +
net/ipv6/udplite.c | 2 +-
11 files changed, 42 insertions(+), 44 deletions(-)
CHANGESETS
----------
commit 9c14555fec7d209c90ae5079c59dc9a338620fd7
Author: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue Jan 22 17:05:31 2008 +0900
[IPV4] UDP,UDPLITE: Sparse: {__udp4_lib,udp,udplite}_err() are of void.
Fix following sparse warnings:
| net/ipv4/udp.c:421:2: warning: returning void-valued expression
| net/ipv4/udplite.c:38:2: warning: returning void-valued expression
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cb2411c..ecd9d91 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -418,7 +418,7 @@ out:
void udp_err(struct sk_buff *skb, u32 info)
{
- return __udp4_lib_err(skb, info, udp_hash);
+ __udp4_lib_err(skb, info, udp_hash);
}
/*
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index f5baeb3..001b881 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -35,7 +35,7 @@ static int udplite_rcv(struct sk_buff *skb)
static void udplite_err(struct sk_buff *skb, u32 info)
{
- return __udp4_lib_err(skb, info, udplite_hash);
+ __udp4_lib_err(skb, info, udplite_hash);
}
static struct net_protocol udplite_protocol = {
---
commit feafbe254cd11496370192a08dbdc1d0ddda226f
Author: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue Jan 22 17:09:55 2008 +0900
[IPV6] UDP,UDPLITE: Sparse: {__udp6_lib,udp,udplite}_err() are of void.
Fix following sparse warnings:
| net/ipv6/udp.c:262:2: warning: returning void-valued expression
| net/ipv6/udplite.c:29:2: warning: returning void-valued expression
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index bf58aca..bd4b9df 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -259,7 +259,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
struct inet6_skb_parm *opt, int type,
int code, int offset, __be32 info )
{
- return __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash);
+ __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash);
}
int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 39f0705..87d4202 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -26,7 +26,7 @@ static void udplitev6_err(struct sk_buff *skb,
struct inet6_skb_parm *opt,
int type, int code, int offset, __be32 info)
{
- return __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash);
+ __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash);
}
static struct inet6_protocol udplitev6_protocol = {
---
commit ce97db1c7fa125b3f24a3d424a6373824a0bca37
Author: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue Jan 22 17:25:46 2008 +0900
[IPV6] UDPLITE: Sparse: Declare non-static symbols in header.
Fix the following sparse warnings:
| net/ipv6/udplite.c:45:14: warning: symbol 'udplitev6_prot' was not declared. Should it be static?
| net/ipv6/udplite.c:80:12: warning: symbol 'udplitev6_init' was not declared. Should it be static?
| net/ipv6/udplite.c:99:6: warning: symbol 'udplitev6_exit' was not declared. Should it be static?
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 2d3fda6..21be3a8 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -5,6 +5,7 @@
#include <net/protocol.h>
#include <net/addrconf.h>
#include <net/inet_common.h>
+#include <net/transp_v6.h>
extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int );
extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
---
commit c70651db4683cdaec05d83b91b6a53560f045a27
Author: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue Jan 22 17:12:50 2008 +0900
[IPV6] ADDRLABEL: Sparse: Make several functions static.
Fix following sparse warnings:
| net/ipv6/addrlabel.c:172:25: warning: symbol 'ip6addrlbl_alloc' was not declared. Should it be static?
| net/ipv6/addrlabel.c:219:5: warning: symbol '__ip6addrlbl_add' was not declared. Should it be static?
| net/ipv6/addrlabel.c:260:5: warning: symbol 'ip6addrlbl_add' was not declared. Should it be static?
| net/ipv6/addrlabel.c:285:5: warning: symbol '__ip6addrlbl_del' was not declared. Should it be static?
| net/ipv6/addrlabel.c:311:5: warning: symbol 'ip6addrlbl_del' was not declared. Should it be static?
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 6f1ca60..3867412 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -169,9 +169,9 @@ u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
}
/* allocate one entry */
-struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
- int prefixlen, int ifindex,
- u32 label)
+static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
+ int prefixlen, int ifindex,
+ u32 label)
{
struct ip6addrlbl_entry *newp;
int addrtype;
@@ -216,7 +216,7 @@ struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
}
/* add a label */
-int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
+static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
{
int ret = 0;
@@ -257,8 +257,8 @@ out:
}
/* add a label */
-int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
- int ifindex, u32 label, int replace)
+static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
+ int ifindex, u32 label, int replace)
{
struct ip6addrlbl_entry *newp;
int ret = 0;
@@ -282,8 +282,8 @@ int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
}
/* remove a label */
-int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
- int ifindex)
+static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
+ int ifindex)
{
struct ip6addrlbl_entry *p = NULL;
struct hlist_node *pos, *n;
@@ -308,8 +308,8 @@ int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
return ret;
}
-int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
- int ifindex)
+static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
+ int ifindex)
{
struct in6_addr prefix_buf;
int ret;
---
commit 50207356bc5026b53dbf99a3e86c28a683ae6745
Author: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue Jan 22 17:18:38 2008 +0900
[IPV6]: Sparse: Declare non-static ipv6_{route,icmp,frag}_sysctl_init() in header.
Fix the following sparse warnings:
| net/ipv6/route.c:2491:18: warning: symbol 'ipv6_route_sysctl_init' was not declared. Should it be static?
| net/ipv6/icmp.c:922:18: warning: symbol 'ipv6_icmp_sysctl_init' was not declared. Should it be static?
| net/ipv6/reassembly.c:628:6: warning: symbol 'ipv6_frag_sysctl_init' was not declared. Should it be static?
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c8e8cb2..3712cae 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -586,6 +586,10 @@ extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
int __user *optlen);
#ifdef CONFIG_PROC_FS
+extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
+extern void ipv6_frag_sysctl_init(struct net *net);
+extern struct ctl_table *ipv6_route_sysctl_init(struct net *net);
+
extern int ac6_proc_init(void);
extern void ac6_proc_exit(void);
extern int raw6_proc_init(void);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3150c4b..6738a7b 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -72,8 +72,6 @@ MODULE_LICENSE("GPL");
static struct list_head inetsw6[SOCK_MAX];
static DEFINE_SPINLOCK(inetsw6_lock);
-void ipv6_frag_sysctl_init(struct net *net);
-
static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
{
const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 5e0af4d..7197eb7 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -14,9 +14,6 @@
#include <net/addrconf.h>
#include <net/inet_frag.h>
-extern struct ctl_table *ipv6_route_sysctl_init(struct net *net);
-extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
-
static ctl_table ipv6_table_template[] = {
{
.ctl_name = NET_IPV6_ROUTE,
---
commit 2aa6b4e605b700e10943afd9f34cd0527304f3a3
Author: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue Jan 22 17:29:40 2008 +0900
[IPV6] ADDRCONF: Sparse: Make inet6_dump_addr() code paths more straight-forward.
Fix the following sparse warning:
| net/ipv6/addrconf.c:3384:2: warning: context imbalance in 'inet6_dump_addr' - different lock contexts for basic block
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index aba7b5d..e40213d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3335,11 +3335,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
ifa = ifa->if_next, ip_idx++) {
if (ip_idx < s_ip_idx)
continue;
- if ((err = inet6_fill_ifaddr(skb, ifa,
- NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, RTM_NEWADDR,
- NLM_F_MULTI)) <= 0)
- goto done;
+ err = inet6_fill_ifaddr(skb, ifa,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWADDR,
+ NLM_F_MULTI);
}
break;
case MULTICAST_ADDR:
@@ -3348,11 +3348,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
ifmca = ifmca->next, ip_idx++) {
if (ip_idx < s_ip_idx)
continue;
- if ((err = inet6_fill_ifmcaddr(skb, ifmca,
- NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, RTM_GETMULTICAST,
- NLM_F_MULTI)) <= 0)
- goto done;
+ err = inet6_fill_ifmcaddr(skb, ifmca,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ RTM_GETMULTICAST,
+ NLM_F_MULTI);
}
break;
case ANYCAST_ADDR:
@@ -3361,11 +3361,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
ifaca = ifaca->aca_next, ip_idx++) {
if (ip_idx < s_ip_idx)
continue;
- if ((err = inet6_fill_ifacaddr(skb, ifaca,
- NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, RTM_GETANYCAST,
- NLM_F_MULTI)) <= 0)
- goto done;
+ err = inet6_fill_ifacaddr(skb, ifaca,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ RTM_GETANYCAST,
+ NLM_F_MULTI);
}
break;
default:
@@ -3373,14 +3373,12 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
}
read_unlock_bh(&idev->lock);
in6_dev_put(idev);
+
+ if (err <= 0)
+ break;
cont:
idx++;
}
-done:
- if (err <= 0) {
- read_unlock_bh(&idev->lock);
- in6_dev_put(idev);
- }
cb->args[0] = idx;
cb->args[1] = ip_idx;
return skb->len;
---
commit b825a8d0e0d210bffeec948b1790c3be4c3c5448
Author: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue Jan 22 17:32:53 2008 +0900
[IPV6] NDISC: Sparse: Use different variable name for local use.
Fix the following sparse warnings:
| net/ipv6/ndisc.c:1300:21: warning: symbol 'opt' shadows an earlier one
| net/ipv6/ndisc.c:1078:7: originally declared here
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index e1554ba..92b6775 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1297,11 +1297,11 @@ skip_defrtr:
}
if (ndopts.nd_useropts) {
- struct nd_opt_hdr *opt;
- for (opt = ndopts.nd_useropts;
- opt;
- opt = ndisc_next_useropt(opt, ndopts.nd_useropts_end)) {
- ndisc_ra_useropt(skb, opt);
+ struct nd_opt_hdr *p;
+ for (p = ndopts.nd_useropts;
+ p;
+ p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
+ ndisc_ra_useropt(skb, p);
}
}
---
--
YOSHIFUJI Hideaki @ USAGI Project <yoshfuji@linux-ipv6.org>
GPG-FP : 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA
^ permalink raw reply related
* Re: 2.6.24-rc8-mm1 : net tcp_input.c warnings
From: Dave Young @ 2008-01-22 9:18 UTC (permalink / raw)
To: Ilpo Järvinen; +Cc: LKML, David Miller, Netdev, Andrew Morton
In-Reply-To: <a8e1da0801220109v6bf8931ev50f2210402c3ba41@mail.gmail.com>
On Jan 22, 2008 5:09 PM, Dave Young <hidave.darkstar@gmail.com> wrote:
>
> On Jan 22, 2008 12:37 PM, Dave Young <hidave.darkstar@gmail.com> wrote:
> >
> > On Jan 22, 2008 5:14 AM, Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> wrote:
> > >
> > > On Mon, 21 Jan 2008, Dave Young wrote:
> > >
> > > > Please see the kernel messages following,(trigged while using some qemu session)
> > > > BTW, seems there's some e100 error message as well.
> > > >
> > > > PCI: Setting latency timer of device 0000:00:1b.0 to 64
> > > > e100: Intel(R) PRO/100 Network Driver, 3.5.23-k4-NAPI
> > > > e100: Copyright(c) 1999-2006 Intel Corporation
> > > > ACPI: PCI Interrupt 0000:03:08.0[A] -> GSI 20 (level, low) -> IRQ 20
> > > > modprobe:2331 conflicting cache attribute efaff000-efb00000 uncached<->default
> > > > e100: 0000:03:08.0: e100_probe: Cannot map device registers, aborting.
> > > > ACPI: PCI interrupt for device 0000:03:08.0 disabled
> > > > e100: probe of 0000:03:08.0 failed with error -12
> > > > eth0: setting full-duplex.
> > > > ------------[ cut here ]------------
> > > > WARNING: at net/ipv4/tcp_input.c:2169 tcp_mark_head_lost+0x121/0x150()
> > > > Modules linked in: snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss eeprom e100 psmouse snd_hda_intel snd_pcm snd_timer btusb rtc_cmos thermal bluetooth rtc_core serio_raw intel_agp button processor sg snd rtc_lib i2c_i801 evdev agpgart soundcore dcdbas 3c59x pcspkr snd_page_alloc
> > > > Pid: 0, comm: swapper Not tainted 2.6.24-rc8-mm1 #4
> > > > [<c0132100>] ? printk+0x0/0x20
> > > > [<c0131834>] warn_on_slowpath+0x54/0x80
> > > > [<c03e8df8>] ? ip_finish_output+0x128/0x2e0
> > > > [<c03e9527>] ? ip_output+0xe7/0x100
> > > > [<c03e8a88>] ? ip_local_out+0x18/0x20
> > > > [<c03e991c>] ? ip_queue_xmit+0x3dc/0x470
> > > > [<c043641e>] ? _spin_unlock_irqrestore+0x5e/0x70
> > > > [<c0186be1>] ? check_pad_bytes+0x61/0x80
> > > > [<c03f6031>] tcp_mark_head_lost+0x121/0x150
> > > > [<c03f60ac>] tcp_update_scoreboard+0x4c/0x170
> > > > [<c03f6e0a>] tcp_fastretrans_alert+0x48a/0x6b0
> > > > [<c03f7d93>] tcp_ack+0x1b3/0x3a0
> > > > [<c03fa14b>] tcp_rcv_established+0x3eb/0x710
> > > > [<c04015c5>] tcp_v4_do_rcv+0xe5/0x100
> > > > [<c0401bbb>] tcp_v4_rcv+0x5db/0x660
> > >
> > > Doh, once more these S+L things..., the rest are symptom of the first
> > > problem.
> >
> > What is the S+L thing? Could you explain a bit?
> >
> > >
> > > What is strange is that it doesn't show up until now, the last TCP
> > > changes that could have some significance are from early Dec/Nov. Is
> > > there some reason why you haven't seen this before this (e.g., not
> > > tested with similar cfg or so)?
> >
> > Hmm, don't know how to answer ...
> >
> >
> > I'm a bit worried about its
> > > reproducability if it takes this far to see it...
> > >
>
> It's trigged again in my pc, just while using firefox.
Maybe relate to the e100 error, I will apply jiri slaby's
e100-iomap-mem-accesses patch to test.
>
> > >
> > > --
> > > i.
> > >
> >
>
^ permalink raw reply
* Re: 2.6.24-rc8-mm1 : net tcp_input.c warnings
From: Dave Young @ 2008-01-22 9:09 UTC (permalink / raw)
To: Ilpo Järvinen; +Cc: LKML, David Miller, Netdev, Andrew Morton
In-Reply-To: <a8e1da0801212037uaa34a10xc2239ac7309a4ed0@mail.gmail.com>
On Jan 22, 2008 12:37 PM, Dave Young <hidave.darkstar@gmail.com> wrote:
>
> On Jan 22, 2008 5:14 AM, Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> wrote:
> >
> > On Mon, 21 Jan 2008, Dave Young wrote:
> >
> > > Please see the kernel messages following,(trigged while using some qemu session)
> > > BTW, seems there's some e100 error message as well.
> > >
> > > PCI: Setting latency timer of device 0000:00:1b.0 to 64
> > > e100: Intel(R) PRO/100 Network Driver, 3.5.23-k4-NAPI
> > > e100: Copyright(c) 1999-2006 Intel Corporation
> > > ACPI: PCI Interrupt 0000:03:08.0[A] -> GSI 20 (level, low) -> IRQ 20
> > > modprobe:2331 conflicting cache attribute efaff000-efb00000 uncached<->default
> > > e100: 0000:03:08.0: e100_probe: Cannot map device registers, aborting.
> > > ACPI: PCI interrupt for device 0000:03:08.0 disabled
> > > e100: probe of 0000:03:08.0 failed with error -12
> > > eth0: setting full-duplex.
> > > ------------[ cut here ]------------
> > > WARNING: at net/ipv4/tcp_input.c:2169 tcp_mark_head_lost+0x121/0x150()
> > > Modules linked in: snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss eeprom e100 psmouse snd_hda_intel snd_pcm snd_timer btusb rtc_cmos thermal bluetooth rtc_core serio_raw intel_agp button processor sg snd rtc_lib i2c_i801 evdev agpgart soundcore dcdbas 3c59x pcspkr snd_page_alloc
> > > Pid: 0, comm: swapper Not tainted 2.6.24-rc8-mm1 #4
> > > [<c0132100>] ? printk+0x0/0x20
> > > [<c0131834>] warn_on_slowpath+0x54/0x80
> > > [<c03e8df8>] ? ip_finish_output+0x128/0x2e0
> > > [<c03e9527>] ? ip_output+0xe7/0x100
> > > [<c03e8a88>] ? ip_local_out+0x18/0x20
> > > [<c03e991c>] ? ip_queue_xmit+0x3dc/0x470
> > > [<c043641e>] ? _spin_unlock_irqrestore+0x5e/0x70
> > > [<c0186be1>] ? check_pad_bytes+0x61/0x80
> > > [<c03f6031>] tcp_mark_head_lost+0x121/0x150
> > > [<c03f60ac>] tcp_update_scoreboard+0x4c/0x170
> > > [<c03f6e0a>] tcp_fastretrans_alert+0x48a/0x6b0
> > > [<c03f7d93>] tcp_ack+0x1b3/0x3a0
> > > [<c03fa14b>] tcp_rcv_established+0x3eb/0x710
> > > [<c04015c5>] tcp_v4_do_rcv+0xe5/0x100
> > > [<c0401bbb>] tcp_v4_rcv+0x5db/0x660
> >
> > Doh, once more these S+L things..., the rest are symptom of the first
> > problem.
>
> What is the S+L thing? Could you explain a bit?
>
> >
> > What is strange is that it doesn't show up until now, the last TCP
> > changes that could have some significance are from early Dec/Nov. Is
> > there some reason why you haven't seen this before this (e.g., not
> > tested with similar cfg or so)?
>
> Hmm, don't know how to answer ...
>
>
> I'm a bit worried about its
> > reproducability if it takes this far to see it...
> >
It's trigged again in my pc, just while using firefox.
> >
> > --
> > i.
> >
>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox