netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Patrick McHardy <kaber@trash.net>
To: "David S. Miller" <davem@davemloft.net>
Cc: netdev@oss.sgi.com
Subject: Re: [PATCH 2.6]: keep fragment queues private to each user
Date: Thu, 27 Jan 2005 00:30:56 +0100	[thread overview]
Message-ID: <41F82830.9010009@trash.net> (raw)
In-Reply-To: <20050125214158.4be3798c.davem@davemloft.net>

[-- Attachment #1: Type: text/plain, Size: 669 bytes --]

David S. Miller wrote:

>On Tue, 25 Jan 2005 05:57:47 +0100
>Patrick McHardy <kaber@trash.net> wrote:
>
>>This patch keeps fragment queues private to each ip_defrag user to avoid
>>skbs jumping between different callers. It shouldn't change any wanted
>>behaviour, the only questionable one was ip_call_ra_chain, but the RA
>>option is included in each fragment. If you're fine with the patch I'm going
>>to send a 2.4 version later.
>>
>
>Please fix the CONFIG_SMP build.  You use some variable "user"
>in ip_frag_intern(), yet don't add this as an argument or
>anything like that.
>
Sorry, new patch attached. I'll send the 2.4 patch later tonight.

Regards
Patrick


[-- Attachment #2: x --]
[-- Type: text/plain, Size: 11412 bytes --]

# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2005/01/27 00:20:54+01:00 kaber@coreworks.de 
#   [IPV4]: Keep fragment queues private to each user
#   
#   Signed-off-by: Patrick McHardy <kaber@trash.net>
# 
# net/ipv4/netfilter/ip_nat_standalone.c
#   2005/01/27 00:20:46+01:00 kaber@coreworks.de +1 -1
#   [IPV4]: Keep fragment queues private to each user
#   
#   Signed-off-by: Patrick McHardy <kaber@trash.net>
# 
# net/ipv4/netfilter/ip_conntrack_standalone.c
#   2005/01/27 00:20:46+01:00 kaber@coreworks.de +4 -7
#   [IPV4]: Keep fragment queues private to each user
#   
#   Signed-off-by: Patrick McHardy <kaber@trash.net>
# 
# net/ipv4/netfilter/ip_conntrack_core.c
#   2005/01/27 00:20:46+01:00 kaber@coreworks.de +2 -9
#   [IPV4]: Keep fragment queues private to each user
#   
#   Signed-off-by: Patrick McHardy <kaber@trash.net>
# 
# net/ipv4/ipvs/ip_vs_core.c
#   2005/01/27 00:20:46+01:00 kaber@coreworks.de +11 -8
#   [IPV4]: Keep fragment queues private to each user
#   
#   Signed-off-by: Patrick McHardy <kaber@trash.net>
# 
# net/ipv4/ip_input.c
#   2005/01/27 00:20:46+01:00 kaber@coreworks.de +2 -2
#   [IPV4]: Keep fragment queues private to each user
#   
#   Signed-off-by: Patrick McHardy <kaber@trash.net>
# 
# net/ipv4/ip_fragment.c
#   2005/01/27 00:20:46+01:00 kaber@coreworks.de +13 -20
#   [IPV4]: Keep fragment queues private to each user
#   
#   Signed-off-by: Patrick McHardy <kaber@trash.net>
# 
# include/net/ip.h
#   2005/01/27 00:20:46+01:00 kaber@coreworks.de +14 -3
#   [IPV4]: Keep fragment queues private to each user
#   
#   Signed-off-by: Patrick McHardy <kaber@trash.net>
# 
# include/linux/netfilter_ipv4/ip_conntrack.h
#   2005/01/27 00:20:46+01:00 kaber@coreworks.de +1 -2
#   [IPV4]: Keep fragment queues private to each user
#   
#   Signed-off-by: Patrick McHardy <kaber@trash.net>
# 
diff -Nru a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
--- a/include/linux/netfilter_ipv4/ip_conntrack.h	2005-01-27 00:22:21 +01:00
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h	2005-01-27 00:22:21 +01:00
@@ -262,10 +262,9 @@
 /* Fake conntrack entry for untracked connections */
 extern struct ip_conntrack ip_conntrack_untracked;
 
-extern int ip_ct_no_defrag;
 /* Returns new sk_buff, or NULL */
 struct sk_buff *
-ip_ct_gather_frags(struct sk_buff *skb);
+ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user);
 
 /* Iterate over all conntracks: if iter returns true, it's deleted. */
 extern void
diff -Nru a/include/net/ip.h b/include/net/ip.h
--- a/include/net/ip.h	2005-01-27 00:22:21 +01:00
+++ b/include/net/ip.h	2005-01-27 00:22:21 +01:00
@@ -286,9 +286,20 @@
 /*
  *	Functions provided by ip_fragment.o
  */
- 
-struct sk_buff *ip_defrag(struct sk_buff *skb);
-extern void ipfrag_flush(void);
+
+enum ip_defrag_users
+{
+	IP_DEFRAG_LOCAL_DELIVER,
+	IP_DEFRAG_CALL_RA_CHAIN,
+	IP_DEFRAG_CONNTRACK_IN,
+	IP_DEFRAG_CONNTRACK_OUT,
+	IP_DEFRAG_NAT_OUT,
+	IP_DEFRAG_VS_IN,
+	IP_DEFRAG_VS_OUT,
+	IP_DEFRAG_VS_FWD
+};
+
+struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user);
 extern int ip_frag_nqueues;
 extern atomic_t ip_frag_mem;
 
diff -Nru a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
--- a/net/ipv4/ip_fragment.c	2005-01-27 00:22:21 +01:00
+++ b/net/ipv4/ip_fragment.c	2005-01-27 00:22:21 +01:00
@@ -73,6 +73,7 @@
 struct ipq {
 	struct ipq	*next;		/* linked list pointers			*/
 	struct list_head lru_list;	/* lru list member 			*/
+	u32		user;
 	u32		saddr;
 	u32		daddr;
 	u16		id;
@@ -243,13 +244,13 @@
 /* Memory limiting on fragments.  Evictor trashes the oldest 
  * fragment queue until we are back under the threshold.
  */
-static void __ip_evictor(int threshold)
+static void ip_evictor(void)
 {
 	struct ipq *qp;
 	struct list_head *tmp;
 	int work;
 
-	work = atomic_read(&ip_frag_mem) - threshold;
+	work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
 	if (work <= 0)
 		return;
 
@@ -274,11 +275,6 @@
 	}
 }
 
-static inline void ip_evictor(void)
-{
-	__ip_evictor(sysctl_ipfrag_low_thresh);
-}
-
 /*
  * Oops, a fragment queue timed out.  Kill it and send an ICMP reply.
  */
@@ -325,7 +321,8 @@
 		if(qp->id == qp_in->id		&&
 		   qp->saddr == qp_in->saddr	&&
 		   qp->daddr == qp_in->daddr	&&
-		   qp->protocol == qp_in->protocol) {
+		   qp->protocol == qp_in->protocol &&
+		   qp->user == qp_in->user) {
 			atomic_inc(&qp->refcnt);
 			write_unlock(&ipfrag_lock);
 			qp_in->last_in |= COMPLETE;
@@ -352,7 +349,7 @@
 }
 
 /* Add an entry to the 'ipq' queue for a newly received IP datagram. */
-static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph)
+static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
 {
 	struct ipq *qp;
 
@@ -364,6 +361,7 @@
 	qp->id = iph->id;
 	qp->saddr = iph->saddr;
 	qp->daddr = iph->daddr;
+	qp->user = user;
 	qp->len = 0;
 	qp->meat = 0;
 	qp->fragments = NULL;
@@ -386,7 +384,7 @@
 /* Find the correct entry in the "incomplete datagrams" queue for
  * this IP datagram, and create new one, if nothing is found.
  */
-static inline struct ipq *ip_find(struct iphdr *iph)
+static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 {
 	__u16 id = iph->id;
 	__u32 saddr = iph->saddr;
@@ -400,7 +398,8 @@
 		if(qp->id == id		&&
 		   qp->saddr == saddr	&&
 		   qp->daddr == daddr	&&
-		   qp->protocol == protocol) {
+		   qp->protocol == protocol &&
+		   qp->user == user) {
 			atomic_inc(&qp->refcnt);
 			read_unlock(&ipfrag_lock);
 			return qp;
@@ -408,7 +407,7 @@
 	}
 	read_unlock(&ipfrag_lock);
 
-	return ip_frag_create(hash, iph);
+	return ip_frag_create(hash, iph, user);
 }
 
 /* Add new segment to existing queue. */
@@ -642,7 +641,7 @@
 }
 
 /* Process an incoming IP datagram fragment. */
-struct sk_buff *ip_defrag(struct sk_buff *skb)
+struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
 {
 	struct iphdr *iph = skb->nh.iph;
 	struct ipq *qp;
@@ -657,7 +656,7 @@
 	dev = skb->dev;
 
 	/* Lookup (or create) queue header */
-	if ((qp = ip_find(iph)) != NULL) {
+	if ((qp = ip_find(iph, user)) != NULL) {
 		struct sk_buff *ret = NULL;
 
 		spin_lock(&qp->lock);
@@ -689,10 +688,4 @@
 	add_timer(&ipfrag_secret_timer);
 }
 
-void ipfrag_flush(void)
-{
-	__ip_evictor(0);
-}
-
 EXPORT_SYMBOL(ip_defrag);
-EXPORT_SYMBOL(ipfrag_flush);
diff -Nru a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
--- a/net/ipv4/ip_input.c	2005-01-27 00:22:21 +01:00
+++ b/net/ipv4/ip_input.c	2005-01-27 00:22:21 +01:00
@@ -172,7 +172,7 @@
 		    (!sk->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 			if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
-				skb = ip_defrag(skb);
+				skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
 				if (skb == NULL) {
 					read_unlock(&ip_ra_lock);
 					return 1;
@@ -273,7 +273,7 @@
 	 */
 
 	if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
-		skb = ip_defrag(skb);
+		skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
 		if (!skb)
 			return 0;
 	}
diff -Nru a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
--- a/net/ipv4/ipvs/ip_vs_core.c	2005-01-27 00:22:21 +01:00
+++ b/net/ipv4/ipvs/ip_vs_core.c	2005-01-27 00:22:21 +01:00
@@ -544,9 +544,9 @@
 }
 
 static inline struct sk_buff *
-ip_vs_gather_frags(struct sk_buff *skb)
+ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
-	skb = ip_defrag(skb);
+	skb = ip_defrag(skb, user);
 	if (skb)
 		ip_send_check(skb->nh.iph);
 	return skb;
@@ -620,7 +620,7 @@
 
 	/* reassemble IP fragments */
 	if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
-		skb = ip_vs_gather_frags(skb);
+		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
 		if (!skb)
 			return NF_STOLEN;
 		*pskb = skb;
@@ -759,7 +759,7 @@
 	/* reassemble IP fragments */
 	if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
 		     !pp->dont_defrag)) {
-		skb = ip_vs_gather_frags(skb);
+		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
 		if (!skb)
 			return NF_STOLEN;
 		iph = skb->nh.iph;
@@ -839,7 +839,8 @@
  *	forward to the right destination host if relevant.
  *	Currently handles error types - unreachable, quench, ttl exceeded.
  */
-static int ip_vs_in_icmp(struct sk_buff **pskb, int *related)
+static int 
+ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
 {
 	struct sk_buff *skb = *pskb;
 	struct iphdr *iph;
@@ -853,7 +854,9 @@
 
 	/* reassemble IP fragments */
 	if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
-		skb = ip_vs_gather_frags(skb);
+		skb = ip_vs_gather_frags(skb,
+		                         hooknum == NF_IP_LOCAL_IN ?
+					 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
 		if (!skb)
 			return NF_STOLEN;
 		*pskb = skb;
@@ -962,7 +965,7 @@
 
 	iph = skb->nh.iph;
 	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
-		int related, verdict = ip_vs_in_icmp(pskb, &related);
+		int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
 
 		if (related)
 			return verdict;
@@ -1057,7 +1060,7 @@
 	if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
 		return NF_ACCEPT;
 
-	return ip_vs_in_icmp(pskb, &r);
+	return ip_vs_in_icmp(pskb, &r, hooknum);
 }
 
 
diff -Nru a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
--- a/net/ipv4/netfilter/ip_conntrack_core.c	2005-01-27 00:22:21 +01:00
+++ b/net/ipv4/netfilter/ip_conntrack_core.c	2005-01-27 00:22:21 +01:00
@@ -936,29 +936,22 @@
 	}
 }
 
-int ip_ct_no_defrag;
-
 /* Returns new sk_buff, or NULL */
 struct sk_buff *
-ip_ct_gather_frags(struct sk_buff *skb)
+ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
 	struct sock *sk = skb->sk;
 #ifdef CONFIG_NETFILTER_DEBUG
 	unsigned int olddebug = skb->nf_debug;
 #endif
 
-	if (unlikely(ip_ct_no_defrag)) {
-		kfree_skb(skb);
-		return NULL;
-	}
-
 	if (sk) {
 		sock_hold(sk);
 		skb_orphan(skb);
 	}
 
 	local_bh_disable(); 
-	skb = ip_defrag(skb);
+	skb = ip_defrag(skb, user);
 	local_bh_enable();
 
 	if (!skb) {
diff -Nru a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c	2005-01-27 00:22:21 +01:00
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c	2005-01-27 00:22:21 +01:00
@@ -391,7 +391,10 @@
 
 	/* Gather fragments. */
 	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
-		*pskb = ip_ct_gather_frags(*pskb);
+		*pskb = ip_ct_gather_frags(*pskb,
+		                           hooknum == NF_IP_PRE_ROUTING ? 
+					   IP_DEFRAG_CONNTRACK_IN :
+					   IP_DEFRAG_CONNTRACK_OUT);
 		if (!*pskb)
 			return NF_STOLEN;
 	}
@@ -823,12 +826,6 @@
  cleanup_defraglocalops:
 	nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
  cleanup_defragops:
-	/* Frag queues may hold fragments with skb->dst == NULL */
-	ip_ct_no_defrag = 1;
-	synchronize_net();
-	local_bh_disable();
-	ipfrag_flush();
-	local_bh_enable();
 	nf_unregister_hook(&ip_conntrack_defrag_ops);
  cleanup_proc_stat:
 #ifdef CONFIG_PROC_FS
diff -Nru a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
--- a/net/ipv4/netfilter/ip_nat_standalone.c	2005-01-27 00:22:21 +01:00
+++ b/net/ipv4/netfilter/ip_nat_standalone.c	2005-01-27 00:22:21 +01:00
@@ -195,7 +195,7 @@
 	   I'm starting to have nightmares about fragments.  */
 
 	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
-		*pskb = ip_ct_gather_frags(*pskb);
+		*pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_NAT_OUT);
 
 		if (!*pskb)
 			return NF_STOLEN;

  reply	other threads:[~2005-01-26 23:30 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-01-25  4:57 [PATCH 2.6]: keep fragment queues private to each user Patrick McHardy
2005-01-25 11:57 ` Herbert Xu
2005-01-25 14:41   ` Patrick McHardy
2005-01-25 20:47     ` Herbert Xu
2005-01-26  5:41 ` David S. Miller
2005-01-26 23:30   ` Patrick McHardy [this message]
2005-01-27  2:44     ` [PATCH 2.4]: " Patrick McHardy
2005-01-27  6:29     ` [PATCH 2.6]: " David S. Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=41F82830.9010009@trash.net \
    --to=kaber@trash.net \
    --cc=davem@davemloft.net \
    --cc=netdev@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).