netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: itkes@imec.msu.ru
To: netdev@oss.sgi.com
Subject: A bug in the Kernel?
Date: Tue, 12 Apr 2005 16:31:41 +0400 (MSD)	[thread overview]
Message-ID: <3558.193.232.114.87.1113309101.squirrel@mx> (raw)

[-- Attachment #1: Type: text/plain, Size: 1580 bytes --]

Hello.

I think, I have found a bug in the Linux Kernel. It is caused by working
with lists by indexes in such operstions as routing tables dump and
routing rules dump (functions fib_rules_dump in fib_rules.c and a few dump
functions in fib_hash.c). If some elements are removed form the list, the
index may identify not the element it identified earlier. As a result,
there may happen that an application that requested the routes or rules
dump, will not receive the entire table. There is how
to make an application to lose some rules.

1. Add 150 rules to kernel table.
2. Application A sends an RTM_GETRULE message with flag NLM_F_DUMP. The
kernel puts first 107 rules to the buffer.
3. Application B deletes first 20 rules.
4. Application A receives the first couple of data from kernel. The kernel
puts next couple of rules to the buffer, begining from 108-th rule, that
was 128-th earlier.
As a result, 20 rules had not been sent to the application, without being
deleted or modified during the dump operation.

Routes can be lost, too, if you add certain 5000 routes, request their
dump and remove 1000 from them during the dump.

In the patch (against kernel 2.6.11) attached, I have corrected these
bugs. In the modified kernel, both dumps of routes and routing rules seems
to work properly. But, I think, there can be other bugs similar to this in
other dump operations.

Alex Itkes (Moscow State University).

P.S. I am awfully sorry if you got this message more then once. There were
some problems with my mailbox so my previous message or your answer might
be lost.

[-- Attachment #2: patch-2.6.11-nl01 --]
[-- Type: text/plain, Size: 9517 bytes --]

diff -urN linux-2.6.11/include/linux/netlink.h linux-2.6.11-nl01/include/linux/netlink.h
--- linux-2.6.11/include/linux/netlink.h	2005-03-02 23:04:19.000000000 +0300
+++ linux-2.6.11-nl01/include/linux/netlink.h	2005-03-08 15:03:11.000000000 +0300
@@ -145,9 +145,11 @@
 	int		(*dump)(struct sk_buff * skb, struct netlink_callback *cb);
 	int		(*done)(struct netlink_callback *cb);
 	int		family;
-	long		args[4];
+	long		args[5];
 };
 
+#define NLCB_ZERO(cb_,k_)	memset((cb_)->args+(k_),0,sizeof((cb_)->args)-(k_)*sizeof((cb_)->args[0]))
+
 struct netlink_notify
 {
 	int pid;
diff -urN linux-2.6.11/net/core/rtnetlink.c linux-2.6.11-nl01/net/core/rtnetlink.c
--- linux-2.6.11/net/core/rtnetlink.c	2005-03-02 23:04:21.000000000 +0300
+++ linux-2.6.11-nl01/net/core/rtnetlink.c	2005-03-08 17:27:28.000000000 +0300
@@ -425,7 +425,7 @@
 		    rtnetlink_links[idx][type].dumpit == NULL)
 			continue;
 		if (idx > s_idx)
-			memset(&cb->args[0], 0, sizeof(cb->args));
+		    NLCB_ZERO(cb,0);
 		if (rtnetlink_links[idx][type].dumpit(skb, cb))
 			break;
 	}
diff -urN linux-2.6.11/net/ipv4/fib_frontend.c linux-2.6.11-nl01/net/ipv4/fib_frontend.c
--- linux-2.6.11/net/ipv4/fib_frontend.c	2005-03-02 23:04:21.000000000 +0300
+++ linux-2.6.11-nl01/net/ipv4/fib_frontend.c	2005-03-08 17:33:40.000000000 +0300
@@ -347,7 +347,7 @@
 	for (t=s_t; t<=RT_TABLE_MAX; t++) {
 		if (t < s_t) continue;
 		if (t > s_t)
-			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+		    NLCB_ZERO(cb,1);
 		if ((tb = fib_get_table(t))==NULL)
 			continue;
 		if (tb->tb_dump(tb, skb, cb) < 0) 
diff -urN linux-2.6.11/net/ipv4/fib_hash.c linux-2.6.11-nl01/net/ipv4/fib_hash.c
--- linux-2.6.11/net/ipv4/fib_hash.c	2005-03-02 23:04:21.000000000 +0300
+++ linux-2.6.11-nl01/net/ipv4/fib_hash.c	2005-03-08 17:37:28.000000000 +0300
@@ -52,6 +52,9 @@
 	struct hlist_node	fn_hash;
 	struct list_head	fn_alias;
 	u32			fn_key;
+	
+	u32			fn_nl_links;
+	u8			fn_nl_dead;
 };
 
 struct fn_zone {
@@ -256,7 +259,7 @@
 
 		head = &fz->fz_hash[fn_hash(k, fz)];
 		hlist_for_each_entry(f, node, head, fn_hash) {
-			if (f->fn_key != k)
+			if (f->fn_key != k || f->fn_nl_dead)
 				continue;
 
 			err = fib_semantic_match(&f->fn_alias,
@@ -296,8 +299,14 @@
 	hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
 		struct fib_alias *fa;
 
+		if(f->fn_nl_dead){
+		    continue;
+		};
 		list_for_each_entry(fa, &f->fn_alias, fa_list) {
 			struct fib_info *next_fi = fa->fa_info;
+			if (fa->fa_nl_dead){
+			    continue;
+			};
 
 			if (fa->fa_scope != res->scope ||
 			    fa->fa_type != RTN_UNICAST)
@@ -497,6 +506,8 @@
 		INIT_HLIST_NODE(&new_f->fn_hash);
 		INIT_LIST_HEAD(&new_f->fn_alias);
 		new_f->fn_key = key;
+		new_f->fn_nl_links=0;
+		new_f->fn_nl_dead=0;
 		f = new_f;
 	}
 
@@ -506,6 +517,8 @@
 	new_fa->fa_scope = r->rtm_scope;
 	new_fa->fa_state = 0;
 
+	new_fa->fa_nl_links=0;
+	new_fa->fa_nl_dead=0;
 	/*
 	 * Insert new entry to the list.
 	 */
@@ -591,8 +604,17 @@
 		int kill_fn;
 
 		fa = fa_to_delete;
+		if(fa->fa_nl_dead){
+		    tb->tb_flush(tb);
+		    return -ESRCH;
+		};
 		rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req);
-
+		
+		if(fa->fa_nl_links){
+		    fa->fa_nl_dead=1;
+		    tb->tb_flush(tb);
+		    return 0;
+		};
 		kill_fn = 0;
 		write_lock_bh(&fib_hash_lock);
 		list_del(&fa->fa_list);
@@ -630,7 +652,7 @@
 		list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
 			struct fib_info *fi = fa->fa_info;
 
-			if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
+			if ((fi && (fi->fib_flags&RTNH_F_DEAD))||(fa->fa_nl_dead&&(fa->fa_nl_links==0))) {
 				write_lock_bh(&fib_hash_lock);
 				list_del(&fa->fa_list);
 				if (list_empty(&f->fn_alias)) {
@@ -675,17 +697,42 @@
 {
 	struct hlist_node *node;
 	struct fib_node *f;
-	int i, s_i;
-
-	s_i = cb->args[3];
-	i = 0;
+	int flag1=0;
+	int flag2=0;
+	
+	if(cb->args[3]==0){
+	    flag1=1;
+	};
 	hlist_for_each_entry(f, node, head, fn_hash) {
 		struct fib_alias *fa;
 
+		if(flag1==0){
+		    if((long)(f)==cb->args[3]){
+			--(f->fn_nl_links);
+			flag1=1;
+		    }else{
+			continue;
+		    };
+		};
+		if(cb->args[4]==0){
+		    flag2=1;
+		};
+		if(f->fn_nl_dead){
+		    continue;
+		};
 		list_for_each_entry(fa, &f->fn_alias, fa_list) {
-			if (i < s_i)
-				goto next;
 
+			if(flag2==0){
+		            if((long)(fa)==cb->args[4]){
+				--(fa->fa_nl_links);
+				flag2=1;
+			    }else{
+				continue;
+			    };
+			};
+			if(fa->fa_nl_dead){
+			    continue;
+			};
 			if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
 					  cb->nlh->nlmsg_seq,
 					  RTM_NEWROUTE,
@@ -696,14 +743,16 @@
 					  fz->fz_order,
 					  fa->fa_tos,
 					  fa->fa_info) < 0) {
-				cb->args[3] = i;
+				++(fa->fa_nl_links);
+				++(f->fn_nl_links);
+				cb->args[4]=(long)(fa);
+				cb->args[3]=(long)(f);
 				return -1;
 			}
-		next:
-			i++;
 		}
 	}
-	cb->args[3] = i;
+	cb->args[4]=0;
+	cb->args[3]=0;
 	return skb->len;
 }
 
@@ -718,8 +767,7 @@
 	for (h=0; h < fz->fz_divisor; h++) {
 		if (h < s_h) continue;
 		if (h > s_h)
-			memset(&cb->args[3], 0,
-			       sizeof(cb->args) - 3*sizeof(cb->args[0]));
+			NLCB_ZERO(cb,3);
 		if (fz->fz_hash == NULL ||
 		    hlist_empty(&fz->fz_hash[h]))
 			continue;
@@ -734,25 +782,27 @@
 
 static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int m, s_m;
 	struct fn_zone *fz;
 	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
 
-	s_m = cb->args[1];
 	read_lock(&fib_hash_lock);
-	for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
-		if (m < s_m) continue;
-		if (m > s_m)
-			memset(&cb->args[2], 0,
-			       sizeof(cb->args) - 2*sizeof(cb->args[0]));
+	if(cb->args[1]){
+	    fz=(struct fn_zone*)(cb->args[1]);
+	}else{
+	    fz=table->fn_zone_list;
+	};
+	for (; fz; fz = fz->fz_next) {
+		if((long)(fz)!=cb->args[1]){
+		    NLCB_ZERO(cb,2);
+		};
 		if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
-			cb->args[1] = m;
+			cb->args[1] = (long)(fz);
 			read_unlock(&fib_hash_lock);
 			return -1;
 		}
 	}
 	read_unlock(&fib_hash_lock);
-	cb->args[1] = m;
+	cb->args[1] = 0;
 	return skb->len;
 }
 
diff -urN linux-2.6.11/net/ipv4/fib_lookup.h linux-2.6.11-nl01/net/ipv4/fib_lookup.h
--- linux-2.6.11/net/ipv4/fib_lookup.h	2005-03-02 23:04:21.000000000 +0300
+++ linux-2.6.11-nl01/net/ipv4/fib_lookup.h	2005-03-08 12:51:36.000000000 +0300
@@ -12,6 +12,9 @@
 	u8			fa_type;
 	u8			fa_scope;
 	u8			fa_state;
+	
+	u32			fa_nl_links;
+	u8			fa_nl_dead;
 };
 
 #define FA_S_ACCESSED	0x01
diff -urN linux-2.6.11/net/ipv4/fib_rules.c linux-2.6.11-nl01/net/ipv4/fib_rules.c
--- linux-2.6.11/net/ipv4/fib_rules.c	2005-03-02 23:04:21.000000000 +0300
+++ linux-2.6.11-nl01/net/ipv4/fib_rules.c	2005-03-08 17:33:25.000000000 +0300
@@ -74,6 +74,9 @@
 #endif
 	char		r_ifname[IFNAMSIZ];
 	int		r_dead;
+	
+	u32		r_nl_links;
+	u8		r_nl_dead;
 };
 
 static struct fib_rule default_rule = {
@@ -101,6 +104,28 @@
 static struct fib_rule *fib_rules = &local_rule;
 static DEFINE_RWLOCK(fib_rules_lock);
 
+static void nl_rules_flush(void)
+{
+	struct fib_rule *r, **rp;
+
+	for(rp=&fib_rules;(r=*rp)!=NULL;rp=&r->r_next){
+	    next:;
+	    if(r->r_nl_dead&&(r->r_nl_links==0)){
+		write_lock_bh(&fib_rules_lock);
+		*rp = r->r_next;
+		r->r_dead = 1;
+		write_unlock_bh(&fib_rules_lock);
+		fib_rule_put(r);
+		r=*rp;
+		if(r==0){
+		    return;
+		}else{
+        	    goto next;
+		};
+	    };
+	};
+}
+
 int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct rtattr **rta = arg;
@@ -121,10 +146,18 @@
 		    (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
 		    (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
 		    (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
+			if(r->r_nl_dead==1){
+			    nl_rules_flush();
+			    break;
+			};
 			err = -EPERM;
 			if (r == &local_rule)
 				break;
-
+			if(r->r_nl_links!=0){
+			    r->r_nl_dead=1;
+			    nl_rules_flush();
+			    return 0;
+			};
 			write_lock_bh(&fib_rules_lock);
 			*rp = r->r_next;
 			r->r_dead = 1;
@@ -198,6 +231,8 @@
 	new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len);
 	new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len);
 	new_r->r_tos = rtm->rtm_tos;
+	new_r->r_nl_links=0;
+	new_r->r_nl_dead=0;
 #ifdef CONFIG_IP_ROUTE_FWMARK
 	if (rta[RTA_PROTOINFO-1])
 		memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
@@ -293,6 +328,9 @@
 	NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src));
 	read_lock(&fib_rules_lock);
 	for (r = fib_rules; r; r=r->r_next) {
+		if(r->r_nl_dead){
+		    continue;
+		};
 		if (((saddr^r->r_src) & r->r_srcmask) ||
 		    ((daddr^r->r_dst) & r->r_dstmask) ||
 		    (r->r_tos && r->r_tos != flp->fl4_tos) ||
@@ -414,19 +452,30 @@
 
 int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int idx;
-	int s_idx = cb->args[0];
 	struct fib_rule *r;
-
+	
+	if(cb->args[0]==(-1)){
+	    return 0;
+	};
 	read_lock(&fib_rules_lock);
-	for (r=fib_rules, idx=0; r; r = r->r_next, idx++) {
-		if (idx < s_idx)
-			continue;
-		if (inet_fill_rule(skb, r, cb) < 0)
-			break;
+	if(cb->args[0]){
+	    r=(struct fib_rule*)(cb->args[0]);
+	    --(r->r_nl_links);
+	}else{
+	    r=fib_rules;
+	};
+	for (; r; r = r->r_next) {
+		if(r->r_nl_dead){
+		    continue;
+		};
+		if (inet_fill_rule(skb, r, cb) < 0){
+			++(r->r_nl_links);
+			cb->args[0]=(long)(r);
+			return skb->len;
+		};
 	}
+	cb->args[0]=(-1);
 	read_unlock(&fib_rules_lock);
-	cb->args[0] = idx;
 
 	return skb->len;
 }

             reply	other threads:[~2005-04-12 12:31 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-04-12 12:31 itkes [this message]
2005-04-12 13:18 ` A bug in the Kernel? jamal
  -- strict thread matches above, loose matches on Subject: below --
2005-03-18 17:44 itkes
2005-03-18 17:59 ` Stephen Hemminger
2005-03-11 11:27 itkes
2005-03-11 12:31 ` jamal
2005-02-27 16:00 itkes
2005-02-27 16:26 ` Patrick McHardy
2005-02-27 19:33   ` David S. Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3558.193.232.114.87.1113309101.squirrel@mx \
    --to=itkes@imec.msu.ru \
    --cc=netdev@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).