public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Petr Vandrovec <vandrove@vc.cvut.cz>
To: akpm@diego.com
Cc: linux-kernel@vger.kernel.org, bwindle@fint.org, acme@conectiva.com.br
Subject: Re: 2.5.46-bk3: BUG in skbuff.c:178
Date: Fri, 8 Nov 2002 23:02:15 +0100	[thread overview]
Message-ID: <20021108220215.GA2437@vana> (raw)
In-Reply-To: <6F45EB551A2@vcnet.vc.cvut.cz>

On Fri, Nov 08, 2002 at 09:33:24PM +0200, Petr Vandrovec wrote:
> On  8 Nov 02 at 12:01, Andrew Morton wrote:
> > > Single-CPU system, running 2.5.46-bk3. Whiling compiling bk4, and running
> > > a script that was pinging every host on my subnet (I was running arp -a
> > > to see what was in the arp table at the time), I hit this BUG.
> > 
> > I'd be suspecting the seq_file conversion in arp.c.  The read_lock_bh()
> > stuff in there looks, umm, unclear ;)
> 
> Yes, see my emails from 23th Oct, 25th Oct (2.5.44: Strange oopses from 
> userspace), from Nov 6th + Nov 7th: Preempt count check when leaving
> IRQ.
> 
> But while yesterday I had no idea, today I have one (it looks like that
> nobody else is going to fix it for me :-( ) :
> seq subsystem can call arp_seq_start/next/stop several times, but
> state->is_pneigh is set to 0 only once, by memset in arp_seq_open :-(
> 
> I think that arp_seq_start should do
> 
>   {
> +   struct arp_iter_state* state = seq->private;
> +   seq->is_pneigh = 0;
> +   seq->bucket = 0;
>     read_lock_bh(&arp_tbl.lock);
>     return *pos ? arp_get_bucket(seq, pos) : (void *)1;
>   }
> 
> and we can drop memset from arp_seq_open. I'll try it, and if it will
> survive my tests, I'll send real patch.  

It was not that trivial: arp was storing current position in three fields:
pos, bucket and is_pneigh - so any code seeking in /proc/net/arp just
returned random data, and eventually locked up box...

Patch below removes 'bucket' from arp_iter_state, and merges it to
the pos. It is based on assumption that there is no more than 16M of
entries in each bucket, and that NEIGH_HASHMASK + 1 + PNEIGH_HASHMASK + 1 < 127
(currently it is 48). As loff_t is 64bit even on i386, there is plenty
of space to grow, but it could require apps compiled with O_LARGEFILE,
so I decided to use only 31bit space.

I also removed __inline__ from neigh_get_bucket. This way all functions
were compiled by gcc-2.95.4 on i386 without local variables on stack...

Because of there is now only one entry in arp_iter_state, it is possible
to use seq->private directly instead of allocating memory for arp_iter_state.
Also whole lock obtaining in arp_seq_start could be greatly simplified,
but I'd like to hear your opinions whether merging pos + bucket together
in the way I did is way to go or not, before I'll dig into this more.

I tested code below here: box no more crashes, and I believe that
whole arp table is visible in /proc/net/arp.

					Best regards,
						Petr Vandrovec
						vandrove@vc.cvut.cz


--- linux-2.5.46-c985.dist/net/ipv4/arp.c	2002-11-08 21:44:01.000000000 +0100
+++ linux-2.5.46-c985/net/ipv4/arp.c	2002-11-08 22:46:44.000000000 +0100
@@ -1139,23 +1139,39 @@
 #endif /* CONFIG_AX25 */
 
 struct arp_iter_state {
-	int is_pneigh, bucket;
+	int is_pneigh;
 };
 
-static __inline__ struct neighbour *neigh_get_bucket(struct seq_file *seq,
+#define ARP_FIRST_NEIGH		(1)
+#define ARP_FIRST_PNEIGH	(ARP_FIRST_NEIGH + NEIGH_HASHMASK + 1)
+
+static inline unsigned int get_arp_pos(loff_t pos, unsigned int* idx) {
+	*idx = pos & 0x00FFFFFF;
+	return pos >> 24;
+}
+
+static inline unsigned int make_arp_pos(unsigned int bucket, unsigned int idx) {
+	return (bucket << 24) | idx;
+}
+
+static inline loff_t next_bucket(loff_t pos) {
+	return (pos + 0x00FFFFFF) & ~0x00FFFFFF;
+}
+
+static struct neighbour *neigh_get_bucket(struct seq_file *seq,
 						     loff_t *pos)
 {
 	struct neighbour *n = NULL;
-	struct arp_iter_state* state = seq->private;
-	loff_t l = *pos;
+	unsigned int l;
+	unsigned int bucket = get_arp_pos(*pos, &l) - ARP_FIRST_NEIGH;
 	int i;
 
-	for (; state->bucket <= NEIGH_HASHMASK; ++state->bucket)
-		for (i = 0, n = arp_tbl.hash_buckets[state->bucket]; n;
+	for (; bucket <= NEIGH_HASHMASK; ++bucket)
+		for (i = 0, n = arp_tbl.hash_buckets[bucket]; n;
 		     ++i, n = n->next)
 			/* Do not confuse users "arp -a" with magic entries */
 			if ((n->nud_state & ~NUD_NOARP) && !l--) {
-				*pos = i;
+				*pos = make_arp_pos(bucket + ARP_FIRST_NEIGH, i);
 				goto out;
 			}
 out:
@@ -1166,15 +1182,15 @@
 							 loff_t *pos)
 {
 	struct pneigh_entry *n = NULL;
-	struct arp_iter_state* state = seq->private;
-	loff_t l = *pos;
+	unsigned int l;
+	unsigned int bucket = get_arp_pos(*pos, &l) - ARP_FIRST_PNEIGH;
 	int i;
 
-	for (; state->bucket <= PNEIGH_HASHMASK; ++state->bucket)
-		for (i = 0, n = arp_tbl.phash_buckets[state->bucket]; n;
+	for (; bucket <= PNEIGH_HASHMASK; ++bucket)
+		for (i = 0, n = arp_tbl.phash_buckets[bucket]; n;
 		     ++i, n = n->next)
 			if (!l--) {
-				*pos = i;
+				*pos = make_arp_pos(bucket + ARP_FIRST_PNEIGH, i);
 				goto out;
 			}
 out:
@@ -1190,8 +1206,7 @@
 
 		read_unlock_bh(&arp_tbl.lock);
 		state->is_pneigh = 1;
-		state->bucket	 = 0;
-		*pos		 = 0;
+		*pos		 = make_arp_pos(ARP_FIRST_PNEIGH, 0);
 		rc = pneigh_get_bucket(seq, pos);
 	}
 	return rc;
@@ -1199,8 +1214,21 @@
 
 static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
 {
+	struct arp_iter_state* state = seq->private;
+	unsigned int idx;
+	unsigned int bucket;
+	
+	state->is_pneigh = 0;
 	read_lock_bh(&arp_tbl.lock);
-	return *pos ? arp_get_bucket(seq, pos) : (void *)1;
+	bucket = get_arp_pos(*pos, &idx);
+	if (bucket < ARP_FIRST_NEIGH)
+		return (void *)1;
+	if (bucket < ARP_FIRST_PNEIGH) {
+		return arp_get_bucket(seq, pos);
+	}
+	read_unlock_bh(&arp_tbl.lock);
+	state->is_pneigh = 1;
+	return pneigh_get_bucket(seq, pos);
 }
 
 static void *arp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -1209,34 +1237,33 @@
 	struct arp_iter_state* state;
 
 	if (v == (void *)1) {
+		*pos = make_arp_pos(1, 0);
 		rc = arp_get_bucket(seq, pos);
 		goto out;
 	}
-
 	state = seq->private;
 	if (!state->is_pneigh) {
 		struct neighbour *n = v;
 
+		BUG_ON((*pos < make_arp_pos(ARP_FIRST_NEIGH, 0)) || (*pos >= make_arp_pos(ARP_FIRST_PNEIGH, 0)));
 		rc = n = n->next;
 		if (n)
 			goto out;
-		*pos = 0;
-		++state->bucket;
+		*pos = next_bucket(*pos);
 		rc = neigh_get_bucket(seq, pos);
 		if (rc)
 			goto out;
 		read_unlock_bh(&arp_tbl.lock);
 		state->is_pneigh = 1;
-		state->bucket	 = 0;
-		*pos		 = 0;
+		*pos		 = make_arp_pos(ARP_FIRST_PNEIGH, 0);
 		rc = pneigh_get_bucket(seq, pos);
 	} else {
 		struct pneigh_entry *pn = v;
 
+		BUG_ON(*pos < make_arp_pos(ARP_FIRST_PNEIGH, 0));
 		pn = pn->next;
 		if (!pn) {
-			++state->bucket;
-			*pos = 0;
+			*pos = next_bucket(*pos);
 			pn   = pneigh_get_bucket(seq, pos);
 		}
 		rc = pn;

  reply	other threads:[~2002-11-08 21:55 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-11-08 19:33 2.5.46-bk3: BUG in skbuff.c:178 Petr Vandrovec
2002-11-08 22:02 ` Petr Vandrovec [this message]
2002-11-10  4:18   ` Arnaldo Carvalho de Melo
2002-11-11  2:26     ` Petr Vandrovec
2002-11-11  2:42       ` Arnaldo Carvalho de Melo
  -- strict thread matches above, loose matches on Subject: below --
2002-11-08 19:42 Burton Windle
2002-11-08 20:01 ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20021108220215.GA2437@vana \
    --to=vandrove@vc.cvut.cz \
    --cc=acme@conectiva.com.br \
    --cc=akpm@diego.com \
    --cc=bwindle@fint.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox