netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Patch resubmission: RFC2863 operstatus for 2.5.49
@ 2002-11-26  9:22 Stefan Rompf
  2002-11-26 10:15 ` David S. Miller
  2002-11-29 12:56 ` Patch resubmission: RFC2863 operstatus for 2.5.49 jamal
  0 siblings, 2 replies; 17+ messages in thread
From: Stefan Rompf @ 2002-11-26  9:22 UTC (permalink / raw)
  To: davem; +Cc: netdev

[-- Attachment #1: Type: text/plain, Size: 177 bytes --]

Hi David,

I've updated my RFC2863 operstatus and userspace forwarding patch to
2.5.49. As discussion on netdev seems to be complete, I need some
comment from your side.

Stefan

[-- Attachment #2: patch-rfc2863-2.5.49 --]
[-- Type: text/plain, Size: 12659 bytes --]

diff -uprNX dontdiff linux-2.5.49/include/linux/netdevice.h linux-2.5.49-lw/include/linux/netdevice.h
--- linux-2.5.49/include/linux/netdevice.h	2002-11-22 22:41:08.000000000 +0100
+++ linux-2.5.49-lw/include/linux/netdevice.h	2002-11-25 22:52:29.000000000 +0100
@@ -204,10 +204,23 @@ enum netdev_state_t
 {
 	__LINK_STATE_XOFF=0,
 	__LINK_STATE_START,
-	__LINK_STATE_PRESENT,
+	__LINK_STATE_PRESENT_OBSOLETE,
 	__LINK_STATE_SCHED,
-	__LINK_STATE_NOCARRIER,
-	__LINK_STATE_RX_SCHED
+	__LINK_STATE_NOCARRIER_OBSOLETE,
+	__LINK_STATE_RX_SCHED,
+	__LINK_STATE_LINKWATCH_PENDING
+};
+
+
+/* Device operative state as per RFC2863 */
+enum netdev_operstate_t {
+	NETDEV_OPER_UP = 1,
+	NETDEV_OPER_DOWN, /* Obsoletes LINK_STATE_NOCARRIER */
+	NETDEV_OPER_TESTING,
+	NETDEV_OPER_UNKNOWN,
+	NETDEV_OPER_DORMANT,
+	NETDEV_OPER_NOTPRESENT, /* Obsoletes !LINK_STATE_PRESENT */
+	NETDEV_OPER_LOWERDOWN
 };
 
 
@@ -308,6 +321,10 @@ struct net_device
 					  * which this device is member of.
 					  */
 
+	/* Operative state, access semaphore */
+	rwlock_t                operstate_lock;
+	unsigned char           operstate;
+
 	/* Interface address info. */
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address	*/
@@ -631,34 +648,76 @@ static inline void dev_put(struct net_de
  * who is responsible for serialization of these calls.
  */
 
+#ifdef CONFIG_LINKWATCH
+extern void linkwatch_fire_event(struct net_device *dev);
+#endif
+
+static inline unsigned char netif_set_operstate(struct net_device *dev, unsigned char newstate)
+{
+	unsigned long flags;
+	unsigned char oldstate;
+
+	write_lock_irqsave(&dev->operstate_lock, flags);
+	oldstate = dev->operstate;
+	dev->operstate = newstate;
+	write_unlock_irqrestore(&dev->operstate_lock, flags);
+
+#ifdef CONFIG_LINKWATCH
+	if (oldstate != newstate) linkwatch_fire_event(dev);
+#endif
+
+	return oldstate;
+}
+
+static inline unsigned char netif_get_operstate(struct net_device *dev)
+{
+	unsigned long flags;
+	unsigned char state;
+
+	read_lock_irqsave(&dev->operstate_lock, flags);
+	state = dev->operstate;
+	read_unlock_irqrestore(&dev->operstate_lock, flags);
+
+	return state;
+}
+
 static inline int netif_carrier_ok(struct net_device *dev)
 {
-	return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
+	return netif_get_operstate(dev) != NETDEV_OPER_UP;
+}
+
+static inline int netif_operstate_to_iff_running(struct net_device *dev)
+{
+	unsigned char state = netif_get_operstate(dev);
+
+	return((1 << state) &
+	       (1 << NETDEV_OPER_UP | 1 << NETDEV_OPER_UNKNOWN));
 }
 
 extern void __netdev_watchdog_up(struct net_device *dev);
 
+
 static inline void netif_carrier_on(struct net_device *dev)
 {
-	clear_bit(__LINK_STATE_NOCARRIER, &dev->state);
+	netif_set_operstate(dev, NETDEV_OPER_UP);
 	if (netif_running(dev))
 		__netdev_watchdog_up(dev);
 }
 
 static inline void netif_carrier_off(struct net_device *dev)
 {
-	set_bit(__LINK_STATE_NOCARRIER, &dev->state);
+	netif_set_operstate(dev, NETDEV_OPER_DOWN);
 }
 
 /* Hot-plugging. */
 static inline int netif_device_present(struct net_device *dev)
 {
-	return test_bit(__LINK_STATE_PRESENT, &dev->state);
+	return netif_get_operstate(dev) != NETDEV_OPER_NOTPRESENT;
 }
 
 static inline void netif_device_detach(struct net_device *dev)
 {
-	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	if (netif_set_operstate(dev, NETDEV_OPER_NOTPRESENT) != NETDEV_OPER_NOTPRESENT &&
 	    netif_running(dev)) {
 		netif_stop_queue(dev);
 	}
@@ -666,7 +725,7 @@ static inline void netif_device_detach(s
 
 static inline void netif_device_attach(struct net_device *dev)
 {
-	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	if (netif_set_operstate(dev, NETDEV_OPER_UNKNOWN) == NETDEV_OPER_NOTPRESENT &&
 	    netif_running(dev)) {
 		netif_wake_queue(dev);
  		__netdev_watchdog_up(dev);
diff -uprNX dontdiff linux-2.5.49/net/Kconfig linux-2.5.49-lw/net/Kconfig
--- linux-2.5.49/net/Kconfig	2002-11-22 22:40:16.000000000 +0100
+++ linux-2.5.49-lw/net/Kconfig	2002-11-25 22:58:39.000000000 +0100
@@ -265,6 +265,20 @@ config ATM_MPOA
 config VLAN_8021Q
 	tristate "802.1Q VLAN Support"
 
+config LINKWATCH
+	bool "Device link state notification (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	help
+	  When this option is enabled, the kernel will forward changes in the
+	  operative ("RUNNING") state of an interface via the netlink socket.
+	  This is most useful when running linux as a router.
+	
+	  Note that currently not many drivers support this, compliant ones
+	  can be found by watching the the RUNNING flag in ifconfig output
+	  that should follow operative state.
+	
+	  If unsure, say 'N'.
+
 config LLC
 	tristate "ANSI/IEEE 802.2 - aka LLC (IPX, Appletalk, Token Ring)"
 	help
diff -uprNX dontdiff linux-2.5.49/net/core/Makefile linux-2.5.49-lw/net/core/Makefile
--- linux-2.5.49/net/core/Makefile	2002-11-22 22:41:08.000000000 +0100
+++ linux-2.5.49-lw/net/core/Makefile	2002-11-25 22:52:33.000000000 +0100
@@ -22,4 +22,6 @@ obj-$(CONFIG_NET_RADIO) += wireless.o
 # Ugly. I wish all wireless drivers were moved in drivers/net/wireless
 obj-$(CONFIG_NET_PCMCIA_RADIO) += wireless.o
 
+obj-$(CONFIG_LINKWATCH) += link_watch.o
+
 include $(TOPDIR)/Rules.make
diff -uprNX dontdiff linux-2.5.49/net/core/dev.c linux-2.5.49-lw/net/core/dev.c
--- linux-2.5.49/net/core/dev.c	2002-11-22 22:40:43.000000000 +0100
+++ linux-2.5.49-lw/net/core/dev.c	2002-11-25 22:52:33.000000000 +0100
@@ -199,7 +199,6 @@ int netdev_fastroute;
 int netdev_fastroute_obstacles;
 #endif
 
-
 /*******************************************************************************
 
 		Protocol management and registration routines
@@ -262,6 +261,9 @@ void dev_add_pack(struct packet_type *pt
 	br_write_unlock_bh(BR_NETPROTO_LOCK);
 }
 
+#ifdef CONFIG_LINKWATCH
+void linkwatch_run_queue(void);
+#endif
 
 /**
  *	dev_remove_pack	 - remove packet handler
@@ -2018,7 +2020,7 @@ static int dev_ifsioc(struct ifreq *ifr,
 							 IFF_RUNNING)) | 
 					 (dev->gflags & (IFF_PROMISC |
 							 IFF_ALLMULTI));
-			if (netif_running(dev) && netif_carrier_ok(dev))
+			if (netif_running(dev) && netif_operstate_to_iff_running(dev))
 				ifr->ifr_flags |= IFF_RUNNING;
 			return 0;
 
@@ -2433,6 +2435,10 @@ int register_netdevice(struct net_device
 		goto out;
 #endif /* CONFIG_NET_DIVERT */
 
+	/* Initial operstate */
+	dev->operstate_lock = RW_LOCK_UNLOCKED;
+	dev->operstate = NETDEV_OPER_UNKNOWN;
+
 	dev->iflink = -1;
 
 	/* Init, if this function is available */
@@ -2458,13 +2464,6 @@ int register_netdevice(struct net_device
 	if (!dev->rebuild_header)
 		dev->rebuild_header = default_rebuild_header;
 
-	/*
-	 *	Default initial state at registry is that the
-	 *	device is present.
-	 */
-
-	set_bit(__LINK_STATE_PRESENT, &dev->state);
-
 	dev->next = NULL;
 	dev_init_scheduler(dev);
 	write_lock_bh(&dev_base_lock);
@@ -2642,6 +2641,17 @@ int unregister_netdevice(struct net_devi
 			/* Rebroadcast unregister notification */
 			notifier_call_chain(&netdev_chain,
 					    NETDEV_UNREGISTER, dev);
+
+#ifdef CONFIG_LINKWATCH
+			if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
+				/* We must not have linkwatch events pending
+				 * on unregister. If this happens, we simply
+				 * run the queue unscheduled, resulting in a
+				 * noop for this device
+				 */
+				linkwatch_run_queue();
+			}
+#endif
 		}
 		current->state = TASK_INTERRUPTIBLE;
 		schedule_timeout(HZ / 4);
@@ -2736,6 +2746,8 @@ static int __init net_dev_init(void)
 #ifdef CONFIG_NET_FASTROUTE
 		dev->fastpath_lock = RW_LOCK_UNLOCKED;
 #endif
+		dev->operstate_lock = RW_LOCK_UNLOCKED;
+		dev->operstate = NETDEV_OPER_UNKNOWN;
 		dev->xmit_lock_owner = -1;
 		dev->iflink = -1;
 		dev_hold(dev);
@@ -2768,7 +2780,6 @@ static int __init net_dev_init(void)
 			if (!dev->rebuild_header)
 				dev->rebuild_header = default_rebuild_header;
 			dev_init_scheduler(dev);
-			set_bit(__LINK_STATE_PRESENT, &dev->state);
 		}
 	}
 
@@ -2849,3 +2860,5 @@ static int net_run_sbin_hotplug(struct n
 	return call_usermodehelper(argv [0], argv, envp);
 }
 #endif
+
+
diff -uprNX dontdiff linux-2.5.49/net/core/link_watch.c linux-2.5.49-lw/net/core/link_watch.c
--- linux-2.5.49/net/core/link_watch.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.49-lw/net/core/link_watch.c	2002-11-25 22:52:33.000000000 +0100
@@ -0,0 +1,134 @@
+/*
+ * Linux network device link state notifaction
+ *
+ * Author:
+ *     Stefan Rompf <sux@isg.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/workqueue.h>
+#include <linux/config.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <asm/bitops.h>
+#include <asm/types.h>
+
+
+enum lw_bits {
+	LW_RUNNING = 0,
+	LW_SE_USED
+};
+
+static unsigned long linkwatch_flags = 0;
+static unsigned long linkwatch_nextevent = 0;
+
+static void linkwatch_event(void *dummy);
+static DECLARE_WORK(linkwatch_work, linkwatch_event, NULL);
+
+static LIST_HEAD(lweventlist);
+static spinlock_t lweventlist_lock = SPIN_LOCK_UNLOCKED;
+
+struct lw_event {
+	struct list_head list;
+	struct net_device *dev;
+};
+
+/* Avoid kmalloc() for most systems */
+struct lw_event singleevent;
+
+/* Must be called with the rtnl semaphore held */
+void linkwatch_run_queue(void) {
+	LIST_HEAD(head);
+	struct list_head *n, *next;
+
+	spin_lock_irq(&lweventlist_lock);
+	list_splice_init(&lweventlist, &head);
+	spin_unlock_irq(&lweventlist_lock);
+
+	list_for_each_safe(n, next, &head) {
+		struct lw_event *event = list_entry(n, struct lw_event, list);
+		struct net_device *dev = event->dev;
+
+		if (event == &singleevent) {
+			clear_bit(LW_SE_USED, &linkwatch_flags);
+		} else {
+			kfree(event);
+		}
+
+		/* We are about to handle this device,
+		 * so new events can be accepted
+		 */
+		clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
+
+		if (dev->flags & IFF_UP) {
+			netdev_state_change(dev);
+		}
+
+		dev_put(dev);
+	}
+}       
+
+
+static void linkwatch_event(void *dummy)
+{
+	/* Limit the number of linkwatch events to one
+	 * per second so that a runaway driver does not
+	 * cause a storm of messages on the netlink
+	 * socket
+	 */	
+	linkwatch_nextevent = jiffies + HZ;
+	clear_bit(LW_RUNNING, &linkwatch_flags);
+
+	rtnl_lock();
+	linkwatch_run_queue();
+	rtnl_unlock();
+}
+
+
+void linkwatch_fire_event(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
+		unsigned long flags;
+		struct lw_event *event;
+
+		if (test_and_set_bit(LW_SE_USED, &linkwatch_flags)) {
+			event = kmalloc(sizeof(struct lw_event), GFP_ATOMIC);
+
+			if (unlikely(event == NULL)) {
+				clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
+				return;
+			}
+		} else {
+			event = &singleevent;
+		}
+
+		dev_hold(dev);
+		event->dev = dev;
+
+		spin_lock_irqsave(&lweventlist_lock, flags);
+		list_add_tail(&event->list, &lweventlist);
+		spin_unlock_irqrestore(&lweventlist_lock, flags);
+
+		if (!test_and_set_bit(LW_RUNNING, &linkwatch_flags)) {
+			unsigned long thisevent = jiffies;
+
+			if (thisevent >= linkwatch_nextevent) {
+				schedule_work(&linkwatch_work);
+			} else {
+				schedule_delayed_work(&linkwatch_work, linkwatch_nextevent - thisevent);
+			}
+		}
+	}
+}
+
diff -uprNX dontdiff linux-2.5.49/net/core/rtnetlink.c linux-2.5.49-lw/net/core/rtnetlink.c
--- linux-2.5.49/net/core/rtnetlink.c	2002-11-22 22:41:13.000000000 +0100
+++ linux-2.5.49-lw/net/core/rtnetlink.c	2002-11-25 22:52:33.000000000 +0100
@@ -165,7 +165,7 @@ static int rtnetlink_fill_ifinfo(struct 
 	r->ifi_flags = dev->flags;
 	r->ifi_change = change;
 
-	if (!netif_running(dev) || !netif_carrier_ok(dev))
+	if (!netif_running(dev) || !netif_operstate_to_iff_running(dev))
 		r->ifi_flags &= ~IFF_RUNNING;
 	else
 		r->ifi_flags |= IFF_RUNNING;
diff -uprNX dontdiff linux-2.5.49/net/netsyms.c linux-2.5.49-lw/net/netsyms.c
--- linux-2.5.49/net/netsyms.c	2002-11-22 22:40:39.000000000 +0100
+++ linux-2.5.49-lw/net/netsyms.c	2002-11-25 22:52:33.000000000 +0100
@@ -632,4 +632,8 @@ extern void wireless_send_event(struct n
 EXPORT_SYMBOL(wireless_send_event);
 #endif	/* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
 
+#ifdef CONFIG_LINKWATCH
+EXPORT_SYMBOL(linkwatch_fire_event);
+#endif
+
 #endif  /* CONFIG_NET */


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.49
  2002-11-26  9:22 Patch resubmission: RFC2863 operstatus for 2.5.49 Stefan Rompf
@ 2002-11-26 10:15 ` David S. Miller
  2002-11-26 15:36   ` Stefan Rompf
  2002-12-03 22:22   ` Patch resubmission: RFC2863 operstatus for 2.5.50 Stefan Rompf
  2002-11-29 12:56 ` Patch resubmission: RFC2863 operstatus for 2.5.49 jamal
  1 sibling, 2 replies; 17+ messages in thread
From: David S. Miller @ 2002-11-26 10:15 UTC (permalink / raw)
  To: srompf; +Cc: netdev


This locking below achieves nothing.

+	read_lock_irqsave(&dev->operstate_lock, flags);
+	state = dev->operstate;
+	read_unlock_irqrestore(&dev->operstate_lock, flags);

In fact, the other side, locking when setting this value, can
be done with a simple spinlock.  Probably something else in
the device struct can be reused.

I also don't think this should be conditional, either we want
it or we don't.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.49
  2002-11-26 10:15 ` David S. Miller
@ 2002-11-26 15:36   ` Stefan Rompf
  2002-12-03 22:22   ` Patch resubmission: RFC2863 operstatus for 2.5.50 Stefan Rompf
  1 sibling, 0 replies; 17+ messages in thread
From: Stefan Rompf @ 2002-11-26 15:36 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev

Hi,

> This locking below achieves nothing.

Ok, so I was too cautious by locking read access to a one byte
structure. I'll change that and read additional documentation on SMP ;-)

> Probably something else in
> the device struct can be reused.

Right now, I don't see which. There are other spinlocks available in the
net_device structure, but they are used by the queuing code and we
should not give up the semantic that netif_set_operstate() can be called
from everywhere. One global spinlock may be acceptable for this special
case.

> I also don't think this should be conditional, either we want
> it or we don't.

The conditional stuff is inspired from my first 2.4 version, but I'm
happy to remove it.

Btw, can you also have a look the 2.4 backport of my link state
notification feature (posting available under
http://marc.theaimsgroup.com/?l=linux-netdev&m=103722967214290&w=2, just
one typo fixed in Configure.help since then). Is this stuff acceptable
for 2.4?

Cheers, Stefan

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.49
  2002-11-26  9:22 Patch resubmission: RFC2863 operstatus for 2.5.49 Stefan Rompf
  2002-11-26 10:15 ` David S. Miller
@ 2002-11-29 12:56 ` jamal
  2002-12-03 23:04   ` Stefan Rompf
  1 sibling, 1 reply; 17+ messages in thread
From: jamal @ 2002-11-29 12:56 UTC (permalink / raw)
  To: Stefan Rompf; +Cc: davem, netdev



On Tue, 26 Nov 2002, Stefan Rompf wrote:

> Hi David,
>
> I've updated my RFC2863 operstatus and userspace forwarding patch to
> 2.5.49. As discussion on netdev seems to be complete, I need some
> comment from your side.
>

Stefan,
Just thought of something, it may be a little tricky but valuable and
i am not quiet sure if it should part of your patch: We probably need to
flush the qdiscs software queues; maybe even the DMA ring i.e simulate
admin down followed by admin up.
Thoughts?

cheers,
jamal

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.50
  2002-11-26 10:15 ` David S. Miller
  2002-11-26 15:36   ` Stefan Rompf
@ 2002-12-03 22:22   ` Stefan Rompf
  2002-12-04  0:52     ` Jeff Garzik
  2002-12-04 19:39     ` David S. Miller
  1 sibling, 2 replies; 17+ messages in thread
From: Stefan Rompf @ 2002-12-03 22:22 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev

[-- Attachment #1: Type: text/plain, Size: 931 bytes --]

Hi David,

here is an updated version with the following changes:

-split the patch: First adds the userspace notification, the other
(depends on the first) RFC2863 semantics. I am aware that we are well
beyond feature freeze, and it makes the code more readable.
-simplified locking as suggested
-made the notification unconditional. We want it ;-)
-fixed a bug in RFC2863 netif_carrier_ok() emulation (was !=, should be
==)

Can you have a look?

Stefan


"David S. Miller" wrote:
> 
> This locking below achieves nothing.
> 
> +       read_lock_irqsave(&dev->operstate_lock, flags);
> +       state = dev->operstate;
> +       read_unlock_irqrestore(&dev->operstate_lock, flags);
> 
> In fact, the other side, locking when setting this value, can
> be done with a simple spinlock.  Probably something else in
> the device struct can be reused.
> 
> I also don't think this should be conditional, either we want
> it or we don't.

[-- Attachment #2: patch-lw-2.5.50 --]
[-- Type: text/plain, Size: 6507 bytes --]

diff -urN linux-2.5.50/include/linux/netdevice.h linux-2.5.50-lw/include/linux/netdevice.h
--- linux-2.5.50/include/linux/netdevice.h	2002-11-22 22:41:08.000000000 +0100
+++ linux-2.5.50-lw/include/linux/netdevice.h	2002-12-02 22:56:34.000000000 +0100
@@ -207,7 +207,8 @@
 	__LINK_STATE_PRESENT,
 	__LINK_STATE_SCHED,
 	__LINK_STATE_NOCARRIER,
-	__LINK_STATE_RX_SCHED
+	__LINK_STATE_RX_SCHED,
+	__LINK_STATE_LINKWATCH_PENDING
 };
 
 
@@ -631,6 +632,8 @@
  * who is responsible for serialization of these calls.
  */
 
+extern void linkwatch_fire_event(struct net_device *dev);
+
 static inline int netif_carrier_ok(struct net_device *dev)
 {
 	return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
@@ -640,14 +643,16 @@
 
 static inline void netif_carrier_on(struct net_device *dev)
 {
-	clear_bit(__LINK_STATE_NOCARRIER, &dev->state);
+	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
+		linkwatch_fire_event(dev);
 	if (netif_running(dev))
 		__netdev_watchdog_up(dev);
 }
 
 static inline void netif_carrier_off(struct net_device *dev)
 {
-	set_bit(__LINK_STATE_NOCARRIER, &dev->state);
+	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
+		linkwatch_fire_event(dev);
 }
 
 /* Hot-plugging. */
diff -urN linux-2.5.50/net/core/Makefile linux-2.5.50-lw/net/core/Makefile
--- linux-2.5.50/net/core/Makefile	2002-12-02 22:37:15.000000000 +0100
+++ linux-2.5.50-lw/net/core/Makefile	2002-12-02 23:16:18.000000000 +0100
@@ -12,7 +12,7 @@
 
 obj-$(CONFIG_FILTER) += filter.o
 
-obj-$(CONFIG_NET) += dev.o dev_mcast.o dst.o neighbour.o rtnetlink.o utils.o
+obj-$(CONFIG_NET) += dev.o dev_mcast.o dst.o neighbour.o rtnetlink.o utils.o link_watch.o
 
 obj-$(CONFIG_NETFILTER) += netfilter.o
 obj-$(CONFIG_NET_DIVERT) += dv.o
diff -urN linux-2.5.50/net/core/dev.c linux-2.5.50-lw/net/core/dev.c
--- linux-2.5.50/net/core/dev.c	2002-11-22 22:40:43.000000000 +0100
+++ linux-2.5.50-lw/net/core/dev.c	2002-12-02 23:08:47.000000000 +0100
@@ -262,6 +262,7 @@
 	br_write_unlock_bh(BR_NETPROTO_LOCK);
 }
 
+void linkwatch_run_queue(void);
 
 /**
  *	dev_remove_pack	 - remove packet handler
@@ -2642,6 +2643,15 @@
 			/* Rebroadcast unregister notification */
 			notifier_call_chain(&netdev_chain,
 					    NETDEV_UNREGISTER, dev);
+
+			if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
+				/* We must not have linkwatch events pending
+				 * on unregister. If this happens, we simply
+				 * run the queue unscheduled, resulting in a
+				 * noop for this device
+				 */
+				linkwatch_run_queue();
+			}
 		}
 		current->state = TASK_INTERRUPTIBLE;
 		schedule_timeout(HZ / 4);
diff -urN linux-2.5.50/net/core/link_watch.c linux-2.5.50-lw/net/core/link_watch.c
--- linux-2.5.50/net/core/link_watch.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.50-lw/net/core/link_watch.c	2002-12-02 22:35:00.000000000 +0100
@@ -0,0 +1,134 @@
+/*
+ * Linux network device link state notifaction
+ *
+ * Author:
+ *     Stefan Rompf <sux@loplof.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/workqueue.h>
+#include <linux/config.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <asm/bitops.h>
+#include <asm/types.h>
+
+
+enum lw_bits {
+	LW_RUNNING = 0,
+	LW_SE_USED
+};
+
+static unsigned long linkwatch_flags = 0;
+static unsigned long linkwatch_nextevent = 0;
+
+static void linkwatch_event(void *dummy);
+static DECLARE_WORK(linkwatch_work, linkwatch_event, NULL);
+
+static LIST_HEAD(lweventlist);
+static spinlock_t lweventlist_lock = SPIN_LOCK_UNLOCKED;
+
+struct lw_event {
+	struct list_head list;
+	struct net_device *dev;
+};
+
+/* Avoid kmalloc() for most systems */
+static struct lw_event singleevent;
+
+/* Must be called with the rtnl semaphore held */
+void linkwatch_run_queue(void) {
+	LIST_HEAD(head);
+	struct list_head *n, *next;
+
+	spin_lock_irq(&lweventlist_lock);
+	list_splice_init(&lweventlist, &head);
+	spin_unlock_irq(&lweventlist_lock);
+
+	list_for_each_safe(n, next, &head) {
+		struct lw_event *event = list_entry(n, struct lw_event, list);
+		struct net_device *dev = event->dev;
+
+		if (event == &singleevent) {
+			clear_bit(LW_SE_USED, &linkwatch_flags);
+		} else {
+			kfree(event);
+		}
+
+		/* We are about to handle this device,
+		 * so new events can be accepted
+		 */
+		clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
+
+		if (dev->flags & IFF_UP) {
+			netdev_state_change(dev);
+		}
+
+		dev_put(dev);
+	}
+}       
+
+
+static void linkwatch_event(void *dummy)
+{
+	/* Limit the number of linkwatch events to one
+	 * per second so that a runaway driver does not
+	 * cause a storm of messages on the netlink
+	 * socket
+	 */	
+	linkwatch_nextevent = jiffies + HZ;
+	clear_bit(LW_RUNNING, &linkwatch_flags);
+
+	rtnl_lock();
+	linkwatch_run_queue();
+	rtnl_unlock();
+}
+
+
+void linkwatch_fire_event(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
+		unsigned long flags;
+		struct lw_event *event;
+
+		if (test_and_set_bit(LW_SE_USED, &linkwatch_flags)) {
+			event = kmalloc(sizeof(struct lw_event), GFP_ATOMIC);
+
+			if (unlikely(event == NULL)) {
+				clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
+				return;
+			}
+		} else {
+			event = &singleevent;
+		}
+
+		dev_hold(dev);
+		event->dev = dev;
+
+		spin_lock_irqsave(&lweventlist_lock, flags);
+		list_add_tail(&event->list, &lweventlist);
+		spin_unlock_irqrestore(&lweventlist_lock, flags);
+
+		if (!test_and_set_bit(LW_RUNNING, &linkwatch_flags)) {
+			unsigned long thisevent = jiffies;
+
+			if (thisevent >= linkwatch_nextevent) {
+				schedule_work(&linkwatch_work);
+			} else {
+				schedule_delayed_work(&linkwatch_work, linkwatch_nextevent - thisevent);
+			}
+		}
+	}
+}
+
diff -urN linux-2.5.50/net/netsyms.c linux-2.5.50-lw/net/netsyms.c
--- linux-2.5.50/net/netsyms.c	2002-11-22 22:40:39.000000000 +0100
+++ linux-2.5.50-lw/net/netsyms.c	2002-12-02 23:09:51.000000000 +0100
@@ -632,4 +632,6 @@
 EXPORT_SYMBOL(wireless_send_event);
 #endif	/* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
 
+EXPORT_SYMBOL(linkwatch_fire_event);
+
 #endif  /* CONFIG_NET */

[-- Attachment #3: patch-lw-2.5.50-rfc2863 --]
[-- Type: text/plain, Size: 5887 bytes --]

diff -urN linux-2.5.50-lw/include/linux/netdevice.h linux-2.5.50-lw-rfc2863/include/linux/netdevice.h
--- linux-2.5.50-lw/include/linux/netdevice.h	2002-12-02 22:56:34.000000000 +0100
+++ linux-2.5.50-lw-rfc2863/include/linux/netdevice.h	2002-12-02 22:45:14.000000000 +0100
@@ -204,14 +204,26 @@
 {
 	__LINK_STATE_XOFF=0,
 	__LINK_STATE_START,
-	__LINK_STATE_PRESENT,
+	__LINK_STATE_PRESENT_OBSOLETE,
 	__LINK_STATE_SCHED,
-	__LINK_STATE_NOCARRIER,
+	__LINK_STATE_NOCARRIER_OBSOLETE,
 	__LINK_STATE_RX_SCHED,
 	__LINK_STATE_LINKWATCH_PENDING
 };
 
 
+/* Device operative state as per RFC2863 */
+enum netdev_operstate_t {
+	NETDEV_OPER_UP = 1,
+	NETDEV_OPER_DOWN, /* Obsoletes LINK_STATE_NOCARRIER */
+	NETDEV_OPER_TESTING,
+	NETDEV_OPER_UNKNOWN,
+	NETDEV_OPER_DORMANT,
+	NETDEV_OPER_NOTPRESENT, /* Obsoletes !LINK_STATE_PRESENT */
+	NETDEV_OPER_LOWERDOWN
+};
+
+
 /*
  * This structure holds at boot time configured netdevice settings. They
  * are then used in the device probing. 
@@ -309,6 +321,10 @@
 					  * which this device is member of.
 					  */
 
+	/* Operative state, access semaphore */
+	spinlock_t              operstate_lock;
+	unsigned char           operstate;
+
 	/* Interface address info. */
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address	*/
@@ -634,36 +650,63 @@
 
 extern void linkwatch_fire_event(struct net_device *dev);
 
+static inline unsigned char netif_set_operstate(struct net_device *dev, unsigned char newstate)
+{
+	unsigned long flags;
+	unsigned char oldstate;
+
+	spin_lock_irqsave(&dev->operstate_lock, flags);
+	oldstate = dev->operstate;
+	dev->operstate = newstate;
+	spin_unlock_irqrestore(&dev->operstate_lock, flags);
+
+	if (oldstate != newstate) linkwatch_fire_event(dev);
+
+	return oldstate;
+}
+
+static inline unsigned char netif_get_operstate(struct net_device *dev)
+{
+	return(dev->operstate);
+}
+
 static inline int netif_carrier_ok(struct net_device *dev)
 {
-	return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
+	return netif_get_operstate(dev) == NETDEV_OPER_UP;
+}
+
+static inline int netif_operstate_to_iff_running(struct net_device *dev)
+{
+	unsigned char state = netif_get_operstate(dev);
+
+	return((1 << state) &
+	       (1 << NETDEV_OPER_UP | 1 << NETDEV_OPER_UNKNOWN));
 }
 
 extern void __netdev_watchdog_up(struct net_device *dev);
 
+
 static inline void netif_carrier_on(struct net_device *dev)
 {
-	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
-		linkwatch_fire_event(dev);
+	netif_set_operstate(dev, NETDEV_OPER_UP);
 	if (netif_running(dev))
 		__netdev_watchdog_up(dev);
 }
 
 static inline void netif_carrier_off(struct net_device *dev)
 {
-	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
-		linkwatch_fire_event(dev);
+	netif_set_operstate(dev, NETDEV_OPER_DOWN);
 }
 
 /* Hot-plugging. */
 static inline int netif_device_present(struct net_device *dev)
 {
-	return test_bit(__LINK_STATE_PRESENT, &dev->state);
+	return netif_get_operstate(dev) != NETDEV_OPER_NOTPRESENT;
 }
 
 static inline void netif_device_detach(struct net_device *dev)
 {
-	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	if (netif_set_operstate(dev, NETDEV_OPER_NOTPRESENT) != NETDEV_OPER_NOTPRESENT &&
 	    netif_running(dev)) {
 		netif_stop_queue(dev);
 	}
@@ -671,7 +714,7 @@
 
 static inline void netif_device_attach(struct net_device *dev)
 {
-	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
+	if (netif_set_operstate(dev, NETDEV_OPER_UNKNOWN) == NETDEV_OPER_NOTPRESENT &&
 	    netif_running(dev)) {
 		netif_wake_queue(dev);
  		__netdev_watchdog_up(dev);
diff -urN linux-2.5.50-lw/net/core/dev.c linux-2.5.50-lw-rfc2863/net/core/dev.c
--- linux-2.5.50-lw/net/core/dev.c	2002-12-02 23:08:47.000000000 +0100
+++ linux-2.5.50-lw-rfc2863/net/core/dev.c	2002-12-02 23:22:03.000000000 +0100
@@ -199,7 +199,6 @@
 int netdev_fastroute_obstacles;
 #endif
 
-
 /*******************************************************************************
 
 		Protocol management and registration routines
@@ -2019,7 +2018,7 @@
 							 IFF_RUNNING)) | 
 					 (dev->gflags & (IFF_PROMISC |
 							 IFF_ALLMULTI));
-			if (netif_running(dev) && netif_carrier_ok(dev))
+			if (netif_running(dev) && netif_operstate_to_iff_running(dev))
 				ifr->ifr_flags |= IFF_RUNNING;
 			return 0;
 
@@ -2434,6 +2433,10 @@
 		goto out;
 #endif /* CONFIG_NET_DIVERT */
 
+	/* Initial operstate */
+	spin_lock_init(&dev->operstate_lock);
+	dev->operstate = NETDEV_OPER_UNKNOWN;
+
 	dev->iflink = -1;
 
 	/* Init, if this function is available */
@@ -2459,13 +2462,6 @@
 	if (!dev->rebuild_header)
 		dev->rebuild_header = default_rebuild_header;
 
-	/*
-	 *	Default initial state at registry is that the
-	 *	device is present.
-	 */
-
-	set_bit(__LINK_STATE_PRESENT, &dev->state);
-
 	dev->next = NULL;
 	dev_init_scheduler(dev);
 	write_lock_bh(&dev_base_lock);
@@ -2746,6 +2742,8 @@
 #ifdef CONFIG_NET_FASTROUTE
 		dev->fastpath_lock = RW_LOCK_UNLOCKED;
 #endif
+		spin_lock_init(&dev->operstate_lock);
+		dev->operstate = NETDEV_OPER_UNKNOWN;
 		dev->xmit_lock_owner = -1;
 		dev->iflink = -1;
 		dev_hold(dev);
@@ -2778,7 +2776,6 @@
 			if (!dev->rebuild_header)
 				dev->rebuild_header = default_rebuild_header;
 			dev_init_scheduler(dev);
-			set_bit(__LINK_STATE_PRESENT, &dev->state);
 		}
 	}
 
diff -urN linux-2.5.50-lw/net/core/rtnetlink.c linux-2.5.50-lw-rfc2863/net/core/rtnetlink.c
--- linux-2.5.50-lw/net/core/rtnetlink.c	2002-11-22 22:41:13.000000000 +0100
+++ linux-2.5.50-lw-rfc2863/net/core/rtnetlink.c	2002-12-02 22:35:00.000000000 +0100
@@ -165,7 +165,7 @@
 	r->ifi_flags = dev->flags;
 	r->ifi_change = change;
 
-	if (!netif_running(dev) || !netif_carrier_ok(dev))
+	if (!netif_running(dev) || !netif_operstate_to_iff_running(dev))
 		r->ifi_flags &= ~IFF_RUNNING;
 	else
 		r->ifi_flags |= IFF_RUNNING;

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.49
  2002-11-29 12:56 ` Patch resubmission: RFC2863 operstatus for 2.5.49 jamal
@ 2002-12-03 23:04   ` Stefan Rompf
  2002-12-04 13:06     ` jamal
  0 siblings, 1 reply; 17+ messages in thread
From: Stefan Rompf @ 2002-12-03 23:04 UTC (permalink / raw)
  To: jamal; +Cc: davem, netdev

Hi Jamal,

> Just thought of something, it may be a little tricky but valuable and
> i am not quiet sure if it should part of your patch: We probably need to
> flush the qdiscs software queues; maybe even the DMA ring i.e simulate
> admin down followed by admin up.

dev_close() is doing quite a lot of stuff, so we should do nothing more
than flush the qdiscs when the link comes up. But is it really useful?
Normally, the queues are short anyway to keep latencies low, and fifty
additional packets don't hurt. I rather think about clearing neighbor
tables and the route cache whenever the operstatus goes down.

Anyway, implementation and usage of the notification should not mix up,
so it let it be a separate patch.

Stefan

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.50
  2002-12-03 22:22   ` Patch resubmission: RFC2863 operstatus for 2.5.50 Stefan Rompf
@ 2002-12-04  0:52     ` Jeff Garzik
  2002-12-04  9:44       ` Stefan Rompf
  2002-12-04 13:11       ` jamal
  2002-12-04 19:39     ` David S. Miller
  1 sibling, 2 replies; 17+ messages in thread
From: Jeff Garzik @ 2002-12-04  0:52 UTC (permalink / raw)
  To: Stefan Rompf; +Cc: David S. Miller, netdev

Pardon my dumb question, but what parts of RFC2863 require kernel 
additions over and above your link state patch?

Your second patch I am less enthusiastic about than the first... :(

I wonder if userspace cannot figure out ifOperStatus from existing data 
we make available?

	Jeff

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.50
  2002-12-04  0:52     ` Jeff Garzik
@ 2002-12-04  9:44       ` Stefan Rompf
  2002-12-04 18:15         ` Jeff Garzik
  2002-12-04 13:11       ` jamal
  1 sibling, 1 reply; 17+ messages in thread
From: Stefan Rompf @ 2002-12-04  9:44 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: David S. Miller, netdev

Hi,

Jeff Garzik wrote:

> Pardon my dumb question, but what parts of RFC2863 require kernel
> additions over and above your link state patch?

the kernel does not know LOWERLAYERDOWN, TESTING, DORMANT, UNKNOWN. They
can be useful when drivers adopt to this scheme.

> Your second patch I am less enthusiastic about than the first... :(

Well, with your opinion I count two against two: I want it, Jamal has
proposed the semantics, and Alexey doesn't want to waste a single bit of
a netlink message for this.

What now?

Stefan

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.49
  2002-12-03 23:04   ` Stefan Rompf
@ 2002-12-04 13:06     ` jamal
  0 siblings, 0 replies; 17+ messages in thread
From: jamal @ 2002-12-04 13:06 UTC (permalink / raw)
  To: Stefan Rompf; +Cc: davem, netdev



On Wed, 4 Dec 2002, Stefan Rompf wrote:

> Hi Jamal,
>
> > Just thought of something, it may be a little tricky but valuable and
> > i am not quiet sure if it should part of your patch: We probably need to
> > flush the qdiscs software queues; maybe even the DMA ring i.e simulate
> > admin down followed by admin up.
>
> dev_close() is doing quite a lot of stuff, so we should do nothing more
> than flush the qdiscs when the link comes up. But is it really useful?

Indeed it is, infact i have been bitten by buffered packets confusing
an upstream switch with buffered VRRP packets when someone stepped on a
cable that i later reconnected. Typically about 30 seconds
later an arp fixes the problem - if the queue had been properly flushed
thered be no problem.
I think maybe flushing all routes and neighbors pointing out that
device is also useful.

> Normally, the queues are short anyway to keep latencies low, and fifty
> additional packets don't hurt. I rather think about clearing neighbor
> tables and the route cache whenever the operstatus goes down.
>
> Anyway, implementation and usage of the notification should not mix up,
> so it let it be a separate patch.

Agreed; just queueing more work for you ;->

cheers,
jamal

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.50
  2002-12-04  0:52     ` Jeff Garzik
  2002-12-04  9:44       ` Stefan Rompf
@ 2002-12-04 13:11       ` jamal
  2002-12-04 17:42         ` Jeff Garzik
  1 sibling, 1 reply; 17+ messages in thread
From: jamal @ 2002-12-04 13:11 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Stefan Rompf, David S. Miller, netdev




On Tue, 3 Dec 2002, Jeff Garzik wrote:

> Pardon my dumb question, but what parts of RFC2863 require kernel
> additions over and above your link state patch?
>
> Your second patch I am less enthusiastic about than the first... :(
>
> I wonder if userspace cannot figure out ifOperStatus from existing data
> we make available?
>

Stefans curtrent patch makes the info available via netlink.
What dont you like about it Jeff? Take a quick look at RFC2863
and scan for IfAdminStatus and IfOperStatus. The modelling RFC2863
has is pretty good and thats what Stefan has followed (after we weeded
a few crappy pieces off the RFC; we discussed on netdev).

cheers,
jamal

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.50
  2002-12-04 13:11       ` jamal
@ 2002-12-04 17:42         ` Jeff Garzik
  2002-12-09 13:27           ` jamal
  0 siblings, 1 reply; 17+ messages in thread
From: Jeff Garzik @ 2002-12-04 17:42 UTC (permalink / raw)
  To: jamal; +Cc: Stefan Rompf, David S. Miller, netdev

jamal wrote:
> Stefans curtrent patch makes the info available via netlink.

He has two patches:  link change notification, and RFC2863 operstatus. 
I agree with the first one and support its inclusion; the second one I 
question its need.

(just for context, I am referring to message <3DED2EA9.D812C881@isg.de> 
dated Dec 3)


> What dont you like about it Jeff? Take a quick look at RFC2863
> and scan for IfAdminStatus and IfOperStatus. The modelling RFC2863
> has is pretty good and thats what Stefan has followed (after we weeded
> a few crappy pieces off the RFC; we discussed on netdev).

I had looked at ifAdminStatus and ifOperStatus before I posted :)

My argument is, _after_ Stefan's link state patch is applied, why do we 
need the additional patch?  [this question is meant to be delivered in 
an honest, not snide way...]

For ifAdminStatus, you have "up", "down", and "testing" states.  Since 
we have no concept of a testing state, if we eliminate that we have "up" 
and "down", two states we can obviously handle.

For ifOperStatus, we have "up", "down" and "testing", which are 
applicable (or not) to Linux as with ifAdminStatus.  Further we have 
states "dormant", "unknown", "notPresent", "lowerLayerDown".  I'll 
discuss each of these in detail.

"dormant" - not used in Stefan's patch, which I agree with.
"unknown" - only used in Stefan's patch before interface is first up'd, 
which is IMO inaccurate.  Accurate use of "up" and "down", to me, 
implies -no- use of "unknown" state.  Because as soon as we are 
initialized, all ethernet details are known, thus "down" is more 
applicable.  The Linux net stack's atomicity is such that leaking an 
"unknown" state would be a bug, too.
"notPresent" - analagous to Linux's netif_device_xxx, and Stefan's patch 
acknowledges this.  However, the use of netif_device_xxx in drivers is 
really only used when the hardware is suspended.  If hardware goes away, 
the interface goes away too, almost immediately.
"lowerLayerDown" - not used in Stefan's patch, which I agree with.

So, Stefan's 2nd patch really only adds "unknown" and "notPresent" 
states to current behavior -- and the applicability of those states to 
Linux is IMO questionable.

Questions/comments requested.

Regards,

	Jeff

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.50
  2002-12-04  9:44       ` Stefan Rompf
@ 2002-12-04 18:15         ` Jeff Garzik
  0 siblings, 0 replies; 17+ messages in thread
From: Jeff Garzik @ 2002-12-04 18:15 UTC (permalink / raw)
  To: Stefan Rompf; +Cc: David S. Miller, netdev

Stefan Rompf wrote:
>>Your second patch I am less enthusiastic about than the first... :(
> 
> 
> Well, with your opinion I count two against two: I want it, Jamal has
> proposed the semantics, and Alexey doesn't want to waste a single bit of
> a netlink message for this.


Well, for generic net stack issues, I weight Jamal and Alexey's opinion 
more than my own ;-)

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.50
  2002-12-03 22:22   ` Patch resubmission: RFC2863 operstatus for 2.5.50 Stefan Rompf
  2002-12-04  0:52     ` Jeff Garzik
@ 2002-12-04 19:39     ` David S. Miller
  1 sibling, 0 replies; 17+ messages in thread
From: David S. Miller @ 2002-12-04 19:39 UTC (permalink / raw)
  To: srompf; +Cc: netdev


I've applied your first patch to 2.5.x, I'm awaiting more discussion
wrt.  Jeff's comments on your second one :-)

Thanks.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.50
  2002-12-04 17:42         ` Jeff Garzik
@ 2002-12-09 13:27           ` jamal
  2002-12-09 23:10             ` Stefan Rompf
  2002-12-10  3:55             ` Jeff Garzik
  0 siblings, 2 replies; 17+ messages in thread
From: jamal @ 2002-12-09 13:27 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Stefan Rompf, David S. Miller, netdev




On Wed, 4 Dec 2002, Jeff Garzik wrote:

> jamal wrote:
> > Stefans curtrent patch makes the info available via netlink.
>
> He has two patches:  link change notification, and RFC2863 operstatus.
> I agree with the first one and support its inclusion; the second one I
> question its need.
>
> (just for context, I am referring to message <3DED2EA9.D812C881@isg.de>
> dated Dec 3)
>
>
> > What dont you like about it Jeff? Take a quick look at RFC2863
> > and scan for IfAdminStatus and IfOperStatus. The modelling RFC2863
> > has is pretty good and thats what Stefan has followed (after we weeded
> > a few crappy pieces off the RFC; we discussed on netdev).
>
> I had looked at ifAdminStatus and ifOperStatus before I posted :)
>
> My argument is, _after_ Stefan's link state patch is applied, why do we
> need the additional patch?  [this question is meant to be delivered in
> an honest, not snide way...]
>

I somehow deleted the original email he sent with the patches. What two
patches? I thought he had one which was a backport and another which
was for 2.5.x (sorry, i actually have seen those patches a few times so i
didnt bother reviewing anything); In any case, when you look at this stuff
think as well of devices that are software netddevices example VLANs or
PPP or some of the USB, Irda etc and you want the status properly
reflected (and some of that status may not make sense to ethernet for
example).

> For ifAdminStatus, you have "up", "down", and "testing" states.  Since
> we have no concept of a testing state, if we eliminate that we have "up"
> and "down", two states we can obviously handle.
>
> For ifOperStatus, we have "up", "down" and "testing", which are
> applicable (or not) to Linux as with ifAdminStatus.  Further we have
> states "dormant", "unknown", "notPresent", "lowerLayerDown".  I'll
> discuss each of these in detail.
>
> "dormant" - not used in Stefan's patch, which I agree with.
> "unknown" - only used in Stefan's patch before interface is first up'd,
> which is IMO inaccurate.  Accurate use of "up" and "down", to me,
> implies -no- use of "unknown" state.  Because as soon as we are
> initialized, all ethernet details are known, thus "down" is more
> applicable.  The Linux net stack's atomicity is such that leaking an
> "unknown" state would be a bug, too.
> "notPresent" - analagous to Linux's netif_device_xxx, and Stefan's patch
> acknowledges this.  However, the use of netif_device_xxx in drivers is
> really only used when the hardware is suspended.  If hardware goes away,
> the interface goes away too, almost immediately.
> "lowerLayerDown" - not used in Stefan's patch, which I agree with.
>
> So, Stefan's 2nd patch really only adds "unknown" and "notPresent"
> states to current behavior -- and the applicability of those states to
> Linux is IMO questionable.
>

If all he is adding are some enumerated types, theres no harm. I cant
remember the details of the discussions - but we had long winding
discussions on the different states.
Note you need both admin and operational status and physically thet may
mean different things (think PPP and ethernet for example). You also need
to properly reflect the status for all sorts of netdevices. As long as
those requirements are met, then all is good. The RFC is not the final
word but it draws from experiences people had for years doing this kind
of stuff - so it is a good idea to draw from their experiences (but ok to
ignore it if it sounds unrealistic).

cheers,
jamal

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.50
  2002-12-09 13:27           ` jamal
@ 2002-12-09 23:10             ` Stefan Rompf
  2002-12-10  3:55             ` Jeff Garzik
  1 sibling, 0 replies; 17+ messages in thread
From: Stefan Rompf @ 2002-12-09 23:10 UTC (permalink / raw)
  To: netdev; +Cc: jamal, Jeff Garzik, David S. Miller

Hi,

> > My argument is, _after_ Stefan's link state patch is applied, why do we
> > need the additional patch?  [this question is meant to be delivered in
> > an honest, not snide way...]
> >
> 
> I somehow deleted the original email he sent with the patches. What two
> patches? I thought he had one which was a backport and another which
> was for 2.5.x

I've splitted the patch into userspace notification and RFC2863 part to
separate the features clearly and get the discussion running again. That
seem to have worked ;-)

> (sorry, i actually have seen those patches a few times so i
> didnt bother reviewing anything); In any case, when you look at this stuff
> think as well of devices that are software netddevices example VLANs or
> PPP or some of the USB, Irda etc and you want the status properly
> reflected (and some of that status may not make sense to ethernet for
> example).

As an example, how do we flag a sleeping dial on demand device with the
current Linux semantics? Is it oper up, because it may be able to
transmit packets and should be considered, or is it oper down, simply as
no protocol has been negotiated. RFC2863 can provice a clear state:
Dormant.

Beside this, we currently have the situation that we can put a device
into admin up and not present, simply by calling netdev_carrier_on() and
netdev_detach(). I consider that broken, a device cannot be both removed
and ready.

I'm aware that I did not provide updates to drivers to use the new
states, but that's just the nature of infrastructure creating patches.
Also, I cannot forwarding the RFC2863 state to userspace as long as
Alexey refuses space in the netlink message for it.

Ok, IMHO the pro and contra arguments are said. So let's decide: Do we
want RFC2863 semantics (of course yes ;-) and David either accepts the
second part of the patch or I continue working on it, or do we just drop
that part?

Stefan

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.50
  2002-12-09 13:27           ` jamal
  2002-12-09 23:10             ` Stefan Rompf
@ 2002-12-10  3:55             ` Jeff Garzik
  2002-12-12 13:21               ` jamal
  1 sibling, 1 reply; 17+ messages in thread
From: Jeff Garzik @ 2002-12-10  3:55 UTC (permalink / raw)
  To: jamal; +Cc: Stefan Rompf, David S. Miller, netdev

On Mon, Dec 09, 2002 at 08:27:39AM -0500, jamal wrote:
> On Wed, 4 Dec 2002, Jeff Garzik wrote:
> > For ifAdminStatus, you have "up", "down", and "testing" states.  Since
> > we have no concept of a testing state, if we eliminate that we have "up"
> > and "down", two states we can obviously handle.
> >
> > For ifOperStatus, we have "up", "down" and "testing", which are
> > applicable (or not) to Linux as with ifAdminStatus.  Further we have
> > states "dormant", "unknown", "notPresent", "lowerLayerDown".  I'll
> > discuss each of these in detail.
> >
> > "dormant" - not used in Stefan's patch, which I agree with.
> > "unknown" - only used in Stefan's patch before interface is first up'd,
> > which is IMO inaccurate.  Accurate use of "up" and "down", to me,
> > implies -no- use of "unknown" state.  Because as soon as we are
> > initialized, all ethernet details are known, thus "down" is more
> > applicable.  The Linux net stack's atomicity is such that leaking an
> > "unknown" state would be a bug, too.
> > "notPresent" - analagous to Linux's netif_device_xxx, and Stefan's patch
> > acknowledges this.  However, the use of netif_device_xxx in drivers is
> > really only used when the hardware is suspended.  If hardware goes away,
> > the interface goes away too, almost immediately.
> > "lowerLayerDown" - not used in Stefan's patch, which I agree with.
> >
> > So, Stefan's 2nd patch really only adds "unknown" and "notPresent"
> > states to current behavior -- and the applicability of those states to
> > Linux is IMO questionable.
> >
> 
> If all he is adding are some enumerated types, theres no harm. I cant
> remember the details of the discussions - but we had long winding
> discussions on the different states.

I would summarize his patch as adding variable to represent literally
ifOperStatus, along with a lock and apparatus to set this variable.

The value may be deduced, without having to literally track it.


> Note you need both admin and operational status and physically thet may
> mean different things (think PPP and ethernet for example). You also need
> to properly reflect the status for all sorts of netdevices. As long as
> those requirements are met, then all is good. The RFC is not the final
> word but it draws from experiences people had for years doing this kind
> of stuff - so it is a good idea to draw from their experiences (but ok to
> ignore it if it sounds unrealistic).

I think we do agree on interpretation as you describe it here.
And the linkwatch patch is now in Linux 2.5.51.  I just think the
further patch to "track literally" ifOperStatus is not needed.  However,
that is presented as an opinion and RFC, not a statement of fact :)

	Jeff

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Patch resubmission: RFC2863 operstatus for 2.5.50
  2002-12-10  3:55             ` Jeff Garzik
@ 2002-12-12 13:21               ` jamal
  0 siblings, 0 replies; 17+ messages in thread
From: jamal @ 2002-12-12 13:21 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Stefan Rompf, David S. Miller, netdev



On Mon, 9 Dec 2002, Jeff Garzik wrote:

> I would summarize his patch as adding variable to represent literally
> ifOperStatus, along with a lock and apparatus to set this variable.
>
> The value may be deduced, without having to literally track it.

If it can be deduced then no point in tracking it.

i think my shepherding of the patch is done at this point. I'll let you
take it from here.

BTW, I still think theres need to clean up the qdisc queues when a link
goes operationaly down.

cheers,
jamal

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2002-12-12 13:21 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-11-26  9:22 Patch resubmission: RFC2863 operstatus for 2.5.49 Stefan Rompf
2002-11-26 10:15 ` David S. Miller
2002-11-26 15:36   ` Stefan Rompf
2002-12-03 22:22   ` Patch resubmission: RFC2863 operstatus for 2.5.50 Stefan Rompf
2002-12-04  0:52     ` Jeff Garzik
2002-12-04  9:44       ` Stefan Rompf
2002-12-04 18:15         ` Jeff Garzik
2002-12-04 13:11       ` jamal
2002-12-04 17:42         ` Jeff Garzik
2002-12-09 13:27           ` jamal
2002-12-09 23:10             ` Stefan Rompf
2002-12-10  3:55             ` Jeff Garzik
2002-12-12 13:21               ` jamal
2002-12-04 19:39     ` David S. Miller
2002-11-29 12:56 ` Patch resubmission: RFC2863 operstatus for 2.5.49 jamal
2002-12-03 23:04   ` Stefan Rompf
2002-12-04 13:06     ` jamal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).