Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH RFC] tun: export underlying socket
From: Michael S. Tsirkin @ 2009-09-11  4:59 UTC (permalink / raw)
  To: Paul Moore; +Cc: David Miller, netdev, herbert
In-Reply-To: <200909110017.27668.paul.moore@hp.com>

On Fri, Sep 11, 2009 at 12:17:27AM -0400, Paul Moore wrote:
> On Thursday 10 September 2009 08:59:29 am Michael S. Tsirkin wrote:
> > Tun device looks similar to a packet socket
> > in that both pass complete frames from/to userspace.
> > 
> > This patch fills in enough fields in the socket underlying tun driver
> > to support sendmsg/recvmsg operations, and exports access to this socket
> > to modules.
> > 
> > This way, code using raw sockets to inject packets
> > into a physical device, can support injecting
> > packets into host network stack almost without modification.
> > 
> > First user of this interface will be vhost virtualization
> > accelerator.
> 
> No comments on the code at this point - I'm just trying to understand the 
> intended user right now which I'm assuming is the vhost-net bits you sent 
> previously? 

Yes - these now use raw socket, I'll add
something like 

	sock = tun_get_socket(file)
	if (!IS_ERR(sock)) {
		return sock;
	}


> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > 
> > This patch is on top of net-next master.
> > An alternative approach would be to add an ioctl to tun, to export the
> > underlying socket to userspace: a uniform way to work with a network
> > device and the host stack might be useful there, as well.
> > Kernel users could then do sockfd_lookup to get the socket.
> > I decided against it for now as it requires more code.
> > Please comment.
> > 
> >  drivers/net/tun.c      |   78
> >  +++++++++++++++++++++++++++++++++++++++++++---- include/linux/if_tun.h |  
> >  14 ++++++++
> >  2 files changed, 85 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> > index 589a44a..76f5faa 100644
> > --- a/drivers/net/tun.c
> > +++ b/drivers/net/tun.c
> > @@ -151,6 +151,7 @@ static int tun_attach(struct tun_struct *tun, struct
> >  file *file) err = 0;
> >  	tfile->tun = tun;
> >  	tun->tfile = tfile;
> > +	tun->socket.file = file;
> >  	dev_hold(tun->dev);
> >  	sock_hold(tun->socket.sk);
> >  	atomic_inc(&tfile->count);
> > @@ -165,6 +166,7 @@ static void __tun_detach(struct tun_struct *tun)
> >  	/* Detach from net device */
> >  	netif_tx_lock_bh(tun->dev);
> >  	tun->tfile = NULL;
> > +	tun->socket.file = NULL;
> >  	netif_tx_unlock_bh(tun->dev);
> > 
> >  	/* Drop read queue */
> > @@ -750,7 +752,7 @@ static __inline__ ssize_t tun_put_user(struct
> >  tun_struct *tun, len = min_t(int, skb->len, len);
> > 
> >  	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
> > -	total += len;
> > +	total += skb->len;
> > 
> >  	tun->dev->stats.tx_packets++;
> >  	tun->dev->stats.tx_bytes += len;
> > @@ -758,12 +760,10 @@ static __inline__ ssize_t tun_put_user(struct
> >  tun_struct *tun, return total;
> >  }
> > 
> > -static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec
> >  *iv, -			    unsigned long count, loff_t pos)
> > +static ssize_t tun_do_read(struct tun_struct *tun,
> > +			   struct kiocb *iocb, const struct iovec *iv,
> > +			   unsigned long count, int noblock)
> >  {
> > -	struct file *file = iocb->ki_filp;
> > -	struct tun_file *tfile = file->private_data;
> > -	struct tun_struct *tun = __tun_get(tfile);
> >  	DECLARE_WAITQUEUE(wait, current);
> >  	struct sk_buff *skb;
> >  	ssize_t len, ret = 0;
> > @@ -785,7 +785,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb,
> >  const struct iovec *iv,
> > 
> >  		/* Read frames from the queue */
> >  		if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
> > -			if (file->f_flags & O_NONBLOCK) {
> > +			if (noblock) {
> >  				ret = -EAGAIN;
> >  				break;
> >  			}
> > @@ -813,6 +813,21 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb,
> >  const struct iovec *iv, remove_wait_queue(&tun->socket.wait, &wait);
> > 
> >  out:
> > +	return ret;
> > +}
> > +
> > +static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec
> >  *iv, +			    unsigned long count, loff_t pos)
> > +{
> > +	struct file *file = iocb->ki_filp;
> > +	struct tun_file *tfile = file->private_data;
> > +	struct tun_struct *tun = __tun_get(tfile);
> > +	ssize_t ret;
> > +
> > +	if (!tun)
> > +		return -EBADFD;
> > +	ret = tun_do_read(tun, iocb, iv, count, file->f_flags & O_NONBLOCK);
> > +	ret = min_t(ssize_t, ret, count);
> >  	tun_put(tun);
> >  	return ret;
> >  }
> > @@ -865,6 +880,37 @@ static void tun_sock_destruct(struct sock *sk)
> >  	free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev);
> >  }
> > 
> > +static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
> > +		       struct msghdr *m, size_t total_len)
> > +{
> > +	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> > +	return tun_get_user(tun, m->msg_iov, total_len,
> > +			    m->msg_flags & MSG_DONTWAIT);
> > +}
> > +
> > +static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
> > +		       struct msghdr *m, size_t total_len,
> > +		       int flags)
> > +{
> > +	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> > +	int ret;
> > +	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
> > +		return -EINVAL;
> > +	ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
> > +			  flags & MSG_DONTWAIT);
> > +	if (ret > total_len) {
> > +		m->msg_flags |= MSG_TRUNC;
> > +		ret = flags & MSG_TRUNC ? ret : total_len;
> > +	}
> > +	return ret;
> > +}
> > +
> > +/* Ops structure to mimic raw sockets with tun */
> > +static const struct proto_ops tun_socket_ops = {
> > +	.sendmsg = tun_sendmsg,
> > +	.recvmsg = tun_recvmsg,
> > +};
> > +
> >  static struct proto tun_proto = {
> >  	.name		= "tun",
> >  	.owner		= THIS_MODULE,
> > @@ -982,6 +1028,7 @@ static int tun_set_iff(struct net *net, struct file
> >  *file, struct ifreq *ifr) goto err_free_dev;
> > 
> >  		init_waitqueue_head(&tun->socket.wait);
> > +		tun->socket.ops = &tun_socket_ops;
> >  		sock_init_data(&tun->socket, sk);
> >  		sk->sk_write_space = tun_sock_write_space;
> >  		sk->sk_sndbuf = INT_MAX;
> > @@ -1483,6 +1530,23 @@ static void tun_cleanup(void)
> >  	rtnl_link_unregister(&tun_link_ops);
> >  }
> > 
> > +/* Get an underlying socket object from tun file.  Returns error unless
> >  file is + * attached to a device.  The returned object works like a packet
> >  socket, it + * can be used for sock_sendmsg/sock_recvmsg.  The caller is
> >  responsible for + * holding a reference to the file for as long as the
> >  socket is in use. */ +struct socket *tun_get_socket(struct file *file)
> > +{
> > +	struct tun_struct *tun;
> > +	if (file->f_op != &tun_fops)
> > +		return ERR_PTR(-EINVAL);
> > +	tun = tun_get(file);
> > +	if (!tun)
> > +		return ERR_PTR(-EBADFD);
> > +	tun_put(tun);
> > +	return &tun->socket;
> > +}
> > +EXPORT_SYMBOL_GPL(tun_get_socket);
> > +
> >  module_init(tun_init);
> >  module_exit(tun_cleanup);
> >  MODULE_DESCRIPTION(DRV_DESCRIPTION);
> > diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
> > index 3f5fd52..404abe0 100644
> > --- a/include/linux/if_tun.h
> > +++ b/include/linux/if_tun.h
> > @@ -86,4 +86,18 @@ struct tun_filter {
> >  	__u8   addr[0][ETH_ALEN];
> >  };
> > 
> > +#ifdef __KERNEL__
> > +#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
> > +struct socket *tun_get_socket(struct file *);
> > +#else
> > +#include <linux/err.h>
> > +#include <linux/errno.h>
> > +struct file;
> > +struct socket;
> > +static inline struct socket *tun_get_socket(struct file *f)
> > +{
> > +	return ERR_PTR(-EINVAL);
> > +}
> > +#endif /* CONFIG_TUN */
> > +#endif /* __KERNEL__ */
> >  #endif /* __IF_TUN_H */
> > 
> 
> -- 
> paul moore
> linux @ hp

^ permalink raw reply

* Re: [PATCH RFC] tun: export underlying socket
From: Paul Moore @ 2009-09-11  4:17 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: David Miller, m.s.tsirkin, netdev, herbert
In-Reply-To: <20090910125929.GA32593@redhat.com>

On Thursday 10 September 2009 08:59:29 am Michael S. Tsirkin wrote:
> Tun device looks similar to a packet socket
> in that both pass complete frames from/to userspace.
> 
> This patch fills in enough fields in the socket underlying tun driver
> to support sendmsg/recvmsg operations, and exports access to this socket
> to modules.
> 
> This way, code using raw sockets to inject packets
> into a physical device, can support injecting
> packets into host network stack almost without modification.
> 
> First user of this interface will be vhost virtualization
> accelerator.

No comments on the code at this point - I'm just trying to understand the 
intended user right now which I'm assuming is the vhost-net bits you sent 
previously? 

> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> 
> This patch is on top of net-next master.
> An alternative approach would be to add an ioctl to tun, to export the
> underlying socket to userspace: a uniform way to work with a network
> device and the host stack might be useful there, as well.
> Kernel users could then do sockfd_lookup to get the socket.
> I decided against it for now as it requires more code.
> Please comment.
> 
>  drivers/net/tun.c      |   78
>  +++++++++++++++++++++++++++++++++++++++++++---- include/linux/if_tun.h |  
>  14 ++++++++
>  2 files changed, 85 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 589a44a..76f5faa 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -151,6 +151,7 @@ static int tun_attach(struct tun_struct *tun, struct
>  file *file) err = 0;
>  	tfile->tun = tun;
>  	tun->tfile = tfile;
> +	tun->socket.file = file;
>  	dev_hold(tun->dev);
>  	sock_hold(tun->socket.sk);
>  	atomic_inc(&tfile->count);
> @@ -165,6 +166,7 @@ static void __tun_detach(struct tun_struct *tun)
>  	/* Detach from net device */
>  	netif_tx_lock_bh(tun->dev);
>  	tun->tfile = NULL;
> +	tun->socket.file = NULL;
>  	netif_tx_unlock_bh(tun->dev);
> 
>  	/* Drop read queue */
> @@ -750,7 +752,7 @@ static __inline__ ssize_t tun_put_user(struct
>  tun_struct *tun, len = min_t(int, skb->len, len);
> 
>  	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
> -	total += len;
> +	total += skb->len;
> 
>  	tun->dev->stats.tx_packets++;
>  	tun->dev->stats.tx_bytes += len;
> @@ -758,12 +760,10 @@ static __inline__ ssize_t tun_put_user(struct
>  tun_struct *tun, return total;
>  }
> 
> -static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec
>  *iv, -			    unsigned long count, loff_t pos)
> +static ssize_t tun_do_read(struct tun_struct *tun,
> +			   struct kiocb *iocb, const struct iovec *iv,
> +			   unsigned long count, int noblock)
>  {
> -	struct file *file = iocb->ki_filp;
> -	struct tun_file *tfile = file->private_data;
> -	struct tun_struct *tun = __tun_get(tfile);
>  	DECLARE_WAITQUEUE(wait, current);
>  	struct sk_buff *skb;
>  	ssize_t len, ret = 0;
> @@ -785,7 +785,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb,
>  const struct iovec *iv,
> 
>  		/* Read frames from the queue */
>  		if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
> -			if (file->f_flags & O_NONBLOCK) {
> +			if (noblock) {
>  				ret = -EAGAIN;
>  				break;
>  			}
> @@ -813,6 +813,21 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb,
>  const struct iovec *iv, remove_wait_queue(&tun->socket.wait, &wait);
> 
>  out:
> +	return ret;
> +}
> +
> +static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec
>  *iv, +			    unsigned long count, loff_t pos)
> +{
> +	struct file *file = iocb->ki_filp;
> +	struct tun_file *tfile = file->private_data;
> +	struct tun_struct *tun = __tun_get(tfile);
> +	ssize_t ret;
> +
> +	if (!tun)
> +		return -EBADFD;
> +	ret = tun_do_read(tun, iocb, iv, count, file->f_flags & O_NONBLOCK);
> +	ret = min_t(ssize_t, ret, count);
>  	tun_put(tun);
>  	return ret;
>  }
> @@ -865,6 +880,37 @@ static void tun_sock_destruct(struct sock *sk)
>  	free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev);
>  }
> 
> +static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
> +		       struct msghdr *m, size_t total_len)
> +{
> +	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> +	return tun_get_user(tun, m->msg_iov, total_len,
> +			    m->msg_flags & MSG_DONTWAIT);
> +}
> +
> +static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
> +		       struct msghdr *m, size_t total_len,
> +		       int flags)
> +{
> +	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> +	int ret;
> +	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
> +		return -EINVAL;
> +	ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
> +			  flags & MSG_DONTWAIT);
> +	if (ret > total_len) {
> +		m->msg_flags |= MSG_TRUNC;
> +		ret = flags & MSG_TRUNC ? ret : total_len;
> +	}
> +	return ret;
> +}
> +
> +/* Ops structure to mimic raw sockets with tun */
> +static const struct proto_ops tun_socket_ops = {
> +	.sendmsg = tun_sendmsg,
> +	.recvmsg = tun_recvmsg,
> +};
> +
>  static struct proto tun_proto = {
>  	.name		= "tun",
>  	.owner		= THIS_MODULE,
> @@ -982,6 +1028,7 @@ static int tun_set_iff(struct net *net, struct file
>  *file, struct ifreq *ifr) goto err_free_dev;
> 
>  		init_waitqueue_head(&tun->socket.wait);
> +		tun->socket.ops = &tun_socket_ops;
>  		sock_init_data(&tun->socket, sk);
>  		sk->sk_write_space = tun_sock_write_space;
>  		sk->sk_sndbuf = INT_MAX;
> @@ -1483,6 +1530,23 @@ static void tun_cleanup(void)
>  	rtnl_link_unregister(&tun_link_ops);
>  }
> 
> +/* Get an underlying socket object from tun file.  Returns error unless
>  file is + * attached to a device.  The returned object works like a packet
>  socket, it + * can be used for sock_sendmsg/sock_recvmsg.  The caller is
>  responsible for + * holding a reference to the file for as long as the
>  socket is in use. */ +struct socket *tun_get_socket(struct file *file)
> +{
> +	struct tun_struct *tun;
> +	if (file->f_op != &tun_fops)
> +		return ERR_PTR(-EINVAL);
> +	tun = tun_get(file);
> +	if (!tun)
> +		return ERR_PTR(-EBADFD);
> +	tun_put(tun);
> +	return &tun->socket;
> +}
> +EXPORT_SYMBOL_GPL(tun_get_socket);
> +
>  module_init(tun_init);
>  module_exit(tun_cleanup);
>  MODULE_DESCRIPTION(DRV_DESCRIPTION);
> diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
> index 3f5fd52..404abe0 100644
> --- a/include/linux/if_tun.h
> +++ b/include/linux/if_tun.h
> @@ -86,4 +86,18 @@ struct tun_filter {
>  	__u8   addr[0][ETH_ALEN];
>  };
> 
> +#ifdef __KERNEL__
> +#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
> +struct socket *tun_get_socket(struct file *);
> +#else
> +#include <linux/err.h>
> +#include <linux/errno.h>
> +struct file;
> +struct socket;
> +static inline struct socket *tun_get_socket(struct file *f)
> +{
> +	return ERR_PTR(-EINVAL);
> +}
> +#endif /* CONFIG_TUN */
> +#endif /* __KERNEL__ */
>  #endif /* __IF_TUN_H */
> 

-- 
paul moore
linux @ hp

^ permalink raw reply

* Re: [net-next PATCH] igb: Use Intel OUI for VF MAC addresses
From: Joe Perches @ 2009-09-11  3:02 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Jeff Kirsher, davem, netdev, gospo, Greg Rose, Don Skidmore
In-Reply-To: <20090910190703.25d14533@nehalam>

On Thu, 2009-09-10 at 19:07 -0700, Stephen Hemminger wrote:
> On Thu, 10 Sep 2009 18:48:27 -0700
> Jeff Kirsher <jeffrey.t.kirsher@intel.com> wrote:
> > From: Gregory Rose <gregory.v.rose@intel.com>
> > This patch changes the default VF MAC address generation to use an Intel
> > Organizational Unit Identifier (OUI), instead of a fully randomized
> > Ethernet address.  This is to help prevent accidental MAC address
> > collisions.

I think this not a very good idea.

> How can probability of collision be lower when the address space
> is smaller? If you are going to use Intel OUI, then you should constrain
> the selection to a portion of that space that is not being used
> by other hardware. I.e if you know Intel assigns addresses to their
> devices in ranges, choose a range that is not in use.

Some other possibilities might be:

o Apply for a Linux specific OUI, maybe via the Linux Foundation,
  and assign MAC random addresses using only that OUI.
o Avoid assigning random MAC addresses that use the initial values
  of already assigned OUIs.

  http://standards.ieee.org/regauth/oui/oui.txt

  Unfortunately, this is not a complete list because several
  vendors have just picked a number at random.

  Nominally assigned leading bytes are:

  00 02 04 08 0C
  10 11 14 18 1C
  20 24 28
  30 34 3C
  40 44 48
  58
  60 64 68 6C
  70 74 78 7C
  80 88
  90 94 98 9C
  A0 A4 A8 AA AC
  B0 B4 B8 BC
  C0 C4 C8 CC
  D4 D8 DC
  E0 E4 E8 EC
  F0 F4

  Maybe just pick an unused specific leading byte.



^ permalink raw reply

* [patch] igb: Tidy vf initialisation
From: Simon Horman @ 2009-09-11  2:18 UTC (permalink / raw)
  To: e1000-devel, netdev
  Cc: John Ronciak, Bruce Allan, Jesse Brandeburg, Jeff Kirsher

This is purely cosmetic, but to my mind it makes the code a lot easier
to follow.

Signed-off-by: Simon Horman <horms@verge.net.au>

Index: net-next-2.6/drivers/net/igb/igb_main.c
===================================================================
--- net-next-2.6.orig/drivers/net/igb/igb_main.c	2009-09-09 15:04:34.000000000 +1000
+++ net-next-2.6/drivers/net/igb/igb_main.c	2009-09-09 16:06:59.000000000 +1000
@@ -1194,6 +1194,44 @@ static const struct net_device_ops igb_n
 #endif
 };
 
+/* Since IOV functionality isn't critical to base device function we can
+ * accept failure.  If it fails we don't allow IOV to be enabled */
+static void __devinit igb_probe_vf(struct pci_dev *pdev,
+				   struct igb_adapter *adapter)
+{
+#ifdef CONFIG_PCI_IOV
+	/* 82576 supports a maximum of 7 VFs in addition to the PF */
+	unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
+	int i;
+	unsigned char mac_addr[ETH_ALEN];
+
+	if (adapter->hw.mac.type != e1000_82576 || !num_vfs)
+		return;
+
+	adapter->vf_data = kcalloc(num_vfs, sizeof(struct vf_data_storage),
+				   GFP_KERNEL);
+	if (!adapter->vf_data) {
+		dev_err(&pdev->dev, "Could not allocate VF private data - "
+				    "IOV enable failed\n");
+		return;
+	}
+
+	if (pci_enable_sriov(pdev, num_vfs)) {
+		kfree(adapter->vf_data);
+		adapter->vf_data = NULL;
+		return;
+	}
+
+	adapter->vfs_allocated_count = num_vfs;
+	dev_info(&pdev->dev, "%d vfs allocated\n", num_vfs);
+	for (i = 0; i < adapter->vfs_allocated_count; i++) {
+		random_ether_addr(mac_addr);
+		igb_set_vf_mac(adapter, i, mac_addr);
+	}
+#endif
+	return;
+}
+
 /**
  * igb_probe - Device Initialization Routine
  * @pdev: PCI device information struct
@@ -1307,46 +1345,8 @@ static int __devinit igb_probe(struct pc
 	if (err)
 		goto err_sw_init;
 
-#ifdef CONFIG_PCI_IOV
-	/* since iov functionality isn't critical to base device function we
-	 * can accept failure.  If it fails we don't allow iov to be enabled */
-	if (hw->mac.type == e1000_82576) {
-		/* 82576 supports a maximum of 7 VFs in addition to the PF */
-		unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
-		int i;
-		unsigned char mac_addr[ETH_ALEN];
-
-		if (num_vfs) {
-			adapter->vf_data = kcalloc(num_vfs,
-						sizeof(struct vf_data_storage),
-						GFP_KERNEL);
-			if (!adapter->vf_data) {
-				dev_err(&pdev->dev,
-				        "Could not allocate VF private data - "
-					"IOV enable failed\n");
-			} else {
-				err = pci_enable_sriov(pdev, num_vfs);
-				if (!err) {
-					adapter->vfs_allocated_count = num_vfs;
-					dev_info(&pdev->dev,
-					         "%d vfs allocated\n",
-					         num_vfs);
-					for (i = 0;
-					     i < adapter->vfs_allocated_count;
-					     i++) {
-						random_ether_addr(mac_addr);
-						igb_set_vf_mac(adapter, i,
-						               mac_addr);
-					}
-				} else {
-					kfree(adapter->vf_data);
-					adapter->vf_data = NULL;
-				}
-			}
-		}
-	}
+	igb_probe_vf(pdev, adapter);
 
-#endif
 	/* setup the private structure */
 	err = igb_sw_init(adapter);
 	if (err)

------------------------------------------------------------------------------
Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day 
trial. Simplify your report design, integration and deployment - and focus on 
what you do best, core application coding. Discover what's new with 
Crystal Reports now.  http://p.sf.net/sfu/bobj-july

^ permalink raw reply

* [patch] e1000: Tidy up logic in e1000_enable_tx_pkt_filtering()
From: Simon Horman @ 2009-09-11  2:17 UTC (permalink / raw)
  To: e1000-devel, netdev
  Cc: John Ronciak, Bruce Allan, Jesse Brandeburg, Jeff Kirsher

This is purely cosmetic, but to my mind it makes the code a lot easier
to follow.

Signed-off-by: Simon Horman <horms@verge.net.au>

--- 

Lightly tested.

Index: net-next-2.6/drivers/net/e1000/e1000_hw.c
===================================================================
--- net-next-2.6.orig/drivers/net/e1000/e1000_hw.c	2009-08-31 17:44:07.000000000 +1000
+++ net-next-2.6/drivers/net/e1000/e1000_hw.c	2009-09-09 11:40:14.000000000 +1000
@@ -7604,31 +7604,33 @@ bool e1000_enable_tx_pkt_filtering(struc
 {
     /* called in init as well as watchdog timer functions */
 
-    s32 ret_val, checksum;
-    bool tx_filter = false;
+    s32 checksum;
+    bool tx_filter = true;
     struct e1000_host_mng_dhcp_cookie *hdr = &(hw->mng_cookie);
     u8 *buffer = (u8 *) &(hw->mng_cookie);
 
-    if (e1000_check_mng_mode(hw)) {
-        ret_val = e1000_mng_enable_host_if(hw);
-        if (ret_val == E1000_SUCCESS) {
-            ret_val = e1000_host_if_read_cookie(hw, buffer);
-            if (ret_val == E1000_SUCCESS) {
-                checksum = hdr->checksum;
-                hdr->checksum = 0;
-                if ((hdr->signature == E1000_IAMT_SIGNATURE) &&
-                    checksum == e1000_calculate_mng_checksum((char *)buffer,
-                                               E1000_MNG_DHCP_COOKIE_LENGTH)) {
-                    if (hdr->status &
-                        E1000_MNG_DHCP_COOKIE_STATUS_PARSING_SUPPORT)
-                        tx_filter = true;
-                } else
-                    tx_filter = true;
-            } else
-                tx_filter = true;
-        }
-    }
+    if (!e1000_check_mng_mode(hw))
+        goto out_false;
+
+    if (e1000_mng_enable_host_if(hw) != E1000_SUCCESS)
+        goto out_false;
+
+    if (e1000_host_if_read_cookie(hw, buffer) != E1000_SUCCESS)
+	goto out_true;
 
+    checksum = hdr->checksum;
+    hdr->checksum = 0;
+    if ((hdr->signature != E1000_IAMT_SIGNATURE) ||
+        checksum != e1000_calculate_mng_checksum((char *)buffer,
+                                   E1000_MNG_DHCP_COOKIE_LENGTH))
+	goto out_true;
+
+    if (hdr->status & E1000_MNG_DHCP_COOKIE_STATUS_PARSING_SUPPORT)
+	goto out_true;
+
+out_false:
+    tx_filter = false;
+out_true:
     hw->tx_pkt_filtering = tx_filter;
     return tx_filter;
 }

------------------------------------------------------------------------------
Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day 
trial. Simplify your report design, integration and deployment - and focus on 
what you do best, core application coding. Discover what's new with 
Crystal Reports now.  http://p.sf.net/sfu/bobj-july

^ permalink raw reply

* Re: [net-next PATCH] igb: Use Intel OUI for VF MAC addresses
From: Stephen Hemminger @ 2009-09-11  2:07 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: davem, netdev, gospo, Greg Rose, Jeff Kirsher, Don Skidmore
In-Reply-To: <20090911014757.19631.66570.stgit@localhost.localdomain>

On Thu, 10 Sep 2009 18:48:27 -0700
Jeff Kirsher <jeffrey.t.kirsher@intel.com> wrote:

> From: Gregory Rose <gregory.v.rose@intel.com>
> 
> This patch changes the default VF MAC address generation to use an Intel
> Organizational Unit Identifier (OUI), instead of a fully randomized
> Ethernet address.  This is to help prevent accidental MAC address
> collisions.

How can probability of collision be lower when the address space
is smaller? If you are going to use Intel OUI, then you should constrain
the selection to a portion of that space that is not being used
by other hardware. I.e if you know Intel assigns addresses to their
devices in ranges, choose a range that is not in use.

^ permalink raw reply

* [net-next PATCH] igb: Use Intel OUI for VF MAC addresses
From: Jeff Kirsher @ 2009-09-11  1:48 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Greg Rose, Jeff Kirsher, Don Skidmore

From: Gregory Rose <gregory.v.rose@intel.com>

This patch changes the default VF MAC address generation to use an Intel
Organizational Unit Identifier (OUI), instead of a fully randomized
Ethernet address.  This is to help prevent accidental MAC address
collisions.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
---

 drivers/net/igb/igb.h      |    1 +
 drivers/net/igb/igb_main.c |    2 ++
 2 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h
index 7126fea..463d178 100644
--- a/drivers/net/igb/igb.h
+++ b/drivers/net/igb/igb.h
@@ -65,6 +65,7 @@ struct igb_adapter;
 #define IGB_MAX_VF_MC_ENTRIES              30
 #define IGB_MAX_VF_FUNCTIONS               8
 #define IGB_MAX_VFTA_ENTRIES               128
+#define OUI_LEN                            3
 
 struct vf_data_storage {
 	unsigned char vf_mac_addresses[ETH_ALEN];
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 943186b..290555c 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -1315,6 +1315,7 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 		unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
 		int i;
 		unsigned char mac_addr[ETH_ALEN];
+		unsigned char oui[OUI_LEN] = {0x02, 0xAA, 0x00};
 
 		if (num_vfs) {
 			adapter->vf_data = kcalloc(num_vfs,
@@ -1335,6 +1336,7 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 					     i < adapter->vfs_allocated_count;
 					     i++) {
 						random_ether_addr(mac_addr);
+						memcpy(mac_addr, oui, OUI_LEN);
 						igb_set_vf_mac(adapter, i,
 						               mac_addr);
 					}


^ permalink raw reply related

* Re: netfilter 00/31: netfilter 2.6.32 update
From: David Miller @ 2009-09-11  1:25 UTC (permalink / raw)
  To: kaber; +Cc: netdev, netfilter-devel
In-Reply-To: <20090910161142.31179.5256.sendpatchset@x2.localnet>

From: Patrick McHardy <kaber@trash.net>
Date: Thu, 10 Sep 2009 18:11:46 +0200 (MEST)

> Please apply or pull from:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6.git master

Pulled, thanks Patrick!

^ permalink raw reply

* Re: igb bandwidth allocation configuration
From: Simon Horman @ 2009-09-11  0:38 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: e1000-devel, netdev
In-Reply-To: <4AA8E939.6050208@trash.net>

On Thu, Sep 10, 2009 at 01:55:37PM +0200, Patrick McHardy wrote:
> Patrick McHardy wrote:
> > Simon Horman wrote:
> >>
> >> I have been looking into adding support the 82586's per-PF/VF
> >> bandwidth allocation to the igb driver. It seems that the trickiest
> >> part is working out how to expose things to user-space.
> >>
> >> ...
> >> Internally it seems that actually the limits are applied to HW Tx queues
> >> rather than directly VMs. There are 16 such queues. Accordingly it might
> >> be useful to design an interface to set limits per-queue using ethtool.
> >> But this would seem to also require exposing which queues are associated
> >> with which PF/VF.
> > 
> > Just an idea since I don't know much about this stuff:
> > 
> > Since we now have the mq packet scheduler, which exposes the device
> > queues as qdisc classes, how about adding driver-specific configuration
> > attributes that are passed to the driver by the mq scheduler? This
> > would allow to configure per-queue bandwidth limits using regular TC
> > commands and also use those limits without VFs for any kind of traffic.
> > Drivers not supporting this would refuse unsupported options.
> 
> Attached patch demonstrates the idea. Compile-tested only.
> 

Thanks, that seems like a pretty good idea to me.
I'll see if I can make it work.


^ permalink raw reply

* Re: [PATCH net-next-2.6] bonding: make ab_arp select active slaves as other modes
From: Jay Vosburgh @ 2009-09-11  0:32 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, davem, bonding-devel, nicolas.2p.debian
In-Reply-To: <20090831210937.GA3152@psychotron.redhat.com>

Jiri Pirko <jpirko@redhat.com> wrote:

>When I was implementing primary_passive option (formely named primary_lazy) I've
>run into troubles with ab_arp. This is the only mode which is not using
>bond_select_active_slave() function to select active slave and instead it
>selects it itself. This seems to be not the right behaviour and it would be
>better to do it in bond_select_active_slave() for all cases. This patch makes
>this happen. Please review.

	Sorry for the delay in response; was out of the office.

	My first question is whether this affect the "current_arp_slave"
behavior, i.e., the round-robining of the ARP probes when no slaves are
active.  Is that something you checked?

	I'll give this a test tomorrow as well.

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

>Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 7c0e0bd..6ebd88d 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -1093,15 +1093,8 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
> 			return NULL; /* still no slave, return NULL */
> 	}
>
>-	/*
>-	 * first try the primary link; if arping, a link must tx/rx
>-	 * traffic before it can be considered the curr_active_slave.
>-	 * also, we would skip slaves between the curr_active_slave
>-	 * and primary_slave that may be up and able to arp
>-	 */
> 	if ((bond->primary_slave) &&
>-	    (!bond->params.arp_interval) &&
>-	    (IS_UP(bond->primary_slave->dev))) {
>+	    bond->primary_slave->link == BOND_LINK_UP) {
> 		new_active = bond->primary_slave;
> 	}
>
>@@ -1109,15 +1102,14 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
> 	old_active = new_active;
>
> 	bond_for_each_slave_from(bond, new_active, i, old_active) {
>-		if (IS_UP(new_active->dev)) {
>-			if (new_active->link == BOND_LINK_UP) {
>-				return new_active;
>-			} else if (new_active->link == BOND_LINK_BACK) {
>-				/* link up, but waiting for stabilization */
>-				if (new_active->delay < mintime) {
>-					mintime = new_active->delay;
>-					bestslave = new_active;
>-				}
>+		if (new_active->link == BOND_LINK_UP) {
>+			return new_active;
>+		} else if (new_active->link == BOND_LINK_BACK &&
>+			   IS_UP(new_active->dev)) {
>+			/* link up, but waiting for stabilization */
>+			if (new_active->delay < mintime) {
>+				mintime = new_active->delay;
>+				bestslave = new_active;

	Is there a functional reason for rearranging this (i.e., did the
use of select_active_slave need this for some reason)?


> 			}
> 		}
> 	}
>@@ -2929,18 +2921,6 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
> 		}
> 	}
>
>-	read_lock(&bond->curr_slave_lock);
>-
>-	/*
>-	 * Trigger a commit if the primary option setting has changed.
>-	 */
>-	if (bond->primary_slave &&
>-	    (bond->primary_slave != bond->curr_active_slave) &&
>-	    (bond->primary_slave->link == BOND_LINK_UP))
>-		commit++;
>-
>-	read_unlock(&bond->curr_slave_lock);
>-
> 	return commit;
> }
>
>@@ -2961,90 +2941,58 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
> 			continue;
>
> 		case BOND_LINK_UP:
>-			write_lock_bh(&bond->curr_slave_lock);
>-
>-			if (!bond->curr_active_slave &&
>-			    time_before_eq(jiffies, dev_trans_start(slave->dev) +
>-					   delta_in_ticks)) {
>+			if ((!bond->curr_active_slave &&
>+			     time_before_eq(jiffies,
>+					    dev_trans_start(slave->dev) +
>+					    delta_in_ticks)) ||
>+			    bond->curr_active_slave != slave) {
> 				slave->link = BOND_LINK_UP;
>-				bond_change_active_slave(bond, slave);
> 				bond->current_arp_slave = NULL;
>
> 				pr_info(DRV_NAME
>-				       ": %s: %s is up and now the "
>-				       "active interface\n",
>-				       bond->dev->name, slave->dev->name);
>-
>-			} else if (bond->curr_active_slave != slave) {
>-				/* this slave has just come up but we
>-				 * already have a current slave; this can
>-				 * also happen if bond_enslave adds a new
>-				 * slave that is up while we are searching
>-				 * for a new slave
>-				 */
>-				slave->link = BOND_LINK_UP;
>-				bond_set_slave_inactive_flags(slave);
>-				bond->current_arp_slave = NULL;
>+					": %s: link status definitely "
>+					"up for interface %s.\n",
>+					bond->dev->name, slave->dev->name);
>
>-				pr_info(DRV_NAME
>-				       ": %s: backup interface %s is now up\n",
>-				       bond->dev->name, slave->dev->name);
>-			}
>+				if (!bond->curr_active_slave ||
>+				    (slave == bond->primary_slave))
>+					goto do_failover;
>
>-			write_unlock_bh(&bond->curr_slave_lock);
>+			}
>
>-			break;
>+			continue;
>
> 		case BOND_LINK_DOWN:
> 			if (slave->link_failure_count < UINT_MAX)
> 				slave->link_failure_count++;
>
> 			slave->link = BOND_LINK_DOWN;
>+			bond_set_slave_inactive_flags(slave);
>
>-			if (slave == bond->curr_active_slave) {
>-				pr_info(DRV_NAME
>-				       ": %s: link status down for active "
>-				       "interface %s, disabling it\n",
>-				       bond->dev->name, slave->dev->name);
>-
>-				bond_set_slave_inactive_flags(slave);
>-
>-				write_lock_bh(&bond->curr_slave_lock);
>-
>-				bond_select_active_slave(bond);
>-				if (bond->curr_active_slave)
>-					bond->curr_active_slave->jiffies =
>-						jiffies;
>-
>-				write_unlock_bh(&bond->curr_slave_lock);
>+			pr_info(DRV_NAME
>+				": %s: link status definitely down for "
>+				"interface %s, disabling it\n",
>+				bond->dev->name, slave->dev->name);
>
>+			if (slave == bond->curr_active_slave) {
> 				bond->current_arp_slave = NULL;
>-
>-			} else if (slave->state == BOND_STATE_BACKUP) {
>-				pr_info(DRV_NAME
>-				       ": %s: backup interface %s is now down\n",
>-				       bond->dev->name, slave->dev->name);
>-
>-				bond_set_slave_inactive_flags(slave);
>+				goto do_failover;
> 			}
>-			break;
>+
>+			continue;
>
> 		default:
> 			pr_err(DRV_NAME
> 			       ": %s: impossible: new_link %d on slave %s\n",
> 			       bond->dev->name, slave->new_link,
> 			       slave->dev->name);
>+			continue;
> 		}
>-	}
>
>-	/*
>-	 * No race with changes to primary via sysfs, as we hold rtnl.
>-	 */
>-	if (bond->primary_slave &&
>-	    (bond->primary_slave != bond->curr_active_slave) &&
>-	    (bond->primary_slave->link == BOND_LINK_UP)) {
>+do_failover:
>+		ASSERT_RTNL();
> 		write_lock_bh(&bond->curr_slave_lock);
>-		bond_change_active_slave(bond, bond->primary_slave);
>+		bond_select_active_slave(bond);
> 		write_unlock_bh(&bond->curr_slave_lock);
> 	}
>
>--
>To unsubscribe from this list: send the line "unsubscribe netdev" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html




^ permalink raw reply

* Re: [PATCH net-next-2.6 v2] bonding: introduce primary_passive option
From: Jay Vosburgh @ 2009-09-11  0:08 UTC (permalink / raw)
  To: =?ISO-8859-1?Q?Nicolas_de_Peslo=FCan?=
  Cc: Jiri Pirko, netdev, davem, bonding-devel
In-Reply-To: <4AA97855.8070301@free.fr>

Nicolas de Pesloüan <nicolas.2p.debian@free.fr> wrote:

>Jiri Pirko wrote:
>> (updated 2)
>>
>> In some cases there is not desirable to switch back to primary interface when
>> it's link recovers and rather stay with currently active one. We need to avoid
>> packetloss as much as we can in some cases. This is solved by introducing
>> primary_passive option. Note that enslaved primary slave is set as current
>> active no matter what.
>>
>> This patch depends on the following one:
>> [net-next-2.6] bonding: make ab_arp select active slaves as other modes
>> http://patchwork.ozlabs.org/patch/32684/
>>
>> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>>
>> diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
>> index d5181ce..e5f2c31 100644
>> --- a/Documentation/networking/bonding.txt
>> +++ b/Documentation/networking/bonding.txt
>> @@ -614,6 +614,32 @@ primary
>>   	The primary option is only valid for active-backup mode.
>>  +primary_passive
>> +
>> +	Specifies the behavior of the current active slave when the primary was
>> +	down and comes back up.  This option is designed to prevent
>> +	flip-flopping between the primary slave and other slaves.  The possible
>> +	values and their respective effects are:
>> +
>> +	disabled or 0 (default)
>> +
>> +		The primary slave becomes the active slave whenever it comes
>> +		back up.
>> +
>> +	better or 1
>> +
>> +		The primary slave becomes the active slave when it comes back
>> +		up, if the speed and duplex of the primary slave is better
>> +		than the speed and duplex of the current active slave.
>> +
>> +	always or 2
>> +
>> +		The primary slave becomes the active slave only if the current
>> +		active slave fails and the primary slave is up.
>> +
>> +	When no slave are active, if the primary comes back up, it becomes the
>> +	active slave, regardless of the value of primary_return
>> +
>>  updelay
>
>My feeling is that using primary_passive=disabled/better/always is far
>less clear than primary_return=always/better/failure_only.
>
>The normal (current) behavior is to always return to primay if it is
>up. You want to add the ability to return to it only on failure of the
>current active slave and I suggested to add the ability the return to it
>if it is "better" than the current active slave.

	Since this has changed from a real option to a "modify behavior
of another option" option, I'd call it something along the lines of
"primary_reselect" with the "always / better / failure" set of choices.

	Also, if "better" isn't implemented, leave it out entirely
(don't define the label or option stuff).  In the future, then, it can
be added in a complete patch, rather than splitting it across two
patches.

>Hence the suggested name for the option and option values.
>
>>   	Specifies the time, in milliseconds, to wait before enabling a
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>> index a7e731f..193eca4 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -94,6 +94,7 @@ static int downdelay;
>>  static int use_carrier	= 1;
>>  static char *mode;
>>  static char *primary;
>> +static char *primary_passive;
>>  static char *lacp_rate;
>>  static char *ad_select;
>>  static char *xmit_hash_policy;
>> @@ -126,6 +127,13 @@ MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
>>  		       "6 for balance-alb");
>>  module_param(primary, charp, 0);
>>  MODULE_PARM_DESC(primary, "Primary network device to use");
>> +module_param(primary_passive, charp, 0);
>> +MODULE_PARM_DESC(primary_passive, "Do not set primary slave active "
>> +				  "once it comes up; "
>> +				  "0 for disabled (default), "
>> +				  "1 for on only if speed of primary is not "
>> +				  "better, "
>> +				  "2 for always on");
>
>You have a double negative assertion here : a "do not" option whose value
>is "disabled". For clarity, I suggest to have a "do" option whose value is
>"enabled".
>
>This is probably the reason why I suggested primary_return instead of
>primary_passive. primary_passive means "refuse to return to primary" and
>when disabled, it becomes "don't refuse to return to primary", which
>should be "accept to return to primary" instead :-)
>
>It might be seen as not being that important, but having an option whose
>name and values are easy to understand leads to an easy to use
>option... :-)
>
>>  module_param(lacp_rate, charp, 0);
>>  MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
>>  			    "(slow/fast)");
>> @@ -200,6 +208,13 @@ const struct bond_parm_tbl fail_over_mac_tbl[] = {
>>  {	NULL,			-1},
>>  };
>>  +const struct bond_parm_tbl pri_passive_tbl[] = {
>> +{	"disabled",		BOND_PRI_PASSIVE_DISABLED},
>> +{	"better",		BOND_PRI_PASSIVE_BETTER},
>> +{	"always",		BOND_PRI_PASSIVE_ALWAYS},
>> +{	NULL,			-1},
>> +};
>> +
>>  struct bond_parm_tbl ad_select_tbl[] = {
>>  {	"stable",	BOND_AD_STABLE},
>>  {	"bandwidth",	BOND_AD_BANDWIDTH},
>> @@ -1070,6 +1085,25 @@ out:
>>   }
>>  +static bool bond_should_loose_active(struct bonding *bond)

	I'm not sure what this function name means (beyond the
misspelling of "lose"); it's really "bond_should_change_active" as a
question, correct?

>> +{
>> +	struct slave *prim = bond->primary_slave;
>> +	struct slave *curr = bond->curr_active_slave;
>> +
>> +	if (!prim || !curr || curr->link != BOND_LINK_UP)
>> +		return true;
>> +	if (bond->force_primary) {
>> +		bond->force_primary = false;
>> +		return true;
>> +	}
>> +	if (bond->params.primary_passive == 1 &&
>
>You should use the constants you defined in bonding.h and used in pri_passive_tbl :
>
>if (bond->params.primary_passive == BOND_PRI_PASSIVE_BETTER &&
>
>> +	    (prim->speed < curr->speed ||
>> +	     (prim->speed == curr->speed && prim->duplex <= curr->duplex)))
>> +		return false;
>> +	if (bond->params.primary_passive == 2)

	Ah, ok, passive really is implemented; I just hunted for the
BETTER label.  So, yes, use the defined labels.

>You should use the constants you defined in bonding.h and used in pri_passive_tbl :
>
>if (bond->params.primary_passive == BOND_PRI_PASSIVE_ALWAYS)
>
>> +		return false;
>> +	return true;
>> +}
>>   /**
>>   * find_best_interface - select the best available slave to be the active one
>> @@ -1093,15 +1127,9 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
>>  			return NULL; /* still no slave, return NULL */
>>  	}
>>  -	/*
>> -	 * first try the primary link; if arping, a link must tx/rx
>> -	 * traffic before it can be considered the curr_active_slave.
>> -	 * also, we would skip slaves between the curr_active_slave
>> -	 * and primary_slave that may be up and able to arp
>> -	 */
>>  	if ((bond->primary_slave) &&
>> -	    (!bond->params.arp_interval) &&
>> -	    (IS_UP(bond->primary_slave->dev))) {
>> +	    bond->primary_slave->link == BOND_LINK_UP &&
>> +	    bond_should_loose_active(bond)) {
>>  		new_active = bond->primary_slave;
>>  	}
>>  @@ -1109,15 +1137,14 @@ static struct slave
>> *bond_find_best_slave(struct bonding *bond)
>>  	old_active = new_active;
>>   	bond_for_each_slave_from(bond, new_active, i, old_active) {
>> -		if (IS_UP(new_active->dev)) {
>> -			if (new_active->link == BOND_LINK_UP) {
>> -				return new_active;
>> -			} else if (new_active->link == BOND_LINK_BACK) {
>> -				/* link up, but waiting for stabilization */
>> -				if (new_active->delay < mintime) {
>> -					mintime = new_active->delay;
>> -					bestslave = new_active;
>> -				}
>> +		if (new_active->link == BOND_LINK_UP) {
>> +			return new_active;
>> +		} else if (new_active->link == BOND_LINK_BACK &&
>> +			   IS_UP(new_active->dev)) {
>> +			/* link up, but waiting for stabilization */
>> +			if (new_active->delay < mintime) {
>> +				mintime = new_active->delay;
>> +				bestslave = new_active;
>>  			}
>>  		}
>>  	}
>> @@ -1683,8 +1710,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
>>   	if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
>>  		/* if there is a primary slave, remember it */
>> -		if (strcmp(bond->params.primary, new_slave->dev->name) == 0)
>> +		if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
>>  			bond->primary_slave = new_slave;
>> +			bond->force_primary = true;
>> +		}
>>  	}
>>   	write_lock_bh(&bond->curr_slave_lock);
>> @@ -2929,18 +2958,6 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
>>  		}
>>  	}
>>  -	read_lock(&bond->curr_slave_lock);
>> -
>> -	/*
>> -	 * Trigger a commit if the primary option setting has changed.
>> -	 */
>> -	if (bond->primary_slave &&
>> -	    (bond->primary_slave != bond->curr_active_slave) &&
>> -	    (bond->primary_slave->link == BOND_LINK_UP))
>> -		commit++;
>> -
>> -	read_unlock(&bond->curr_slave_lock);
>> -
>>  	return commit;
>>  }
>>  @@ -2961,90 +2978,58 @@ static void bond_ab_arp_commit(struct bonding
>> *bond, int delta_in_ticks)
>>  			continue;
>>   		case BOND_LINK_UP:
>> -			write_lock_bh(&bond->curr_slave_lock);
>> -
>> -			if (!bond->curr_active_slave &&
>> -			    time_before_eq(jiffies, dev_trans_start(slave->dev) +
>> -					   delta_in_ticks)) {
>> +			if ((!bond->curr_active_slave &&
>> +			     time_before_eq(jiffies,
>> +					    dev_trans_start(slave->dev) +
>> +					    delta_in_ticks)) ||
>> +			    bond->curr_active_slave != slave) {
>>  				slave->link = BOND_LINK_UP;
>> -				bond_change_active_slave(bond, slave);
>>  				bond->current_arp_slave = NULL;
>>   				pr_info(DRV_NAME
>> -				       ": %s: %s is up and now the "
>> -				       "active interface\n",
>> -				       bond->dev->name, slave->dev->name);
>> -
>> -			} else if (bond->curr_active_slave != slave) {
>> -				/* this slave has just come up but we
>> -				 * already have a current slave; this can
>> -				 * also happen if bond_enslave adds a new
>> -				 * slave that is up while we are searching
>> -				 * for a new slave
>> -				 */
>> -				slave->link = BOND_LINK_UP;
>> -				bond_set_slave_inactive_flags(slave);
>> -				bond->current_arp_slave = NULL;
>> +					": %s: link status definitely "
>> +					"up for interface %s.\n",
>> +					bond->dev->name, slave->dev->name);
>>  -				pr_info(DRV_NAME
>> -				       ": %s: backup interface %s is now up\n",
>> -				       bond->dev->name, slave->dev->name);
>> -			}
>> +				if (!bond->curr_active_slave ||
>> +				    (slave == bond->primary_slave))
>> +					goto do_failover;
>>  -			write_unlock_bh(&bond->curr_slave_lock);
>> +			}
>>  -			break;
>> +			continue;
>>   		case BOND_LINK_DOWN:
>>  			if (slave->link_failure_count < UINT_MAX)
>>  				slave->link_failure_count++;
>>   			slave->link = BOND_LINK_DOWN;
>> +			bond_set_slave_inactive_flags(slave);
>>  -			if (slave == bond->curr_active_slave) {
>> -				pr_info(DRV_NAME
>> -				       ": %s: link status down for active "
>> -				       "interface %s, disabling it\n",
>> -				       bond->dev->name, slave->dev->name);
>> -
>> -				bond_set_slave_inactive_flags(slave);
>> -
>> -				write_lock_bh(&bond->curr_slave_lock);
>> -
>> -				bond_select_active_slave(bond);
>> -				if (bond->curr_active_slave)
>> -					bond->curr_active_slave->jiffies =
>> -						jiffies;
>> -
>> -				write_unlock_bh(&bond->curr_slave_lock);
>> +			pr_info(DRV_NAME
>> +				": %s: link status definitely down for "
>> +				"interface %s, disabling it\n",
>> +				bond->dev->name, slave->dev->name);
>>  +			if (slave == bond->curr_active_slave) {
>>  				bond->current_arp_slave = NULL;
>> -
>> -			} else if (slave->state == BOND_STATE_BACKUP) {
>> -				pr_info(DRV_NAME
>> -				       ": %s: backup interface %s is now down\n",
>> -				       bond->dev->name, slave->dev->name);
>> -
>> -				bond_set_slave_inactive_flags(slave);
>> +				goto do_failover;
>>  			}
>> -			break;
>> +
>> +			continue;
>>   		default:
>>  			pr_err(DRV_NAME
>>  			       ": %s: impossible: new_link %d on slave %s\n",
>>  			       bond->dev->name, slave->new_link,
>>  			       slave->dev->name);
>> +			continue;
>>  		}
>> -	}
>>  -	/*
>> -	 * No race with changes to primary via sysfs, as we hold rtnl.
>> -	 */
>> -	if (bond->primary_slave &&
>> -	    (bond->primary_slave != bond->curr_active_slave) &&
>> -	    (bond->primary_slave->link == BOND_LINK_UP)) {
>> +do_failover:
>> +		ASSERT_RTNL();
>>  		write_lock_bh(&bond->curr_slave_lock);
>> -		bond_change_active_slave(bond, bond->primary_slave);
>> +		bond_select_active_slave(bond);
>>  		write_unlock_bh(&bond->curr_slave_lock);
>>  	}
>>  @@ -4695,7 +4680,7 @@ int bond_parse_parm(const char *buf, const struct
>> bond_parm_tbl *tbl)
>>   static int bond_check_params(struct bond_params *params)
>>  {
>> -	int arp_validate_value, fail_over_mac_value;
>> +	int arp_validate_value, fail_over_mac_value, primary_passive_value;
>>   	/*
>>  	 * Convert string parameters.
>> @@ -4994,6 +4979,20 @@ static int bond_check_params(struct bond_params *params)
>>  		primary = NULL;
>>  	}
>>  +	if (primary && primary_passive) {
>> +		primary_passive_value = bond_parse_parm(primary_passive,
>> +							pri_passive_tbl);
>> +		if (primary_passive_value == -1) {
>> +			pr_err(DRV_NAME
>> +			       ": Error: Invalid primary_passive \"%s\"\n",
>> +			       primary_passive ==
>> +					NULL ? "NULL" : primary_passive);
>> +			return -EINVAL;
>> +		}
>> +	} else {
>> +		primary_passive_value = BOND_PRI_PASSIVE_DISABLED;
>> +	}
>> +
>>  	if (fail_over_mac) {
>>  		fail_over_mac_value = bond_parse_parm(fail_over_mac,
>>  						      fail_over_mac_tbl);
>> @@ -5025,6 +5024,7 @@ static int bond_check_params(struct bond_params *params)
>>  	params->use_carrier = use_carrier;
>>  	params->lacp_fast = lacp_fast;
>>  	params->primary[0] = 0;
>> +	params->primary_passive = primary_passive_value;
>>  	params->fail_over_mac = fail_over_mac_value;
>>   	if (primary) {
>> diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
>> index 6044e12..84ce933 100644
>> --- a/drivers/net/bonding/bond_sysfs.c
>> +++ b/drivers/net/bonding/bond_sysfs.c
>> @@ -1212,6 +1212,59 @@ static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR,
>>  		   bonding_show_primary, bonding_store_primary);
>>   /*
>> + * Show and set the primary_passive flag.
>> + */
>> +static ssize_t bonding_show_primary_passive(struct device *d,
>> +					    struct device_attribute *attr,
>> +					    char *buf)
>> +{
>> +	struct bonding *bond = to_bond(d);
>> +
>> +	return sprintf(buf, "%s %d\n",
>> +		       pri_passive_tbl[bond->params.primary_passive].modename,
>> +		       bond->params.primary_passive);
>> +}
>> +
>> +static ssize_t bonding_store_primary_passive(struct device *d,
>> +					     struct device_attribute *attr,
>> +					     const char *buf, size_t count)
>> +{
>> +	int new_value, ret = count;
>> +	struct bonding *bond = to_bond(d);
>> +
>> +	if (!rtnl_trylock())
>> +		return restart_syscall();
>> +
>> +	new_value = bond_parse_parm(buf, pri_passive_tbl);
>> +	if (new_value < 0)  {
>> +		pr_err(DRV_NAME
>> +		       ": %s: Ignoring invalid primary_passive value %.*s.\n",
>> +		       bond->dev->name,
>> +		       (int) strlen(buf) - 1, buf);
>> +		ret = -EINVAL;
>> +		goto out;
>> +	} else {
>> +		bond->params.primary_passive = new_value;
>> +		pr_info(DRV_NAME ": %s: setting primary_passive to %s (%d).\n",
>> +		       bond->dev->name, pri_passive_tbl[new_value].modename,
>> +		       new_value);
>> +		if (new_value == 0 || new_value == 1) {

	Magic numbers again, but this test shouldn't be necessary.  The
new_value is known to be valid if bond_parse_parm didn't return failure.

>> +			bond->force_primary = true;
>> +			read_lock(&bond->lock);
>> +			write_lock_bh(&bond->curr_slave_lock);
>> +			bond_select_active_slave(bond);
>> +			write_unlock_bh(&bond->curr_slave_lock);
>> +			read_unlock(&bond->lock);
>> +		}
>> +	}
>> +out:
>> +	rtnl_unlock();
>> +	return ret;
>> +}
>> +static DEVICE_ATTR(primary_passive, S_IRUGO | S_IWUSR,
>> +		   bonding_show_primary_passive, bonding_store_primary_passive);
>> +
>> +/*
>>   * Show and set the use_carrier flag.
>>   */
>>  static ssize_t bonding_show_carrier(struct device *d,
>> @@ -1500,6 +1553,7 @@ static struct attribute *per_bond_attrs[] = {
>>  	&dev_attr_num_unsol_na.attr,
>>  	&dev_attr_miimon.attr,
>>  	&dev_attr_primary.attr,
>> +	&dev_attr_primary_passive.attr,
>>  	&dev_attr_use_carrier.attr,
>>  	&dev_attr_active_slave.attr,
>>  	&dev_attr_mii_status.attr,
>> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>> index 6824771..9a9e0c7 100644
>> --- a/drivers/net/bonding/bonding.h
>> +++ b/drivers/net/bonding/bonding.h
>> @@ -131,6 +131,7 @@ struct bond_params {
>>  	int lacp_fast;
>>  	int ad_select;
>>  	char primary[IFNAMSIZ];
>> +	int primary_passive;
>>  	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
>>  };
>>  @@ -190,6 +191,7 @@ struct bonding {
>>  	struct   slave *curr_active_slave;
>>  	struct   slave *current_arp_slave;
>>  	struct   slave *primary_slave;
>> +	bool     force_primary;
>>  	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
>>  	rwlock_t lock;
>>  	rwlock_t curr_slave_lock;
>> @@ -258,6 +260,10 @@ static inline bool bond_is_lb(const struct bonding *bond)
>>  		|| bond->params.mode == BOND_MODE_ALB;
>>  }
>>  +#define BOND_PRI_PASSIVE_DISABLED	0
>> +#define BOND_PRI_PASSIVE_BETTER		1
>> +#define BOND_PRI_PASSIVE_ALWAYS		2
>> +
>>  #define BOND_FOM_NONE			0
>>  #define BOND_FOM_ACTIVE			1
>>  #define BOND_FOM_FOLLOW			2
>> @@ -348,6 +354,7 @@ extern const struct bond_parm_tbl bond_mode_tbl[];
>>  extern const struct bond_parm_tbl xmit_hashtype_tbl[];
>>  extern const struct bond_parm_tbl arp_validate_tbl[];
>>  extern const struct bond_parm_tbl fail_over_mac_tbl[];
>> +extern const struct bond_parm_tbl pri_passive_tbl[];
>>  extern struct bond_parm_tbl ad_select_tbl[];
>>   #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
>>
>
>	Nicolas.

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply

* [PATCH -next] isdn: fix netjet/isdnhdlc build errors
From: Randy Dunlap @ 2009-09-10 22:15 UTC (permalink / raw)
  To: netdev; +Cc: davem, Karsten Keil

From: Randy Dunlap <randy.dunlap@oracle.com>

The netjet driver uses isdnhdlc interfaces (as does another ISDN
driver), so move that driver source file into the top-level
drivers/isdn/ directory and make it available to any ISDN drivers.

Fixes these build errors:

ERROR: "isdnhdlc_decode" [drivers/isdn/hardware/mISDN/netjet.ko] undefined!
ERROR: "isdnhdlc_encode" [drivers/isdn/hardware/mISDN/netjet.ko] undefined!
ERROR: "isdnhdlc_out_init" [drivers/isdn/hardware/mISDN/netjet.ko] undefined!
ERROR: "isdnhdlc_rcv_init" [drivers/isdn/hardware/mISDN/netjet.ko] undefined!

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Karsten Keil <isdn@linux-pingi.de>
---
 drivers/isdn/Kconfig        |    5 
 drivers/isdn/Makefile       |    2 
 drivers/isdn/i4l/Kconfig    |    6 
 drivers/isdn/i4l/Makefile   |    1 
 drivers/isdn/i4l/isdnhdlc.c |  630 ----------------------------------
 drivers/isdn/isdnhdlc.c     |  630 ++++++++++++++++++++++++++++++++++
 6 files changed, 637 insertions(+), 637 deletions(-)

--- linux-next-20090909.orig/drivers/isdn/Makefile
+++ linux-next-20090909/drivers/isdn/Makefile
@@ -15,3 +15,5 @@ obj-$(CONFIG_ISDN_DRV_LOOP)		+= isdnloop
 obj-$(CONFIG_ISDN_DRV_ACT2000)		+= act2000/
 obj-$(CONFIG_HYSDN)			+= hysdn/
 obj-$(CONFIG_ISDN_DRV_GIGASET)		+= gigaset/
+
+obj-$(CONFIG_ISDN_HDLC)		+= isdnhdlc.o
--- linux-next-20090909.orig/drivers/isdn/i4l/Makefile
+++ linux-next-20090909/drivers/isdn/i4l/Makefile
@@ -4,7 +4,6 @@
 
 obj-$(CONFIG_ISDN_I4L)		+= isdn.o
 obj-$(CONFIG_ISDN_PPP_BSDCOMP)	+= isdn_bsdcomp.o
-obj-$(CONFIG_ISDN_HDLC)		+= isdnhdlc.o
 
 # Multipart objects.
 
--- linux-next-20090909.orig/drivers/isdn/i4l/isdnhdlc.c
+++ /dev/null
@@ -1,630 +0,0 @@
-/*
- * isdnhdlc.c  --  General purpose ISDN HDLC decoder.
- *
- * Copyright (C)
- *	2009	Karsten Keil		<keil@b1-systems.de>
- *	2002	Wolfgang Mües		<wolfgang@iksw-muees.de>
- *	2001	Frode Isaksen		<fisaksen@bewan.com>
- *      2001	Kai Germaschewski	<kai.germaschewski@gmx.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/crc-ccitt.h>
-#include <linux/isdn/hdlc.h>
-#include <linux/bitrev.h>
-
-/*-------------------------------------------------------------------*/
-
-MODULE_AUTHOR("Wolfgang Mües <wolfgang@iksw-muees.de>, "
-	      "Frode Isaksen <fisaksen@bewan.com>, "
-	      "Kai Germaschewski <kai.germaschewski@gmx.de>");
-MODULE_DESCRIPTION("General purpose ISDN HDLC decoder");
-MODULE_LICENSE("GPL");
-
-/*-------------------------------------------------------------------*/
-
-enum {
-	HDLC_FAST_IDLE, HDLC_GET_FLAG_B0, HDLC_GETFLAG_B1A6, HDLC_GETFLAG_B7,
-	HDLC_GET_DATA, HDLC_FAST_FLAG
-};
-
-enum {
-	HDLC_SEND_DATA, HDLC_SEND_CRC1, HDLC_SEND_FAST_FLAG,
-	HDLC_SEND_FIRST_FLAG, HDLC_SEND_CRC2, HDLC_SEND_CLOSING_FLAG,
-	HDLC_SEND_IDLE1, HDLC_SEND_FAST_IDLE, HDLC_SENDFLAG_B0,
-	HDLC_SENDFLAG_B1A6, HDLC_SENDFLAG_B7, STOPPED, HDLC_SENDFLAG_ONE
-};
-
-void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, u32 features)
-{
-	memset(hdlc, 0, sizeof(struct isdnhdlc_vars));
-	hdlc->state = HDLC_GET_DATA;
-	if (features & HDLC_56KBIT)
-		hdlc->do_adapt56 = 1;
-	if (features & HDLC_BITREVERSE)
-		hdlc->do_bitreverse = 1;
-}
-EXPORT_SYMBOL(isdnhdlc_out_init);
-
-void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, u32 features)
-{
-	memset(hdlc, 0, sizeof(struct isdnhdlc_vars));
-	if (features & HDLC_DCHANNEL) {
-		hdlc->dchannel = 1;
-		hdlc->state = HDLC_SEND_FIRST_FLAG;
-	} else {
-		hdlc->dchannel = 0;
-		hdlc->state = HDLC_SEND_FAST_FLAG;
-		hdlc->ffvalue = 0x7e;
-	}
-	hdlc->cbin = 0x7e;
-	if (features & HDLC_56KBIT) {
-		hdlc->do_adapt56 = 1;
-		hdlc->state = HDLC_SENDFLAG_B0;
-	} else
-		hdlc->data_bits = 8;
-	if (features & HDLC_BITREVERSE)
-		hdlc->do_bitreverse = 1;
-}
-EXPORT_SYMBOL(isdnhdlc_rcv_init);
-
-static int
-check_frame(struct isdnhdlc_vars *hdlc)
-{
-	int status;
-
-	if (hdlc->dstpos < 2) 	/* too small - framing error */
-		status = -HDLC_FRAMING_ERROR;
-	else if (hdlc->crc != 0xf0b8)	/* crc error */
-		status = -HDLC_CRC_ERROR;
-	else {
-		/* remove CRC */
-		hdlc->dstpos -= 2;
-		/* good frame */
-		status = hdlc->dstpos;
-	}
-	return status;
-}
-
-/*
-  isdnhdlc_decode - decodes HDLC frames from a transparent bit stream.
-
-  The source buffer is scanned for valid HDLC frames looking for
-  flags (01111110) to indicate the start of a frame. If the start of
-  the frame is found, the bit stuffing is removed (0 after 5 1's).
-  When a new flag is found, the complete frame has been received
-  and the CRC is checked.
-  If a valid frame is found, the function returns the frame length
-  excluding the CRC with the bit HDLC_END_OF_FRAME set.
-  If the beginning of a valid frame is found, the function returns
-  the length.
-  If a framing error is found (too many 1s and not a flag) the function
-  returns the length with the bit HDLC_FRAMING_ERROR set.
-  If a CRC error is found the function returns the length with the
-  bit HDLC_CRC_ERROR set.
-  If the frame length exceeds the destination buffer size, the function
-  returns the length with the bit HDLC_LENGTH_ERROR set.
-
-  src - source buffer
-  slen - source buffer length
-  count - number of bytes removed (decoded) from the source buffer
-  dst _ destination buffer
-  dsize - destination buffer size
-  returns - number of decoded bytes in the destination buffer and status
-  flag.
- */
-int isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src, int slen,
-	int *count, u8 *dst, int dsize)
-{
-	int status = 0;
-
-	static const unsigned char fast_flag[] = {
-		0x00, 0x00, 0x00, 0x20, 0x30, 0x38, 0x3c, 0x3e, 0x3f
-	};
-
-	static const unsigned char fast_flag_value[] = {
-		0x00, 0x7e, 0xfc, 0xf9, 0xf3, 0xe7, 0xcf, 0x9f, 0x3f
-	};
-
-	static const unsigned char fast_abort[] = {
-		0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff
-	};
-
-#define handle_fast_flag(h) \
-	do {\
-		if (h->cbin == fast_flag[h->bit_shift]) {\
-			h->ffvalue = fast_flag_value[h->bit_shift];\
-			h->state = HDLC_FAST_FLAG;\
-			h->ffbit_shift = h->bit_shift;\
-			h->bit_shift = 1;\
-		} else {\
-			h->state = HDLC_GET_DATA;\
-			h->data_received = 0;\
-		} \
-	} while (0)
-
-#define handle_abort(h) \
-	do {\
-		h->shift_reg = fast_abort[h->ffbit_shift - 1];\
-		h->hdlc_bits1 = h->ffbit_shift - 2;\
-		if (h->hdlc_bits1 < 0)\
-			h->hdlc_bits1 = 0;\
-		h->data_bits = h->ffbit_shift - 1;\
-		h->state = HDLC_GET_DATA;\
-		h->data_received = 0;\
-	} while (0)
-
-	*count = slen;
-
-	while (slen > 0) {
-		if (hdlc->bit_shift == 0) {
-			/* the code is for bitreverse streams */
-			if (hdlc->do_bitreverse == 0)
-				hdlc->cbin = bitrev8(*src++);
-			else
-				hdlc->cbin = *src++;
-			slen--;
-			hdlc->bit_shift = 8;
-			if (hdlc->do_adapt56)
-				hdlc->bit_shift--;
-		}
-
-		switch (hdlc->state) {
-		case STOPPED:
-			return 0;
-		case HDLC_FAST_IDLE:
-			if (hdlc->cbin == 0xff) {
-				hdlc->bit_shift = 0;
-				break;
-			}
-			hdlc->state = HDLC_GET_FLAG_B0;
-			hdlc->hdlc_bits1 = 0;
-			hdlc->bit_shift = 8;
-			break;
-		case HDLC_GET_FLAG_B0:
-			if (!(hdlc->cbin & 0x80)) {
-				hdlc->state = HDLC_GETFLAG_B1A6;
-				hdlc->hdlc_bits1 = 0;
-			} else {
-				if ((!hdlc->do_adapt56) &&
-				    (++hdlc->hdlc_bits1 >= 8) &&
-				    (hdlc->bit_shift == 1))
-						hdlc->state = HDLC_FAST_IDLE;
-			}
-			hdlc->cbin <<= 1;
-			hdlc->bit_shift--;
-			break;
-		case HDLC_GETFLAG_B1A6:
-			if (hdlc->cbin & 0x80) {
-				hdlc->hdlc_bits1++;
-				if (hdlc->hdlc_bits1 == 6)
-					hdlc->state = HDLC_GETFLAG_B7;
-			} else
-				hdlc->hdlc_bits1 = 0;
-			hdlc->cbin <<= 1;
-			hdlc->bit_shift--;
-			break;
-		case HDLC_GETFLAG_B7:
-			if (hdlc->cbin & 0x80) {
-				hdlc->state = HDLC_GET_FLAG_B0;
-			} else {
-				hdlc->state = HDLC_GET_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->shift_reg = 0;
-				hdlc->hdlc_bits1 = 0;
-				hdlc->data_bits = 0;
-				hdlc->data_received = 0;
-			}
-			hdlc->cbin <<= 1;
-			hdlc->bit_shift--;
-			break;
-		case HDLC_GET_DATA:
-			if (hdlc->cbin & 0x80) {
-				hdlc->hdlc_bits1++;
-				switch (hdlc->hdlc_bits1) {
-				case 6:
-					break;
-				case 7:
-					if (hdlc->data_received)
-						/* bad frame */
-						status = -HDLC_FRAMING_ERROR;
-					if (!hdlc->do_adapt56) {
-						if (hdlc->cbin == fast_abort
-						    [hdlc->bit_shift + 1]) {
-							hdlc->state =
-								HDLC_FAST_IDLE;
-							hdlc->bit_shift = 1;
-							break;
-						}
-					} else
-						hdlc->state = HDLC_GET_FLAG_B0;
-					break;
-				default:
-					hdlc->shift_reg >>= 1;
-					hdlc->shift_reg |= 0x80;
-					hdlc->data_bits++;
-					break;
-				}
-			} else {
-				switch (hdlc->hdlc_bits1) {
-				case 5:
-					break;
-				case 6:
-					if (hdlc->data_received)
-						status = check_frame(hdlc);
-					hdlc->crc = 0xffff;
-					hdlc->shift_reg = 0;
-					hdlc->data_bits = 0;
-					if (!hdlc->do_adapt56)
-						handle_fast_flag(hdlc);
-					else {
-						hdlc->state = HDLC_GET_DATA;
-						hdlc->data_received = 0;
-					}
-					break;
-				default:
-					hdlc->shift_reg >>= 1;
-					hdlc->data_bits++;
-					break;
-				}
-				hdlc->hdlc_bits1 = 0;
-			}
-			if (status) {
-				hdlc->dstpos = 0;
-				*count -= slen;
-				hdlc->cbin <<= 1;
-				hdlc->bit_shift--;
-				return status;
-			}
-			if (hdlc->data_bits == 8) {
-				hdlc->data_bits = 0;
-				hdlc->data_received = 1;
-				hdlc->crc = crc_ccitt_byte(hdlc->crc,
-						hdlc->shift_reg);
-
-				/* good byte received */
-				if (hdlc->dstpos < dsize)
-					dst[hdlc->dstpos++] = hdlc->shift_reg;
-				else {
-					/* frame too long */
-					status = -HDLC_LENGTH_ERROR;
-					hdlc->dstpos = 0;
-				}
-			}
-			hdlc->cbin <<= 1;
-			hdlc->bit_shift--;
-			break;
-		case HDLC_FAST_FLAG:
-			if (hdlc->cbin == hdlc->ffvalue) {
-				hdlc->bit_shift = 0;
-				break;
-			} else {
-				if (hdlc->cbin == 0xff) {
-					hdlc->state = HDLC_FAST_IDLE;
-					hdlc->bit_shift = 0;
-				} else if (hdlc->ffbit_shift == 8) {
-					hdlc->state = HDLC_GETFLAG_B7;
-					break;
-				} else
-					handle_abort(hdlc);
-			}
-			break;
-		default:
-			break;
-		}
-	}
-	*count -= slen;
-	return 0;
-}
-EXPORT_SYMBOL(isdnhdlc_decode);
-/*
-  isdnhdlc_encode - encodes HDLC frames to a transparent bit stream.
-
-  The bit stream starts with a beginning flag (01111110). After
-  that each byte is added to the bit stream with bit stuffing added
-  (0 after 5 1's).
-  When the last byte has been removed from the source buffer, the
-  CRC (2 bytes is added) and the frame terminates with the ending flag.
-  For the dchannel, the idle character (all 1's) is also added at the end.
-  If this function is called with empty source buffer (slen=0), flags or
-  idle character will be generated.
-
-  src - source buffer
-  slen - source buffer length
-  count - number of bytes removed (encoded) from source buffer
-  dst _ destination buffer
-  dsize - destination buffer size
-  returns - number of encoded bytes in the destination buffer
-*/
-int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen,
-	int *count, u8 *dst, int dsize)
-{
-	static const unsigned char xfast_flag_value[] = {
-		0x7e, 0x3f, 0x9f, 0xcf, 0xe7, 0xf3, 0xf9, 0xfc, 0x7e
-	};
-
-	int len = 0;
-
-	*count = slen;
-
-	/* special handling for one byte frames */
-	if ((slen == 1) && (hdlc->state == HDLC_SEND_FAST_FLAG))
-		hdlc->state = HDLC_SENDFLAG_ONE;
-	while (dsize > 0) {
-		if (hdlc->bit_shift == 0) {
-			if (slen && !hdlc->do_closing) {
-				hdlc->shift_reg = *src++;
-				slen--;
-				if (slen == 0)
-					/* closing sequence, CRC + flag(s) */
-					hdlc->do_closing = 1;
-				hdlc->bit_shift = 8;
-			} else {
-				if (hdlc->state == HDLC_SEND_DATA) {
-					if (hdlc->data_received) {
-						hdlc->state = HDLC_SEND_CRC1;
-						hdlc->crc ^= 0xffff;
-						hdlc->bit_shift = 8;
-						hdlc->shift_reg =
-							hdlc->crc & 0xff;
-					} else if (!hdlc->do_adapt56)
-						hdlc->state =
-							HDLC_SEND_FAST_FLAG;
-					else
-						hdlc->state =
-							HDLC_SENDFLAG_B0;
-				}
-
-			}
-		}
-
-		switch (hdlc->state) {
-		case STOPPED:
-			while (dsize--)
-				*dst++ = 0xff;
-			return dsize;
-		case HDLC_SEND_FAST_FLAG:
-			hdlc->do_closing = 0;
-			if (slen == 0) {
-				/* the code is for bitreverse streams */
-				if (hdlc->do_bitreverse == 0)
-					*dst++ = bitrev8(hdlc->ffvalue);
-				else
-					*dst++ = hdlc->ffvalue;
-				len++;
-				dsize--;
-				break;
-			}
-			/* fall through */
-		case HDLC_SENDFLAG_ONE:
-			if (hdlc->bit_shift == 8) {
-				hdlc->cbin = hdlc->ffvalue >>
-					(8 - hdlc->data_bits);
-				hdlc->state = HDLC_SEND_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->hdlc_bits1 = 0;
-				hdlc->data_received = 1;
-			}
-			break;
-		case HDLC_SENDFLAG_B0:
-			hdlc->do_closing = 0;
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			hdlc->hdlc_bits1 = 0;
-			hdlc->state = HDLC_SENDFLAG_B1A6;
-			break;
-		case HDLC_SENDFLAG_B1A6:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			hdlc->cbin++;
-			if (++hdlc->hdlc_bits1 == 6)
-				hdlc->state = HDLC_SENDFLAG_B7;
-			break;
-		case HDLC_SENDFLAG_B7:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if (slen == 0) {
-				hdlc->state = HDLC_SENDFLAG_B0;
-				break;
-			}
-			if (hdlc->bit_shift == 8) {
-				hdlc->state = HDLC_SEND_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->hdlc_bits1 = 0;
-				hdlc->data_received = 1;
-			}
-			break;
-		case HDLC_SEND_FIRST_FLAG:
-			hdlc->data_received = 1;
-			if (hdlc->data_bits == 8) {
-				hdlc->state = HDLC_SEND_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if (hdlc->shift_reg & 0x01)
-				hdlc->cbin++;
-			hdlc->shift_reg >>= 1;
-			hdlc->bit_shift--;
-			if (hdlc->bit_shift == 0) {
-				hdlc->state = HDLC_SEND_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->hdlc_bits1 = 0;
-			}
-			break;
-		case HDLC_SEND_DATA:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if (hdlc->hdlc_bits1 == 5) {
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			if (hdlc->bit_shift == 8)
-				hdlc->crc = crc_ccitt_byte(hdlc->crc,
-					hdlc->shift_reg);
-			if (hdlc->shift_reg & 0x01) {
-				hdlc->hdlc_bits1++;
-				hdlc->cbin++;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			} else {
-				hdlc->hdlc_bits1 = 0;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			}
-			break;
-		case HDLC_SEND_CRC1:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if (hdlc->hdlc_bits1 == 5) {
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			if (hdlc->shift_reg & 0x01) {
-				hdlc->hdlc_bits1++;
-				hdlc->cbin++;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			} else {
-				hdlc->hdlc_bits1 = 0;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			}
-			if (hdlc->bit_shift == 0) {
-				hdlc->shift_reg = (hdlc->crc >> 8);
-				hdlc->state = HDLC_SEND_CRC2;
-				hdlc->bit_shift = 8;
-			}
-			break;
-		case HDLC_SEND_CRC2:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if (hdlc->hdlc_bits1 == 5) {
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			if (hdlc->shift_reg & 0x01) {
-				hdlc->hdlc_bits1++;
-				hdlc->cbin++;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			} else {
-				hdlc->hdlc_bits1 = 0;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			}
-			if (hdlc->bit_shift == 0) {
-				hdlc->shift_reg = 0x7e;
-				hdlc->state = HDLC_SEND_CLOSING_FLAG;
-				hdlc->bit_shift = 8;
-			}
-			break;
-		case HDLC_SEND_CLOSING_FLAG:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if (hdlc->hdlc_bits1 == 5) {
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			if (hdlc->shift_reg & 0x01)
-				hdlc->cbin++;
-			hdlc->shift_reg >>= 1;
-			hdlc->bit_shift--;
-			if (hdlc->bit_shift == 0) {
-				hdlc->ffvalue =
-					xfast_flag_value[hdlc->data_bits];
-				if (hdlc->dchannel) {
-					hdlc->ffvalue = 0x7e;
-					hdlc->state = HDLC_SEND_IDLE1;
-					hdlc->bit_shift = 8-hdlc->data_bits;
-					if (hdlc->bit_shift == 0)
-						hdlc->state =
-							HDLC_SEND_FAST_IDLE;
-				} else {
-					if (!hdlc->do_adapt56) {
-						hdlc->state =
-							HDLC_SEND_FAST_FLAG;
-						hdlc->data_received = 0;
-					} else {
-						hdlc->state = HDLC_SENDFLAG_B0;
-						hdlc->data_received = 0;
-					}
-					/* Finished this frame, send flags */
-					if (dsize > 1)
-						dsize = 1;
-				}
-			}
-			break;
-		case HDLC_SEND_IDLE1:
-			hdlc->do_closing = 0;
-			hdlc->cbin <<= 1;
-			hdlc->cbin++;
-			hdlc->data_bits++;
-			hdlc->bit_shift--;
-			if (hdlc->bit_shift == 0) {
-				hdlc->state = HDLC_SEND_FAST_IDLE;
-				hdlc->bit_shift = 0;
-			}
-			break;
-		case HDLC_SEND_FAST_IDLE:
-			hdlc->do_closing = 0;
-			hdlc->cbin = 0xff;
-			hdlc->data_bits = 8;
-			if (hdlc->bit_shift == 8) {
-				hdlc->cbin = 0x7e;
-				hdlc->state = HDLC_SEND_FIRST_FLAG;
-			} else {
-				/* the code is for bitreverse streams */
-				if (hdlc->do_bitreverse == 0)
-					*dst++ = bitrev8(hdlc->cbin);
-				else
-					*dst++ = hdlc->cbin;
-				hdlc->bit_shift = 0;
-				hdlc->data_bits = 0;
-				len++;
-				dsize = 0;
-			}
-			break;
-		default:
-			break;
-		}
-		if (hdlc->do_adapt56) {
-			if (hdlc->data_bits == 7) {
-				hdlc->cbin <<= 1;
-				hdlc->cbin++;
-				hdlc->data_bits++;
-			}
-		}
-		if (hdlc->data_bits == 8) {
-			/* the code is for bitreverse streams */
-			if (hdlc->do_bitreverse == 0)
-				*dst++ = bitrev8(hdlc->cbin);
-			else
-				*dst++ = hdlc->cbin;
-			hdlc->data_bits = 0;
-			len++;
-			dsize--;
-		}
-	}
-	*count -= slen;
-
-	return len;
-}
-EXPORT_SYMBOL(isdnhdlc_encode);
--- /dev/null
+++ linux-next-20090909/drivers/isdn/isdnhdlc.c
@@ -0,0 +1,630 @@
+/*
+ * isdnhdlc.c  --  General purpose ISDN HDLC decoder.
+ *
+ * Copyright (C)
+ *	2009	Karsten Keil		<keil@b1-systems.de>
+ *	2002	Wolfgang Mües		<wolfgang@iksw-muees.de>
+ *	2001	Frode Isaksen		<fisaksen@bewan.com>
+ *      2001	Kai Germaschewski	<kai.germaschewski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/crc-ccitt.h>
+#include <linux/isdn/hdlc.h>
+#include <linux/bitrev.h>
+
+/*-------------------------------------------------------------------*/
+
+MODULE_AUTHOR("Wolfgang Mües <wolfgang@iksw-muees.de>, "
+	      "Frode Isaksen <fisaksen@bewan.com>, "
+	      "Kai Germaschewski <kai.germaschewski@gmx.de>");
+MODULE_DESCRIPTION("General purpose ISDN HDLC decoder");
+MODULE_LICENSE("GPL");
+
+/*-------------------------------------------------------------------*/
+
+enum {
+	HDLC_FAST_IDLE, HDLC_GET_FLAG_B0, HDLC_GETFLAG_B1A6, HDLC_GETFLAG_B7,
+	HDLC_GET_DATA, HDLC_FAST_FLAG
+};
+
+enum {
+	HDLC_SEND_DATA, HDLC_SEND_CRC1, HDLC_SEND_FAST_FLAG,
+	HDLC_SEND_FIRST_FLAG, HDLC_SEND_CRC2, HDLC_SEND_CLOSING_FLAG,
+	HDLC_SEND_IDLE1, HDLC_SEND_FAST_IDLE, HDLC_SENDFLAG_B0,
+	HDLC_SENDFLAG_B1A6, HDLC_SENDFLAG_B7, STOPPED, HDLC_SENDFLAG_ONE
+};
+
+void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, u32 features)
+{
+	memset(hdlc, 0, sizeof(struct isdnhdlc_vars));
+	hdlc->state = HDLC_GET_DATA;
+	if (features & HDLC_56KBIT)
+		hdlc->do_adapt56 = 1;
+	if (features & HDLC_BITREVERSE)
+		hdlc->do_bitreverse = 1;
+}
+EXPORT_SYMBOL(isdnhdlc_out_init);
+
+void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, u32 features)
+{
+	memset(hdlc, 0, sizeof(struct isdnhdlc_vars));
+	if (features & HDLC_DCHANNEL) {
+		hdlc->dchannel = 1;
+		hdlc->state = HDLC_SEND_FIRST_FLAG;
+	} else {
+		hdlc->dchannel = 0;
+		hdlc->state = HDLC_SEND_FAST_FLAG;
+		hdlc->ffvalue = 0x7e;
+	}
+	hdlc->cbin = 0x7e;
+	if (features & HDLC_56KBIT) {
+		hdlc->do_adapt56 = 1;
+		hdlc->state = HDLC_SENDFLAG_B0;
+	} else
+		hdlc->data_bits = 8;
+	if (features & HDLC_BITREVERSE)
+		hdlc->do_bitreverse = 1;
+}
+EXPORT_SYMBOL(isdnhdlc_rcv_init);
+
+static int
+check_frame(struct isdnhdlc_vars *hdlc)
+{
+	int status;
+
+	if (hdlc->dstpos < 2) 	/* too small - framing error */
+		status = -HDLC_FRAMING_ERROR;
+	else if (hdlc->crc != 0xf0b8)	/* crc error */
+		status = -HDLC_CRC_ERROR;
+	else {
+		/* remove CRC */
+		hdlc->dstpos -= 2;
+		/* good frame */
+		status = hdlc->dstpos;
+	}
+	return status;
+}
+
+/*
+  isdnhdlc_decode - decodes HDLC frames from a transparent bit stream.
+
+  The source buffer is scanned for valid HDLC frames looking for
+  flags (01111110) to indicate the start of a frame. If the start of
+  the frame is found, the bit stuffing is removed (0 after 5 1's).
+  When a new flag is found, the complete frame has been received
+  and the CRC is checked.
+  If a valid frame is found, the function returns the frame length
+  excluding the CRC with the bit HDLC_END_OF_FRAME set.
+  If the beginning of a valid frame is found, the function returns
+  the length.
+  If a framing error is found (too many 1s and not a flag) the function
+  returns the length with the bit HDLC_FRAMING_ERROR set.
+  If a CRC error is found the function returns the length with the
+  bit HDLC_CRC_ERROR set.
+  If the frame length exceeds the destination buffer size, the function
+  returns the length with the bit HDLC_LENGTH_ERROR set.
+
+  src - source buffer
+  slen - source buffer length
+  count - number of bytes removed (decoded) from the source buffer
+  dst _ destination buffer
+  dsize - destination buffer size
+  returns - number of decoded bytes in the destination buffer and status
+  flag.
+ */
+int isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src, int slen,
+	int *count, u8 *dst, int dsize)
+{
+	int status = 0;
+
+	static const unsigned char fast_flag[] = {
+		0x00, 0x00, 0x00, 0x20, 0x30, 0x38, 0x3c, 0x3e, 0x3f
+	};
+
+	static const unsigned char fast_flag_value[] = {
+		0x00, 0x7e, 0xfc, 0xf9, 0xf3, 0xe7, 0xcf, 0x9f, 0x3f
+	};
+
+	static const unsigned char fast_abort[] = {
+		0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff
+	};
+
+#define handle_fast_flag(h) \
+	do {\
+		if (h->cbin == fast_flag[h->bit_shift]) {\
+			h->ffvalue = fast_flag_value[h->bit_shift];\
+			h->state = HDLC_FAST_FLAG;\
+			h->ffbit_shift = h->bit_shift;\
+			h->bit_shift = 1;\
+		} else {\
+			h->state = HDLC_GET_DATA;\
+			h->data_received = 0;\
+		} \
+	} while (0)
+
+#define handle_abort(h) \
+	do {\
+		h->shift_reg = fast_abort[h->ffbit_shift - 1];\
+		h->hdlc_bits1 = h->ffbit_shift - 2;\
+		if (h->hdlc_bits1 < 0)\
+			h->hdlc_bits1 = 0;\
+		h->data_bits = h->ffbit_shift - 1;\
+		h->state = HDLC_GET_DATA;\
+		h->data_received = 0;\
+	} while (0)
+
+	*count = slen;
+
+	while (slen > 0) {
+		if (hdlc->bit_shift == 0) {
+			/* the code is for bitreverse streams */
+			if (hdlc->do_bitreverse == 0)
+				hdlc->cbin = bitrev8(*src++);
+			else
+				hdlc->cbin = *src++;
+			slen--;
+			hdlc->bit_shift = 8;
+			if (hdlc->do_adapt56)
+				hdlc->bit_shift--;
+		}
+
+		switch (hdlc->state) {
+		case STOPPED:
+			return 0;
+		case HDLC_FAST_IDLE:
+			if (hdlc->cbin == 0xff) {
+				hdlc->bit_shift = 0;
+				break;
+			}
+			hdlc->state = HDLC_GET_FLAG_B0;
+			hdlc->hdlc_bits1 = 0;
+			hdlc->bit_shift = 8;
+			break;
+		case HDLC_GET_FLAG_B0:
+			if (!(hdlc->cbin & 0x80)) {
+				hdlc->state = HDLC_GETFLAG_B1A6;
+				hdlc->hdlc_bits1 = 0;
+			} else {
+				if ((!hdlc->do_adapt56) &&
+				    (++hdlc->hdlc_bits1 >= 8) &&
+				    (hdlc->bit_shift == 1))
+						hdlc->state = HDLC_FAST_IDLE;
+			}
+			hdlc->cbin <<= 1;
+			hdlc->bit_shift--;
+			break;
+		case HDLC_GETFLAG_B1A6:
+			if (hdlc->cbin & 0x80) {
+				hdlc->hdlc_bits1++;
+				if (hdlc->hdlc_bits1 == 6)
+					hdlc->state = HDLC_GETFLAG_B7;
+			} else
+				hdlc->hdlc_bits1 = 0;
+			hdlc->cbin <<= 1;
+			hdlc->bit_shift--;
+			break;
+		case HDLC_GETFLAG_B7:
+			if (hdlc->cbin & 0x80) {
+				hdlc->state = HDLC_GET_FLAG_B0;
+			} else {
+				hdlc->state = HDLC_GET_DATA;
+				hdlc->crc = 0xffff;
+				hdlc->shift_reg = 0;
+				hdlc->hdlc_bits1 = 0;
+				hdlc->data_bits = 0;
+				hdlc->data_received = 0;
+			}
+			hdlc->cbin <<= 1;
+			hdlc->bit_shift--;
+			break;
+		case HDLC_GET_DATA:
+			if (hdlc->cbin & 0x80) {
+				hdlc->hdlc_bits1++;
+				switch (hdlc->hdlc_bits1) {
+				case 6:
+					break;
+				case 7:
+					if (hdlc->data_received)
+						/* bad frame */
+						status = -HDLC_FRAMING_ERROR;
+					if (!hdlc->do_adapt56) {
+						if (hdlc->cbin == fast_abort
+						    [hdlc->bit_shift + 1]) {
+							hdlc->state =
+								HDLC_FAST_IDLE;
+							hdlc->bit_shift = 1;
+							break;
+						}
+					} else
+						hdlc->state = HDLC_GET_FLAG_B0;
+					break;
+				default:
+					hdlc->shift_reg >>= 1;
+					hdlc->shift_reg |= 0x80;
+					hdlc->data_bits++;
+					break;
+				}
+			} else {
+				switch (hdlc->hdlc_bits1) {
+				case 5:
+					break;
+				case 6:
+					if (hdlc->data_received)
+						status = check_frame(hdlc);
+					hdlc->crc = 0xffff;
+					hdlc->shift_reg = 0;
+					hdlc->data_bits = 0;
+					if (!hdlc->do_adapt56)
+						handle_fast_flag(hdlc);
+					else {
+						hdlc->state = HDLC_GET_DATA;
+						hdlc->data_received = 0;
+					}
+					break;
+				default:
+					hdlc->shift_reg >>= 1;
+					hdlc->data_bits++;
+					break;
+				}
+				hdlc->hdlc_bits1 = 0;
+			}
+			if (status) {
+				hdlc->dstpos = 0;
+				*count -= slen;
+				hdlc->cbin <<= 1;
+				hdlc->bit_shift--;
+				return status;
+			}
+			if (hdlc->data_bits == 8) {
+				hdlc->data_bits = 0;
+				hdlc->data_received = 1;
+				hdlc->crc = crc_ccitt_byte(hdlc->crc,
+						hdlc->shift_reg);
+
+				/* good byte received */
+				if (hdlc->dstpos < dsize)
+					dst[hdlc->dstpos++] = hdlc->shift_reg;
+				else {
+					/* frame too long */
+					status = -HDLC_LENGTH_ERROR;
+					hdlc->dstpos = 0;
+				}
+			}
+			hdlc->cbin <<= 1;
+			hdlc->bit_shift--;
+			break;
+		case HDLC_FAST_FLAG:
+			if (hdlc->cbin == hdlc->ffvalue) {
+				hdlc->bit_shift = 0;
+				break;
+			} else {
+				if (hdlc->cbin == 0xff) {
+					hdlc->state = HDLC_FAST_IDLE;
+					hdlc->bit_shift = 0;
+				} else if (hdlc->ffbit_shift == 8) {
+					hdlc->state = HDLC_GETFLAG_B7;
+					break;
+				} else
+					handle_abort(hdlc);
+			}
+			break;
+		default:
+			break;
+		}
+	}
+	*count -= slen;
+	return 0;
+}
+EXPORT_SYMBOL(isdnhdlc_decode);
+/*
+  isdnhdlc_encode - encodes HDLC frames to a transparent bit stream.
+
+  The bit stream starts with a beginning flag (01111110). After
+  that each byte is added to the bit stream with bit stuffing added
+  (0 after 5 1's).
+  When the last byte has been removed from the source buffer, the
+  CRC (2 bytes is added) and the frame terminates with the ending flag.
+  For the dchannel, the idle character (all 1's) is also added at the end.
+  If this function is called with empty source buffer (slen=0), flags or
+  idle character will be generated.
+
+  src - source buffer
+  slen - source buffer length
+  count - number of bytes removed (encoded) from source buffer
+  dst _ destination buffer
+  dsize - destination buffer size
+  returns - number of encoded bytes in the destination buffer
+*/
+int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen,
+	int *count, u8 *dst, int dsize)
+{
+	static const unsigned char xfast_flag_value[] = {
+		0x7e, 0x3f, 0x9f, 0xcf, 0xe7, 0xf3, 0xf9, 0xfc, 0x7e
+	};
+
+	int len = 0;
+
+	*count = slen;
+
+	/* special handling for one byte frames */
+	if ((slen == 1) && (hdlc->state == HDLC_SEND_FAST_FLAG))
+		hdlc->state = HDLC_SENDFLAG_ONE;
+	while (dsize > 0) {
+		if (hdlc->bit_shift == 0) {
+			if (slen && !hdlc->do_closing) {
+				hdlc->shift_reg = *src++;
+				slen--;
+				if (slen == 0)
+					/* closing sequence, CRC + flag(s) */
+					hdlc->do_closing = 1;
+				hdlc->bit_shift = 8;
+			} else {
+				if (hdlc->state == HDLC_SEND_DATA) {
+					if (hdlc->data_received) {
+						hdlc->state = HDLC_SEND_CRC1;
+						hdlc->crc ^= 0xffff;
+						hdlc->bit_shift = 8;
+						hdlc->shift_reg =
+							hdlc->crc & 0xff;
+					} else if (!hdlc->do_adapt56)
+						hdlc->state =
+							HDLC_SEND_FAST_FLAG;
+					else
+						hdlc->state =
+							HDLC_SENDFLAG_B0;
+				}
+
+			}
+		}
+
+		switch (hdlc->state) {
+		case STOPPED:
+			while (dsize--)
+				*dst++ = 0xff;
+			return dsize;
+		case HDLC_SEND_FAST_FLAG:
+			hdlc->do_closing = 0;
+			if (slen == 0) {
+				/* the code is for bitreverse streams */
+				if (hdlc->do_bitreverse == 0)
+					*dst++ = bitrev8(hdlc->ffvalue);
+				else
+					*dst++ = hdlc->ffvalue;
+				len++;
+				dsize--;
+				break;
+			}
+			/* fall through */
+		case HDLC_SENDFLAG_ONE:
+			if (hdlc->bit_shift == 8) {
+				hdlc->cbin = hdlc->ffvalue >>
+					(8 - hdlc->data_bits);
+				hdlc->state = HDLC_SEND_DATA;
+				hdlc->crc = 0xffff;
+				hdlc->hdlc_bits1 = 0;
+				hdlc->data_received = 1;
+			}
+			break;
+		case HDLC_SENDFLAG_B0:
+			hdlc->do_closing = 0;
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			hdlc->hdlc_bits1 = 0;
+			hdlc->state = HDLC_SENDFLAG_B1A6;
+			break;
+		case HDLC_SENDFLAG_B1A6:
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			hdlc->cbin++;
+			if (++hdlc->hdlc_bits1 == 6)
+				hdlc->state = HDLC_SENDFLAG_B7;
+			break;
+		case HDLC_SENDFLAG_B7:
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			if (slen == 0) {
+				hdlc->state = HDLC_SENDFLAG_B0;
+				break;
+			}
+			if (hdlc->bit_shift == 8) {
+				hdlc->state = HDLC_SEND_DATA;
+				hdlc->crc = 0xffff;
+				hdlc->hdlc_bits1 = 0;
+				hdlc->data_received = 1;
+			}
+			break;
+		case HDLC_SEND_FIRST_FLAG:
+			hdlc->data_received = 1;
+			if (hdlc->data_bits == 8) {
+				hdlc->state = HDLC_SEND_DATA;
+				hdlc->crc = 0xffff;
+				hdlc->hdlc_bits1 = 0;
+				break;
+			}
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			if (hdlc->shift_reg & 0x01)
+				hdlc->cbin++;
+			hdlc->shift_reg >>= 1;
+			hdlc->bit_shift--;
+			if (hdlc->bit_shift == 0) {
+				hdlc->state = HDLC_SEND_DATA;
+				hdlc->crc = 0xffff;
+				hdlc->hdlc_bits1 = 0;
+			}
+			break;
+		case HDLC_SEND_DATA:
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			if (hdlc->hdlc_bits1 == 5) {
+				hdlc->hdlc_bits1 = 0;
+				break;
+			}
+			if (hdlc->bit_shift == 8)
+				hdlc->crc = crc_ccitt_byte(hdlc->crc,
+					hdlc->shift_reg);
+			if (hdlc->shift_reg & 0x01) {
+				hdlc->hdlc_bits1++;
+				hdlc->cbin++;
+				hdlc->shift_reg >>= 1;
+				hdlc->bit_shift--;
+			} else {
+				hdlc->hdlc_bits1 = 0;
+				hdlc->shift_reg >>= 1;
+				hdlc->bit_shift--;
+			}
+			break;
+		case HDLC_SEND_CRC1:
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			if (hdlc->hdlc_bits1 == 5) {
+				hdlc->hdlc_bits1 = 0;
+				break;
+			}
+			if (hdlc->shift_reg & 0x01) {
+				hdlc->hdlc_bits1++;
+				hdlc->cbin++;
+				hdlc->shift_reg >>= 1;
+				hdlc->bit_shift--;
+			} else {
+				hdlc->hdlc_bits1 = 0;
+				hdlc->shift_reg >>= 1;
+				hdlc->bit_shift--;
+			}
+			if (hdlc->bit_shift == 0) {
+				hdlc->shift_reg = (hdlc->crc >> 8);
+				hdlc->state = HDLC_SEND_CRC2;
+				hdlc->bit_shift = 8;
+			}
+			break;
+		case HDLC_SEND_CRC2:
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			if (hdlc->hdlc_bits1 == 5) {
+				hdlc->hdlc_bits1 = 0;
+				break;
+			}
+			if (hdlc->shift_reg & 0x01) {
+				hdlc->hdlc_bits1++;
+				hdlc->cbin++;
+				hdlc->shift_reg >>= 1;
+				hdlc->bit_shift--;
+			} else {
+				hdlc->hdlc_bits1 = 0;
+				hdlc->shift_reg >>= 1;
+				hdlc->bit_shift--;
+			}
+			if (hdlc->bit_shift == 0) {
+				hdlc->shift_reg = 0x7e;
+				hdlc->state = HDLC_SEND_CLOSING_FLAG;
+				hdlc->bit_shift = 8;
+			}
+			break;
+		case HDLC_SEND_CLOSING_FLAG:
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			if (hdlc->hdlc_bits1 == 5) {
+				hdlc->hdlc_bits1 = 0;
+				break;
+			}
+			if (hdlc->shift_reg & 0x01)
+				hdlc->cbin++;
+			hdlc->shift_reg >>= 1;
+			hdlc->bit_shift--;
+			if (hdlc->bit_shift == 0) {
+				hdlc->ffvalue =
+					xfast_flag_value[hdlc->data_bits];
+				if (hdlc->dchannel) {
+					hdlc->ffvalue = 0x7e;
+					hdlc->state = HDLC_SEND_IDLE1;
+					hdlc->bit_shift = 8-hdlc->data_bits;
+					if (hdlc->bit_shift == 0)
+						hdlc->state =
+							HDLC_SEND_FAST_IDLE;
+				} else {
+					if (!hdlc->do_adapt56) {
+						hdlc->state =
+							HDLC_SEND_FAST_FLAG;
+						hdlc->data_received = 0;
+					} else {
+						hdlc->state = HDLC_SENDFLAG_B0;
+						hdlc->data_received = 0;
+					}
+					/* Finished this frame, send flags */
+					if (dsize > 1)
+						dsize = 1;
+				}
+			}
+			break;
+		case HDLC_SEND_IDLE1:
+			hdlc->do_closing = 0;
+			hdlc->cbin <<= 1;
+			hdlc->cbin++;
+			hdlc->data_bits++;
+			hdlc->bit_shift--;
+			if (hdlc->bit_shift == 0) {
+				hdlc->state = HDLC_SEND_FAST_IDLE;
+				hdlc->bit_shift = 0;
+			}
+			break;
+		case HDLC_SEND_FAST_IDLE:
+			hdlc->do_closing = 0;
+			hdlc->cbin = 0xff;
+			hdlc->data_bits = 8;
+			if (hdlc->bit_shift == 8) {
+				hdlc->cbin = 0x7e;
+				hdlc->state = HDLC_SEND_FIRST_FLAG;
+			} else {
+				/* the code is for bitreverse streams */
+				if (hdlc->do_bitreverse == 0)
+					*dst++ = bitrev8(hdlc->cbin);
+				else
+					*dst++ = hdlc->cbin;
+				hdlc->bit_shift = 0;
+				hdlc->data_bits = 0;
+				len++;
+				dsize = 0;
+			}
+			break;
+		default:
+			break;
+		}
+		if (hdlc->do_adapt56) {
+			if (hdlc->data_bits == 7) {
+				hdlc->cbin <<= 1;
+				hdlc->cbin++;
+				hdlc->data_bits++;
+			}
+		}
+		if (hdlc->data_bits == 8) {
+			/* the code is for bitreverse streams */
+			if (hdlc->do_bitreverse == 0)
+				*dst++ = bitrev8(hdlc->cbin);
+			else
+				*dst++ = hdlc->cbin;
+			hdlc->data_bits = 0;
+			len++;
+			dsize--;
+		}
+	}
+	*count -= slen;
+
+	return len;
+}
+EXPORT_SYMBOL(isdnhdlc_encode);
--- linux-next-20090909.orig/drivers/isdn/Kconfig
+++ linux-next-20090909/drivers/isdn/Kconfig
@@ -61,4 +61,9 @@ endif # ISDN_CAPI
 
 source "drivers/isdn/gigaset/Kconfig"
 
+config ISDN_HDLC
+	tristate
+	select CRC_CCITT
+	select BITREVERSE
+
 endif # ISDN
--- linux-next-20090909.orig/drivers/isdn/i4l/Kconfig
+++ linux-next-20090909/drivers/isdn/i4l/Kconfig
@@ -140,9 +140,3 @@ endmenu
 # end ISDN_I4L
 endif
 
-config ISDN_HDLC
-	tristate 
-	depends on HISAX_ST5481
-	select CRC_CCITT
-	select BITREVERSE
-




---
~Randy
LPC 2009, Sept. 23-25, Portland, Oregon
http://linuxplumbersconf.org/2009/

^ permalink raw reply

* Re: [PATCH net-next-2.6 v2] bonding: introduce primary_passive option
From: Nicolas de Pesloüan @ 2009-09-10 22:06 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, davem, fubar, bonding-devel
In-Reply-To: <20090910182059.GC2972@psychotron.redhat.com>

Jiri Pirko wrote:
> (updated 2)
> 
> In some cases there is not desirable to switch back to primary interface when
> it's link recovers and rather stay with currently active one. We need to avoid
> packetloss as much as we can in some cases. This is solved by introducing
> primary_passive option. Note that enslaved primary slave is set as current
> active no matter what.
> 
> This patch depends on the following one:
> [net-next-2.6] bonding: make ab_arp select active slaves as other modes
> http://patchwork.ozlabs.org/patch/32684/
> 
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
> 
> diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
> index d5181ce..e5f2c31 100644
> --- a/Documentation/networking/bonding.txt
> +++ b/Documentation/networking/bonding.txt
> @@ -614,6 +614,32 @@ primary
>  
>  	The primary option is only valid for active-backup mode.
>  
> +primary_passive
> +
> +	Specifies the behavior of the current active slave when the primary was
> +	down and comes back up.  This option is designed to prevent
> +	flip-flopping between the primary slave and other slaves.  The possible
> +	values and their respective effects are:
> +
> +	disabled or 0 (default)
> +
> +		The primary slave becomes the active slave whenever it comes
> +		back up.
> +
> +	better or 1
> +
> +		The primary slave becomes the active slave when it comes back
> +		up, if the speed and duplex of the primary slave is better
> +		than the speed and duplex of the current active slave.
> +
> +	always or 2
> +
> +		The primary slave becomes the active slave only if the current
> +		active slave fails and the primary slave is up.
> +
> +	When no slave are active, if the primary comes back up, it becomes the
> +	active slave, regardless of the value of primary_return
> +
>  updelay

My feeling is that using primary_passive=disabled/better/always is far less 
clear than primary_return=always/better/failure_only.

The normal (current) behavior is to always return to primay if it is up. You 
want to add the ability to return to it only on failure of the current active 
slave and I suggested to add the ability the return to it if it is "better" than 
the current active slave.

Hence the suggested name for the option and option values.

>  
>  	Specifies the time, in milliseconds, to wait before enabling a
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index a7e731f..193eca4 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -94,6 +94,7 @@ static int downdelay;
>  static int use_carrier	= 1;
>  static char *mode;
>  static char *primary;
> +static char *primary_passive;
>  static char *lacp_rate;
>  static char *ad_select;
>  static char *xmit_hash_policy;
> @@ -126,6 +127,13 @@ MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
>  		       "6 for balance-alb");
>  module_param(primary, charp, 0);
>  MODULE_PARM_DESC(primary, "Primary network device to use");
> +module_param(primary_passive, charp, 0);
> +MODULE_PARM_DESC(primary_passive, "Do not set primary slave active "
> +				  "once it comes up; "
> +				  "0 for disabled (default), "
> +				  "1 for on only if speed of primary is not "
> +				  "better, "
> +				  "2 for always on");

You have a double negative assertion here : a "do not" option whose value is 
"disabled". For clarity, I suggest to have a "do" option whose value is "enabled".

This is probably the reason why I suggested primary_return instead of 
primary_passive. primary_passive means "refuse to return to primary" and when 
disabled, it becomes "don't refuse to return to primary", which should be 
"accept to return to primary" instead :-)

It might be seen as not being that important, but having an option whose name 
and values are easy to understand leads to an easy to use option... :-)

>  module_param(lacp_rate, charp, 0);
>  MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
>  			    "(slow/fast)");
> @@ -200,6 +208,13 @@ const struct bond_parm_tbl fail_over_mac_tbl[] = {
>  {	NULL,			-1},
>  };
>  
> +const struct bond_parm_tbl pri_passive_tbl[] = {
> +{	"disabled",		BOND_PRI_PASSIVE_DISABLED},
> +{	"better",		BOND_PRI_PASSIVE_BETTER},
> +{	"always",		BOND_PRI_PASSIVE_ALWAYS},
> +{	NULL,			-1},
> +};
> +
>  struct bond_parm_tbl ad_select_tbl[] = {
>  {	"stable",	BOND_AD_STABLE},
>  {	"bandwidth",	BOND_AD_BANDWIDTH},
> @@ -1070,6 +1085,25 @@ out:
>  
>  }
>  
> +static bool bond_should_loose_active(struct bonding *bond)
> +{
> +	struct slave *prim = bond->primary_slave;
> +	struct slave *curr = bond->curr_active_slave;
> +
> +	if (!prim || !curr || curr->link != BOND_LINK_UP)
> +		return true;
> +	if (bond->force_primary) {
> +		bond->force_primary = false;
> +		return true;
> +	}
> +	if (bond->params.primary_passive == 1 &&

You should use the constants you defined in bonding.h and used in pri_passive_tbl :

if (bond->params.primary_passive == BOND_PRI_PASSIVE_BETTER &&

> +	    (prim->speed < curr->speed ||
> +	     (prim->speed == curr->speed && prim->duplex <= curr->duplex)))
> +		return false;
> +	if (bond->params.primary_passive == 2)

You should use the constants you defined in bonding.h and used in pri_passive_tbl :

if (bond->params.primary_passive == BOND_PRI_PASSIVE_ALWAYS)

> +		return false;
> +	return true;
> +}
>  
>  /**
>   * find_best_interface - select the best available slave to be the active one
> @@ -1093,15 +1127,9 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
>  			return NULL; /* still no slave, return NULL */
>  	}
>  
> -	/*
> -	 * first try the primary link; if arping, a link must tx/rx
> -	 * traffic before it can be considered the curr_active_slave.
> -	 * also, we would skip slaves between the curr_active_slave
> -	 * and primary_slave that may be up and able to arp
> -	 */
>  	if ((bond->primary_slave) &&
> -	    (!bond->params.arp_interval) &&
> -	    (IS_UP(bond->primary_slave->dev))) {
> +	    bond->primary_slave->link == BOND_LINK_UP &&
> +	    bond_should_loose_active(bond)) {
>  		new_active = bond->primary_slave;
>  	}
>  
> @@ -1109,15 +1137,14 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
>  	old_active = new_active;
>  
>  	bond_for_each_slave_from(bond, new_active, i, old_active) {
> -		if (IS_UP(new_active->dev)) {
> -			if (new_active->link == BOND_LINK_UP) {
> -				return new_active;
> -			} else if (new_active->link == BOND_LINK_BACK) {
> -				/* link up, but waiting for stabilization */
> -				if (new_active->delay < mintime) {
> -					mintime = new_active->delay;
> -					bestslave = new_active;
> -				}
> +		if (new_active->link == BOND_LINK_UP) {
> +			return new_active;
> +		} else if (new_active->link == BOND_LINK_BACK &&
> +			   IS_UP(new_active->dev)) {
> +			/* link up, but waiting for stabilization */
> +			if (new_active->delay < mintime) {
> +				mintime = new_active->delay;
> +				bestslave = new_active;
>  			}
>  		}
>  	}
> @@ -1683,8 +1710,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
>  
>  	if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
>  		/* if there is a primary slave, remember it */
> -		if (strcmp(bond->params.primary, new_slave->dev->name) == 0)
> +		if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
>  			bond->primary_slave = new_slave;
> +			bond->force_primary = true;
> +		}
>  	}
>  
>  	write_lock_bh(&bond->curr_slave_lock);
> @@ -2929,18 +2958,6 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
>  		}
>  	}
>  
> -	read_lock(&bond->curr_slave_lock);
> -
> -	/*
> -	 * Trigger a commit if the primary option setting has changed.
> -	 */
> -	if (bond->primary_slave &&
> -	    (bond->primary_slave != bond->curr_active_slave) &&
> -	    (bond->primary_slave->link == BOND_LINK_UP))
> -		commit++;
> -
> -	read_unlock(&bond->curr_slave_lock);
> -
>  	return commit;
>  }
>  
> @@ -2961,90 +2978,58 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
>  			continue;
>  
>  		case BOND_LINK_UP:
> -			write_lock_bh(&bond->curr_slave_lock);
> -
> -			if (!bond->curr_active_slave &&
> -			    time_before_eq(jiffies, dev_trans_start(slave->dev) +
> -					   delta_in_ticks)) {
> +			if ((!bond->curr_active_slave &&
> +			     time_before_eq(jiffies,
> +					    dev_trans_start(slave->dev) +
> +					    delta_in_ticks)) ||
> +			    bond->curr_active_slave != slave) {
>  				slave->link = BOND_LINK_UP;
> -				bond_change_active_slave(bond, slave);
>  				bond->current_arp_slave = NULL;
>  
>  				pr_info(DRV_NAME
> -				       ": %s: %s is up and now the "
> -				       "active interface\n",
> -				       bond->dev->name, slave->dev->name);
> -
> -			} else if (bond->curr_active_slave != slave) {
> -				/* this slave has just come up but we
> -				 * already have a current slave; this can
> -				 * also happen if bond_enslave adds a new
> -				 * slave that is up while we are searching
> -				 * for a new slave
> -				 */
> -				slave->link = BOND_LINK_UP;
> -				bond_set_slave_inactive_flags(slave);
> -				bond->current_arp_slave = NULL;
> +					": %s: link status definitely "
> +					"up for interface %s.\n",
> +					bond->dev->name, slave->dev->name);
>  
> -				pr_info(DRV_NAME
> -				       ": %s: backup interface %s is now up\n",
> -				       bond->dev->name, slave->dev->name);
> -			}
> +				if (!bond->curr_active_slave ||
> +				    (slave == bond->primary_slave))
> +					goto do_failover;
>  
> -			write_unlock_bh(&bond->curr_slave_lock);
> +			}
>  
> -			break;
> +			continue;
>  
>  		case BOND_LINK_DOWN:
>  			if (slave->link_failure_count < UINT_MAX)
>  				slave->link_failure_count++;
>  
>  			slave->link = BOND_LINK_DOWN;
> +			bond_set_slave_inactive_flags(slave);
>  
> -			if (slave == bond->curr_active_slave) {
> -				pr_info(DRV_NAME
> -				       ": %s: link status down for active "
> -				       "interface %s, disabling it\n",
> -				       bond->dev->name, slave->dev->name);
> -
> -				bond_set_slave_inactive_flags(slave);
> -
> -				write_lock_bh(&bond->curr_slave_lock);
> -
> -				bond_select_active_slave(bond);
> -				if (bond->curr_active_slave)
> -					bond->curr_active_slave->jiffies =
> -						jiffies;
> -
> -				write_unlock_bh(&bond->curr_slave_lock);
> +			pr_info(DRV_NAME
> +				": %s: link status definitely down for "
> +				"interface %s, disabling it\n",
> +				bond->dev->name, slave->dev->name);
>  
> +			if (slave == bond->curr_active_slave) {
>  				bond->current_arp_slave = NULL;
> -
> -			} else if (slave->state == BOND_STATE_BACKUP) {
> -				pr_info(DRV_NAME
> -				       ": %s: backup interface %s is now down\n",
> -				       bond->dev->name, slave->dev->name);
> -
> -				bond_set_slave_inactive_flags(slave);
> +				goto do_failover;
>  			}
> -			break;
> +
> +			continue;
>  
>  		default:
>  			pr_err(DRV_NAME
>  			       ": %s: impossible: new_link %d on slave %s\n",
>  			       bond->dev->name, slave->new_link,
>  			       slave->dev->name);
> +			continue;
>  		}
> -	}
>  
> -	/*
> -	 * No race with changes to primary via sysfs, as we hold rtnl.
> -	 */
> -	if (bond->primary_slave &&
> -	    (bond->primary_slave != bond->curr_active_slave) &&
> -	    (bond->primary_slave->link == BOND_LINK_UP)) {
> +do_failover:
> +		ASSERT_RTNL();
>  		write_lock_bh(&bond->curr_slave_lock);
> -		bond_change_active_slave(bond, bond->primary_slave);
> +		bond_select_active_slave(bond);
>  		write_unlock_bh(&bond->curr_slave_lock);
>  	}
>  
> @@ -4695,7 +4680,7 @@ int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl)
>  
>  static int bond_check_params(struct bond_params *params)
>  {
> -	int arp_validate_value, fail_over_mac_value;
> +	int arp_validate_value, fail_over_mac_value, primary_passive_value;
>  
>  	/*
>  	 * Convert string parameters.
> @@ -4994,6 +4979,20 @@ static int bond_check_params(struct bond_params *params)
>  		primary = NULL;
>  	}
>  
> +	if (primary && primary_passive) {
> +		primary_passive_value = bond_parse_parm(primary_passive,
> +							pri_passive_tbl);
> +		if (primary_passive_value == -1) {
> +			pr_err(DRV_NAME
> +			       ": Error: Invalid primary_passive \"%s\"\n",
> +			       primary_passive ==
> +					NULL ? "NULL" : primary_passive);
> +			return -EINVAL;
> +		}
> +	} else {
> +		primary_passive_value = BOND_PRI_PASSIVE_DISABLED;
> +	}
> +
>  	if (fail_over_mac) {
>  		fail_over_mac_value = bond_parse_parm(fail_over_mac,
>  						      fail_over_mac_tbl);
> @@ -5025,6 +5024,7 @@ static int bond_check_params(struct bond_params *params)
>  	params->use_carrier = use_carrier;
>  	params->lacp_fast = lacp_fast;
>  	params->primary[0] = 0;
> +	params->primary_passive = primary_passive_value;
>  	params->fail_over_mac = fail_over_mac_value;
>  
>  	if (primary) {
> diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
> index 6044e12..84ce933 100644
> --- a/drivers/net/bonding/bond_sysfs.c
> +++ b/drivers/net/bonding/bond_sysfs.c
> @@ -1212,6 +1212,59 @@ static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR,
>  		   bonding_show_primary, bonding_store_primary);
>  
>  /*
> + * Show and set the primary_passive flag.
> + */
> +static ssize_t bonding_show_primary_passive(struct device *d,
> +					    struct device_attribute *attr,
> +					    char *buf)
> +{
> +	struct bonding *bond = to_bond(d);
> +
> +	return sprintf(buf, "%s %d\n",
> +		       pri_passive_tbl[bond->params.primary_passive].modename,
> +		       bond->params.primary_passive);
> +}
> +
> +static ssize_t bonding_store_primary_passive(struct device *d,
> +					     struct device_attribute *attr,
> +					     const char *buf, size_t count)
> +{
> +	int new_value, ret = count;
> +	struct bonding *bond = to_bond(d);
> +
> +	if (!rtnl_trylock())
> +		return restart_syscall();
> +
> +	new_value = bond_parse_parm(buf, pri_passive_tbl);
> +	if (new_value < 0)  {
> +		pr_err(DRV_NAME
> +		       ": %s: Ignoring invalid primary_passive value %.*s.\n",
> +		       bond->dev->name,
> +		       (int) strlen(buf) - 1, buf);
> +		ret = -EINVAL;
> +		goto out;
> +	} else {
> +		bond->params.primary_passive = new_value;
> +		pr_info(DRV_NAME ": %s: setting primary_passive to %s (%d).\n",
> +		       bond->dev->name, pri_passive_tbl[new_value].modename,
> +		       new_value);
> +		if (new_value == 0 || new_value == 1) {
> +			bond->force_primary = true;
> +			read_lock(&bond->lock);
> +			write_lock_bh(&bond->curr_slave_lock);
> +			bond_select_active_slave(bond);
> +			write_unlock_bh(&bond->curr_slave_lock);
> +			read_unlock(&bond->lock);
> +		}
> +	}
> +out:
> +	rtnl_unlock();
> +	return ret;
> +}
> +static DEVICE_ATTR(primary_passive, S_IRUGO | S_IWUSR,
> +		   bonding_show_primary_passive, bonding_store_primary_passive);
> +
> +/*
>   * Show and set the use_carrier flag.
>   */
>  static ssize_t bonding_show_carrier(struct device *d,
> @@ -1500,6 +1553,7 @@ static struct attribute *per_bond_attrs[] = {
>  	&dev_attr_num_unsol_na.attr,
>  	&dev_attr_miimon.attr,
>  	&dev_attr_primary.attr,
> +	&dev_attr_primary_passive.attr,
>  	&dev_attr_use_carrier.attr,
>  	&dev_attr_active_slave.attr,
>  	&dev_attr_mii_status.attr,
> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
> index 6824771..9a9e0c7 100644
> --- a/drivers/net/bonding/bonding.h
> +++ b/drivers/net/bonding/bonding.h
> @@ -131,6 +131,7 @@ struct bond_params {
>  	int lacp_fast;
>  	int ad_select;
>  	char primary[IFNAMSIZ];
> +	int primary_passive;
>  	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
>  };
>  
> @@ -190,6 +191,7 @@ struct bonding {
>  	struct   slave *curr_active_slave;
>  	struct   slave *current_arp_slave;
>  	struct   slave *primary_slave;
> +	bool     force_primary;
>  	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
>  	rwlock_t lock;
>  	rwlock_t curr_slave_lock;
> @@ -258,6 +260,10 @@ static inline bool bond_is_lb(const struct bonding *bond)
>  		|| bond->params.mode == BOND_MODE_ALB;
>  }
>  
> +#define BOND_PRI_PASSIVE_DISABLED	0
> +#define BOND_PRI_PASSIVE_BETTER		1
> +#define BOND_PRI_PASSIVE_ALWAYS		2
> +
>  #define BOND_FOM_NONE			0
>  #define BOND_FOM_ACTIVE			1
>  #define BOND_FOM_FOLLOW			2
> @@ -348,6 +354,7 @@ extern const struct bond_parm_tbl bond_mode_tbl[];
>  extern const struct bond_parm_tbl xmit_hashtype_tbl[];
>  extern const struct bond_parm_tbl arp_validate_tbl[];
>  extern const struct bond_parm_tbl fail_over_mac_tbl[];
> +extern const struct bond_parm_tbl pri_passive_tbl[];
>  extern struct bond_parm_tbl ad_select_tbl[];
>  
>  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
> 

	Nicolas.

^ permalink raw reply

* [PATCH] net: force bridge module(s) to be GPL
From: Stephen Hemminger @ 2009-09-10 21:51 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel

The only valid usage for the bridge frame hooks are by a
GPL components (such as the bridge module).
The kernel should not leave a crack in the door for proprietary
networking stacks to slip in.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

--- a/net/core/dev.c	2009-09-10 14:27:59.076074483 -0700
+++ b/net/core/dev.c	2009-09-10 14:28:43.659314263 -0700
@@ -2116,7 +2116,7 @@ static inline int deliver_skb(struct sk_
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
-EXPORT_SYMBOL(br_fdb_test_addr_hook);
+EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
 /*
@@ -2125,7 +2125,7 @@ EXPORT_SYMBOL(br_fdb_test_addr_hook);
  */
 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
 					struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL(br_handle_frame_hook);
+EXPORT_SYMBOL_GPL(br_handle_frame_hook);
 
 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
 					    struct packet_type **pt_prev, int *ret,

^ permalink raw reply

* [PATCH v3 3/3] ucc_geth: Fix hangs after switching from full to half duplex
From: Anton Vorontsov @ 2009-09-10 21:48 UTC (permalink / raw)
  To: Scott Wood; +Cc: David Miller, netdev, Andy Fleming, Timur Tabi, linuxppc-dev
In-Reply-To: <20090910210935.GA26037@oksana.dev.rtsoft.ru>

MPC8360 QE UCC ethernet controllers hang when changing link duplex
under a load (a bit of NFS activity is enough).

  PHY: mdio@e0102120:00 - Link is Up - 1000/Full
  sh-3.00# ethtool -s eth0 speed 100 duplex half autoneg off
  PHY: mdio@e0102120:00 - Link is Down
  PHY: mdio@e0102120:00 - Link is Up - 100/Half
  NETDEV WATCHDOG: eth0 (ucc_geth): transmit queue 0 timed out
  ------------[ cut here ]------------
  Badness at c01fcbd0 [verbose debug info unavailable]
  NIP: c01fcbd0 LR: c01fcbd0 CTR: c0194e44
  ...

The cure is to disable the controller before changing speed/duplex
and enable it afterwards.

Though, disabling the controller might take quite a while, so we
better not grab any spinlocks in adjust_link(). Instead, we quiesce
the driver's activity, and only then disable the controller.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
---

On Fri, Sep 11, 2009 at 01:09:36AM +0400, Anton Vorontsov wrote:
> On Thu, Sep 10, 2009 at 11:40:53PM +0400, Anton Vorontsov wrote:
> > On Thu, Sep 10, 2009 at 01:04:32PM -0500, Scott Wood wrote:
> > > Anton Vorontsov wrote:
> > > >MPC8360 QE UCC ethernet controllers hang when changing link duplex
> > > >under a load (a bit of NFS activity is enough).
> > > >
> > > >  PHY: mdio@e0102120:00 - Link is Up - 1000/Full
> > > >  sh-3.00# ethtool -s eth0 speed 100 duplex half autoneg off
> > > >  PHY: mdio@e0102120:00 - Link is Down
> > > >  PHY: mdio@e0102120:00 - Link is Up - 100/Half
> > > >  NETDEV WATCHDOG: eth0 (ucc_geth): transmit queue 0 timed out
> > > >  ------------[ cut here ]------------
> > > >  Badness at c01fcbd0 [verbose debug info unavailable]
> > > >  NIP: c01fcbd0 LR: c01fcbd0 CTR: c0194e44
> > > >  ...
> > > >
> > > >The cure is to disable the controller before changing speed/duplex
> > > >and enable it afterwards.
> > > >
> > > >Since ugeth_graceful_stop_{tx,rx} now may be called from an atomic
> > > >context, switch the two functions from msleep() to mdelay().
> > > 
> > > Ouch.
> > 
> > Yeah, right... delaying for 10ms with irqs off isn't good.
> > 
> > > Can we put this in a workqueue or something?
> > 
> > adjust_link() itself isn't called from an atomic context.
> 
> Oops. I though that phylib calls us from a workqueue, not a timer. Hm...
> 
> Will be a little bit more work..

Ignore me. I'm working on two kernel versions in parallel (2.6.21 and
mainline), and it's 2.6.21 where phylib uses a timer. Mainline is OK.

How about this patch?

 drivers/net/ucc_geth.c |   36 +++++++++++++++++++++++++++++++-----
 1 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index 2a2c973..232fef9 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -1560,6 +1560,25 @@ static int ugeth_disable(struct ucc_geth_private *ugeth, enum comm_dir mode)
 	return 0;
 }
 
+static void ugeth_quiesce(struct ucc_geth_private *ugeth)
+{
+	/* Wait for and prevent any further xmits. */
+	netif_tx_disable(ugeth->ndev);
+
+	/* Disable the interrupt to avoid NAPI rescheduling. */
+	disable_irq(ugeth->ug_info->uf_info.irq);
+
+	/* Stop NAPI, and possibly wait for its completion. */
+	napi_disable(&ugeth->napi);
+}
+
+static void ugeth_activate(struct ucc_geth_private *ugeth)
+{
+	napi_enable(&ugeth->napi);
+	enable_irq(ugeth->ug_info->uf_info.irq);
+	netif_tx_wake_all_queues(ugeth->ndev);
+}
+
 /* Called every time the controller might need to be made
  * aware of new link state.  The PHY code conveys this
  * information through variables in the ugeth structure, and this
@@ -1573,14 +1592,11 @@ static void adjust_link(struct net_device *dev)
 	struct ucc_geth __iomem *ug_regs;
 	struct ucc_fast __iomem *uf_regs;
 	struct phy_device *phydev = ugeth->phydev;
-	unsigned long flags;
 	int new_state = 0;
 
 	ug_regs = ugeth->ug_regs;
 	uf_regs = ugeth->uccf->uf_regs;
 
-	spin_lock_irqsave(&ugeth->lock, flags);
-
 	if (phydev->link) {
 		u32 tempval = in_be32(&ug_regs->maccfg2);
 		u32 upsmr = in_be32(&uf_regs->upsmr);
@@ -1631,9 +1647,21 @@ static void adjust_link(struct net_device *dev)
 			ugeth->oldspeed = phydev->speed;
 		}
 
+		/*
+		 * To change the MAC configuration we need to disable the
+		 * controller. To do so, we have to either grab ugeth->lock,
+		 * which is a bad idea since 'graceful stop' commands might
+		 * take quite a while, or we can quiesce driver's activity.
+		 */
+		ugeth_quiesce(ugeth);
+		ugeth_disable(ugeth, COMM_DIR_RX_AND_TX);
+
 		out_be32(&ug_regs->maccfg2, tempval);
 		out_be32(&uf_regs->upsmr, upsmr);
 
+		ugeth_enable(ugeth, COMM_DIR_RX_AND_TX);
+		ugeth_activate(ugeth);
+
 		if (!ugeth->oldlink) {
 			new_state = 1;
 			ugeth->oldlink = 1;
@@ -1647,8 +1675,6 @@ static void adjust_link(struct net_device *dev)
 
 	if (new_state && netif_msg_link(ugeth))
 		phy_print_status(phydev);
-
-	spin_unlock_irqrestore(&ugeth->lock, flags);
 }
 
 /* Initialize TBI PHY interface for communicating with the
-- 
1.6.3.3


^ permalink raw reply related

* Re: UDP regression with packets rates < 10k per sec
From: Christoph Lameter @ 2009-09-10 21:42 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <4AA97183.3030008@gmail.com>

On Thu, 10 Sep 2009, Eric Dumazet wrote:

> and idle=mwait  (or hpet and C2/C3 states are used on this machine and
> wakeups are slow)

We disabled C states here.

^ permalink raw reply

* Re: UDP regression with packets rates < 10k per sec
From: Christoph Lameter @ 2009-09-10 21:39 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <4AA963A4.5080509@gmail.com>

On Thu, 10 Sep 2009, Eric Dumazet wrote:

> 22:28:11.223581 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
> 22:28:11.223678 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32
>
> See how the answer is *very* slow ? Something like > 100 us ?

If you run tcpdump then you create some overhead I guess. 100 usec should
show up as latencies > 50usec in the report.

The latency request includes the timestamp from the source. So the
destination can take another timestamp at receive time. The sending back
of the reply to the timestamp request is not critical at all. The receiver
will just register the time differential established by the sender.

^ permalink raw reply

* Re: UDP regression with packets rates < 10k per sec
From: Eric Dumazet @ 2009-09-10 21:37 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: netdev
In-Reply-To: <4AA963A4.5080509@gmail.com>

Eric Dumazet a écrit :
> Something must be wrong with program or whatever...
> 
> On the receiver I did this to trace the latency messages only
> 
> # tcpdump -i eth0 not host 239.0.192.2 and port 9002 -n
> 
> 22:28:07.223300 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
> 22:28:07.223403 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32
> 
> 22:28:08.223301 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
> 22:28:08.223416 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32
> 
> 22:28:09.223380 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
> 22:28:09.223494 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32
> 
> 22:28:10.223481 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
> 22:28:10.223597 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32
> 
> 22:28:11.223581 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
> 22:28:11.223678 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32
> 
> 
> See how the answer is *very* slow ? Something like > 100 us ?
> 

Never mind, this is OK, after proper irq affinity and tweaks on receiver :
(I should do same on other machine as well...)

(echo 1 >/proc/irq/default_smp_affinity) before up-ing bnx2 NIC

echo performance >/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor

and idle=mwait  (or hpet and C2/C3 states are used on this machine and 
wakeups are slow)

# tcpdump -i eth0 not host 239.0.192.2 and port 9002 -n
23:25:02.137143 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
23:25:02.137212 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32

23:25:03.137204 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
23:25:03.137288 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32

23:25:04.137276 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
23:25:04.137355 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32


I presume powersaving improvements in last kernels are against
latencies goals...


^ permalink raw reply

* Re: UDP regression with packets rates < 10k per sec
From: Christoph Lameter @ 2009-09-10 21:36 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <4AA963A4.5080509@gmail.com>

On Thu, 10 Sep 2009, Eric Dumazet wrote:

> On the receiver I did this to trace the latency messages only
>
> # tcpdump -i eth0 not host 239.0.192.2 and port 9002 -n

You do not show the multicast traffic. The latency timestamps requests are
send unicast and at larger intervals. About one per second per mc channel.

^ permalink raw reply

* Re: [Linux-ATM-General] [PATCH] atm/br2684: netif_stop_queue() when atm device busy and netif_wake_queue() when we can send packets again.
From: Karl Hiramoto @ 2009-09-10 21:30 UTC (permalink / raw)
  To: Philip A. Prindeville; +Cc: netdev, linux-atm-general
In-Reply-To: <4AA95838.4010007@redfish-solutions.com>

Philip A. Prindeville wrote:
> I had noticed that with my Solos card, and using Qwest 7062/896kb/s
> service that I was typically only getting ~400kb/s upstream, so I
> thought that delayed transmitter restarts might be the culprit and
> decided to try out this patch.
>
> I'm running 2.6.27.26, and I modified the patch as below.
>
> Has anyone confirmed this patch (Karl's) against 2.6.27?
>
> Thanks,
>
> -Philip
I'd be interested in hearing comparisons with/without the patch.    What 
i have is no upstream packet loss with this patch, however slightly 
lower total throughput.  I think because the upper networking layers 
take time to restart the packet flow.   I'm not really sure if or how 
many packets to upper layers buffer.  I haven't had time to debug it 
further.

I don't have a solos card, but you may have to tweak the solos driver 
sk_sndbuf  value.

--
Karl

^ permalink raw reply

* Re: [PATCH v2 3/3] ucc_geth: Fix hangs after switching from full to half duplex
From: Anton Vorontsov @ 2009-09-10 21:09 UTC (permalink / raw)
  To: Scott Wood; +Cc: David Miller, netdev, Andy Fleming, Timur Tabi, linuxppc-dev
In-Reply-To: <20090910194053.GA24363@oksana.dev.rtsoft.ru>

On Thu, Sep 10, 2009 at 11:40:53PM +0400, Anton Vorontsov wrote:
> On Thu, Sep 10, 2009 at 01:04:32PM -0500, Scott Wood wrote:
> > Anton Vorontsov wrote:
> > >MPC8360 QE UCC ethernet controllers hang when changing link duplex
> > >under a load (a bit of NFS activity is enough).
> > >
> > >  PHY: mdio@e0102120:00 - Link is Up - 1000/Full
> > >  sh-3.00# ethtool -s eth0 speed 100 duplex half autoneg off
> > >  PHY: mdio@e0102120:00 - Link is Down
> > >  PHY: mdio@e0102120:00 - Link is Up - 100/Half
> > >  NETDEV WATCHDOG: eth0 (ucc_geth): transmit queue 0 timed out
> > >  ------------[ cut here ]------------
> > >  Badness at c01fcbd0 [verbose debug info unavailable]
> > >  NIP: c01fcbd0 LR: c01fcbd0 CTR: c0194e44
> > >  ...
> > >
> > >The cure is to disable the controller before changing speed/duplex
> > >and enable it afterwards.
> > >
> > >Since ugeth_graceful_stop_{tx,rx} now may be called from an atomic
> > >context, switch the two functions from msleep() to mdelay().
> > 
> > Ouch.
> 
> Yeah, right... delaying for 10ms with irqs off isn't good.
> 
> > Can we put this in a workqueue or something?
> 
> adjust_link() itself isn't called from an atomic context.

Oops. I though that phylib calls us from a workqueue, not a timer. Hm...

Will be a little bit more work..

-- 
Anton Vorontsov
email: cbouatmailru@gmail.com
irc://irc.freenode.net/bd2

^ permalink raw reply

* Re: UDP regression with packets rates < 10k per sec
From: Eric Dumazet @ 2009-09-10 20:37 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: netdev
In-Reply-To: <alpine.DEB.1.10.0909091350590.32067@V090114053VZO-1>

Something must be wrong with program or whatever...

On the receiver I did this to trace the latency messages only

# tcpdump -i eth0 not host 239.0.192.2 and port 9002 -n

22:28:07.223300 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
22:28:07.223403 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32

22:28:08.223301 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
22:28:08.223416 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32

22:28:09.223380 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
22:28:09.223494 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32

22:28:10.223481 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
22:28:10.223597 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32

22:28:11.223581 IP 55.225.18.7.9002 > 55.225.18.5.9002: UDP, length 32
22:28:11.223678 IP 55.225.18.5.9002 > 55.225.18.7.9002: UDP, length 32


See how the answer is *very* slow ? Something like > 100 us ?


^ permalink raw reply

* Re: [Linux-ATM-General] [PATCH] atm/br2684: netif_stop_queue() when atm device busy and netif_wake_queue() when we can send packets again.
From: Philip A. Prindeville @ 2009-09-10 19:49 UTC (permalink / raw)
  To: Karl Hiramoto; +Cc: netdev, linux-atm-general
In-Reply-To: <1251545092-18081-1-git-send-email-karl@hiramoto.org>

[-- Attachment #1: Type: text/plain, Size: 3504 bytes --]

I had noticed that with my Solos card, and using Qwest 7062/896kb/s
service that I was typically only getting ~400kb/s upstream, so I
thought that delayed transmitter restarts might be the culprit and
decided to try out this patch.

I'm running 2.6.27.26, and I modified the patch as below.

Has anyone confirmed this patch (Karl's) against 2.6.27?

Thanks,

-Philip


Karl Hiramoto wrote:
> This patch removes the call to dev_kfree_skb() when the atm device is busy.
> Calling dev_kfree_skb() causes heavy packet loss then the device is under
> heavy load, the more correct behavior should be to stop the upper layers,
> then when the lower device can queue packets again wake the upper layers.
>
> Signed-off-by: Karl Hiramoto <karl@hiramoto.org>
> ---
>  net/atm/br2684.c |   37 +++++++++++++++++++++++++++----------
>  1 files changed, 27 insertions(+), 10 deletions(-)
>
> diff --git a/net/atm/br2684.c b/net/atm/br2684.c
> index 2912665..5c42225 100644
> --- a/net/atm/br2684.c
> +++ b/net/atm/br2684.c
> @@ -69,7 +69,7 @@ struct br2684_vcc {
>  	struct net_device *device;
>  	/* keep old push, pop functions for chaining */
>  	void (*old_push) (struct atm_vcc * vcc, struct sk_buff * skb);
> -	/* void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb); */
> +	void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb);
>  	enum br2684_encaps encaps;
>  	struct list_head brvccs;
>  #ifdef CONFIG_ATM_BR2684_IPFILTER
> @@ -142,6 +142,22 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s)
>  	return NULL;
>  }
>  
> +/* chained vcc->pop function.  Check if we should wake the netif_queue */
> +static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb)
> +{
> +	struct br2684_vcc *brvcc = BR2684_VCC(vcc);
> +	struct net_device *net_dev = skb->dev;
> +
> +	pr_debug("br2684_pop(vcc %p ; net_dev %p )\n", vcc, net_dev);
> +	brvcc->old_pop(vcc, skb);
> +
> +	if (!net_dev)
> +		return;
> +
> +	if (atm_may_send(vcc, 0))
> +		netif_wake_queue(net_dev);
> +
> +}
>  /*
>   * Send a packet out a particular vcc.  Not to useful right now, but paves
>   * the way for multiple vcc's per itf.  Returns true if we can send,
> @@ -200,20 +216,19 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
>  
>  	ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
>  	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
> -	if (!atm_may_send(atmvcc, skb->truesize)) {
> -		/*
> -		 * We free this here for now, because we cannot know in a higher
> -		 * layer whether the skb pointer it supplied wasn't freed yet.
> -		 * Now, it always is.
> -		 */
> -		dev_kfree_skb(skb);
> -		return 0;
> -	}
>  	atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
>  	ATM_SKB(skb)->atm_options = atmvcc->atm_options;
>  	dev->stats.tx_packets++;
>  	dev->stats.tx_bytes += skb->len;
>  	atmvcc->send(atmvcc, skb);
> +
> +	if (!atm_may_send(atmvcc, 0)) {
> +		netif_stop_queue(brvcc->device);
> +		/*check for race with br2684_pop*/
> +		if (atm_may_send(atmvcc, 0))
> +			netif_start_queue(brvcc->device);
> +	}
> +
>  	return 1;
>  }
>  
> @@ -503,8 +518,10 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
>  	atmvcc->user_back = brvcc;
>  	brvcc->encaps = (enum br2684_encaps)be.encaps;
>  	brvcc->old_push = atmvcc->push;
> +	brvcc->old_pop = atmvcc->pop;
>  	barrier();
>  	atmvcc->push = br2684_push;
> +	atmvcc->pop = br2684_pop;
>  
>  	__skb_queue_head_init(&queue);
>  	rq = &sk_atm(atmvcc)->sk_receive_queue;
>   


[-- Attachment #2: linux-2.6.27-br2684.patch --]
[-- Type: text/plain, Size: 2396 bytes --]

--- linux-2.6.27.29-astlinux/net/atm/br2684.c.orig	2009-07-30 16:06:41.000000000 -0700
+++ linux-2.6.27.29-astlinux/net/atm/br2684.c	2009-09-10 12:42:50.000000000 -0700
@@ -69,7 +69,7 @@ struct br2684_vcc {
 	struct net_device *device;
 	/* keep old push, pop functions for chaining */
 	void (*old_push) (struct atm_vcc * vcc, struct sk_buff * skb);
-	/* void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb); */
+	void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb);
 	enum br2684_encaps encaps;
 	struct list_head brvccs;
 #ifdef CONFIG_ATM_BR2684_IPFILTER
@@ -143,6 +143,22 @@ static struct net_device *br2684_find_de
 	return NULL;
 }
 
+/* chained vcc->pop function.  Check if we should wake the netif_queue */
+static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+	struct br2684_vcc *brvcc = BR2684_VCC(vcc);
+	struct net_device *net_dev = skb->dev;
+
+	pr_debug("br2684_pop(vcc %p ; net_dev %p )\n", vcc, net_dev);
+	brvcc->old_pop(vcc, skb);
+
+	if (!net_dev)
+		return;
+
+	if (atm_may_send(vcc, 0))
+		netif_wake_queue(net_dev);
+
+}
 /*
  * Send a packet out a particular vcc.  Not to useful right now, but paves
  * the way for multiple vcc's per itf.  Returns true if we can send,
@@ -200,20 +216,19 @@ static int br2684_xmit_vcc(struct sk_buf
 
 	ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
 	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
-	if (!atm_may_send(atmvcc, skb->truesize)) {
-		/*
-		 * We free this here for now, because we cannot know in a higher
-		 * layer whether the skb pointer it supplied wasn't freed yet.
-		 * Now, it always is.
-		 */
-		dev_kfree_skb(skb);
-		return 0;
-	}
 	atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
 	ATM_SKB(skb)->atm_options = atmvcc->atm_options;
 	brdev->stats.tx_packets++;
 	brdev->stats.tx_bytes += skb->len;
 	atmvcc->send(atmvcc, skb);
+
+	if (!atm_may_send(atmvcc, 0)) {
+		netif_stop_queue(brvcc->device);
+		/*check for race with br2684_pop*/
+		if (atm_may_send(atmvcc, 0))
+			netif_start_queue(brvcc->device);
+	}
+
 	return 1;
 }
 
@@ -509,8 +524,10 @@ static int br2684_regvcc(struct atm_vcc 
 	atmvcc->user_back = brvcc;
 	brvcc->encaps = (enum br2684_encaps)be.encaps;
 	brvcc->old_push = atmvcc->push;
+	brvcc->old_pop = atmvcc->pop;
 	barrier();
 	atmvcc->push = br2684_push;
+	atmvcc->pop = br2684_pop;
 
 	rq = &sk_atm(atmvcc)->sk_receive_queue;
 

^ permalink raw reply

* Re: [PATCH v2 3/3] ucc_geth: Fix hangs after switching from full to half duplex
From: Anton Vorontsov @ 2009-09-10 19:40 UTC (permalink / raw)
  To: Scott Wood; +Cc: David Miller, netdev, Andy Fleming, Timur Tabi, linuxppc-dev
In-Reply-To: <4AA93FB0.5060802@freescale.com>

On Thu, Sep 10, 2009 at 01:04:32PM -0500, Scott Wood wrote:
> Anton Vorontsov wrote:
> >MPC8360 QE UCC ethernet controllers hang when changing link duplex
> >under a load (a bit of NFS activity is enough).
> >
> >  PHY: mdio@e0102120:00 - Link is Up - 1000/Full
> >  sh-3.00# ethtool -s eth0 speed 100 duplex half autoneg off
> >  PHY: mdio@e0102120:00 - Link is Down
> >  PHY: mdio@e0102120:00 - Link is Up - 100/Half
> >  NETDEV WATCHDOG: eth0 (ucc_geth): transmit queue 0 timed out
> >  ------------[ cut here ]------------
> >  Badness at c01fcbd0 [verbose debug info unavailable]
> >  NIP: c01fcbd0 LR: c01fcbd0 CTR: c0194e44
> >  ...
> >
> >The cure is to disable the controller before changing speed/duplex
> >and enable it afterwards.
> >
> >Since ugeth_graceful_stop_{tx,rx} now may be called from an atomic
> >context, switch the two functions from msleep() to mdelay().
> 
> Ouch.

Yeah, right... delaying for 10ms with irqs off isn't good.

> Can we put this in a workqueue or something?

adjust_link() itself isn't called from an atomic context.

It's we are grabbing ugeth->lock, i.e. a spinlock. I don't see
why the lock is needed in adjust_link() in its current form,
but if we're going to disable the controller for some time,
we'll have to make sure that no start_xmit() or NAPI is running,
scheduled or will be scheduled until we say so.

I think that lock-less, and thus completely sleep-able variant
of adjust_link is doable.

Thanks,

-- 
Anton Vorontsov
email: cbouatmailru@gmail.com
irc://irc.freenode.net/bd2

^ permalink raw reply

* Re: [PATCH] ipv6: Add IFA_F_DADFAILED flag
From: Brian Haley @ 2009-09-10 19:14 UTC (permalink / raw)
  To: Jens Rosenboom; +Cc: david Miller, netdev@vger.kernel.org, YOSHIFUJI Hideaki
In-Reply-To: <1252599080.5980.3.camel@fnki-nb00130>

Hi Jens,

Jens Rosenboom wrote:
>> Ok, how does this look?  I changed it to set the tentative flag as it did
>> before, plus clear the dad_failed flag if the device got restarted,
>> triggering DAD to happen again for any tentative address, that was an
>> oversight on my part.
> 
> Looks fine to me so far, can you also send the patch for userspace? That
> would making testing this a bit easier. ;-)

Iproute2 patch below, I'll re-post both once you have a chance to test.

>> I'd still like to know if using this last ifa_flag is going to be an issue,
>> I actually finished a similar patch that uses a new IFA_ADDRFLAGS structure
>> to pass in/out this additional info.
> 
> IMHO you should stick to this version, if any future feature needs
> another bit, it may happen also to need two of them and so will need a
> new structure then anyway, but why not keep it simple for now?

I'll leave it for now, I might just post as an RFC to get some feedback on it.

Thanks,

-Brian


diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h
index a60c821..fd97404 100644
--- a/include/linux/if_addr.h
+++ b/include/linux/if_addr.h
@@ -41,6 +41,7 @@ enum
 
 #define	IFA_F_NODAD		0x02
 #define IFA_F_OPTIMISTIC	0x04
+#define IFA_F_DADFAILED		0x08
 #define	IFA_F_HOMEADDRESS	0x10
 #define IFA_F_DEPRECATED	0x20
 #define IFA_F_TENTATIVE		0x40
diff --git a/ip/ipaddress.c b/ip/ipaddress.c
index 267ecb3..97c7a8b 100644
--- a/ip/ipaddress.c
+++ b/ip/ipaddress.c
@@ -508,6 +508,10 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n,
 		fprintf(fp, "dynamic ");
 	} else
 		ifa->ifa_flags &= ~IFA_F_PERMANENT;
+	if (ifa->ifa_flags&IFA_F_DADFAILED) {
+		ifa->ifa_flags &= ~IFA_F_DADFAILED;
+		fprintf(fp, "dadfailed ");
+	}
 	if (ifa->ifa_flags)
 		fprintf(fp, "flags %02x ", ifa->ifa_flags);
 	if (rta_tb[IFA_LABEL])

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox