Netdev List
 help / color / mirror / Atom feed
* [PATCH] net/unix: Allow Unix sockets to be treated like normal files.
From: Jeff Hansen @ 2010-10-26  4:21 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Jeff Hansen

This allows Unix sockets to be opened, written, read, and closed, like
normal files.  This can be especially handy from, for example, a shell
script that wants to send a short message to a Unix socket, but doesn't
want to and/or cannot create the socket itself.

This will try to open the Unix socket first in SOCK_DGRAM mode, then
SOCK_STREAM mode if that fails.

Signed-off-by: Jeff Hansen <x@jeffhansen.com>
---
 net/unix/Kconfig   |   10 +++++
 net/unix/af_unix.c |  113 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+), 0 deletions(-)

diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index 5a69733..68df4f1 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -19,3 +19,13 @@ config UNIX
 
 	  Say Y unless you know what you are doing.
 
+config UNIX_FOPS
+	boolean "Allow Unix sockets to be treated like normal files"
+	depends on UNIX
+	---help---
+	  If you say Y here, Unix sockets may be opened, written, read, and
+	  closed, like normal files.  This is handy for sending short commands
+	  to Unix sockets (i.e. from shell scripts), without having to create
+	  a Unix socket.
+
+	  Say Y unless you know what you are doing.
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0ebc777..b5a6655 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -798,6 +798,114 @@ fail:
 	return NULL;
 }
 
+#ifdef CONFIG_UNIX_FOPS
+static int unix_open(struct inode *inode, struct file *filp)
+{
+	int err;
+	struct socket *sock = NULL;
+	struct dentry *dentry = filp->f_dentry;
+	struct sockaddr_un sunaddr = { 0 };
+	char *p;
+
+	if (!filp)
+		return -ENXIO;
+	dentry = filp->f_dentry;
+
+	if (!dentry || !dentry->d_parent)
+		return -ENXIO;
+
+	if (filp->private_data)
+		return -EBUSY;
+
+	sunaddr.sun_family = AF_UNIX;
+	p = d_path(&filp->f_path, sunaddr.sun_path, sizeof(sunaddr.sun_path));
+	if (IS_ERR(p))
+		return PTR_ERR(p);
+	memmove(sunaddr.sun_path, p, p[sizeof(sunaddr.sun_path) - 1] ?
+		sizeof(sunaddr.sun_path) : strlen(p));
+
+	err = sock_create(PF_UNIX, SOCK_DGRAM, 0, &sock);
+	if (err)
+		return err;
+
+	err = unix_dgram_connect(sock, (struct sockaddr *)&sunaddr,
+				 sizeof(sunaddr), 0);
+	if (err) {
+		sock_release(sock);
+
+		err = sock_create(PF_UNIX, SOCK_STREAM, 0, &sock);
+		if (err)
+			return err;
+
+		err = unix_stream_connect(sock, (struct sockaddr *)&sunaddr,
+					  sizeof(sunaddr), 0);
+
+		if (err)
+			return err;
+	}
+	filp->private_data = sock;
+
+	return err;
+}
+
+static int unix_frelease(struct inode *inode, struct file *filp)
+{
+	if (!filp->private_data)
+		return -ENXIO;
+
+	sock_release(filp->private_data);
+	filp->private_data = NULL;
+	return 0;
+}
+
+static ssize_t unix_readwrite(struct file *filp, void *buf,
+	size_t _len, loff_t *ppos, int do_write)
+{
+	struct socket *sock = filp->private_data;
+	int len = (int)_len, err;
+	struct kvec iov = {
+		.iov_base = buf,
+		.iov_len = len,
+	};
+	struct msghdr msg = {
+		/* NB: struct iovec and kvec are equal */
+		.msg_iov = (struct iovec *)&iov,
+		.msg_iovlen = 1,
+	};
+
+	if (!sock)
+		return -ENXIO;
+	if (_len > 0xffffffffLL)
+		return -E2BIG;
+
+	err = do_write ? sock_sendmsg(sock, &msg, len) :
+			 sock_recvmsg(sock, &msg, len, 0);
+	if (err > 0 && ppos)
+		*ppos += err;
+
+	return err;
+}
+
+static ssize_t unix_write(struct file *filp, const char __user *buf,
+	size_t _len, loff_t *ppos)
+{
+	return unix_readwrite(filp, (void *)buf, _len, ppos, 0);
+}
+
+static ssize_t unix_read(struct file *filp, const char __user *buf,
+	size_t _len, loff_t *ppos)
+{
+	return unix_readwrite(filp, (void *)buf, _len, ppos, 0);
+}
+
+const struct file_operations unix_sock_fops = {
+	.owner = THIS_MODULE,
+	.open = unix_open,
+	.release = unix_frelease,
+	.write = unix_write,
+	.read = unix_read,
+};
+#endif /* CONFIG_UNIX_FOPS */
 
 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
@@ -874,6 +982,11 @@ out_mknod_drop_write:
 		mnt_drop_write(nd.path.mnt);
 		if (err)
 			goto out_mknod_dput;
+
+#ifdef CONFIG_UNIX_FOPS
+		dentry->d_inode->i_fop = &unix_sock_fops;
+#endif
+
 		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 		dput(nd.path.dentry);
 		nd.path.dentry = dentry;
-- 
1.7.0.4


^ permalink raw reply related

* Re: [PATCH] ipv6: addrconf: clear IPv6 addresses and routes when losing link
From: Stephen Hemminger @ 2010-10-26  4:38 UTC (permalink / raw)
  To: Lorenzo Colitti; +Cc: netdev
In-Reply-To: <AANLkTikC4pv8aOODM2pOg2bKQGL69wivcUU3f9ZziPhe@mail.gmail.com>

On Mon, 25 Oct 2010 19:08:27 -0700
Lorenzo Colitti <lorenzo@google.com> wrote:

> When roaming between different networks (e.g., changing wireless
> SSIDs, or plugging in to different wired networks), IPv6 addresses and
> routes are not cleared. If the two networks have different IPv6
> subnets assigned, the host maintains both the old and new IPv6
> addresses and gateways, but only the new ones works. If the host
> chooses the wrong source address or gateway, or if the new network
> does not have IPv6 but the old one did, IPv6 connections time out,
> leading to long delays when trying to connect to IPv6 hosts.
> 
> Fix this by ensuring that autoconfigured IPv6 addresses and routes are
> purged when link is lost, not only when the interface goes down.
> 
> Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
> 
> --- a/net/ipv6/addrconf.c	2010-10-20 13:30:22.000000000 -0700
> +++ b/net/ipv6/addrconf.c	2010-10-25 13:55:15.000000000 -0700
> @@ -2524,6 +2524,14 @@
>  		} else {
>  			if (!addrconf_qdisc_ok(dev)) {
>  				/* device is still not ready. */
> +				if (idev && (idev->if_flags & IF_READY)) {
> +					/* Link lost. Clear addresses and
> +					   routes, the device might come back
> +					   on a link where they are no longer
> +					   valid. */
> +					addrconf_ifdown(dev, 0);
> +					idev->if_flags &= ~IF_READY;
> +				}
>  				break;
>  			}

This is incorrect. When link is lost, routes and address should not be
flushed. They should be marked as tentative and then go through DAD again
on the new network.

If you do it this way, you break routing protocols when link is brought
down and back up.

^ permalink raw reply

* [net-next] stmmac: enable/disable rx/tx in the core with a single write.
From: Giuseppe CAVALLARO @ 2010-10-26  4:58 UTC (permalink / raw)
  To: netdev; +Cc: avisconti

From: avisconti <armando.visconti@st.com>

This patch enables and disables the rx and tx bits in the MAC control reg
by using a single write operation.
This also solves a possible problem (spotted on SPEAr platforms) at 10Mbps
where two consecutive writes to a MAC control register can take more than
4 phy_clk cycles.

Signed-off-by: Armando Visconti <armando.visconti@st.com>
Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/stmmac/stmmac_main.c |   40 +++++++++----------------------------
 1 files changed, 10 insertions(+), 30 deletions(-)

diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index 823b9e6..06bc603 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -337,33 +337,19 @@ static int stmmac_init_phy(struct net_device *dev)
 	return 0;
 }
 
-static inline void stmmac_mac_enable_rx(void __iomem *ioaddr)
+static inline void stmmac_enable_mac(void __iomem *ioaddr)
 {
 	u32 value = readl(ioaddr + MAC_CTRL_REG);
-	value |= MAC_RNABLE_RX;
-	/* Set the RE (receive enable bit into the MAC CTRL register).  */
-	writel(value, ioaddr + MAC_CTRL_REG);
-}
 
-static inline void stmmac_mac_enable_tx(void __iomem *ioaddr)
-{
-	u32 value = readl(ioaddr + MAC_CTRL_REG);
-	value |= MAC_ENABLE_TX;
-	/* Set the TE (transmit enable bit into the MAC CTRL register).  */
+	value |= MAC_RNABLE_RX | MAC_ENABLE_TX;
 	writel(value, ioaddr + MAC_CTRL_REG);
 }
 
-static inline void stmmac_mac_disable_rx(void __iomem *ioaddr)
+static inline void stmmac_disable_mac(void __iomem *ioaddr)
 {
 	u32 value = readl(ioaddr + MAC_CTRL_REG);
-	value &= ~MAC_RNABLE_RX;
-	writel(value, ioaddr + MAC_CTRL_REG);
-}
 
-static inline void stmmac_mac_disable_tx(void __iomem *ioaddr)
-{
-	u32 value = readl(ioaddr + MAC_CTRL_REG);
-	value &= ~MAC_ENABLE_TX;
+	value &= ~(MAC_ENABLE_TX | MAC_RNABLE_RX);
 	writel(value, ioaddr + MAC_CTRL_REG);
 }
 
@@ -857,8 +843,7 @@ static int stmmac_open(struct net_device *dev)
 	writel(0xffffffff, priv->ioaddr + MMC_LOW_INTR_MASK);
 
 	/* Enable the MAC Rx/Tx */
-	stmmac_mac_enable_rx(priv->ioaddr);
-	stmmac_mac_enable_tx(priv->ioaddr);
+	stmmac_enable_mac(priv->ioaddr);
 
 	/* Set the HW DMA mode and the COE */
 	stmmac_dma_operation_mode(priv);
@@ -928,9 +913,8 @@ static int stmmac_release(struct net_device *dev)
 	/* Release and free the Rx/Tx resources */
 	free_dma_desc_resources(priv);
 
-	/* Disable the MAC core */
-	stmmac_mac_disable_tx(priv->ioaddr);
-	stmmac_mac_disable_rx(priv->ioaddr);
+	/* Disable the MAC Rx/Tx */
+	stmmac_disable_mac(priv->ioaddr);
 
 	netif_carrier_off(dev);
 
@@ -1787,8 +1771,7 @@ static int stmmac_dvr_remove(struct platform_device *pdev)
 	priv->hw->dma->stop_rx(priv->ioaddr);
 	priv->hw->dma->stop_tx(priv->ioaddr);
 
-	stmmac_mac_disable_rx(priv->ioaddr);
-	stmmac_mac_disable_tx(priv->ioaddr);
+	stmmac_disable_mac(priv->ioaddr);
 
 	netif_carrier_off(ndev);
 
@@ -1839,13 +1822,11 @@ static int stmmac_suspend(struct platform_device *pdev, pm_message_t state)
 					     dis_ic);
 		priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size);
 
-		stmmac_mac_disable_tx(priv->ioaddr);
-
 		/* Enable Power down mode by programming the PMT regs */
 		if (device_can_wakeup(priv->device))
 			priv->hw->mac->pmt(priv->ioaddr, priv->wolopts);
 		else
-			stmmac_mac_disable_rx(priv->ioaddr);
+			stmmac_disable_mac(priv->ioaddr);
 	} else {
 		priv->shutdown = 1;
 		/* Although this can appear slightly redundant it actually
@@ -1886,8 +1867,7 @@ static int stmmac_resume(struct platform_device *pdev)
 	netif_device_attach(dev);
 
 	/* Enable the MAC and DMA */
-	stmmac_mac_enable_rx(priv->ioaddr);
-	stmmac_mac_enable_tx(priv->ioaddr);
+	stmmac_enable_mac(priv->ioaddr);
 	priv->hw->dma->start_tx(priv->ioaddr);
 	priv->hw->dma->start_rx(priv->ioaddr);
 
-- 
1.5.5.6


^ permalink raw reply related

* Re: linux-next: Tree for October 25 (netfilter/xt_socket)
From: David Miller @ 2010-10-26  5:02 UTC (permalink / raw)
  To: randy.dunlap; +Cc: sfr, netdev, linux-next, linux-kernel, netfilter-devel
In-Reply-To: <20101025165407.74b0c0a0.randy.dunlap@oracle.com>

From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 25 Oct 2010 16:54:07 -0700

> xt_socket.c:(.text+0x39ff8): undefined reference to `ipv6_find_hdr'
> xt_socket.c:(.init.text+0x1703): undefined reference to `nf_defrag_ipv6_enable'

This ought to be fixed in net-2.6 already, by:

commit f6318e558806c925029dc101f14874be9f9fa78f
Author: KOVACS Krisztian <hidden@balabit.hu>
Date:   Sun Oct 24 23:38:32 2010 +0000

    netfilter: fix module dependency issues with IPv6 defragmentation, ip6tables and xt_TPROXY
    
    One of the previous tproxy related patches split IPv6 defragmentation and
    connection tracking, but did not correctly add Kconfig stanzas to handle the
    new dependencies correctly. This patch fixes that by making the config options
    mirror the setup we have for IPv4: a distinct config option for defragmentation
    that is automatically selected by both connection tracking and
    xt_TPROXY/xt_socket.
    
    The patch also changes the #ifdefs enclosing IPv6 specific code in xt_socket
    and xt_TPROXY: we only compile these in case we have ip6tables support enabled.
    
    Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
    Signed-off-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply

* Re: linux-next: Tree for October 25 (netfilter/nf_conntrack_reasm)
From: David Miller @ 2010-10-26  5:03 UTC (permalink / raw)
  To: randy.dunlap; +Cc: sfr, netfilter-devel, netdev, linux-next, linux-kernel
In-Reply-To: <20101025165529.9cedfeac.randy.dunlap@oracle.com>

From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 25 Oct 2010 16:55:29 -0700

> On Mon, 25 Oct 2010 14:58:34 +1100 Stephen Rothwell wrote:
> 
>> Hi all,
>> 
>> Reminder: do not add 2.6.38 destined stuff to linux-next until after
>> 2.6.37-rc1 is released.
> 
> net/ipv6/netfilter/nf_conntrack_reasm.c:628: error: 'nf_ct_frag6_sysctl_header' undeclared (first use in this function)
> net/ipv6/netfilter/nf_conntrack_reasm.c:628: error: 'nf_net_netfilter_sysctl_path' undeclared (first use in this function)
> net/ipv6/netfilter/nf_conntrack_reasm.c:629: error: 'nf_ct_frag6_sysctl_table' undeclared (first use in this function)
> net/ipv6/netfilter/nf_conntrack_reasm.c:640: error: 'nf_ct_frag6_sysctl_header' undeclared (first use in this function)
> 
> 
> config file is attached.

Should also be fixed by the commit I just pointed you to.

^ permalink raw reply

* ath9k crashing the kernel
From: Jaswinder Singh @ 2010-10-26  5:06 UTC (permalink / raw)
  To: Linux Kernel Mailing List, linux-wireless, netdev, ath9k-devel

Hello,

ath9k is crashing the kernel :

[   21.276554] BUG: spinlock bad magic on CPU#1, NetworkManager/1056
[   21.277015]  lock: f5be80a8, .magic: 00000000, .owner: <none>/-1,
.owner_cpu: 0
[   21.277015] Pid: 1056, comm: NetworkManager Not tainted 2.6.36-netbook+ #20
[   21.277015] Call Trace:
[   21.277015]  [<c14767a7>] ? printk+0xf/0x11
[   21.277015]  [<c117b823>] spin_bug+0x7c/0x87
[   21.301365]  [<c117b8bd>] do_raw_spin_lock+0x1e/0x125
[   21.301365]  [<c1478d0a>] ? _raw_spin_unlock_bh+0x1a/0x1c
[   21.301365]  [<c1478dc3>] _raw_spin_lock_irqsave+0x17/0x1c
[   21.318857]  [<c1288a74>] ath9k_config+0x255/0x38b
[   21.318857]  [<c1447bdb>] ieee80211_hw_config+0x10a/0x114
[   21.328034]  [<c1453545>] ieee80211_do_open+0x3de/0x4cf
[   21.328034]  [<c1452206>] ? ieee80211_check_concurrent_iface+0x21/0x13a
[   21.328034]  [<c104c470>] ? raw_notifier_call_chain+0xc/0xe
[   21.328034]  [<c1453691>] ieee80211_open+0x5b/0x5e
[   21.328034]  [<c13947cf>] __dev_open+0x80/0xa9
[   21.328034]  [<c13920bb>] __dev_change_flags+0xa1/0x116
[   21.328034]  [<c1394723>] dev_change_flags+0x13/0x3f
[   21.328034]  [<c139d568>] do_setlink+0x226/0x507
[   21.328034]  [<c139d917>] rtnl_setlink+0xce/0xd4
[   21.328034]  [<c11786d2>] ? copy_to_user+0x3a/0x118
[   21.328034]  [<c139d849>] ? rtnl_setlink+0x0/0xd4
[   21.328034]  [<c139df4d>] rtnetlink_rcv_msg+0x17e/0x194
[   21.328034]  [<c139ddcf>] ? rtnetlink_rcv_msg+0x0/0x194
[   21.328034]  [<c13a59c4>] netlink_rcv_skb+0x30/0x76
[   21.328034]  [<c139ddc8>] rtnetlink_rcv+0x1b/0x22
[   21.328034]  [<c13a5772>] netlink_unicast+0x1aa/0x20b
[   21.328034]  [<c13a5eac>] netlink_sendmsg+0x22c/0x27a
[   21.328034]  [<c13862b8>] sock_sendmsg+0xa5/0xbb
[   21.328034]  [<c13862b8>] ? sock_sendmsg+0xa5/0xbb
[   21.328034]  [<c138e05b>] ? verify_iovec+0x3e/0x6b
[   21.328034]  [<c1386ab2>] sys_sendmsg+0x149/0x196
[   21.328034]  [<c10826f5>] ? unlock_page+0x40/0x43
[   21.328034]  [<c10952d5>] ? __do_fault+0x367/0x393
[   21.328034]  [<c1096b8e>] ? handle_mm_fault+0x3bd/0x77a
[   21.328034]  [<c10b6fb3>] ? destroy_inode+0x1f/0x30
[   21.328034]  [<c138694e>] ? sys_recvmsg+0x2b/0x46
[   21.328034]  [<c1387c00>] sys_socketcall+0x146/0x18b
[   21.328034]  [<c1002850>] sysenter_do_call+0x12/0x26
[   21.344846] ADDRCONF(NETDEV_UP): wlan0: link is not ready
[   21.487994] atl1c 0000:03:00.0: irq 42 for MSI/MSI-X
[   21.543870] ADDRCONF(NETDEV_UP): eth0: link is not ready


Linux 2.6.36 f6f94e2ab1 is good
and
229aebb873e2972 is bad

I am trying to bisect it but I am getting compilation error :

  LD      .tmp_vmlinux1
drivers/built-in.o: In function `ath_do_set_opmode':
/home/jaswinder/jaswinder-git/linux-2.6/drivers/net/wireless/ath/ath5k/base.c:567:
undefined reference to `ath_opmode_to_string'
/home/jaswinder/jaswinder-git/linux-2.6/drivers/net/wireless/ath/ath5k/base.c:568:
undefined reference to `ath_opmode_to_string'
make: *** [.tmp_vmlinux1] Error 1
[jaswinder@ linux-2.6]$

Thanks,
--
Jaswinder Singh.

^ permalink raw reply

* Re: [Uclinux-dist-devel] [PATCH 1/5] netdev: bfin_mac: push settings to platform resources
From: David Miller @ 2010-10-26  5:07 UTC (permalink / raw)
  To: vapier; +Cc: netdev, uclinux-dist-devel
In-Reply-To: <AANLkTi=jfho4yd_gSMfKywBPkD_=smgnP_yLtRh=00jv@mail.gmail.com>

From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 25 Oct 2010 21:54:48 -0400

> On Sun, Oct 24, 2010 at 19:51, Mike Frysinger wrote:
>> On Sun, Oct 24, 2010 at 19:45, David Miller wrote:
>>> From: Mike Frysinger
>>>> and i have the Blackfin changes waiting for you to merge this patch.
>>>> i can squash them into this change and have you merge the result, or i
>>>> can merge it.  i dont really care either way.
>>>
>>> Why don't you just apply this to your tree then.  Feel free to add my:
>>
>> thanks, i'll do that
> 
> Linus has taken this via my tree now.  are you going to merge the
> other patches ?  or do i need to resend them ?

Please resend them, thanks.

^ permalink raw reply

* Re: [v3 RFC PATCH 0/4] Implement multiqueue virtio-net
From: Krishna Kumar2 @ 2010-10-26  5:10 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: anthony, arnd, avi, davem, eric.dumazet, kvm, netdev, rusty
In-Reply-To: <20101025161718.GA19559@redhat.com>

"Michael S. Tsirkin" <mst@redhat.com> wrote on 10/25/2010 09:47:18 PM:

> > Any feedback, comments, objections, issues or bugs about the
> > patches? Please let me know if something needs to be done.
>
> I am trying to wrap my head around kernel/user interface here.
> E.g., will we need another incompatible change when we add multiple RX
> queues?

Though I added a 'mq' option to qemu, there shouldn't be
any incompatibility between old and new qemu's wrt vhost
and virtio-net drivers. So the old qemu will run new host
and new guest without issues, and new qemu can also run
old host and old guest. Multiple RXQ will also not add
any incompatibility.

With MQ RX, I will be able to remove the hueristic (idea
from David Stevens).  The idea is: Guest sends out packets
on, say TXQ#2, vhost#2 processes the packets but packets
going out from host to guest might be sent out on a
different RXQ, say RXQ#4.  Guest receives the packet on
RXQ#4, and all future responses on that connection are sent
on TXQ#4.  Now vhost#4 processes both RX and TX packets for
this connection.  Without needing to hash on the connection,
guest can make sure that the same vhost thread will handle
a single connection.

> Also need to think about how robust our single stream heuristic is,
> e.g. what are the chances it will misdetect a bidirectional
> UDP stream as a single TCP?

I think it should not happen. The hueristic code gets
called for handling just the transmit packets, packets
that vhost sends out to the guest skip this path.

I tested unidirectional and bidirectional UDP to confirm:

8 iterations of iperf tests, each iteration of 15 secs,
result is the sum of all 8 iterations in Gbits/sec
__________________________________________
Uni-directional          Bi-directional
  Org      New             Org      New
__________________________________________
  71.78    71.77           71.74   72.07
__________________________________________

Thanks,

- KK


^ permalink raw reply

* RE: [PATCH net-next-2.6 1/2] be2net: Adding an option to use INTx instead of MSI-X
From: Somnath.Kotur @ 2010-10-26  5:24 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <20101025.162535.226782713.davem@davemloft.net>

Dave,
         Could you pls ignore this patch and apply the next patch in the series ?
 ( PATCH 2/2]  be2net: Schedule/Destroy worker thread in probe()/remove() rather than open()/close()

Thanks
Som
________________________________________
From: David Miller [davem@davemloft.net]
Sent: Tuesday, October 26, 2010 4:55 AM
To: bhutchings@solarflare.com
Cc: Kotur, Somnath; netdev@vger.kernel.org; linux-pci@vger.kernel.org
Subject: Re: [PATCH net-next-2.6 1/2] be2net: Adding an option to use INTx instead of MSI-X

From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 25 Oct 2010 23:38:53 +0100

> David Miller wrote:
>> From: Somnath Kotur <somnath.kotur@emulex.com>
>> Date: Mon, 25 Oct 2010 16:42:35 +0530
>>
>> > By default, be2net uses MSIx wherever possible.
>> > Adding a module parameter to use INTx for users who do not want to use MSIx.
>> >
>> > Signed-off-by: Somnath Kotur <somnath.kotur@emulex.com>
>>
>> Either add a new ethtool flag, or use the PCI subsystem facilities
>> for tweaking things to implement this.
>>
>> Do not use a module option, otherwise every other networking driver
>> author will get the same "cool" idea, give the module option
>> different names, and the resulting user experience is terrible.
>
> This has already happened, sadly.  So far as I can see it's mostly done
> to allow users to work around systems with broken MSIs; I'm not aware of
> any other reason to prefer legacy interrupts.  However, the PCI subsystem
> already implements a blacklist and a kernel parameter for disabling MSIs
> on these systems.

The PCI subsystem bits I'm totally fine with.

But in the drivers themselves, that's what I don't want.

^ permalink raw reply

* Re: [PATCH] ipv6: addrconf: clear IPv6 addresses and routes when losing link
From: Lorenzo Colitti @ 2010-10-26  5:44 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev
In-Reply-To: <20101025213841.635b9a15@nehalam>

On Mon, Oct 25, 2010 at 9:38 PM, Stephen Hemminger
<shemminger@vyatta.com> wrote:
> This is incorrect. When link is lost, routes and address should not be
> flushed. They should be marked as tentative and then go through DAD again
> on the new network.

That won't help the case I am trying to fix, which is the case where
the new link has a global prefix different than the old link. Marking
the addresses as tentative will simply make them pass DAD and come
back as soon as link comes back. But since they don't match the prefix
that is assigned to the new link, they are unusable, because packets
can't be routed back to them.

> If you do it this way, you break routing protocols when link is brought
> down and back up.

The only addresses and routes flushed in this way should be ones that
aren't manually configured, i.e., the ones created by autoconf
(addrconf.c:2720 onwards). These won't be used by routing protocols,
except for link-local addresses. So I assume you're talking about
link-local here.

Link-local addresses are immediately recreated in a tentative state as
soon as link comes back, because on NETDEV_UP addrconf_notify calls
addrconf_dev_config. So, this patch only makes it so that they become
tentative when link goes away and comes back. In that time, the router
that temporarily loses link is unable to send packets for the brief
period of time that the link is performing DAD, but if the router has
lost link, it will also fail to send the packet while link is lost.
What's the additional failure scenario? Will it help if I make it so
that link-local addresses aren't touched at all?

^ permalink raw reply

* [PATCH] e1000e: add netpoll support for MSI/MSI-X IRQ modes
From: Dongdong Deng @ 2010-10-26  5:54 UTC (permalink / raw)
  To: davem, jesse, jeffrey.t.kirsher, bruce.w.allan
  Cc: alexander.h.duyck, carolyn.wyborny, donald.c.skidmore,
	gregory.v.rose, peter.p.waskiewicz.jr, john.ronciak,
	dongdong.deng, e1000-devel, netdev

With enabling CONFIG_PCI_MSI, e1000e could work in MSI/MSI-X IRQ mode,
and netpoll controller didn't deal with those IRQ modes on e1000e.

This patch add the handling MSI/MSI-X IRQ modes to netpoll controller,
so that netconsole could work with those IRQ modes.

Signed-off-by: Dongdong Deng <dongdong.deng@windriver.com>
---
 drivers/net/e1000e/netdev.c |   49 +++++++++++++++++++++++++++++++++++++++---
 1 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index e561d15..36992ba 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -5369,6 +5369,36 @@ static void e1000_shutdown(struct pci_dev *pdev)
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
+
+static irqreturn_t e1000_intr_msix(int irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	int vector, msix_irq;
+
+	if (adapter->msix_entries) {
+		vector = 0;
+		msix_irq = adapter->msix_entries[vector].vector;
+		disable_irq(msix_irq);
+		e1000_intr_msix_rx(msix_irq, netdev);
+		enable_irq(msix_irq);
+
+		vector++;
+		msix_irq = adapter->msix_entries[vector].vector;
+		disable_irq(msix_irq);
+		e1000_intr_msix_tx(msix_irq, netdev);
+		enable_irq(msix_irq);
+
+		vector++;
+		msix_irq = adapter->msix_entries[vector].vector;
+		disable_irq(msix_irq);
+		e1000_msix_other(msix_irq, netdev);
+		enable_irq(msix_irq);
+	}
+
+	return IRQ_HANDLED;
+}
+
 /*
  * Polling 'interrupt' - used by things like netconsole to send skbs
  * without having to re-enable interrupts. It's not called while
@@ -5378,10 +5408,21 @@ static void e1000_netpoll(struct net_device *netdev)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 
-	disable_irq(adapter->pdev->irq);
-	e1000_intr(adapter->pdev->irq, netdev);
-
-	enable_irq(adapter->pdev->irq);
+	switch (adapter->int_mode) {
+	case E1000E_INT_MODE_MSIX:
+		e1000_intr_msix(adapter->pdev->irq, netdev);
+		break;
+	case E1000E_INT_MODE_MSI:
+		disable_irq(adapter->pdev->irq);
+		e1000_intr_msi(adapter->pdev->irq, netdev);
+		enable_irq(adapter->pdev->irq);
+		break;
+	default: /* E1000E_INT_MODE_LEGACY */
+		disable_irq(adapter->pdev->irq);
+		e1000_intr(adapter->pdev->irq, netdev);
+		enable_irq(adapter->pdev->irq);
+		break;
+	}
 }
 #endif
 
-- 
1.6.0.4


^ permalink raw reply related

* Re: [PATCH 1/2 v3] xps: Improvements in TX queue selection
From: Helmut Schaa @ 2010-10-26  6:18 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev, eric.dumazet
In-Reply-To: <alpine.DEB.1.00.1010211303390.30535@pokey.mtv.corp.google.com>

Hi,

Am Donnerstag 21 Oktober 2010 schrieb Tom Herbert:
> In dev_pick_tx, don't do work in calculating queue index or setting
> the index in the sock unless the device has more than one queue.  This
> allows the sock to be set only with a queue index of a multi-queue
> device which is desirable if device are stacked like in a tunnel.
> 
> We also allow the mapping of a socket to queue to be changed.  To
> maintain in order packet transmission a flag (ooo_okay) has been
> added to the sk_buff structure.  If a transport layer sets this flag
> on a packet, the transmit queue can be changed for the socket.
> Presumably, the transport would set this if there was no possbility
> of creating OOO packets (for instance, there are no packets in flight
> for the socket).  This patch includes the modification in TCP output
> for setting this flag.
> 
> Signed-off-by: Tom Herbert <therbert@google.com>
> ---

[...]

> diff --git a/net/core/dev.c b/net/core/dev.c
> index b2269ac..a538ed5 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2123,28 +2123,32 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
>  					struct sk_buff *skb)
>  {
>  	int queue_index;
> -	const struct net_device_ops *ops = dev->netdev_ops;
>  
> -	if (ops->ndo_select_queue) {
> -		queue_index = ops->ndo_select_queue(dev, skb);
> -		queue_index = dev_cap_txqueue(dev, queue_index);
> -	} else {
> +	if (dev->real_num_tx_queues > 1) {
>  		struct sock *sk = skb->sk;
> +
>  		queue_index = sk_tx_queue_get(sk);
> -		if (queue_index < 0) {
>  
> -			queue_index = 0;
> -			if (dev->real_num_tx_queues > 1)
> +		if (queue_index < 0 || skb->ooo_okay ||
> +		    queue_index >= dev->real_num_tx_queues) {
> +			const struct net_device_ops *ops = dev->netdev_ops;
> +			int old_index = queue_index;
> +
> +			if (ops->ndo_select_queue) {
> +				queue_index = ops->ndo_select_queue(dev, skb);
> +				queue_index = dev_cap_txqueue(dev, queue_index);
> +			} else
>  				queue_index = skb_tx_hash(dev, skb);

Wouldn't that break mac80211 QoS again for bridged AP mode interfaces (see
commit deabc772f39405054a438d711f408d2d94d26d96, "net: fix tx queue selection
for bridged devices implementing select_queue")?

Helmut

^ permalink raw reply

* Re: [PATCH net-next-2.6 1/2] be2net: Adding an option to use INTx instead of MSI-X
From: Michael Ellerman @ 2010-10-26  6:52 UTC (permalink / raw)
  To: David Miller; +Cc: bhutchings, somnath.kotur, netdev, linux-pci
In-Reply-To: <20101025.162535.226782713.davem@davemloft.net>

[-- Attachment #1: Type: text/plain, Size: 5022 bytes --]

On Mon, 2010-10-25 at 16:25 -0700, David Miller wrote:
> From: Ben Hutchings <bhutchings@solarflare.com>
> Date: Mon, 25 Oct 2010 23:38:53 +0100
> 
> > David Miller wrote:
> >> From: Somnath Kotur <somnath.kotur@emulex.com>
> >> Date: Mon, 25 Oct 2010 16:42:35 +0530
> >> 
> >> > By default, be2net uses MSIx wherever possible.
> >> > Adding a module parameter to use INTx for users who do not want to use MSIx.
> >> > 
> >> > Signed-off-by: Somnath Kotur <somnath.kotur@emulex.com>
> >> 
> >> Either add a new ethtool flag, or use the PCI subsystem facilities
> >> for tweaking things to implement this.
> >>
> >> Do not use a module option, otherwise every other networking driver
> >> author will get the same "cool" idea, give the module option
> >> different names, and the resulting user experience is terrible.
> > 
> > This has already happened, sadly.  So far as I can see it's mostly done
> > to allow users to work around systems with broken MSIs; I'm not aware of
> > any other reason to prefer legacy interrupts.  However, the PCI subsystem
> > already implements a blacklist and a kernel parameter for disabling MSIs
> > on these systems.
> 
> The PCI subsystem bits I'm totally fine with.
> 
> But in the drivers themselves, that's what I don't want.

That horse has really really bolted, it's gawn.

I count 26 drivers with "disable MSI/X" parameters. Some even have more
than one.

11 of them are network drivers, 9 scsi, 3 ata.

I agree it's a mess for users, but it's probably preferable to a
non-working driver.

Ethtool would be nice, but only for network drivers. Is there a generic
solution, quirks are obviously not keeping people happy.

cheers



MSI:

Param "msi", 10 users:
  drivers/ata/sata_mv.c:MODULE_PARM_DESC(msi, "Enable use of PCI MSI (0=off, 1=on)");
  drivers/ata/sata_nv.c:MODULE_PARM_DESC(msi, "Enable use of MSI (Default: false)");
  drivers/ata/sata_sil24.c:MODULE_PARM_DESC(msi, "Enable MSI (Default: false)");
  drivers/net/cxgb3/cxgb3_main.c:MODULE_PARM_DESC(msi, "whether to use MSI or MSI-X");
  drivers/net/cxgb4/cxgb4_main.c:MODULE_PARM_DESC(msi, "whether to use INTx (0), MSI (1) or MSI-X (2)");
  drivers/net/cxgb4vf/cxgb4vf_main.c:MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
  drivers/net/forcedeth.c:MODULE_PARM_DESC(msi, "MSI interrupts are enabled by setting to 1 and disabled by setting to 0.");
  drivers/net/qla3xxx.c:MODULE_PARM_DESC(msi, "Turn on Message Signaled Interrupts.");
  drivers/scsi/aacraid/aachba.c:MODULE_PARM_DESC(msi, "IRQ handling."
  drivers/scsi/stex.c:MODULE_PARM_DESC(msi, "Enable Message Signaled Interrupts(0=off, 1=on)");

Param "use_msi", 4 users:
  drivers/net/qlcnic/qlcnic_main.c:MODULE_PARM_DESC(use_msi, "MSI interrupt (0=disabled, 1=enabled");
  drivers/scsi/3w-9xxx.c:MODULE_PARM_DESC(use_msi, "Use Message Signaled Interrupts.  Default: 0");
  drivers/scsi/3w-sas.c:MODULE_PARM_DESC(use_msi, "Use Message Signaled Interrupts. Default: 0");
  drivers/scsi/aic94xx/aic94xx_init.c:MODULE_PARM_DESC(use_msi, "\n"

Param "disable_msi", 4 users:
  drivers/scsi/vmw_pvscsi.c:MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
  drivers/net/bnx2.c:MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)");
  drivers/net/chelsio/cxgb2.c:MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)");
  drivers/net/sky2.c:MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)");

Misc:
  sound/pci/hda/hda_intel.c:MODULE_PARM_DESC(enable_msi, "Enable Message Signaled Interrupt (MSI)");
  drivers/message/fusion/mptbase.c:MODULE_PARM_DESC(mpt_msi_enable_spi, " Enable MSI Support for SPI \
  drivers/message/fusion/mptbase.c:MODULE_PARM_DESC(mpt_msi_enable_fc, " Enable MSI Support for FC \
  drivers/message/fusion/mptbase.c:MODULE_PARM_DESC(mpt_msi_enable_sas, " Enable MSI Support for SAS \
  drivers/net/myri10ge/myri10ge.c:MODULE_PARM_DESC(myri10ge_msi, "Enable Message Signalled Interrupts");


MSI-X:

Param "msi_x", 2 users:
  drivers/infiniband/hw/mthca/mthca_main.c:MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
  drivers/net/mlx4/main.c:MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");

Misc:
  drivers/net/forcedeth.c:MODULE_PARM_DESC(msix, "MSIX interrupts are enabled by setting to 1 and disabled by setting to 0.");
  drivers/net/qlcnic/qlcnic_main.c:MODULE_PARM_DESC(use_msi_x, "MSI-X interrupt (0=disabled, 1=enabled");
  drivers/scsi/bfa/bfad_intr.c:MODULE_PARM_DESC(msix_disable_cb, "Disable MSIX for Brocade-415/425/815/825"
  drivers/scsi/bfa/bfad_intr.c:MODULE_PARM_DESC(msix_disable_ct, "Disable MSIX for Brocade-1010/1020/804"
  drivers/scsi/mpt2sas/mpt2sas_base.c:MODULE_PARM_DESC(msix_disable, " disable msix routed interrupts (default=0)");
  drivers/scsi/qla4xxx/ql4_os.c:MODULE_PARM_DESC(ql4xenablemsix,
  drivers/scsi/vmw_pvscsi.c:MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply

* [PATCH] tunnels: add __rcu annotations
From: Eric Dumazet @ 2010-10-26  7:01 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Add __rcu annotations to :
        (struct ip_tunnel)->prl
        (struct ip_tunnel_prl_entry)->next
        (struct xfrm_tunnel)->next
	struct xfrm_tunnel *tunnel4_handlers
	struct xfrm_tunnel *tunnel64_handlers

And use appropriate rcu primitives to reduce sparse warnings if
CONFIG_SPARSE_RCU_POINTER=y

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/ipip.h |    4 ++--
 include/net/xfrm.h |    2 +-
 net/ipv4/tunnel4.c |   29 +++++++++++++++++++----------
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/include/net/ipip.h b/include/net/ipip.h
index 0403fe4..a32654d 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -34,12 +34,12 @@ struct ip_tunnel {
 #ifdef CONFIG_IPV6_SIT_6RD
 	struct ip_tunnel_6rd_parm	ip6rd;
 #endif
-	struct ip_tunnel_prl_entry	*prl;		/* potential router list */
+	struct ip_tunnel_prl_entry __rcu *prl;		/* potential router list */
 	unsigned int			prl_count;	/* # of entries in PRL */
 };
 
 struct ip_tunnel_prl_entry {
-	struct ip_tunnel_prl_entry	*next;
+	struct ip_tunnel_prl_entry __rcu *next;
 	__be32				addr;
 	u16				flags;
 	struct rcu_head			rcu_head;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index ffcd478..bcfb6b2 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1264,7 +1264,7 @@ struct xfrm_tunnel {
 	int (*handler)(struct sk_buff *skb);
 	int (*err_handler)(struct sk_buff *skb, u32 info);
 
-	struct xfrm_tunnel *next;
+	struct xfrm_tunnel __rcu *next;
 	int priority;
 };
 
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 9a17bd2..ac3b3ee 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -14,27 +14,32 @@
 #include <net/protocol.h>
 #include <net/xfrm.h>
 
-static struct xfrm_tunnel *tunnel4_handlers __read_mostly;
-static struct xfrm_tunnel *tunnel64_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
 static DEFINE_MUTEX(tunnel4_mutex);
 
-static inline struct xfrm_tunnel **fam_handlers(unsigned short family)
+static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
 {
 	return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
 }
 
 int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
 {
-	struct xfrm_tunnel **pprev;
+	struct xfrm_tunnel __rcu **pprev;
+	struct xfrm_tunnel *t;
+
 	int ret = -EEXIST;
 	int priority = handler->priority;
 
 	mutex_lock(&tunnel4_mutex);
 
-	for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
-		if ((*pprev)->priority > priority)
+	for (pprev = fam_handlers(family);
+	     (t = rcu_dereference_protected(*pprev,
+			lockdep_is_held(&tunnel4_mutex))) != NULL;
+	     pprev = &t->next) {
+		if (t->priority > priority)
 			break;
-		if ((*pprev)->priority == priority)
+		if (t->priority == priority)
 			goto err;
 	}
 
@@ -52,13 +57,17 @@ EXPORT_SYMBOL(xfrm4_tunnel_register);
 
 int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
 {
-	struct xfrm_tunnel **pprev;
+	struct xfrm_tunnel __rcu **pprev;
+	struct xfrm_tunnel *t;
 	int ret = -ENOENT;
 
 	mutex_lock(&tunnel4_mutex);
 
-	for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
-		if (*pprev == handler) {
+	for (pprev = fam_handlers(family);
+	     (t = rcu_dereference_protected(*pprev,
+			lockdep_is_held(&tunnel4_mutex))) != NULL;
+	     pprev = &t->next) {
+		if (t == handler) {
 			*pprev = handler->next;
 			ret = 0;
 			break;



^ permalink raw reply related

* [PATCH] ipv4: add __rcu annotations to routes.c
From: Eric Dumazet @ 2010-10-26  7:02 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Add __rcu annotations to :
        (struct dst_entry)->rt_next
        (struct rt_hash_bucket)->chain

And use appropriate rcu primitives to reduce sparse warnings if
CONFIG_SPARSE_RCU_POINTER=y

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/dst.h |    2 -
 net/ipv4/route.c  |   75 ++++++++++++++++++++++++++------------------
 2 files changed, 47 insertions(+), 30 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index a217c83..ffe9cb7 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -95,7 +95,7 @@ struct dst_entry {
 	unsigned long		lastuse;
 	union {
 		struct dst_entry *next;
-		struct rtable    *rt_next;
+		struct rtable __rcu *rt_next;
 		struct rt6_info   *rt6_next;
 		struct dn_route  *dn_next;
 	};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d6cb2bf..987bf9a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -198,7 +198,7 @@ const __u8 ip_tos2prio[16] = {
  */
 
 struct rt_hash_bucket {
-	struct rtable	*chain;
+	struct rtable __rcu	*chain;
 };
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
@@ -280,7 +280,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
 	struct rtable *r = NULL;
 
 	for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
-		if (!rt_hash_table[st->bucket].chain)
+		if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
 			continue;
 		rcu_read_lock_bh();
 		r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
@@ -300,17 +300,17 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
 {
 	struct rt_cache_iter_state *st = seq->private;
 
-	r = r->dst.rt_next;
+	r = rcu_dereference_bh(r->dst.rt_next);
 	while (!r) {
 		rcu_read_unlock_bh();
 		do {
 			if (--st->bucket < 0)
 				return NULL;
-		} while (!rt_hash_table[st->bucket].chain);
+		} while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
 		rcu_read_lock_bh();
-		r = rt_hash_table[st->bucket].chain;
+		r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
 	}
-	return rcu_dereference_bh(r);
+	return r;
 }
 
 static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -721,19 +721,23 @@ static void rt_do_flush(int process_context)
 	for (i = 0; i <= rt_hash_mask; i++) {
 		if (process_context && need_resched())
 			cond_resched();
-		rth = rt_hash_table[i].chain;
+		rth = rcu_dereference_raw(rt_hash_table[i].chain);
 		if (!rth)
 			continue;
 
 		spin_lock_bh(rt_hash_lock_addr(i));
 #ifdef CONFIG_NET_NS
 		{
-		struct rtable ** prev, * p;
+		struct rtable __rcu **prev;
+		struct rtable *p;
 
-		rth = rt_hash_table[i].chain;
+		rth = rcu_dereference_protected(rt_hash_table[i].chain,
+			lockdep_is_held(rt_hash_lock_addr(i)));
 
 		/* defer releasing the head of the list after spin_unlock */
-		for (tail = rth; tail; tail = tail->dst.rt_next)
+		for (tail = rth; tail;
+		     tail = rcu_dereference_protected(tail->dst.rt_next,
+				lockdep_is_held(rt_hash_lock_addr(i))))
 			if (!rt_is_expired(tail))
 				break;
 		if (rth != tail)
@@ -741,8 +745,12 @@ static void rt_do_flush(int process_context)
 
 		/* call rt_free on entries after the tail requiring flush */
 		prev = &rt_hash_table[i].chain;
-		for (p = *prev; p; p = next) {
-			next = p->dst.rt_next;
+		for (p = rcu_dereference_protected(*prev,
+				lockdep_is_held(rt_hash_lock_addr(i)));
+		     p != NULL;
+		     p = next) {
+			next = rcu_dereference_protected(p->dst.rt_next,
+				lockdep_is_held(rt_hash_lock_addr(i)));
 			if (!rt_is_expired(p)) {
 				prev = &p->dst.rt_next;
 			} else {
@@ -752,14 +760,15 @@ static void rt_do_flush(int process_context)
 		}
 		}
 #else
-		rth = rt_hash_table[i].chain;
-		rt_hash_table[i].chain = NULL;
+		rth = rcu_dereference_protected(rt_hash_table[i].chain,
+			lockdep_is_held(rt_hash_lock_addr(i)));
+		rcu_assign_pointer(rt_hash_table[i].chain, NULL);
 		tail = NULL;
 #endif
 		spin_unlock_bh(rt_hash_lock_addr(i));
 
 		for (; rth != tail; rth = next) {
-			next = rth->dst.rt_next;
+			next = rcu_dereference_protected(rth->dst.rt_next, 1);
 			rt_free(rth);
 		}
 	}
@@ -790,7 +799,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
 	while (aux != rth) {
 		if (compare_hash_inputs(&aux->fl, &rth->fl))
 			return 0;
-		aux = aux->dst.rt_next;
+		aux = rcu_dereference_protected(aux->dst.rt_next, 1);
 	}
 	return ONE;
 }
@@ -799,7 +808,8 @@ static void rt_check_expire(void)
 {
 	static unsigned int rover;
 	unsigned int i = rover, goal;
-	struct rtable *rth, **rthp;
+	struct rtable *rth;
+	struct rtable __rcu **rthp;
 	unsigned long samples = 0;
 	unsigned long sum = 0, sum2 = 0;
 	unsigned long delta;
@@ -825,11 +835,12 @@ static void rt_check_expire(void)
 
 		samples++;
 
-		if (*rthp == NULL)
+		if (rcu_dereference_raw(*rthp) == NULL)
 			continue;
 		length = 0;
 		spin_lock_bh(rt_hash_lock_addr(i));
-		while ((rth = *rthp) != NULL) {
+		while ((rth = rcu_dereference_protected(*rthp,
+					lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
 			prefetch(rth->dst.rt_next);
 			if (rt_is_expired(rth)) {
 				*rthp = rth->dst.rt_next;
@@ -941,7 +952,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
 	static unsigned long last_gc;
 	static int rover;
 	static int equilibrium;
-	struct rtable *rth, **rthp;
+	struct rtable *rth;
+	struct rtable __rcu **rthp;
 	unsigned long now = jiffies;
 	int goal;
 	int entries = dst_entries_get_fast(&ipv4_dst_ops);
@@ -995,7 +1007,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
 			k = (k + 1) & rt_hash_mask;
 			rthp = &rt_hash_table[k].chain;
 			spin_lock_bh(rt_hash_lock_addr(k));
-			while ((rth = *rthp) != NULL) {
+			while ((rth = rcu_dereference_protected(*rthp,
+					lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) {
 				if (!rt_is_expired(rth) &&
 					!rt_may_expire(rth, tmo, expire)) {
 					tmo >>= 1;
@@ -1071,7 +1084,7 @@ static int slow_chain_length(const struct rtable *head)
 
 	while (rth) {
 		length += has_noalias(head, rth);
-		rth = rth->dst.rt_next;
+		rth = rcu_dereference_protected(rth->dst.rt_next, 1);
 	}
 	return length >> FRACT_BITS;
 }
@@ -1079,9 +1092,9 @@ static int slow_chain_length(const struct rtable *head)
 static int rt_intern_hash(unsigned hash, struct rtable *rt,
 			  struct rtable **rp, struct sk_buff *skb, int ifindex)
 {
-	struct rtable	*rth, **rthp;
+	struct rtable	*rth, *cand;
+	struct rtable __rcu **rthp, **candp;
 	unsigned long	now;
-	struct rtable *cand, **candp;
 	u32 		min_score;
 	int		chain_length;
 	int attempts = !in_softirq();
@@ -1128,7 +1141,8 @@ restart:
 	rthp = &rt_hash_table[hash].chain;
 
 	spin_lock_bh(rt_hash_lock_addr(hash));
-	while ((rth = *rthp) != NULL) {
+	while ((rth = rcu_dereference_protected(*rthp,
+			lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
 		if (rt_is_expired(rth)) {
 			*rthp = rth->dst.rt_next;
 			rt_free(rth);
@@ -1324,12 +1338,14 @@ EXPORT_SYMBOL(__ip_select_ident);
 
 static void rt_del(unsigned hash, struct rtable *rt)
 {
-	struct rtable **rthp, *aux;
+	struct rtable __rcu **rthp;
+	struct rtable *aux;
 
 	rthp = &rt_hash_table[hash].chain;
 	spin_lock_bh(rt_hash_lock_addr(hash));
 	ip_rt_put(rt);
-	while ((aux = *rthp) != NULL) {
+	while ((aux = rcu_dereference_protected(*rthp,
+			lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
 		if (aux == rt || rt_is_expired(aux)) {
 			*rthp = aux->dst.rt_next;
 			rt_free(aux);
@@ -1346,7 +1362,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 {
 	int i, k;
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
-	struct rtable *rth, **rthp;
+	struct rtable *rth;
+	struct rtable __rcu **rthp;
 	__be32  skeys[2] = { saddr, 0 };
 	int  ikeys[2] = { dev->ifindex, 0 };
 	struct netevent_redirect netevent;
@@ -1379,7 +1396,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 			unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
 						rt_genid(net));
 
-			rthp=&rt_hash_table[hash].chain;
+			rthp = &rt_hash_table[hash].chain;
 
 			while ((rth = rcu_dereference(*rthp)) != NULL) {
 				struct rtable *rt;



^ permalink raw reply related

* [PATCH] net: add __rcu annotations to protocol
From: Eric Dumazet @ 2010-10-26  7:02 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Add __rcu annotations to :
        struct net_protocol *inet_protos
        struct net_protocol *inet6_protos

And use appropriate casts to reduce sparse warnings if
CONFIG_SPARSE_RCU_POINTER=y

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/protocol.h |    4 ++--
 net/ipv4/protocol.c    |    8 +++++---
 net/ipv6/protocol.c    |    8 +++++---
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/include/net/protocol.h b/include/net/protocol.h
index f1effdd..dc07495 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -89,10 +89,10 @@ struct inet_protosw {
 #define INET_PROTOSW_PERMANENT 0x02  /* Permanent protocols are unremovable. */
 #define INET_PROTOSW_ICSK      0x04  /* Is this an inet_connection_sock? */
 
-extern const struct net_protocol *inet_protos[MAX_INET_PROTOS];
+extern const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS];
 
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-extern const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
+extern const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS];
 #endif
 
 extern int	inet_add_protocol(const struct net_protocol *prot, unsigned char num);
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 65699c2..9ae5c01 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,7 +28,7 @@
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 
-const struct net_protocol *inet_protos[MAX_INET_PROTOS] __read_mostly;
+const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
 
 /*
  *	Add a protocol handler to the hash tables
@@ -38,7 +38,8 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
 	int hash = protocol & (MAX_INET_PROTOS - 1);
 
-	return !cmpxchg(&inet_protos[hash], NULL, prot) ? 0 : -1;
+	return !cmpxchg((const struct net_protocol **)&inet_protos[hash],
+			NULL, prot) ? 0 : -1;
 }
 EXPORT_SYMBOL(inet_add_protocol);
 
@@ -50,7 +51,8 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
 	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
 
-	ret = (cmpxchg(&inet_protos[hash], prot, NULL) == prot) ? 0 : -1;
+	ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash],
+		       prot, NULL) == prot) ? 0 : -1;
 
 	synchronize_net();
 
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 9bb936a..9a7978f 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,13 +25,14 @@
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 
-const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
 
 int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
 {
 	int hash = protocol & (MAX_INET_PROTOS - 1);
 
-	return !cmpxchg(&inet6_protos[hash], NULL, prot) ? 0 : -1;
+	return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+			NULL, prot) ? 0 : -1;
 }
 EXPORT_SYMBOL(inet6_add_protocol);
 
@@ -43,7 +44,8 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
 {
 	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
 
-	ret = (cmpxchg(&inet6_protos[hash], prot, NULL) == prot) ? 0 : -1;
+	ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+		       prot, NULL) == prot) ? 0 : -1;
 
 	synchronize_net();
 



^ permalink raw reply related

* Re: ath9k crashing the kernel
From: Jaswinder Singh @ 2010-10-26  7:09 UTC (permalink / raw)
  To: Linux Kernel Mailing List, linux-wireless, netdev, ath9k-devel,
	Linus 
In-Reply-To: <AANLkTim=jD-C4DwDS2=KuQnhPOyP+Cnd-hUTdaZ7Opth@mail.gmail.com>

Hello,

On Tue, Oct 26, 2010 at 10:36 AM, Jaswinder Singh
<jaswinderlinux@gmail.com> wrote:
>
> ath9k is crashing the kernel :
>
> [   21.276554] BUG: spinlock bad magic on CPU#1, NetworkManager/1056
> [   21.277015]  lock: f5be80a8, .magic: 00000000, .owner: <none>/-1,
> .owner_cpu: 0
> [   21.277015] Pid: 1056, comm: NetworkManager Not tainted 2.6.36-netbook+ #20
> [   21.277015] Call Trace:
> [   21.277015]  [<c14767a7>] ? printk+0xf/0x11
> [   21.277015]  [<c117b823>] spin_bug+0x7c/0x87
> [   21.301365]  [<c117b8bd>] do_raw_spin_lock+0x1e/0x125
> [   21.301365]  [<c1478d0a>] ? _raw_spin_unlock_bh+0x1a/0x1c
> [   21.301365]  [<c1478dc3>] _raw_spin_lock_irqsave+0x17/0x1c
> [   21.318857]  [<c1288a74>] ath9k_config+0x255/0x38b
> [   21.318857]  [<c1447bdb>] ieee80211_hw_config+0x10a/0x114
> [   21.328034]  [<c1453545>] ieee80211_do_open+0x3de/0x4cf
> [   21.328034]  [<c1452206>] ? ieee80211_check_concurrent_iface+0x21/0x13a
> [   21.328034]  [<c104c470>] ? raw_notifier_call_chain+0xc/0xe
> [   21.328034]  [<c1453691>] ieee80211_open+0x5b/0x5e
> [   21.328034]  [<c13947cf>] __dev_open+0x80/0xa9
> [   21.328034]  [<c13920bb>] __dev_change_flags+0xa1/0x116
> [   21.328034]  [<c1394723>] dev_change_flags+0x13/0x3f
> [   21.328034]  [<c139d568>] do_setlink+0x226/0x507
> [   21.328034]  [<c139d917>] rtnl_setlink+0xce/0xd4
> [   21.328034]  [<c11786d2>] ? copy_to_user+0x3a/0x118
> [   21.328034]  [<c139d849>] ? rtnl_setlink+0x0/0xd4
> [   21.328034]  [<c139df4d>] rtnetlink_rcv_msg+0x17e/0x194
> [   21.328034]  [<c139ddcf>] ? rtnetlink_rcv_msg+0x0/0x194
> [   21.328034]  [<c13a59c4>] netlink_rcv_skb+0x30/0x76
> [   21.328034]  [<c139ddc8>] rtnetlink_rcv+0x1b/0x22
> [   21.328034]  [<c13a5772>] netlink_unicast+0x1aa/0x20b
> [   21.328034]  [<c13a5eac>] netlink_sendmsg+0x22c/0x27a
> [   21.328034]  [<c13862b8>] sock_sendmsg+0xa5/0xbb
> [   21.328034]  [<c13862b8>] ? sock_sendmsg+0xa5/0xbb
> [   21.328034]  [<c138e05b>] ? verify_iovec+0x3e/0x6b
> [   21.328034]  [<c1386ab2>] sys_sendmsg+0x149/0x196
> [   21.328034]  [<c10826f5>] ? unlock_page+0x40/0x43
> [   21.328034]  [<c10952d5>] ? __do_fault+0x367/0x393
> [   21.328034]  [<c1096b8e>] ? handle_mm_fault+0x3bd/0x77a
> [   21.328034]  [<c10b6fb3>] ? destroy_inode+0x1f/0x30
> [   21.328034]  [<c138694e>] ? sys_recvmsg+0x2b/0x46
> [   21.328034]  [<c1387c00>] sys_socketcall+0x146/0x18b
> [   21.328034]  [<c1002850>] sysenter_do_call+0x12/0x26
> [   21.344846] ADDRCONF(NETDEV_UP): wlan0: link is not ready
> [   21.487994] atl1c 0000:03:00.0: irq 42 for MSI/MSI-X
> [   21.543870] ADDRCONF(NETDEV_UP): eth0: link is not ready
>
>
> Linux 2.6.36 f6f94e2ab1 is good
> and
> 229aebb873e2972 is bad
>

After further investigation bad commit is :

3430098ae463e31ab16926ac3eb295368a3ca5d9 is the first bad commit
commit 3430098ae463e31ab16926ac3eb295368a3ca5d9
Author: Felix Fietkau <nbd@openwrt.org>
Date:   Sun Oct 10 18:21:52 2010 +0200

    ath9k: implement channel utilization stats for survey

    Results for the active channel are updated whenever a new survey dump
    is requested, the old data is kept to allow multiple processes to
    make their own channel utilization averages.
    All other channels only contain the data for the last time that the
    hardware was on the channel, i.e. the last scan result or other
    off-channel activity.
    Running a background scan does not clear the data for the active
    channel.

    Signed-off-by: Felix Fietkau <nbd@openwrt.org>
    Signed-off-by: John W. Linville <linville@tuxdriver.com>


Thanks,
--
Jaswinder Singh.
> I am trying to bisect it but I am getting compilation error :
>
>  LD      .tmp_vmlinux1
> drivers/built-in.o: In function `ath_do_set_opmode':
> /home/jaswinder/jaswinder-git/linux-2.6/drivers/net/wireless/ath/ath5k/base.c:567:
> undefined reference to `ath_opmode_to_string'
> /home/jaswinder/jaswinder-git/linux-2.6/drivers/net/wireless/ath/ath5k/base.c:568:
> undefined reference to `ath_opmode_to_string'
> make: *** [.tmp_vmlinux1] Error 1
> [jaswinder@ linux-2.6]$
>

^ permalink raw reply

* [PATCH 1/5] netdev: bfin_mac: clean up printk messages
From: Mike Frysinger @ 2010-10-26  7:40 UTC (permalink / raw)
  To: netdev, David S. Miller; +Cc: uclinux-dist-devel

Use netdev_* and pr_* helper funcs for output rather than printk.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 drivers/net/bfin_mac.c |   65 +++++++++++++++++++++--------------------------
 1 files changed, 29 insertions(+), 36 deletions(-)

diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index ce1e5e9..a572bcb 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -8,6 +8,11 @@
  * Licensed under the GPL-2 or later.
  */
 
+#define DRV_VERSION	"1.1"
+#define DRV_DESC	"Blackfin on-chip Ethernet MAC driver"
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -41,12 +46,7 @@
 
 #include "bfin_mac.h"
 
-#define DRV_NAME	"bfin_mac"
-#define DRV_VERSION	"1.1"
-#define DRV_AUTHOR	"Bryan Wu, Luke Yang"
-#define DRV_DESC	"Blackfin on-chip Ethernet MAC driver"
-
-MODULE_AUTHOR(DRV_AUTHOR);
+MODULE_AUTHOR("Bryan Wu, Luke Yang");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION(DRV_DESC);
 MODULE_ALIAS("platform:bfin_mac");
@@ -189,8 +189,7 @@ static int desc_list_init(void)
 		/* allocate a new skb for next time receive */
 		new_skb = dev_alloc_skb(PKT_BUF_SZ + NET_IP_ALIGN);
 		if (!new_skb) {
-			printk(KERN_NOTICE DRV_NAME
-			       ": init: low on mem - packet dropped\n");
+			pr_notice("init: low on mem - packet dropped\n");
 			goto init_error;
 		}
 		skb_reserve(new_skb, NET_IP_ALIGN);
@@ -240,7 +239,7 @@ static int desc_list_init(void)
 
 init_error:
 	desc_list_free();
-	printk(KERN_ERR DRV_NAME ": kmalloc failed\n");
+	pr_err("kmalloc failed\n");
 	return -ENOMEM;
 }
 
@@ -259,8 +258,7 @@ static int bfin_mdio_poll(void)
 	while ((bfin_read_EMAC_STAADD()) & STABUSY) {
 		udelay(1);
 		if (timeout_cnt-- < 0) {
-			printk(KERN_ERR DRV_NAME
-			": wait MDC/MDIO transaction to complete timeout\n");
+			pr_err("wait MDC/MDIO transaction to complete timeout\n");
 			return -ETIMEDOUT;
 		}
 	}
@@ -350,9 +348,9 @@ static void bfin_mac_adjust_link(struct net_device *dev)
 					opmode &= ~RMII_10;
 					break;
 				default:
-					printk(KERN_WARNING
-						"%s: Ack!  Speed (%d) is not 10/100!\n",
-						DRV_NAME, phydev->speed);
+					netdev_warn(dev,
+						"Ack! Speed (%d) is not 10/100!\n",
+						phydev->speed);
 					break;
 				}
 				bfin_write_EMAC_OPMODE(opmode);
@@ -417,14 +415,13 @@ static int mii_probe(struct net_device *dev, int phy_mode)
 
 	/* now we are supposed to have a proper phydev, to attach to... */
 	if (!phydev) {
-		printk(KERN_INFO "%s: Don't found any phy device at all\n",
-			dev->name);
+		netdev_err(dev, "no phy device found\n");
 		return -ENODEV;
 	}
 
 	if (phy_mode != PHY_INTERFACE_MODE_RMII &&
 		phy_mode != PHY_INTERFACE_MODE_MII) {
-		printk(KERN_INFO "%s: Invalid phy interface mode\n", dev->name);
+		netdev_err(dev, "invalid phy interface mode\n");
 		return -EINVAL;
 	}
 
@@ -432,7 +429,7 @@ static int mii_probe(struct net_device *dev, int phy_mode)
 			0, phy_mode);
 
 	if (IS_ERR(phydev)) {
-		printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name);
+		netdev_err(dev, "could not attach PHY\n");
 		return PTR_ERR(phydev);
 	}
 
@@ -453,11 +450,10 @@ static int mii_probe(struct net_device *dev, int phy_mode)
 	lp->old_duplex = -1;
 	lp->phydev = phydev;
 
-	printk(KERN_INFO "%s: attached PHY driver [%s] "
-	       "(mii_bus:phy_addr=%s, irq=%d, mdc_clk=%dHz(mdc_div=%d)"
-	       "@sclk=%dMHz)\n",
-	       DRV_NAME, phydev->drv->name, dev_name(&phydev->dev), phydev->irq,
-	       MDC_CLK, mdc_div, sclk/1000000);
+	pr_info("attached PHY driver [%s] "
+	        "(mii_bus:phy_addr=%s, irq=%d, mdc_clk=%dHz(mdc_div=%d)@sclk=%dMHz)\n",
+	        phydev->drv->name, dev_name(&phydev->dev), phydev->irq,
+	        MDC_CLK, mdc_div, sclk/1000000);
 
 	return 0;
 }
@@ -502,7 +498,7 @@ bfin_mac_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 static void bfin_mac_ethtool_getdrvinfo(struct net_device *dev,
 					struct ethtool_drvinfo *info)
 {
-	strcpy(info->driver, DRV_NAME);
+	strcpy(info->driver, KBUILD_MODNAME);
 	strcpy(info->version, DRV_VERSION);
 	strcpy(info->fw_version, "N/A");
 	strcpy(info->bus_info, dev_name(&dev->dev));
@@ -827,8 +823,7 @@ static void bfin_tx_hwtstamp(struct net_device *netdev, struct sk_buff *skb)
 		while ((!(bfin_read_EMAC_PTP_ISTAT() & TXTL)) && (--timeout_cnt))
 			udelay(1);
 		if (timeout_cnt == 0)
-			printk(KERN_ERR DRV_NAME
-					": fails to timestamp the TX packet\n");
+			netdev_err(netdev, "timestamp the TX packet failed\n");
 		else {
 			struct skb_shared_hwtstamps shhwtstamps;
 			u64 ns;
@@ -1083,8 +1078,7 @@ static void bfin_mac_rx(struct net_device *dev)
 	 * we which case we simply drop the packet
 	 */
 	if (current_rx_ptr->status.status_word & RX_ERROR_MASK) {
-		printk(KERN_NOTICE DRV_NAME
-		       ": rx: receive error - packet dropped\n");
+		netdev_notice(dev, "rx: receive error - packet dropped\n");
 		dev->stats.rx_dropped++;
 		goto out;
 	}
@@ -1094,8 +1088,7 @@ static void bfin_mac_rx(struct net_device *dev)
 
 	new_skb = dev_alloc_skb(PKT_BUF_SZ + NET_IP_ALIGN);
 	if (!new_skb) {
-		printk(KERN_NOTICE DRV_NAME
-		       ": rx: low on mem - packet dropped\n");
+		netdev_notice(dev, "rx: low on mem - packet dropped\n");
 		dev->stats.rx_dropped++;
 		goto out;
 	}
@@ -1213,7 +1206,7 @@ static int bfin_mac_enable(struct phy_device *phydev)
 	int ret;
 	u32 opmode;
 
-	pr_debug("%s: %s\n", DRV_NAME, __func__);
+	pr_debug("%s\n", __func__);
 
 	/* Set RX DMA */
 	bfin_write_DMA1_NEXT_DESC_PTR(&(rx_list_head->desc_a));
@@ -1323,7 +1316,7 @@ static void bfin_mac_set_multicast_list(struct net_device *dev)
 	u32 sysctl;
 
 	if (dev->flags & IFF_PROMISC) {
-		printk(KERN_INFO "%s: set to promisc mode\n", dev->name);
+		netdev_info(dev, "set promisc mode\n");
 		sysctl = bfin_read_EMAC_OPMODE();
 		sysctl |= PR;
 		bfin_write_EMAC_OPMODE(sysctl);
@@ -1393,7 +1386,7 @@ static int bfin_mac_open(struct net_device *dev)
 	 * address using ifconfig eth0 hw ether xx:xx:xx:xx:xx:xx
 	 */
 	if (!is_valid_ether_addr(dev->dev_addr)) {
-		printk(KERN_WARNING DRV_NAME ": no valid ethernet hw addr\n");
+		netdev_warn(dev, "no valid ethernet hw addr\n");
 		return -EINVAL;
 	}
 
@@ -1558,7 +1551,7 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
 	bfin_mac_hwtstamp_init(ndev);
 
 	/* now, print out the card info, in a short format.. */
-	dev_info(&pdev->dev, "%s, Version %s\n", DRV_DESC, DRV_VERSION);
+	netdev_info(ndev, "%s, Version %s\n", DRV_DESC, DRV_VERSION);
 
 	return 0;
 
@@ -1650,7 +1643,7 @@ static int __devinit bfin_mii_bus_probe(struct platform_device *pdev)
 	 * so set the GPIO pins to Ethernet mode
 	 */
 	pin_req = mii_bus_pd->mac_peripherals;
-	rc = peripheral_request_list(pin_req, DRV_NAME);
+	rc = peripheral_request_list(pin_req, KBUILD_MODNAME);
 	if (rc) {
 		dev_err(&pdev->dev, "Requesting peripherals failed!\n");
 		return rc;
@@ -1739,7 +1732,7 @@ static struct platform_driver bfin_mac_driver = {
 	.resume = bfin_mac_resume,
 	.suspend = bfin_mac_suspend,
 	.driver = {
-		.name = DRV_NAME,
+		.name = KBUILD_MODNAME,
 		.owner	= THIS_MODULE,
 	},
 };
-- 
1.7.3.2


^ permalink raw reply related

* [PATCH 2/5] netdev: bfin_mac: mark setup_system_regs as static
From: Mike Frysinger @ 2010-10-26  7:40 UTC (permalink / raw)
  To: netdev, David S. Miller; +Cc: uclinux-dist-devel
In-Reply-To: <1288078827-17495-1-git-send-email-vapier@gentoo.org>

No need for this to be exported since it is only used in this driver.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 drivers/net/bfin_mac.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index a572bcb..e712be4 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -558,7 +558,7 @@ static const struct ethtool_ops bfin_mac_ethtool_ops = {
 };
 
 /**************************************************************************/
-void setup_system_regs(struct net_device *dev)
+static void setup_system_regs(struct net_device *dev)
 {
 	struct bfin_mac_local *lp = netdev_priv(dev);
 	int i;
-- 
1.7.3.2


^ permalink raw reply related

* [PATCH 3/5] netdev: bfin_mac: drop unused Mac data
From: Mike Frysinger @ 2010-10-26  7:40 UTC (permalink / raw)
  To: netdev, David S. Miller; +Cc: uclinux-dist-devel
In-Reply-To: <1288078827-17495-1-git-send-email-vapier@gentoo.org>

We don't use this local "Mac" data anywhere (since we rely on the
netdev's storage), so punt it.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 drivers/net/bfin_mac.h |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/bfin_mac.h b/drivers/net/bfin_mac.h
index aed68be..4827f6b 100644
--- a/drivers/net/bfin_mac.h
+++ b/drivers/net/bfin_mac.h
@@ -68,7 +68,6 @@ struct bfin_mac_local {
 	 */
 	struct net_device_stats stats;
 
-	unsigned char Mac[6];	/* MAC address of the board */
 	spinlock_t lock;
 
 	int wol;		/* Wake On Lan */
-- 
1.7.3.2


^ permalink raw reply related

* [PATCH 4/5] netdev: bfin_mac: let boards set vlan masks
From: Mike Frysinger @ 2010-10-26  7:40 UTC (permalink / raw)
  To: netdev, David S. Miller; +Cc: uclinux-dist-devel
In-Reply-To: <1288078827-17495-1-git-send-email-vapier@gentoo.org>

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 drivers/net/bfin_mac.c   |    7 +++++++
 drivers/net/bfin_mac.h   |    3 +++
 include/linux/bfin_mac.h |    1 +
 3 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index e712be4..0b9fc51 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -588,6 +588,10 @@ static void setup_system_regs(struct net_device *dev)
 
 	bfin_write_EMAC_MMC_CTL(RSTC | CROLL);
 
+	/* Set vlan regs to let 1522 bytes long packets pass through */
+	bfin_write_EMAC_VLAN1(lp->vlan1_mask);
+	bfin_write_EMAC_VLAN2(lp->vlan2_mask);
+
 	/* Initialize the TX DMA channel registers */
 	bfin_write_DMA2_X_COUNT(0);
 	bfin_write_DMA2_X_MODIFY(4);
@@ -1520,6 +1524,9 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
 		goto out_err_mii_probe;
 	}
 
+	lp->vlan1_mask = ETH_P_8021Q | mii_bus_data->vlan1_mask;
+	lp->vlan2_mask = ETH_P_8021Q | mii_bus_data->vlan2_mask;
+
 	/* Fill in the fields of the device structure with ethernet values. */
 	ether_setup(ndev);
 
diff --git a/drivers/net/bfin_mac.h b/drivers/net/bfin_mac.h
index 4827f6b..c1a0d66 100644
--- a/drivers/net/bfin_mac.h
+++ b/drivers/net/bfin_mac.h
@@ -75,6 +75,9 @@ struct bfin_mac_local {
 	struct timer_list tx_reclaim_timer;
 	struct net_device *ndev;
 
+	/* Data for EMAC_VLAN1 regs */
+	u16 vlan1_mask, vlan2_mask;
+
 	/* MII and PHY stuffs */
 	int old_link;          /* used by bf537_adjust_link */
 	int old_speed;
diff --git a/include/linux/bfin_mac.h b/include/linux/bfin_mac.h
index 904dec7..a69554e 100644
--- a/include/linux/bfin_mac.h
+++ b/include/linux/bfin_mac.h
@@ -24,6 +24,7 @@ struct bfin_mii_bus_platform_data {
 	const unsigned short *mac_peripherals;
 	int phy_mode;
 	unsigned int phy_mask;
+	unsigned short vlan1_mask, vlan2_mask;
 };
 
 #endif
-- 
1.7.3.2


^ permalink raw reply related

* [PATCH 5/5] netdev: bfin_mac: disable hardware checksum if writeback cache is enabled
From: Mike Frysinger @ 2010-10-26  7:40 UTC (permalink / raw)
  To: netdev, David S. Miller; +Cc: uclinux-dist-devel, Sonic Zhang
In-Reply-To: <1288078827-17495-1-git-send-email-vapier@gentoo.org>

From: Sonic Zhang <sonic.zhang@analog.com>

With writeback caches, corrupted RX packets will be sent up the stack
without any error markings.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 drivers/net/bfin_mac.h |    7 +++++++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/drivers/net/bfin_mac.h b/drivers/net/bfin_mac.h
index c1a0d66..f8559ac 100644
--- a/drivers/net/bfin_mac.h
+++ b/drivers/net/bfin_mac.h
@@ -17,7 +17,14 @@
 #include <linux/etherdevice.h>
 #include <linux/bfin_mac.h>
 
+/*
+ * Disable hardware checksum for bug #5600 if writeback cache is
+ * enabled. Otherwize, corrupted RX packet will be sent up stack
+ * without error mark.
+ */
+#ifndef CONFIG_BFIN_EXTMEM_WRITEBACK
 #define BFIN_MAC_CSUM_OFFLOAD
+#endif
 
 #define TX_RECLAIM_JIFFIES (HZ / 5)
 
-- 
1.7.3.2


^ permalink raw reply related

* dev_alloc_skb and latency issues
From: Jean-Michel Hautbois @ 2010-10-26  8:04 UTC (permalink / raw)
  To: netdev

Hi Everyone !

I am new to this mailing list, and I hope this kind of question hasn't
already been solved before (didn't find anything in the archives...).
I am facing some latency issues in the network layer (I am using a
bridge in order to transmit data between one interface to another).

I am focusing on allocation of memory using alloc_skb for *every* new
packet, and freeing of each packet before receiving a new one.
My use case is quite easy : I always have similar packets (some bytes
are changed, but the size is the same).
I don't think I am the only one with such a use case, and am thinking
about an optimization in this case (probably for others too) : why do
we have to allocate using kmem_cache for all the new packets ?

We could probably use a little piece of code which would reuse the
buffer if it hasn't to be used by anyone else.
I am thinking that if the buffer is ready to be freed (in kfree_skb or
skb_release_all for instance) then, mark the skb as "free" but do not
actually free memory.
On the next dev_alloc_skb, check this mark, and if it is present, do
not allocate, and just "memset" the skb.

This would be in my point of view really efficient when packets are similar.
Anyway, you probably have ideas about that stuff, and I am waiting for
your advices about that :).

Thanks in advance,
Best Regards,
JM

^ permalink raw reply

* Re: dev_alloc_skb and latency issues
From: Eric Dumazet @ 2010-10-26  8:25 UTC (permalink / raw)
  To: Jean-Michel Hautbois; +Cc: netdev
In-Reply-To: <AANLkTikzQw3gHT8RZUD4cnGny5Xwh6_X==zFXBfi5muS@mail.gmail.com>

Le mardi 26 octobre 2010 à 10:04 +0200, Jean-Michel Hautbois a écrit :
> Hi Everyone !
> 
> I am new to this mailing list, and I hope this kind of question hasn't
> already been solved before (didn't find anything in the archives...).
> I am facing some latency issues in the network layer (I am using a
> bridge in order to transmit data between one interface to another).
> 
> I am focusing on allocation of memory using alloc_skb for *every* new
> packet, and freeing of each packet before receiving a new one.
> My use case is quite easy : I always have similar packets (some bytes
> are changed, but the size is the same).
> I don't think I am the only one with such a use case, and am thinking
> about an optimization in this case (probably for others too) : why do
> we have to allocate using kmem_cache for all the new packets ?
> 
> We could probably use a little piece of code which would reuse the
> buffer if it hasn't to be used by anyone else.
> I am thinking that if the buffer is ready to be freed (in kfree_skb or
> skb_release_all for instance) then, mark the skb as "free" but do not
> actually free memory.
> On the next dev_alloc_skb, check this mark, and if it is present, do
> not allocate, and just "memset" the skb.
> 
> This would be in my point of view really efficient when packets are similar.
> Anyway, you probably have ideas about that stuff, and I am waiting for
> your advices about that :).

Once you add all necessary code to handle a new cache layer, you end in
a situation is brings nothing but extra cost and bugs (check recent
discussion about rx_recycle stuff in gianfar driver)

Really, kmem_cache is pretty fast and scalable. If not, better to work
on this, instead of adding yet another layer.




^ permalink raw reply

* [PATCH net-next-2.6] be2net: Schedule/Destroy worker thread in probe()/remove() rather than open()/close()
From: Somnath Kotur @ 2010-10-26  9:01 UTC (permalink / raw)
  To: netdev

When async mcc compls are rcvd on an i/f that is down (and so interrupts are disabled)
they just lie unprocessed in the compl queue.The compl queue can eventually get filled
up and cause the BE to lock up.The fix is to use be_worker to reap mcc compls when the
i/f is down.be_worker is now launched in be_probe() and canceled in be_remove().

Signed-off-by: Somnath Kotur <somnath.kotur@emulex.com>
---
 drivers/net/benet/be_main.c |   22 ++++++++++++++++++----
 1 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 45b1f66..deb546e 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -1806,6 +1806,20 @@ static void be_worker(struct work_struct *work)
 	struct be_rx_obj *rxo;
 	int i;
 
+	/* when interrupts are not yet enabled, just reap any pending
+	* mcc completions */
+	if (!netif_running(adapter->netdev)) {
+		int mcc_compl, status = 0;
+
+		mcc_compl = be_process_mcc(adapter, &status);
+
+		if (mcc_compl) {
+			struct be_mcc_obj *mcc_obj = &adapter->mcc_obj;
+			be_cq_notify(adapter, mcc_obj->cq.id, false, mcc_compl);
+		}
+		goto reschedule;
+	}
+
 	if (!adapter->stats_ioctl_sent)
 		be_cmd_get_stats(adapter, &adapter->stats_cmd);
 
@@ -1824,6 +1838,7 @@ static void be_worker(struct work_struct *work)
 	if (!adapter->ue_detected)
 		be_detect_dump_ue(adapter);
 
+reschedule:
 	schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
 }
 
@@ -2019,8 +2034,6 @@ static int be_close(struct net_device *netdev)
 	struct be_eq_obj *tx_eq = &adapter->tx_eq;
 	int vec, i;
 
-	cancel_delayed_work_sync(&adapter->work);
-
 	be_async_mcc_disable(adapter);
 
 	netif_stop_queue(netdev);
@@ -2085,8 +2098,6 @@ static int be_open(struct net_device *netdev)
 	/* Now that interrupts are on we can process async mcc */
 	be_async_mcc_enable(adapter);
 
-	schedule_delayed_work(&adapter->work, msecs_to_jiffies(100));
-
 	status = be_cmd_link_status_query(adapter, &link_up, &mac_speed,
 			&link_speed);
 	if (status)
@@ -2715,6 +2726,8 @@ static void __devexit be_remove(struct pci_dev *pdev)
 	if (!adapter)
 		return;
 
+	cancel_delayed_work_sync(&adapter->work);
+
 	unregister_netdev(adapter->netdev);
 
 	be_clear(adapter);
@@ -2870,6 +2883,7 @@ static int __devinit be_probe(struct pci_dev *pdev,
 		goto unsetup;
 
 	dev_info(&pdev->dev, "%s port %d\n", nic_name(pdev), adapter->port_num);
+	schedule_delayed_work(&adapter->work, msecs_to_jiffies(100));
 	return 0;
 
 unsetup:
-- 
1.5.6.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox