* [PATCH 2/5] Fix deadlock in bonding driver resulting from internal locking when using netpoll
From: nhorman @ 2010-10-12 20:29 UTC (permalink / raw)
To: netdev; +Cc: bonding-devel, fubar, davem, andy, amwang, nhorman
In-Reply-To: <1286915377-1612-1-git-send-email-nhorman@tuxdriver.com>
From: Neil Horman <nhorman@tuxdriver.com>
The monitoring paths in the bonding driver take write locks that are shared by
the tx path. If netconsole is in use, these paths can call printk which puts us
in the netpoll tx path, which, if netconsole is attached to the bonding driver,
result in deadlock (the xmit_lock guards are useless in netpoll_send_skb, as the
monitor paths in the bonding driver don't claim the xmit_lock, nor should they).
The solution is to use a per cpu flag internal to the driver to indicate when a
cpu is holding the lock in a path that might recusrse into the tx path for the
driver via netconsole. By checking this flag on transmit, we can defer the
sending of the netconsole frames until a later time using the retransmit feature
of netpoll_send_skb that is triggered on the return code NETDEV_TX_BUSY. I've
tested this and am able to transmit via netconsole while causing failover
conditions on the bond slave links.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
---
drivers/net/bonding/bond_main.c | 67 ++++++++++++++++++++++++++++++++++++--
1 files changed, 63 insertions(+), 4 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index eb7d089..1d0d8c5 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -76,6 +76,7 @@
#include <linux/if_vlan.h>
#include <linux/if_bonding.h>
#include <linux/jiffies.h>
+#include <linux/preempt.h>
#include <net/route.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -169,6 +170,35 @@ MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link
/*----------------------------- Global variables ----------------------------*/
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static cpumask_var_t netpoll_block_tx;
+
+static inline void block_netpoll_tx(void
+{
+ preempt_disable();
+ BUG_ON(cpumask_test_and_set_cpu(smp_processor_id(),
+ netpoll_block_tx));
+}
+
+static inline void unblock_netpoll_tx(void)
+{
+ BUG_ON(!cpumask_test_and_clear_cpu(smp_processor_id(),
+ netpoll_block_tx));
+ preempt_enable();
+}
+
+static inline int is_netpoll_tx_blocked(struct net_device *dev)
+{
+ if (unlikely(dev->priv_flags & IFF_IN_NETPOLL))
+ return cpumask_test_cpu(smp_processor_id(), netpoll_block_tx);
+ return 0;
+}
+#else
+#define block_netpoll_tx()
+#define unblock_netpoll_tx()
+#define is_netpoll_tx_blocked(dev)
+#endif
+
static const char * const version =
DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -310,6 +340,7 @@ static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
pr_debug("bond: %s, vlan id %d\n", bond->dev->name, vlan_id);
+ block_netpoll_tx();
write_lock_bh(&bond->lock);
list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
@@ -344,6 +375,7 @@ static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
out:
write_unlock_bh(&bond->lock);
+ unblock_netpoll_tx();
return res;
}
@@ -1804,10 +1836,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
bond_set_carrier(bond);
#ifdef CONFIG_NET_POLL_CONTROLLER
- /*
- * Netpoll and bonding is broken, make sure it is not initialized
- * until it is fixed.
- */
if (disable_netpoll) {
bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
} else {
@@ -1892,6 +1920,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
return -EINVAL;
}
+ block_netpoll_tx();
netdev_bonding_change(bond_dev, NETDEV_BONDING_DESLAVE);
write_lock_bh(&bond->lock);
@@ -1901,6 +1930,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
pr_info("%s: %s not enslaved\n",
bond_dev->name, slave_dev->name);
write_unlock_bh(&bond->lock);
+ unblock_netpoll_tx();
return -EINVAL;
}
@@ -1994,6 +2024,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
}
write_unlock_bh(&bond->lock);
+ unblock_netpoll_tx();
/* must do this from outside any spinlocks */
bond_destroy_slave_symlinks(bond_dev, slave_dev);
@@ -2085,6 +2116,7 @@ static int bond_release_all(struct net_device *bond_dev)
struct net_device *slave_dev;
struct sockaddr addr;
+ block_netpoll_tx();
write_lock_bh(&bond->lock);
netif_carrier_off(bond_dev);
@@ -2183,6 +2215,7 @@ static int bond_release_all(struct net_device *bond_dev)
out:
write_unlock_bh(&bond->lock);
+ unblock_netpoll_tx();
return 0;
}
@@ -2232,9 +2265,11 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi
(old_active) &&
(new_active->link == BOND_LINK_UP) &&
IS_UP(new_active->dev)) {
+ block_netpoll_tx();
write_lock_bh(&bond->curr_slave_lock);
bond_change_active_slave(bond, new_active);
write_unlock_bh(&bond->curr_slave_lock);
+ unblock_netpoll_tx();
} else
res = -EINVAL;
@@ -2466,9 +2501,11 @@ static void bond_miimon_commit(struct bonding *bond)
do_failover:
ASSERT_RTNL();
+ block_netpoll_tx();
write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
+ unblock_netpoll_tx();
}
bond_set_carrier(bond);
@@ -2911,11 +2948,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
}
if (do_failover) {
+ block_netpoll_tx();
write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
+ unblock_netpoll_tx();
}
re_arm:
@@ -3074,9 +3113,11 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
do_failover:
ASSERT_RTNL();
+ block_netpoll_tx();
write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
+ unblock_netpoll_tx();
}
bond_set_carrier(bond);
@@ -4564,6 +4605,13 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct bonding *bond = netdev_priv(dev);
+ /*
+ * If we risk deadlock from transmitting this in the
+ * netpoll path, tell netpoll to queue the frame for later tx
+ */
+ if (is_netpoll_tx_blocked(dev))
+ return NETDEV_TX_BUSY;
+
if (TX_QUEUE_OVERRIDE(bond->params.mode)) {
if (!bond_slave_override(bond, skb))
return NETDEV_TX_OK;
@@ -5295,6 +5343,13 @@ static int __init bonding_init(void)
if (res)
goto err;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (!alloc_cpumask_var(&netpoll_block_tx, GFP_KERNEL)) {
+ res = -ENOMEM;
+ bond_destroy_sysfs();
+ goto err;
+ }
+#endif
register_netdevice_notifier(&bond_netdev_notifier);
register_inetaddr_notifier(&bond_inetaddr_notifier);
bond_register_ipv6_notifier();
@@ -5316,6 +5371,10 @@ static void __exit bonding_exit(void)
bond_destroy_sysfs();
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ free_cpumask_var(netpoll_block_tx);
+#endif
+
rtnl_link_unregister(&bond_link_ops);
unregister_pernet_subsys(&bond_net_ops);
}
--
1.7.2.3
^ permalink raw reply related
* Re: [PATCH -next] sundance: Add initial ethtool stats support
From: Joe Perches @ 2010-10-12 20:57 UTC (permalink / raw)
To: Denis Kirjanov
Cc: David Miller, netdev, eric.dumazet, Ben Hutchings, Jeff Garzik
In-Reply-To: <4CB4C6E6.9090805@kernel.org>
On Wed, 2010-10-13 at 00:36 +0400, Denis Kirjanov wrote:
> diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c
Hi Denis.
Just a few trivial misspellings of deffered -> deferred
> index 4283cc5..159f7e7 100644
> --- a/drivers/net/sundance.c
> +++ b/drivers/net/sundance.c
> @@ -363,6 +363,19 @@ struct netdev_private {
> dma_addr_t tx_ring_dma;
> dma_addr_t rx_ring_dma;
> struct timer_list timer; /* Media monitoring timer. */
> + /* ethtool extra stats */
> + struct {
> + u64 tx_multiple_collisions;
> + u64 tx_single_collisions;
> + u64 tx_late_collisions;
> + u64 tx_deffered;
> + u64 tx_deffered_excessive;
1
> + np->xstats.tx_deffered += ioread8(ioaddr + StatsTxDefer);
> + np->xstats.tx_deffered_excessive += ioread8(ioaddr + StatsTxXSDefer);
2
> +static const struct {
> + const char name[ETH_GSTRING_LEN];
> +} sundance_stats[] = {
> + { "tx_multiple_collisions" },
> + { "tx_single_collisions" },
> + { "tx_late_collisions" },
> + { "tx_deffered" },
> + { "tx_deffered_excessive" },
3
> + data[i++] = np->xstats.tx_deffered;
> + data[i++] = np->xstats.tx_deffered_excessive;
4
^ permalink raw reply
* [PATCH] bonding: various fixes for bonding, netpoll & netconsole
From: nhorman @ 2010-10-12 21:55 UTC (permalink / raw)
To: netdev; +Cc: bonding-devel, fubar, davem, andy, amwang, nhorman
Grr, really sorry guys, 3rd times the charm. netdev greylisted this and several
others pitched it because of a bad mail header. I'm resending through tuxdriver
like I should have done before
A while ago we tried to enable netpoll on the bonding driver to enable
netconsole. That worked well in a steady state, but deadlocked frequently in
failover conditions due to some recursive lock-taking (as well as a few other
problems). I've gone through the driver, netconsole and netpoll code, fixed up
those deadlocks, and confirmed that, with this patch series, we can use
netconsole on bonding without deadlock in all bonding modes with all slaves,
even accross failovers. I've also fixed up some incidental bugs that I ran
across while looking through this code, as described in individual patches
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
^ permalink raw reply
* [PATCH 1/5] Fix bonding drivers improper modification of netpoll structure
From: nhorman @ 2010-10-12 21:55 UTC (permalink / raw)
To: netdev; +Cc: bonding-devel, fubar, davem, andy, amwang, nhorman
In-Reply-To: <1286920552-2173-1-git-send-email-nhorman@tuxdriver.com>
From: Neil Horman <nhorman@tuxdriver.com>
The bonding driver currently modifies the netpoll structure in its xmit path
while sending frames from netpoll. This is racy, as other cpus can access the
netpoll structure in parallel. Since the bonding driver points np->dev to a
slave device, other cpus can inadvertently attempt to send data directly to
slave devices, leading to improper locking with the bonding master, lost frames,
and deadlocks. This patch fixes that up.
This patch also removes the real_dev pointer from the netpoll structure as that
data is really only used by bonding in the poll_controller, and we can emulate
its behavior by check each slave for IS_UP.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
---
drivers/net/bonding/bond_main.c | 15 +++++++++------
include/linux/netpoll.h | 9 +++++++--
net/core/netpoll.c | 6 +++---
3 files changed, 19 insertions(+), 11 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a0bf35d..eb7d089 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -449,11 +449,9 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) {
struct netpoll *np = bond->dev->npinfo->netpoll;
slave_dev->npinfo = bond->dev->npinfo;
- np->real_dev = np->dev = skb->dev;
slave_dev->priv_flags |= IFF_IN_NETPOLL;
- netpoll_send_skb(np, skb);
+ netpoll_send_skb_on_dev(np, skb, slave_dev);
slave_dev->priv_flags &= ~IFF_IN_NETPOLL;
- np->dev = bond->dev;
} else
#endif
dev_queue_xmit(skb);
@@ -1332,9 +1330,14 @@ static bool slaves_support_netpoll(struct net_device *bond_dev)
static void bond_poll_controller(struct net_device *bond_dev)
{
- struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
- if (dev != bond_dev)
- netpoll_poll_dev(dev);
+ struct bonding *bond = netdev_priv(bond_dev);
+ struct slave *slave;
+ int i;
+
+ bond_for_each_slave(bond, slave, i) {
+ if (slave->dev && IS_UP(slave->dev))
+ netpoll_poll_dev(slave->dev);
+ }
}
static void bond_netpoll_cleanup(struct net_device *bond_dev)
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 50d8009..79358bb 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -14,7 +14,6 @@
struct netpoll {
struct net_device *dev;
- struct net_device *real_dev;
char dev_name[IFNAMSIZ];
const char *name;
void (*rx_hook)(struct netpoll *, int, char *, int);
@@ -53,7 +52,13 @@ void netpoll_set_trap(int trap);
void __netpoll_cleanup(struct netpoll *np);
void netpoll_cleanup(struct netpoll *np);
int __netpoll_rx(struct sk_buff *skb);
-void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
+void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
+ struct net_device *dev);
+static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+{
+ netpoll_send_skb_on_dev(np, skb, np->dev);
+}
+
#ifdef CONFIG_NETPOLL
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 537e01a..4e98ffa 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -288,11 +288,11 @@ static int netpoll_owner_active(struct net_device *dev)
return 0;
}
-void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
+ struct net_device *dev)
{
int status = NETDEV_TX_BUSY;
unsigned long tries;
- struct net_device *dev = np->dev;
const struct net_device_ops *ops = dev->netdev_ops;
/* It is up to the caller to keep npinfo alive. */
struct netpoll_info *npinfo = np->dev->npinfo;
@@ -346,7 +346,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
schedule_delayed_work(&npinfo->tx_work,0);
}
}
-EXPORT_SYMBOL(netpoll_send_skb);
+EXPORT_SYMBOL(netpoll_send_skb_on_dev);
void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
--
1.7.2.3
^ permalink raw reply related
* [PATCH 2/5] Fix deadlock in bonding driver resulting from internal locking when using netpoll
From: nhorman @ 2010-10-12 21:55 UTC (permalink / raw)
To: netdev; +Cc: bonding-devel, fubar, davem, andy, amwang, nhorman
In-Reply-To: <1286920552-2173-1-git-send-email-nhorman@tuxdriver.com>
From: Neil Horman <nhorman@tuxdriver.com>
The monitoring paths in the bonding driver take write locks that are shared by
the tx path. If netconsole is in use, these paths can call printk which puts us
in the netpoll tx path, which, if netconsole is attached to the bonding driver,
result in deadlock (the xmit_lock guards are useless in netpoll_send_skb, as the
monitor paths in the bonding driver don't claim the xmit_lock, nor should they).
The solution is to use a per cpu flag internal to the driver to indicate when a
cpu is holding the lock in a path that might recusrse into the tx path for the
driver via netconsole. By checking this flag on transmit, we can defer the
sending of the netconsole frames until a later time using the retransmit feature
of netpoll_send_skb that is triggered on the return code NETDEV_TX_BUSY. I've
tested this and am able to transmit via netconsole while causing failover
conditions on the bond slave links.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
---
drivers/net/bonding/bond_main.c | 67 ++++++++++++++++++++++++++++++++++++--
1 files changed, 63 insertions(+), 4 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index eb7d089..1d0d8c5 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -76,6 +76,7 @@
#include <linux/if_vlan.h>
#include <linux/if_bonding.h>
#include <linux/jiffies.h>
+#include <linux/preempt.h>
#include <net/route.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -169,6 +170,35 @@ MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link
/*----------------------------- Global variables ----------------------------*/
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static cpumask_var_t netpoll_block_tx;
+
+static inline void block_netpoll_tx(void
+{
+ preempt_disable();
+ BUG_ON(cpumask_test_and_set_cpu(smp_processor_id(),
+ netpoll_block_tx));
+}
+
+static inline void unblock_netpoll_tx(void)
+{
+ BUG_ON(!cpumask_test_and_clear_cpu(smp_processor_id(),
+ netpoll_block_tx));
+ preempt_enable();
+}
+
+static inline int is_netpoll_tx_blocked(struct net_device *dev)
+{
+ if (unlikely(dev->priv_flags & IFF_IN_NETPOLL))
+ return cpumask_test_cpu(smp_processor_id(), netpoll_block_tx);
+ return 0;
+}
+#else
+#define block_netpoll_tx()
+#define unblock_netpoll_tx()
+#define is_netpoll_tx_blocked(dev)
+#endif
+
static const char * const version =
DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -310,6 +340,7 @@ static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
pr_debug("bond: %s, vlan id %d\n", bond->dev->name, vlan_id);
+ block_netpoll_tx();
write_lock_bh(&bond->lock);
list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
@@ -344,6 +375,7 @@ static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
out:
write_unlock_bh(&bond->lock);
+ unblock_netpoll_tx();
return res;
}
@@ -1804,10 +1836,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
bond_set_carrier(bond);
#ifdef CONFIG_NET_POLL_CONTROLLER
- /*
- * Netpoll and bonding is broken, make sure it is not initialized
- * until it is fixed.
- */
if (disable_netpoll) {
bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
} else {
@@ -1892,6 +1920,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
return -EINVAL;
}
+ block_netpoll_tx();
netdev_bonding_change(bond_dev, NETDEV_BONDING_DESLAVE);
write_lock_bh(&bond->lock);
@@ -1901,6 +1930,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
pr_info("%s: %s not enslaved\n",
bond_dev->name, slave_dev->name);
write_unlock_bh(&bond->lock);
+ unblock_netpoll_tx();
return -EINVAL;
}
@@ -1994,6 +2024,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
}
write_unlock_bh(&bond->lock);
+ unblock_netpoll_tx();
/* must do this from outside any spinlocks */
bond_destroy_slave_symlinks(bond_dev, slave_dev);
@@ -2085,6 +2116,7 @@ static int bond_release_all(struct net_device *bond_dev)
struct net_device *slave_dev;
struct sockaddr addr;
+ block_netpoll_tx();
write_lock_bh(&bond->lock);
netif_carrier_off(bond_dev);
@@ -2183,6 +2215,7 @@ static int bond_release_all(struct net_device *bond_dev)
out:
write_unlock_bh(&bond->lock);
+ unblock_netpoll_tx();
return 0;
}
@@ -2232,9 +2265,11 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi
(old_active) &&
(new_active->link == BOND_LINK_UP) &&
IS_UP(new_active->dev)) {
+ block_netpoll_tx();
write_lock_bh(&bond->curr_slave_lock);
bond_change_active_slave(bond, new_active);
write_unlock_bh(&bond->curr_slave_lock);
+ unblock_netpoll_tx();
} else
res = -EINVAL;
@@ -2466,9 +2501,11 @@ static void bond_miimon_commit(struct bonding *bond)
do_failover:
ASSERT_RTNL();
+ block_netpoll_tx();
write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
+ unblock_netpoll_tx();
}
bond_set_carrier(bond);
@@ -2911,11 +2948,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
}
if (do_failover) {
+ block_netpoll_tx();
write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
+ unblock_netpoll_tx();
}
re_arm:
@@ -3074,9 +3113,11 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
do_failover:
ASSERT_RTNL();
+ block_netpoll_tx();
write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
+ unblock_netpoll_tx();
}
bond_set_carrier(bond);
@@ -4564,6 +4605,13 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct bonding *bond = netdev_priv(dev);
+ /*
+ * If we risk deadlock from transmitting this in the
+ * netpoll path, tell netpoll to queue the frame for later tx
+ */
+ if (is_netpoll_tx_blocked(dev))
+ return NETDEV_TX_BUSY;
+
if (TX_QUEUE_OVERRIDE(bond->params.mode)) {
if (!bond_slave_override(bond, skb))
return NETDEV_TX_OK;
@@ -5295,6 +5343,13 @@ static int __init bonding_init(void)
if (res)
goto err;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (!alloc_cpumask_var(&netpoll_block_tx, GFP_KERNEL)) {
+ res = -ENOMEM;
+ bond_destroy_sysfs();
+ goto err;
+ }
+#endif
register_netdevice_notifier(&bond_netdev_notifier);
register_inetaddr_notifier(&bond_inetaddr_notifier);
bond_register_ipv6_notifier();
@@ -5316,6 +5371,10 @@ static void __exit bonding_exit(void)
bond_destroy_sysfs();
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ free_cpumask_var(netpoll_block_tx);
+#endif
+
rtnl_link_unregister(&bond_link_ops);
unregister_pernet_subsys(&bond_net_ops);
}
--
1.7.2.3
^ permalink raw reply related
* [PATCH 3/5] Fix napi poll for bonding driver
From: nhorman @ 2010-10-12 21:55 UTC (permalink / raw)
To: netdev; +Cc: bonding-devel, fubar, davem, andy, amwang, nhorman
In-Reply-To: <1286920552-2173-1-git-send-email-nhorman@tuxdriver.com>
From: Neil Horman <nhorman@tuxdriver.com>
Usually the netpoll path, when preforming a napi poll can get away with just
polling all the napi instances of the configured device. Thats not the case for
the bonding driver however, as the napi instances which may wind up getting
flagged as needing polling after the poll_controller call don't belong to the
bonded device, but rather to the slave devices. Fix this by checking the device
in question for the IFF_MASTER flag, if set, we know we need to check the full
poll list for this cpu, rather than just the devices napi instance list.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
---
net/core/netpoll.c | 9 ++++++++-
1 files changed, 8 insertions(+), 1 deletions(-)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4e98ffa..d79d221 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -156,8 +156,15 @@ static void poll_napi(struct net_device *dev)
{
struct napi_struct *napi;
int budget = 16;
+ struct softnet_data *sd = &__get_cpu_var(softnet_data);
+ struct list_head *nlist;
- list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (dev->flags & IFF_MASTER)
+ nlist = &sd->poll_list;
+ else
+ nlist = &dev->napi_list;
+
+ list_for_each_entry(napi, nlist, dev_list) {
if (napi->poll_owner != smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
budget = poll_one_napi(dev->npinfo, napi, budget);
--
1.7.2.3
^ permalink raw reply related
* [PATCH 4/5] Fix netconsole to not deadlock on rmmod
From: nhorman @ 2010-10-12 21:55 UTC (permalink / raw)
To: netdev; +Cc: bonding-devel, fubar, davem, andy, amwang, nhorman
In-Reply-To: <1286920552-2173-1-git-send-email-nhorman@tuxdriver.com>
From: Neil Horman <nhorman@tuxdriver.com>
Netconsole calls netpoll_cleanup on receipt of a NETDEVICE_UNREGISTER event.
The notifier subsystem calls these event handlers with rtnl_lock held, which
netpoll_cleanup also takes, resulting in deadlock. Fix this by calling the
__netpoll_cleanup interior function instead, and fixing up the additional
pointers.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
---
drivers/net/netconsole.c | 9 ++++++++-
1 files changed, 8 insertions(+), 1 deletions(-)
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index ca142c4..94255f0 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -678,7 +678,14 @@ static int netconsole_netdev_event(struct notifier_block *this,
strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
break;
case NETDEV_UNREGISTER:
- netpoll_cleanup(&nt->np);
+ /*
+ * rtnl_lock already held
+ */
+ if (nt->np.dev) {
+ __netpoll_cleanup(&nt->np);
+ dev_put(nt->np.dev);
+ nt->np.dev = NULL;
+ }
/* Fall through */
case NETDEV_GOING_DOWN:
case NETDEV_BONDING_DESLAVE:
--
1.7.2.3
^ permalink raw reply related
* [PATCH 5/5] Re-enable netpoll over bonding
From: nhorman @ 2010-10-12 21:55 UTC (permalink / raw)
To: netdev; +Cc: bonding-devel, fubar, davem, andy, amwang, nhorman
In-Reply-To: <1286920552-2173-1-git-send-email-nhorman@tuxdriver.com>
From: Neil Horman <nhorman@tuxdriver.com>
With the inclusion of previous fixup patches, netpoll over bonding apears to
work reliably with failover conditions. This reverts Gospos previous commit
c22d7ac844f1cb9c6a5fd20f89ebadc2feef891b, and allows access again to the netpoll
functionality in the bonding driver.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
---
drivers/net/bonding/bond_main.c | 31 +++++++++++--------------------
1 files changed, 11 insertions(+), 20 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1d0d8c5..3b53360 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -173,7 +173,7 @@ MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link
#ifdef CONFIG_NET_POLL_CONTROLLER
static cpumask_var_t netpoll_block_tx;
-static inline void block_netpoll_tx(void
+static inline void block_netpoll_tx(void)
{
preempt_disable();
BUG_ON(cpumask_test_and_set_cpu(smp_processor_id(),
@@ -209,9 +209,6 @@ static int arp_ip_count;
static int bond_mode = BOND_MODE_ROUNDROBIN;
static int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
static int lacp_fast;
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static int disable_netpoll = 1;
-#endif
const struct bond_parm_tbl bond_lacp_tbl[] = {
{ "slow", AD_LACP_SLOW},
@@ -1836,19 +1833,15 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
bond_set_carrier(bond);
#ifdef CONFIG_NET_POLL_CONTROLLER
- if (disable_netpoll) {
+ if (slaves_support_netpoll(bond_dev)) {
+ bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+ if (bond_dev->npinfo)
+ slave_dev->npinfo = bond_dev->npinfo;
+ } else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) {
bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
- } else {
- if (slaves_support_netpoll(bond_dev)) {
- bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
- if (bond_dev->npinfo)
- slave_dev->npinfo = bond_dev->npinfo;
- } else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) {
- bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
- pr_info("New slave device %s does not support netpoll\n",
- slave_dev->name);
- pr_info("Disabling netpoll support for %s\n", bond_dev->name);
- }
+ pr_info("New slave device %s does not support netpoll\n",
+ slave_dev->name);
+ pr_info("Disabling netpoll support for %s\n", bond_dev->name);
}
#endif
read_unlock(&bond->lock);
@@ -2055,10 +2048,8 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
#ifdef CONFIG_NET_POLL_CONTROLLER
read_lock_bh(&bond->lock);
- /* Make sure netpoll over stays disabled until fixed. */
- if (!disable_netpoll)
- if (slaves_support_netpoll(bond_dev))
- bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+ if (slaves_support_netpoll(bond_dev))
+ bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
read_unlock_bh(&bond->lock);
if (slave_dev->netdev_ops->ndo_netpoll_cleanup)
slave_dev->netdev_ops->ndo_netpoll_cleanup(slave_dev);
--
1.7.2.3
^ permalink raw reply related
* Re: tbf/htb qdisc limitations
From: Jarek Poplawski @ 2010-10-12 21:59 UTC (permalink / raw)
To: Steven Brudenell; +Cc: netdev
In-Reply-To: <AANLkTikQkCcPXtRQGp=MQrjrWtae84VzbENn5x+1yC47@mail.gmail.com>
On Tue, Oct 12, 2010 at 03:31:48PM -0400, Steven Brudenell wrote:
> > Yes, it's not allowed according to Documentation/HOWTO. Btw, as you
> > can see e.g. in sch_hfsc comments, 64-bit division is avoided too.
>
> i see sch_hfsc avoids do_div in critical areas for performance
> reasons, but uses it other places. it should still be alright to
> do_div in tbf_change and htb_change_class, right? it would be nice to
> compute the rtabs in those functions instead of having userspace do
> it.
Right, tbf_change or htb_change_class are on the "slow path". But
to compute these rtabs you need passing more parameters than rate.
And userspace would still do most of it for backward compatibility.
>
> > I can only say there is no versioning, but backward compatibility
> > is crucial, so you need to do some tricks or data duplication.
> > You could probably try to get opinions about it with an RFC on
> > moving tbf and htb schedulers to 64 bits if you're interested
> > (decoupling it from your specific burst problem).
>
> my burst problem is the only semi-legitimate motivation i can think
> of. the only other possible motivations i can imagine are setting
> "limit" to buffer more than 4GB of packets and setting "rate" to
> something more than 32 gigabit; both of these seem kind of dubious. is
> there something else you had in mind?
No, mainly 10 gigabit rates and additionally 64-bit stats.
> looking more at the netlink tc interface: why is it that the interface
> for so many qdiscs consists of passing a big options struct as a
> single netlink attr, instead of a bunch of individual attrs? this kind
> of seems contrary to the extensibility / flexibility spirit of
> netlink, and seems to be getting in the way of changing the interface.
> maybe i should RFC about this instead ;)
Sure, you can (I'm not the netlink expert).
Jarek P.
^ permalink raw reply
* Re: tbf/htb qdisc limitations
From: Rick Jones @ 2010-10-12 22:17 UTC (permalink / raw)
To: Jarek Poplawski; +Cc: Steven Brudenell, netdev
In-Reply-To: <20101012215932.GA1945@del.dom.local>
>>my burst problem is the only semi-legitimate motivation i can think
>>of. the only other possible motivations i can imagine are setting
>>"limit" to buffer more than 4GB of packets and setting "rate" to
>>something more than 32 gigabit; both of these seem kind of dubious. is
>>there something else you had in mind?
>
>
> No, mainly 10 gigabit rates and additionally 64-bit stats.
Any issue for bonded 10 GbE interfaces? Now that the IEEE have ratified (June)
how far out are 40 GbE interfaces? Or 100 GbE for that matter.
rick jones
^ permalink raw reply
* [PATCH] bonding: various fixes for bonding, netpoll & netconsole
From: nhorman @ 2010-10-12 20:29 UTC (permalink / raw)
To: netdev; +Cc: bonding-devel, fubar, davem, andy, amwang, nhorman
Sorry if this is a resend for some, a bad git-send-email config caused some
rejections, so I'm resending.
A few months back, an attempt was made to enable netpoll over bonding, so that
netconsole could be used over bonded interfaces. This worked in the steady
state, but had several deadlocks in various failover conditions. I've gone
through the bonding code, and fixed up those deadlocks, along with several other
problems noted along the way, which caused other issues with netpoll+bonding.
With this patch series, netpoll works with bonding in all modes accross all
slaves during failover conditions. It also allows fixes some deadlock
conditions in the netconsole code itself.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
^ permalink raw reply
* [PATCH v2] xps-mp: Transmit Packet Steering for multiqueue
From: Tom Herbert @ 2010-10-13 0:20 UTC (permalink / raw)
To: davem, netdev; +Cc: eric.dumazet
This patch implements transmit packet steering (XPS) for multiqueue
devices. XPS selects a transmit queue during packet transmission based
on configuration. This is done by mapping the CPU transmitting the
packet to a queue. This is the transmit side analogue to RPS-- where
RPS is selecting a CPU based on receive queue, XPS selects a queue
based on the CPU (previously there was an XPS patch from Eric
Dumazet, but that might more appropriately be called transmit completion
steering).
Each transmit queue can be associated with a number of CPUs which will
used the queue to send packets. This is configured as a CPU mask on a
per queue basis in:
/sys/class/net/eth<n>/queues/tx-<n>/xps_cpus
The mappings are stored per device in an inverted data structure that
maps CPUs to queues. In the netdevice structure this is an array of
num_possible_cpu structures where each array entry contains a bit map
of queues which that CPU can use.
We also allow the mapping of a socket to queue to be modified, for
instance if a thread is scheduled on a different CPU the desired queue
for transmitting packets would likely change. To maintain in order
packet transmission a flag (ooo_okay) has been added to the sk_buf
structure. If a transport layer sets this flag on a packet, the
transmit queue can be changed for this socket. Presumably, the
transport would set this is there was no possbility of creating ooo
packets (for instance there are no packets in flight for the socket).
This patch includes the modification in TCP output for setting this
flag.
The allocation of the netdev_queues was modified to be symmetric
with how the rx queues are allocated, including the ability to
change the number of real queues.
In dev_pick_tx, don't do work in calculating queue index or setting
the index in the sock unless the device has more than one queue. This
allows the sock to be set only with a queue index of a multi-queue
device which is desirable if device are stacked like in a tunnel.
The benefits of XPS are improved locality in the per queue data
structures. Also, transmit completions are more likely to be done
nearer to the sending thread so this should promote locality back
to the socket (e.g. UDP). The benefits of XPS are dependent on
cache hierarchy, application load, and other factors. XPS would
nominally be configured so that a queue would only be shared by CPUs
which are sharing a cache, the degenerative configuration woud be that
each CPU has it's own queue.
Below are some benchmark results which show the potential benfit of
this patch. The netperf test has 500 instances of netperf TCP_RR test
with 1 byte req. and resp.
bnx2x on 16 core AMD
XPS (16 queues, 1 TX queue per CPU) 1135K at 99% CPU
No XPS (16 queues) 992K at 100% CPU
Signed-off-by: Tom Herbert <therbert@google.com>
---
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 14fbb04..4a944a7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -503,6 +503,13 @@ struct netdev_queue {
struct Qdisc *qdisc;
unsigned long state;
struct Qdisc *qdisc_sleeping;
+#ifdef CONFIG_RPS
+ struct kobject kobj;
+ struct netdev_queue *first;
+ atomic_t count;
+ struct xps_map *xps_maps;
+#endif
+
/*
* write mostly part
*/
@@ -530,6 +537,23 @@ struct rps_map {
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
/*
+ * This structure holds an XPS map which can be of variable length. queues
+ * is an array of num_possible_cpus entries, where each entry is a mask of
+ * queues for that CPU (up to num_tx_queues bits for device).
+ */
+struct xps_map {
+ struct rcu_head rcu;
+ unsigned long queues[0];
+};
+
+#define QUEUE_MASK_SIZE(dev) (BITS_TO_LONGS(dev->num_tx_queues))
+#define XPS_MAP_SIZE(dev) (sizeof(struct xps_map) + (num_possible_cpus() * \
+ QUEUE_MASK_SIZE(dev) * sizeof(unsigned long)))
+#define XPS_ENTRY(map, offset, dev) \
+ (&map->queues[offset * QUEUE_MASK_SIZE(dev)])
+#define netdev_get_xps_maps(dev) ((dev)->_tx[0].xps_maps)
+
+/*
* The rps_dev_flow structure contains the mapping of a flow to a CPU and the
* tail pointer for that CPU's input queue at the time of last enqueue.
*/
@@ -1696,8 +1720,8 @@ static inline int netif_is_multiqueue(const struct net_device *dev)
return dev->num_tx_queues > 1;
}
-extern void netif_set_real_num_tx_queues(struct net_device *dev,
- unsigned int txq);
+extern int netif_set_real_num_tx_queues(struct net_device *dev,
+ unsigned int txq);
#ifdef CONFIG_RPS
extern int netif_set_real_num_rx_queues(struct net_device *dev,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0b53c43..2f28b1f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -386,6 +386,7 @@ struct sk_buff {
#else
__u8 deliver_no_wcard:1;
#endif
+ __u8 ooo_okay:1;
kmemcheck_bitfield_end(flags2);
/* 0/14 bit hole */
diff --git a/net/core/dev.c b/net/core/dev.c
index 04972a4..9f19545 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1553,18 +1553,31 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
* greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
*/
-void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
unsigned int real_num = dev->real_num_tx_queues;
- if (unlikely(txq > dev->num_tx_queues))
- ;
- else if (txq > real_num)
- dev->real_num_tx_queues = txq;
- else if (txq < real_num) {
- dev->real_num_tx_queues = txq;
- qdisc_reset_all_tx_gt(dev, txq);
- }
+ if (dev->reg_state == NETREG_REGISTERED) {
+ ASSERT_RTNL();
+ if (unlikely(txq > dev->num_tx_queues))
+ return -EINVAL;
+#ifdef CONFIG_RPS
+ {
+ int rc;
+ rc = netdev_queue_update_kobjects(dev,
+ dev->real_num_tx_queues, txq);
+
+ if (rc)
+ return rc;
+ }
+#endif
+ if (txq < real_num)
+ qdisc_reset_all_tx_gt(dev, txq);
+ } else
+ dev->num_tx_queues = txq;
+
+ dev->real_num_tx_queues = txq;
+ return 0;
}
EXPORT_SYMBOL(netif_set_real_num_tx_queues);
@@ -2087,32 +2100,93 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
return queue_index;
}
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb,
+ int queue_index)
+{
+ struct xps_map *maps;
+ int cpu = smp_processor_id();
+ u32 hash;
+ unsigned long *queues;
+ int weight, select;
+
+ rcu_read_lock();
+ maps = rcu_dereference(netdev_get_xps_maps(dev));
+
+ if (!maps) {
+ rcu_read_unlock();
+ return queue_index;
+ }
+
+ queues = XPS_ENTRY(maps, cpu, dev);
+
+ if (queue_index >= 0) {
+ if (test_bit(queue_index, queues)) {
+ rcu_read_unlock();
+ return queue_index;
+ }
+ }
+
+ weight = bitmap_weight(queues, dev->real_num_tx_queues);
+ switch (weight) {
+ case 0:
+ break;
+ case 1:
+ queue_index =
+ find_first_bit(queues, dev->real_num_tx_queues);
+ break;
+ default:
+ if (skb->sk && skb->sk->sk_hash)
+ hash = skb->sk->sk_hash;
+ else
+ hash = (__force u16) skb->protocol ^ skb->rxhash;
+ hash = jhash_1word(hash, hashrnd);
+
+ select = ((u64) hash * weight) >> 32;
+ queue_index =
+ find_first_bit(queues, dev->real_num_tx_queues);
+ while (select--)
+ queue_index = find_next_bit(queues,
+ dev->real_num_tx_queues, queue_index + 1);
+ break;
+ }
+
+ rcu_read_unlock();
+ return queue_index;
+}
+
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
{
int queue_index;
- const struct net_device_ops *ops = dev->netdev_ops;
- if (ops->ndo_select_queue) {
- queue_index = ops->ndo_select_queue(dev, skb);
- queue_index = dev_cap_txqueue(dev, queue_index);
- } else {
+ if (dev->real_num_tx_queues > 1) {
struct sock *sk = skb->sk;
+
queue_index = sk_tx_queue_get(sk);
- if (queue_index < 0) {
+ if (queue_index < 0 || skb->ooo_okay) {
+ const struct net_device_ops *ops = dev->netdev_ops;
+ int old_index = queue_index;
- queue_index = 0;
- if (dev->real_num_tx_queues > 1)
- queue_index = skb_tx_hash(dev, skb);
+ if (ops->ndo_select_queue) {
+ queue_index = ops->ndo_select_queue(dev, skb);
+ queue_index = dev_cap_txqueue(dev, queue_index);
+ } else {
+ queue_index = get_xps_queue(dev,
+ skb, queue_index);
+ if (queue_index < 0)
+ queue_index = skb_tx_hash(dev, skb);
+ }
- if (sk) {
- struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
+ if ((queue_index != old_index) && sk) {
+ struct dst_entry *dst =
+ rcu_dereference_check(sk->sk_dst_cache, 1);
if (dst && skb_dst(skb) == dst)
sk_tx_queue_set(sk, queue_index);
}
}
- }
+ } else
+ queue_index = 0;
skb_set_queue_mapping(skb, queue_index);
return netdev_get_tx_queue(dev, queue_index);
@@ -5036,6 +5110,42 @@ static int netif_alloc_rx_queues(struct net_device *dev)
return 0;
}
+static int netif_alloc_netdev_queues(struct net_device *dev)
+{
+ unsigned int i, count = dev->num_tx_queues;
+ struct netdev_queue *tx;
+
+ tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
+ if (!tx) {
+ pr_err("netdev: Unable to allocate %u tx queues.\n",
+ count);
+ return -ENOMEM;
+ }
+ dev->_tx = tx;
+#ifdef CONFIG_RPS
+ /*
+ * Set a pointer to first element in the array which holds the
+ * reference count.
+ */
+ for (i = 0; i < count; i++)
+ tx[i].first = tx;
+#endif
+ return 0;
+}
+
+static void netdev_init_one_queue(struct net_device *dev,
+ struct netdev_queue *queue,
+ void *_unused)
+{
+ queue->dev = dev;
+}
+
+static void netdev_init_queues(struct net_device *dev)
+{
+ netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+ spin_lock_init(&dev->tx_global_lock);
+}
+
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -5069,7 +5179,6 @@ int register_netdevice(struct net_device *dev)
spin_lock_init(&dev->addr_list_lock);
netdev_set_addr_lockdep_class(dev);
- netdev_init_queue_locks(dev);
dev->iflink = -1;
@@ -5077,6 +5186,13 @@ int register_netdevice(struct net_device *dev)
if (ret)
goto out;
+ ret = netif_alloc_netdev_queues(dev);
+ if (ret)
+ goto out;
+
+ netdev_init_queues(dev);
+ netdev_init_queue_locks(dev);
+
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
@@ -5458,19 +5574,6 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
}
EXPORT_SYMBOL(dev_get_stats);
-static void netdev_init_one_queue(struct net_device *dev,
- struct netdev_queue *queue,
- void *_unused)
-{
- queue->dev = dev;
-}
-
-static void netdev_init_queues(struct net_device *dev)
-{
- netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
- spin_lock_init(&dev->tx_global_lock);
-}
-
struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
{
struct netdev_queue *queue = dev_ingress_queue(dev);
@@ -5504,7 +5607,6 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *), unsigned int queue_count)
{
- struct netdev_queue *tx;
struct net_device *dev;
size_t alloc_size;
struct net_device *p;
@@ -5526,20 +5628,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
return NULL;
}
- tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
- if (!tx) {
- printk(KERN_ERR "alloc_netdev: Unable to allocate "
- "tx qdiscs.\n");
- goto free_p;
- }
-
-
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
- goto free_tx;
+ goto free_p;
if (dev_addr_init(dev))
goto free_pcpu;
@@ -5549,7 +5643,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
- dev->_tx = tx;
dev->num_tx_queues = queue_count;
dev->real_num_tx_queues = queue_count;
@@ -5560,8 +5653,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev->gso_max_size = GSO_MAX_SIZE;
- netdev_init_queues(dev);
-
INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
dev->ethtool_ntuple_list.count = 0;
INIT_LIST_HEAD(&dev->napi_list);
@@ -5572,8 +5663,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
strcpy(dev->name, name);
return dev;
-free_tx:
- kfree(tx);
free_pcpu:
free_percpu(dev->pcpu_refcnt);
free_p:
@@ -5596,7 +5685,9 @@ void free_netdev(struct net_device *dev)
release_net(dev_net(dev));
+#ifndef CONFIG_RPS
kfree(dev->_tx);
+#endif
kfree(rcu_dereference_raw(dev->ingress_queue));
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b143173..2f1f09a 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -764,18 +764,307 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
return error;
}
-static int rx_queue_register_kobjects(struct net_device *net)
+/*
+ * netdev_queue sysfs structures and functions.
+ */
+struct netdev_queue_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attr, char *buf);
+ ssize_t (*store)(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_netdev_queue_attr(_attr) container_of(_attr, \
+ struct netdev_queue_attribute, attr)
+
+#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
+
+static ssize_t netdev_queue_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+ struct netdev_queue *queue = to_netdev_queue(kobj);
+
+ if (!attribute->show)
+ return -EIO;
+
+ return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t netdev_queue_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+ struct netdev_queue *queue = to_netdev_queue(kobj);
+
+ if (!attribute->store)
+ return -EIO;
+
+ return attribute->store(queue, attribute, buf, count);
+}
+
+static struct sysfs_ops netdev_queue_sysfs_ops = {
+ .show = netdev_queue_attr_show,
+ .store = netdev_queue_attr_store,
+};
+
+static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{
- net->queues_kset = kset_create_and_add("queues",
- NULL, &net->dev.kobj);
- if (!net->queues_kset)
+ struct net_device *dev = queue->dev;
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++)
+ if (queue == &dev->_tx[i])
+ break;
+
+ BUG_ON(i >= dev->num_tx_queues);
+
+ return i;
+}
+
+static ssize_t show_xps_map(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute, char *buf)
+{
+ struct net_device *dev = queue->dev;
+ struct netdev_queue *first = queue->first;
+ struct xps_map *maps;
+ cpumask_var_t mask;
+ unsigned long *qmask, index;
+ size_t len = 0;
+ int i;
+ unsigned int qmask_size = QUEUE_MASK_SIZE(dev);
+
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
- return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
+
+ index = get_netdev_queue_index(queue);
+
+ rcu_read_lock();
+ maps = rcu_dereference(first->xps_maps);
+ if (maps) {
+ qmask = maps->queues;
+ for (i = 0; i < num_possible_cpus(); i++) {
+ if (test_bit(index, qmask))
+ cpumask_set_cpu(i, mask);
+ qmask += qmask_size;
+ }
+ }
+ len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+ if (PAGE_SIZE - len < 3) {
+ rcu_read_unlock();
+ free_cpumask_var(mask);
+ return -EINVAL;
+ }
+ rcu_read_unlock();
+
+ free_cpumask_var(mask);
+ len += sprintf(buf + len, "\n");
+ return len;
+}
+
+static void xps_map_release(struct rcu_head *rcu)
+{
+ struct xps_map *map = container_of(rcu, struct xps_map, rcu);
+
+ kfree(map);
}
-static void rx_queue_remove_kobjects(struct net_device *net)
+static DEFINE_MUTEX(xps_map_lock);
+
+static ssize_t store_xps_map(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ const char *buf, size_t len)
+{
+ struct net_device *dev = queue->dev;
+ struct netdev_queue *first = queue->first;
+ struct xps_map *maps;
+ cpumask_var_t mask;
+ int err, i, nonempty = 0;
+ unsigned long *qmask, index;
+ unsigned int qmask_size = QUEUE_MASK_SIZE(dev);
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+ if (err) {
+ free_cpumask_var(mask);
+ return err;
+ }
+
+ mutex_lock(&xps_map_lock);
+
+ maps = first->xps_maps;
+ if (!maps) {
+ if (!cpumask_weight(mask)) {
+ mutex_unlock(&xps_map_lock);
+ free_cpumask_var(mask);
+ return 0;
+ }
+ maps = kzalloc(XPS_MAP_SIZE(dev), GFP_KERNEL);
+ if (!maps) {
+ mutex_unlock(&xps_map_lock);
+ free_cpumask_var(mask);
+ return -ENOMEM;
+ }
+ rcu_assign_pointer(first->xps_maps, maps);
+ }
+
+ index = get_netdev_queue_index(queue);
+
+ qmask = maps->queues;
+ for (i = 0; i < num_possible_cpus(); i++) {
+ if (cpu_isset(i, *mask) && cpu_online(i)) {
+ set_bit(index, qmask);
+ nonempty = 1;
+ } else
+ clear_bit(index, qmask);
+ if (!nonempty &&
+ bitmap_weight(qmask, dev->real_num_tx_queues))
+ nonempty = 1;
+ qmask += qmask_size;
+ }
+
+ if (!nonempty) {
+ rcu_assign_pointer(first->xps_maps, NULL);
+ call_rcu(&maps->rcu, xps_map_release);
+ }
+
+ mutex_unlock(&xps_map_lock);
+
+ free_cpumask_var(mask);
+ return len;
+}
+
+static struct netdev_queue_attribute xps_cpus_attribute =
+ __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+
+static struct attribute *netdev_queue_default_attrs[] = {
+ &xps_cpus_attribute.attr,
+ NULL
+};
+
+static void netdev_queue_release(struct kobject *kobj)
+{
+ struct netdev_queue *queue = to_netdev_queue(kobj);
+ struct net_device *dev = queue->dev;
+ struct netdev_queue *first = queue->first;
+ struct xps_map *maps;
+ unsigned long *qmask, index;
+ int i, nonempty = 0;
+ unsigned int qmask_size = QUEUE_MASK_SIZE(dev);
+
+ index = get_netdev_queue_index(queue);
+
+ mutex_lock(&xps_map_lock);
+
+ maps = first->xps_maps;
+
+ if (maps) {
+ qmask = maps->queues;
+ for (i = 0; i < num_possible_cpus(); i++) {
+ clear_bit(index, qmask);
+ if (!nonempty &&
+ bitmap_weight(qmask, dev->real_num_tx_queues))
+ nonempty = 1;
+ qmask += qmask_size;
+ }
+
+ if (!nonempty) {
+ rcu_assign_pointer(first->xps_maps, NULL);
+ call_rcu(&maps->rcu, xps_map_release);
+ }
+ }
+ mutex_unlock(&xps_map_lock);
+
+ if (atomic_dec_and_test(&first->count))
+ kfree(first);
+}
+
+static struct kobj_type netdev_queue_ktype = {
+ .sysfs_ops = &netdev_queue_sysfs_ops,
+ .release = netdev_queue_release,
+ .default_attrs = netdev_queue_default_attrs,
+};
+
+static int netdev_queue_add_kobject(struct net_device *net, int index)
+{
+ struct netdev_queue *queue = net->_tx + index;
+ struct netdev_queue *first = queue->first;
+ struct kobject *kobj = &queue->kobj;
+ int error = 0;
+
+ kobj->kset = net->queues_kset;
+ error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
+ "tx-%u", index);
+ if (error) {
+ kobject_put(kobj);
+ return error;
+ }
+
+ kobject_uevent(kobj, KOBJ_ADD);
+ atomic_inc(&first->count);
+
+ return error;
+}
+
+int
+netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
+{
+ int i;
+ int error = 0;
+
+ for (i = old_num; i < new_num; i++) {
+ error = netdev_queue_add_kobject(net, i);
+ if (error) {
+ new_num = old_num;
+ break;
+ }
+ }
+
+ while (--i >= new_num)
+ kobject_put(&net->_rx[i].kobj);
+
+ return error;
+}
+
+static int register_queue_kobjects(struct net_device *net)
+{
+ int error = 0, txq = 0, rxq = 0;
+
+ net->queues_kset = kset_create_and_add("queues",
+ NULL, &net->dev.kobj);
+ if (!net->queues_kset)
+ return -ENOMEM;
+
+ error = net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
+ if (error)
+ goto error;
+ rxq = net->real_num_rx_queues;
+
+ error = netdev_queue_update_kobjects(net, 0,
+ net->real_num_tx_queues);
+ if (error)
+ goto error;
+ txq = net->real_num_tx_queues;
+
+ return 0;
+
+error:
+ netdev_queue_update_kobjects(net, txq, 0);
+ net_rx_queue_update_kobjects(net, rxq, 0);
+ return error;
+
+ }
+
+static void remove_queue_kobjects(struct net_device *net)
{
net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0);
+ netdev_queue_update_kobjects(net, net->real_num_tx_queues, 0);
kset_unregister(net->queues_kset);
}
#endif /* CONFIG_RPS */
@@ -878,7 +1167,7 @@ void netdev_unregister_kobject(struct net_device * net)
kobject_get(&dev->kobj);
#ifdef CONFIG_RPS
- rx_queue_remove_kobjects(net);
+ remove_queue_kobjects(net);
#endif
device_del(dev);
@@ -919,7 +1208,7 @@ int netdev_register_kobject(struct net_device *net)
return error;
#ifdef CONFIG_RPS
- error = rx_queue_register_kobjects(net);
+ error = register_queue_kobjects(net);
if (error) {
device_del(dev);
return error;
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 778e157..25ec2ee 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -6,6 +6,9 @@ int netdev_register_kobject(struct net_device *);
void netdev_unregister_kobject(struct net_device *);
#ifdef CONFIG_RPS
int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
+int netdev_queue_update_kobjects(struct net_device *net,
+ int old_num, int new_num);
+
#endif
#endif
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 05b1ecf..67b9c9e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -822,8 +822,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
&md5);
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
- if (tcp_packets_in_flight(tp) == 0)
+ if (tcp_packets_in_flight(tp) == 0) {
tcp_ca_event(sk, CA_EVENT_TX_START);
+ skb->ooo_okay = 1;
+ }
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
^ permalink raw reply related
* [PATCH net-next 2/5] tipc: Simplify bearer shutdown logic
From: Paul Gortmaker @ 2010-10-13 0:25 UTC (permalink / raw)
To: davem; +Cc: netdev, allan.stephens
In-Reply-To: <1286929558-2954-1-git-send-email-paul.gortmaker@windriver.com>
From: Allan Stephens <allan.stephens@windriver.com>
Disable all active bearers when TIPC is shut down without having to do
a name-based search to locate each bearer object.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
net/tipc/bearer.c | 61 ++++++++++++++++++++--------------------------------
1 files changed, 24 insertions(+), 37 deletions(-)
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 9c10c6b..9969ec6 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -280,39 +280,39 @@ static int bearer_name_validate(const char *name,
}
/**
- * bearer_find - locates bearer object with matching bearer name
+ * tipc_bearer_find_interface - locates bearer object with matching interface name
*/
-static struct bearer *bearer_find(const char *name)
+struct bearer *tipc_bearer_find_interface(const char *if_name)
{
struct bearer *b_ptr;
+ char *b_if_name;
u32 i;
- if (tipc_mode != TIPC_NET_MODE)
- return NULL;
-
for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
- if (b_ptr->active && (!strcmp(b_ptr->publ.name, name)))
+ if (!b_ptr->active)
+ continue;
+ b_if_name = strchr(b_ptr->publ.name, ':') + 1;
+ if (!strcmp(b_if_name, if_name))
return b_ptr;
}
return NULL;
}
/**
- * tipc_bearer_find_interface - locates bearer object with matching interface name
+ * bearer_find - locates bearer object with matching bearer name
*/
-struct bearer *tipc_bearer_find_interface(const char *if_name)
+static struct bearer *bearer_find(const char *name)
{
struct bearer *b_ptr;
- char *b_if_name;
u32 i;
+ if (tipc_mode != TIPC_NET_MODE)
+ return NULL;
+
for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
- if (!b_ptr->active)
- continue;
- b_if_name = strchr(b_ptr->publ.name, ':') + 1;
- if (!strcmp(b_if_name, if_name))
+ if (b_ptr->active && (!strcmp(b_ptr->publ.name, name)))
return b_ptr;
}
return NULL;
@@ -630,30 +630,17 @@ int tipc_block_bearer(const char *name)
* Note: This routine assumes caller holds tipc_net_lock.
*/
-static int bearer_disable(const char *name)
+static int bearer_disable(struct bearer *b_ptr)
{
- struct bearer *b_ptr;
struct link *l_ptr;
struct link *temp_l_ptr;
- b_ptr = bearer_find(name);
- if (!b_ptr) {
- warn("Attempt to disable unknown bearer <%s>\n", name);
- return -EINVAL;
- }
-
- info("Disabling bearer <%s>\n", name);
+ info("Disabling bearer <%s>\n", b_ptr->publ.name);
tipc_disc_stop_link_req(b_ptr->link_req);
spin_lock_bh(&b_ptr->publ.lock);
b_ptr->link_req = NULL;
b_ptr->publ.blocked = 1;
- if (b_ptr->media->disable_bearer) {
- spin_unlock_bh(&b_ptr->publ.lock);
- write_unlock_bh(&tipc_net_lock);
- b_ptr->media->disable_bearer(&b_ptr->publ);
- write_lock_bh(&tipc_net_lock);
- spin_lock_bh(&b_ptr->publ.lock);
- }
+ b_ptr->media->disable_bearer(&b_ptr->publ);
list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
tipc_link_delete(l_ptr);
}
@@ -664,10 +651,16 @@ static int bearer_disable(const char *name)
int tipc_disable_bearer(const char *name)
{
+ struct bearer *b_ptr;
int res;
write_lock_bh(&tipc_net_lock);
- res = bearer_disable(name);
+ b_ptr = bearer_find(name);
+ if (b_ptr == NULL) {
+ warn("Attempt to disable unknown bearer <%s>\n", name);
+ res = -EINVAL;
+ } else
+ res = bearer_disable(b_ptr);
write_unlock_bh(&tipc_net_lock);
return res;
}
@@ -680,13 +673,7 @@ void tipc_bearer_stop(void)
for (i = 0; i < MAX_BEARERS; i++) {
if (tipc_bearers[i].active)
- tipc_bearers[i].publ.blocked = 1;
- }
- for (i = 0; i < MAX_BEARERS; i++) {
- if (tipc_bearers[i].active)
- bearer_disable(tipc_bearers[i].publ.name);
+ bearer_disable(&tipc_bearers[i]);
}
media_count = 0;
}
-
-
--
1.7.0.4
^ permalink raw reply related
* [PATCH net-next 3/5] tipc: Optimizations to bearer enabling logic
From: Paul Gortmaker @ 2010-10-13 0:25 UTC (permalink / raw)
To: davem; +Cc: netdev, allan.stephens
In-Reply-To: <1286929558-2954-1-git-send-email-paul.gortmaker@windriver.com>
From: Allan Stephens <allan.stephens@windriver.com>
Introduces "enabling" state during activation of a new TIPC bearer,
which supplements the existing "disabled" and "enabled" states.
This change allows the new bearer to be added without having to
temporarily block the processing of incoming packets on existing
bearers during the binding of the new bearer to its associated
interface. It also makes it unnecessary to zero out the entire
bearer structure at the start of activation.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
net/tipc/bcast.c | 2 +-
net/tipc/bearer.c | 27 +++++++++++++++++----------
net/tipc/bearer.h | 8 ++++++--
net/tipc/link.c | 2 +-
4 files changed, 25 insertions(+), 14 deletions(-)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index ecfaac1..ba6dcb2 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -645,7 +645,7 @@ void tipc_bcbearer_sort(void)
for (b_index = 0; b_index < MAX_BEARERS; b_index++) {
struct bearer *b = &tipc_bearers[b_index];
- if (!b->active || !b->nodes.count)
+ if ((b->state != BEARER_ENABLED) || !b->nodes.count)
continue;
if (!bp_temp[b->priority].primary)
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 9969ec6..379338f 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -290,7 +290,7 @@ struct bearer *tipc_bearer_find_interface(const char *if_name)
u32 i;
for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
- if (!b_ptr->active)
+ if (b_ptr->state != BEARER_ENABLED)
continue;
b_if_name = strchr(b_ptr->publ.name, ':') + 1;
if (!strcmp(b_if_name, if_name))
@@ -312,7 +312,8 @@ static struct bearer *bearer_find(const char *name)
return NULL;
for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
- if (b_ptr->active && (!strcmp(b_ptr->publ.name, name)))
+ if ((b_ptr->state == BEARER_ENABLED) &&
+ (!strcmp(b_ptr->publ.name, name)))
return b_ptr;
}
return NULL;
@@ -337,7 +338,8 @@ struct sk_buff *tipc_bearer_get_names(void)
for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) {
for (j = 0; j < MAX_BEARERS; j++) {
b_ptr = &tipc_bearers[j];
- if (b_ptr->active && (b_ptr->media == m_ptr)) {
+ if ((b_ptr->state == BEARER_ENABLED) &&
+ (b_ptr->media == m_ptr)) {
tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME,
b_ptr->publ.name,
strlen(b_ptr->publ.name) + 1);
@@ -532,7 +534,7 @@ restart:
bearer_id = MAX_BEARERS;
with_this_prio = 1;
for (i = MAX_BEARERS; i-- != 0; ) {
- if (!tipc_bearers[i].active) {
+ if (tipc_bearers[i].state != BEARER_ENABLED) {
bearer_id = i;
continue;
}
@@ -559,21 +561,23 @@ restart:
}
b_ptr = &tipc_bearers[bearer_id];
- memset(b_ptr, 0, sizeof(struct bearer));
-
+ b_ptr->state = BEARER_ENABLING;
strcpy(b_ptr->publ.name, name);
+ b_ptr->priority = priority;
+
+ write_unlock_bh(&tipc_net_lock);
res = m_ptr->enable_bearer(&b_ptr->publ);
if (res) {
+ b_ptr->state = BEARER_DISABLED;
warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res);
- goto failed;
+ return res;
}
+ write_lock_bh(&tipc_net_lock);
b_ptr->identity = bearer_id;
b_ptr->media = m_ptr;
b_ptr->net_plane = bearer_id + 'A';
- b_ptr->active = 1;
b_ptr->detect_scope = bcast_scope;
- b_ptr->priority = priority;
INIT_LIST_HEAD(&b_ptr->cong_links);
INIT_LIST_HEAD(&b_ptr->links);
if (m_ptr->bcast) {
@@ -581,7 +585,10 @@ restart:
bcast_scope, 2);
}
spin_lock_init(&b_ptr->publ.lock);
+ b_ptr->state = BEARER_ENABLED;
+
write_unlock_bh(&tipc_net_lock);
+
info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
name, tipc_addr_string_fill(addr_string, bcast_scope), priority);
return 0;
@@ -672,7 +679,7 @@ void tipc_bearer_stop(void)
u32 i;
for (i = 0; i < MAX_BEARERS; i++) {
- if (tipc_bearers[i].active)
+ if (tipc_bearers[i].state == BEARER_ENABLED)
bearer_disable(&tipc_bearers[i]);
}
media_count = 0;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index a850b38..134c6cb 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -43,6 +43,10 @@
#define MAX_BEARERS 8
#define MAX_MEDIA 4
+/* Bearer state */
+#define BEARER_DISABLED 0
+#define BEARER_ENABLING 1
+#define BEARER_ENABLED 2
/**
* struct media - TIPC media information available to internal users
@@ -87,7 +91,7 @@ struct media {
* @links: list of non-congested links associated with bearer
* @cong_links: list of congested links associated with bearer
* @continue_count: # of times bearer has resumed after congestion or blocking
- * @active: non-zero if bearer structure is represents a bearer
+ * @state: bearer state (disabled, enabling, enabled)
* @net_plane: network plane ('A' through 'H') currently associated with bearer
* @nodes: indicates which nodes in cluster can be reached through bearer
*/
@@ -102,7 +106,7 @@ struct bearer {
struct list_head links;
struct list_head cong_links;
u32 continue_count;
- int active;
+ unsigned char state;
char net_plane;
struct tipc_node_map nodes;
};
diff --git a/net/tipc/link.c b/net/tipc/link.c
index b8cf1e9..54bd99d 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1830,7 +1830,7 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
/* Ensure bearer is still enabled */
- if (unlikely(!b_ptr->active))
+ if (unlikely(b_ptr->state != BEARER_ENABLED))
goto cont;
/* Ensure message is well-formed */
--
1.7.0.4
^ permalink raw reply related
* [PATCH net-next 1/5] tipc: Enhance enabling and disabling of Ethernet bearers
From: Paul Gortmaker @ 2010-10-13 0:25 UTC (permalink / raw)
To: davem; +Cc: netdev, allan.stephens
From: Allan Stephens <allan.stephens@windriver.com>
Use work queue to eliminate release of TIPC's configuration lock when
registering for device notifications while activating Ethernet media
support. Optimize code that locates an unused bearer entry when enabling
an Ethernet bearer. Use work queue to break the association between a
newly disabled Ethernet bearer and its device driver, thereby allowing
quicker reuse of the bearer entry.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
net/tipc/config.c | 13 +------
net/tipc/eth_media.c | 93 ++++++++++++++++++++++++++++++-------------------
2 files changed, 58 insertions(+), 48 deletions(-)
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 961d1b0..a43450c 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -332,19 +332,8 @@ static struct sk_buff *cfg_set_own_addr(void)
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
" (cannot change node address once assigned)");
- /*
- * Must release all spinlocks before calling start_net() because
- * Linux version of TIPC calls eth_media_start() which calls
- * register_netdevice_notifier() which may block!
- *
- * Temporarily releasing the lock should be harmless for non-Linux TIPC,
- * but Linux version of eth_media_start() should really be reworked
- * so that it can be called with spinlocks held.
- */
-
- spin_unlock_bh(&config_lock);
tipc_core_start_net(addr);
- spin_lock_bh(&config_lock);
+
return tipc_cfg_reply_none();
}
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 6e988ba..479dbc0 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -51,17 +51,20 @@
* @bearer: ptr to associated "generic" bearer structure
* @dev: ptr to associated Ethernet network device
* @tipc_packet_type: used in binding TIPC to Ethernet driver
+ * @cleanup: work item used when disabling bearer
*/
struct eth_bearer {
struct tipc_bearer *bearer;
struct net_device *dev;
struct packet_type tipc_packet_type;
+ struct work_struct cleanup;
};
static struct eth_bearer eth_bearers[MAX_ETH_BEARERS];
static int eth_started = 0;
static struct notifier_block notifier;
+static struct work_struct reg_notifier;
/**
* send_msg - send a TIPC message out over an Ethernet interface
@@ -157,22 +160,22 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
if (!dev)
return -ENODEV;
- /* Find Ethernet bearer for device (or create one) */
-
- for (;(eb_ptr != stop) && eb_ptr->dev && (eb_ptr->dev != dev); eb_ptr++);
- if (eb_ptr == stop)
- return -EDQUOT;
- if (!eb_ptr->dev) {
- eb_ptr->dev = dev;
- eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
- eb_ptr->tipc_packet_type.dev = dev;
- eb_ptr->tipc_packet_type.func = recv_msg;
- eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
- INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list));
- dev_hold(dev);
- dev_add_pack(&eb_ptr->tipc_packet_type);
+ /* Create Ethernet bearer for device */
+
+ while (eb_ptr->dev != NULL) {
+ if (++eb_ptr == stop)
+ return -EDQUOT;
}
+ eb_ptr->dev = dev;
+ eb_ptr->tipc_packet_type.type = __constant_htons(ETH_P_TIPC);
+ eb_ptr->tipc_packet_type.dev = dev;
+ eb_ptr->tipc_packet_type.func = recv_msg;
+ eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
+ INIT_LIST_HEAD(&eb_ptr->tipc_packet_type.list);
+ dev_hold(dev);
+ dev_add_pack(&eb_ptr->tipc_packet_type);
+
/* Associate TIPC bearer with Ethernet bearer */
eb_ptr->bearer = tb_ptr;
@@ -185,16 +188,36 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
}
/**
+ * cleanup_bearer - break association between Ethernet bearer and interface
+ *
+ * This routine must be invoked from a work queue because it can sleep.
+ */
+
+static void cleanup_bearer(struct work_struct *work)
+{
+ struct eth_bearer *eb_ptr =
+ container_of(work, struct eth_bearer, cleanup);
+
+ dev_remove_pack(&eb_ptr->tipc_packet_type);
+ dev_put(eb_ptr->dev);
+ eb_ptr->dev = NULL;
+}
+
+/**
* disable_bearer - detach TIPC bearer from an Ethernet interface
*
- * We really should do dev_remove_pack() here, but this function can not be
- * called at tasklet level. => Use eth_bearer->bearer as a flag to throw away
- * incoming buffers, & postpone dev_remove_pack() to eth_media_stop() on exit.
+ * Mark Ethernet bearer as inactive so that incoming buffers are thrown away,
+ * then get worker thread to complete bearer cleanup. (Can't do cleanup
+ * here because cleanup code needs to sleep and caller holds spinlocks.)
*/
static void disable_bearer(struct tipc_bearer *tb_ptr)
{
- ((struct eth_bearer *)tb_ptr->usr_handle)->bearer = NULL;
+ struct eth_bearer *eb_ptr = (struct eth_bearer *)tb_ptr->usr_handle;
+
+ eb_ptr->bearer = NULL;
+ INIT_WORK(&eb_ptr->cleanup, cleanup_bearer);
+ schedule_work(&eb_ptr->cleanup);
}
/**
@@ -265,6 +288,19 @@ static char *eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size
}
/**
+ * do_registration - register TIPC to receive device notifications
+ *
+ * This routine must be invoked from a work queue because it can sleep.
+ */
+
+static void do_registration(struct work_struct *dummy)
+{
+ notifier.notifier_call = &recv_notification;
+ notifier.priority = 0;
+ register_netdevice_notifier(¬ifier);
+}
+
+/**
* tipc_eth_media_start - activate Ethernet bearer support
*
* Register Ethernet media type with TIPC bearer code. Also register
@@ -291,11 +327,9 @@ int tipc_eth_media_start(void)
if (res)
return res;
- notifier.notifier_call = &recv_notification;
- notifier.priority = 0;
- res = register_netdevice_notifier(¬ifier);
- if (!res)
- eth_started = 1;
+ INIT_WORK(®_notifier, do_registration);
+ schedule_work(®_notifier);
+ eth_started = 1;
return res;
}
@@ -305,22 +339,9 @@ int tipc_eth_media_start(void)
void tipc_eth_media_stop(void)
{
- int i;
-
if (!eth_started)
return;
unregister_netdevice_notifier(¬ifier);
- for (i = 0; i < MAX_ETH_BEARERS ; i++) {
- if (eth_bearers[i].bearer) {
- eth_bearers[i].bearer->blocked = 1;
- eth_bearers[i].bearer = NULL;
- }
- if (eth_bearers[i].dev) {
- dev_remove_pack(ð_bearers[i].tipc_packet_type);
- dev_put(eth_bearers[i].dev);
- }
- }
- memset(ð_bearers, 0, sizeof(eth_bearers));
eth_started = 0;
}
--
1.7.0.4
^ permalink raw reply related
* [PATCH net-next 4/5] tipc: Rework data structures that track neighboring nodes and links
From: Paul Gortmaker @ 2010-10-13 0:25 UTC (permalink / raw)
To: davem; +Cc: netdev, allan.stephens
In-Reply-To: <1286929558-2954-1-git-send-email-paul.gortmaker@windriver.com>
From: Allan Stephens <allan.stephens@windriver.com>
Convert existing linked list of neighboring nodes to a standard
doubly-linked list. Add counters that track total number of nodes
in list and total number of links to these nodes, thereby allowing
configuration message replies to allocate only space based on
the actual number of nodes and links rather than the worst case.
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
net/tipc/bcast.c | 5 ++-
net/tipc/link.c | 3 +-
net/tipc/node.c | 60 ++++++++++++++++++++++++++---------------------------
net/tipc/node.h | 5 +--
4 files changed, 36 insertions(+), 37 deletions(-)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index ba6dcb2..e006678 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -454,10 +454,11 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
tipc_node_unlock(node);
spin_lock_bh(&bc_lock);
bcl->stats.recv_nacks++;
- bcl->owner->next = node; /* remember requestor */
+ /* remember retransmit requester */
+ bcl->owner->node_list.next =
+ (struct list_head *)node;
bclink_retransmit_pkt(msg_bcgap_after(msg),
msg_bcgap_to(msg));
- bcl->owner->next = NULL;
spin_unlock_bh(&bc_lock);
} else {
tipc_bclink_peek_nack(msg_destnode(msg),
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 54bd99d..13bc0b6 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1652,7 +1652,8 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
tipc_printf(TIPC_OUTPUT, "Outstanding acks: %lu\n",
(unsigned long) TIPC_SKB_CB(buf)->handle);
- n_ptr = l_ptr->owner->next;
+ /* recover retransmit requester */
+ n_ptr = (struct tipc_node *)l_ptr->owner->node_list.next;
tipc_node_lock(n_ptr);
tipc_addr_string_fill(addr_string, n_ptr->addr);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 7c49cd0..944b480 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -50,7 +50,9 @@ void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str);
static void node_lost_contact(struct tipc_node *n_ptr);
static void node_established_contact(struct tipc_node *n_ptr);
-struct tipc_node *tipc_nodes = NULL; /* sorted list of nodes within cluster */
+static LIST_HEAD(nodes_list); /* sorted list of neighboring nodes */
+static int node_count; /* number of neighboring nodes that exist */
+static int link_count; /* number of unicast links node currently has */
static DEFINE_SPINLOCK(node_create_lock);
@@ -70,11 +72,11 @@ struct tipc_node *tipc_node_create(u32 addr)
{
struct cluster *c_ptr;
struct tipc_node *n_ptr;
- struct tipc_node **curr_node;
+ struct tipc_node *new_n_ptr;
spin_lock_bh(&node_create_lock);
- for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
+ list_for_each_entry(n_ptr, &nodes_list, node_list) {
if (addr < n_ptr->addr)
break;
if (addr == n_ptr->addr) {
@@ -83,8 +85,8 @@ struct tipc_node *tipc_node_create(u32 addr)
}
}
- n_ptr = kzalloc(sizeof(*n_ptr),GFP_ATOMIC);
- if (!n_ptr) {
+ new_n_ptr = kzalloc(sizeof(*new_n_ptr), GFP_ATOMIC);
+ if (!new_n_ptr) {
spin_unlock_bh(&node_create_lock);
warn("Node creation failed, no memory\n");
return NULL;
@@ -96,28 +98,22 @@ struct tipc_node *tipc_node_create(u32 addr)
}
if (!c_ptr) {
spin_unlock_bh(&node_create_lock);
- kfree(n_ptr);
+ kfree(new_n_ptr);
return NULL;
}
- n_ptr->addr = addr;
- spin_lock_init(&n_ptr->lock);
- INIT_LIST_HEAD(&n_ptr->nsub);
- n_ptr->owner = c_ptr;
- tipc_cltr_attach_node(c_ptr, n_ptr);
- n_ptr->last_router = -1;
+ new_n_ptr->addr = addr;
+ spin_lock_init(&new_n_ptr->lock);
+ INIT_LIST_HEAD(&new_n_ptr->nsub);
+ new_n_ptr->owner = c_ptr;
+ tipc_cltr_attach_node(c_ptr, new_n_ptr);
+ new_n_ptr->last_router = -1;
+
+ list_add_tail(&new_n_ptr->node_list, &n_ptr->node_list);
+ node_count++;
- /* Insert node into ordered list */
- for (curr_node = &tipc_nodes; *curr_node;
- curr_node = &(*curr_node)->next) {
- if (addr < (*curr_node)->addr) {
- n_ptr->next = *curr_node;
- break;
- }
- }
- (*curr_node) = n_ptr;
spin_unlock_bh(&node_create_lock);
- return n_ptr;
+ return new_n_ptr;
}
void tipc_node_delete(struct tipc_node *n_ptr)
@@ -136,6 +132,8 @@ void tipc_node_delete(struct tipc_node *n_ptr)
#endif
dbg("node %x deleted\n", n_ptr->addr);
+ node_count--;
+ list_del(&n_ptr->node_list);
kfree(n_ptr);
}
@@ -275,6 +273,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
n_ptr->links[bearer_id] = l_ptr;
tipc_net.zones[tipc_zone(l_ptr->addr)]->links++;
n_ptr->link_cnt++;
+ link_count++;
return n_ptr;
}
err("Attempt to establish second link on <%s> to %s\n",
@@ -289,6 +288,7 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr)
n_ptr->links[l_ptr->b_ptr->identity] = NULL;
tipc_net.zones[tipc_zone(l_ptr->addr)]->links--;
n_ptr->link_cnt--;
+ link_count--;
}
/*
@@ -619,7 +619,7 @@ u32 tipc_available_nodes(const u32 domain)
u32 cnt = 0;
read_lock_bh(&tipc_net_lock);
- for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
+ list_for_each_entry(n_ptr, &nodes_list, node_list) {
if (!tipc_in_scope(domain, n_ptr->addr))
continue;
if (tipc_node_is_up(n_ptr))
@@ -646,15 +646,14 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
" (network address)");
read_lock_bh(&tipc_net_lock);
- if (!tipc_nodes) {
+ if (!node_count) {
read_unlock_bh(&tipc_net_lock);
return tipc_cfg_reply_none();
}
- /* For now, get space for all other nodes
- (will need to modify this when slave nodes are supported */
+ /* Get space for all neighboring nodes */
- payload_size = TLV_SPACE(sizeof(node_info)) * (tipc_max_nodes - 1);
+ payload_size = TLV_SPACE(sizeof(node_info)) * node_count;
if (payload_size > 32768u) {
read_unlock_bh(&tipc_net_lock);
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
@@ -668,7 +667,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
/* Add TLVs for all nodes in scope */
- for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
+ list_for_each_entry(n_ptr, &nodes_list, node_list) {
if (!tipc_in_scope(domain, n_ptr->addr))
continue;
node_info.addr = htonl(n_ptr->addr);
@@ -704,8 +703,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
/* Get space for all unicast links + multicast link */
- payload_size = TLV_SPACE(sizeof(link_info)) *
- (tipc_net.zones[tipc_zone(tipc_own_addr)]->links + 1);
+ payload_size = TLV_SPACE(sizeof(link_info)) * (link_count + 1);
if (payload_size > 32768u) {
read_unlock_bh(&tipc_net_lock);
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
@@ -726,7 +724,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
/* Add TLVs for any other links in scope */
- for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
+ list_for_each_entry(n_ptr, &nodes_list, node_list) {
u32 i;
if (!tipc_in_scope(domain, n_ptr->addr))
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 45f3db3..26715dc 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -47,7 +47,7 @@
* @addr: network address of node
* @lock: spinlock governing access to structure
* @owner: pointer to cluster that node belongs to
- * @next: pointer to next node in sorted list of cluster's nodes
+ * @node_list: adjacent entries in sorted list of nodes
* @nsub: list of "node down" subscriptions monitoring node
* @active_links: pointers to active links to node
* @links: pointers to all links to node
@@ -73,7 +73,7 @@ struct tipc_node {
u32 addr;
spinlock_t lock;
struct cluster *owner;
- struct tipc_node *next;
+ struct list_head node_list;
struct list_head nsub;
struct link *active_links[2];
struct link *links[MAX_BEARERS];
@@ -96,7 +96,6 @@ struct tipc_node {
} bclink;
};
-extern struct tipc_node *tipc_nodes;
extern u32 tipc_own_tag;
struct tipc_node *tipc_node_create(u32 addr);
--
1.7.0.4
^ permalink raw reply related
* [PATCH net-next 5/5] tipc: clean out all instances of #if 0'd unused code
From: Paul Gortmaker @ 2010-10-13 0:25 UTC (permalink / raw)
To: davem; +Cc: netdev, allan.stephens
In-Reply-To: <1286929558-2954-1-git-send-email-paul.gortmaker@windriver.com>
Remove all instances of legacy, or as yet to be implemented code
that is currently living within an #if 0 ... #endif block.
In the rare instance that some of it be needed in the future,
it can still be dragged out of history, but there is no need
for it to sit in mainline.
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
net/tipc/config.c | 141 -------------------------------------------------
net/tipc/discover.c | 20 -------
net/tipc/discover.h | 3 -
net/tipc/link.c | 112 +--------------------------------------
net/tipc/link.h | 4 --
net/tipc/name_table.c | 17 ------
net/tipc/net.c | 9 ---
net/tipc/node.c | 26 ---------
net/tipc/port.c | 44 ---------------
9 files changed, 1 insertions(+), 375 deletions(-)
diff --git a/net/tipc/config.c b/net/tipc/config.c
index a43450c..1e2fd41 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -120,139 +120,6 @@ struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
return buf;
}
-
-#if 0
-
-/* Now obsolete code for handling commands not yet implemented the new way */
-
-/*
- * Some of this code assumed that the manager structure contains two added
- * fields:
- * u32 link_subscriptions;
- * struct list_head link_subscribers;
- * which are currently not present. These fields may need to be re-introduced
- * if and when support for link subscriptions is added.
- */
-
-void tipc_cfg_link_event(u32 addr, char *name, int up)
-{
- /* TIPC DOESN'T HANDLE LINK EVENT SUBSCRIPTIONS AT THE MOMENT */
-}
-
-int tipc_cfg_cmd(const struct tipc_cmd_msg * msg,
- char *data,
- u32 sz,
- u32 *ret_size,
- struct tipc_portid *orig)
-{
- int rv = -EINVAL;
- u32 cmd = msg->cmd;
-
- *ret_size = 0;
- switch (cmd) {
- case TIPC_REMOVE_LINK:
- case TIPC_CMD_BLOCK_LINK:
- case TIPC_CMD_UNBLOCK_LINK:
- if (!cfg_check_connection(orig))
- rv = link_control(msg->argv.link_name, msg->cmd, 0);
- break;
- case TIPC_ESTABLISH:
- {
- int connected;
-
- tipc_isconnected(mng.conn_port_ref, &connected);
- if (connected || !orig) {
- rv = TIPC_FAILURE;
- break;
- }
- rv = tipc_connect2port(mng.conn_port_ref, orig);
- if (rv == TIPC_OK)
- orig = 0;
- break;
- }
- case TIPC_GET_PEER_ADDRESS:
- *ret_size = link_peer_addr(msg->argv.link_name, data, sz);
- break;
- case TIPC_GET_ROUTES:
- rv = TIPC_OK;
- break;
- default: {}
- }
- if (*ret_size)
- rv = TIPC_OK;
- return rv;
-}
-
-static void cfg_cmd_event(struct tipc_cmd_msg *msg,
- char *data,
- u32 sz,
- struct tipc_portid const *orig)
-{
- int rv = -EINVAL;
- struct tipc_cmd_result_msg rmsg;
- struct iovec msg_sect[2];
- int *arg;
-
- msg->cmd = ntohl(msg->cmd);
-
- cfg_prepare_res_msg(msg->cmd, msg->usr_handle, rv, &rmsg, msg_sect,
- data, 0);
- if (ntohl(msg->magic) != TIPC_MAGIC)
- goto exit;
-
- switch (msg->cmd) {
- case TIPC_CREATE_LINK:
- if (!cfg_check_connection(orig))
- rv = disc_create_link(&msg->argv.create_link);
- break;
- case TIPC_LINK_SUBSCRIBE:
- {
- struct subscr_data *sub;
-
- if (mng.link_subscriptions > 64)
- break;
- sub = kmalloc(sizeof(*sub),
- GFP_ATOMIC);
- if (sub == NULL) {
- warn("Memory squeeze; dropped remote link subscription\n");
- break;
- }
- INIT_LIST_HEAD(&sub->subd_list);
- tipc_createport(mng.user_ref,
- (void *)sub,
- TIPC_HIGH_IMPORTANCE,
- 0,
- 0,
- (tipc_conn_shutdown_event)cfg_linksubscr_cancel,
- 0,
- 0,
- (tipc_conn_msg_event)cfg_linksubscr_cancel,
- 0,
- &sub->port_ref);
- if (!sub->port_ref) {
- kfree(sub);
- break;
- }
- memcpy(sub->usr_handle,msg->usr_handle,
- sizeof(sub->usr_handle));
- sub->domain = msg->argv.domain;
- list_add_tail(&sub->subd_list, &mng.link_subscribers);
- tipc_connect2port(sub->port_ref, orig);
- rmsg.retval = TIPC_OK;
- tipc_send(sub->port_ref, 2u, msg_sect);
- mng.link_subscriptions++;
- return;
- }
- default:
- rv = tipc_cfg_cmd(msg, data, sz, (u32 *)&msg_sect[1].iov_len, orig);
- }
-exit:
- rmsg.result_len = htonl(msg_sect[1].iov_len);
- rmsg.retval = htonl(rv);
- tipc_cfg_respond(msg_sect, 2u, orig);
-}
-#endif
-
#define MAX_STATS_INFO 2000
static struct sk_buff *tipc_show_stats(void)
@@ -546,14 +413,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
case TIPC_CMD_SHOW_PORTS:
rep_tlv_buf = tipc_port_get_ports();
break;
-#if 0
- case TIPC_CMD_SHOW_PORT_STATS:
- rep_tlv_buf = port_show_stats(req_tlv_area, req_tlv_space);
- break;
- case TIPC_CMD_RESET_PORT_STATS:
- rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED);
- break;
-#endif
case TIPC_CMD_SET_LOG_SIZE:
rep_tlv_buf = tipc_log_resize_cmd(req_tlv_area, req_tlv_space);
break;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index f28d1ae..dbd79c6 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -46,16 +46,6 @@
#define TIPC_LINK_REQ_FAST 2000 /* normal delay if bearer has no links */
#define TIPC_LINK_REQ_SLOW 600000 /* normal delay if bearer has links */
-#if 0
-#define GET_NODE_INFO 300
-#define GET_NODE_INFO_RESULT 301
-#define FORWARD_LINK_PROBE 302
-#define LINK_REQUEST_REJECTED 303
-#define LINK_REQUEST_ACCEPTED 304
-#define DROP_LINK_REQUEST 305
-#define CHECK_LINK_COUNT 306
-#endif
-
/*
* TODO: Most of the inter-cluster setup stuff should be
* rewritten, and be made conformant with specification.
@@ -79,16 +69,6 @@ struct link_req {
};
-#if 0
-int disc_create_link(const struct tipc_link_create *argv)
-{
- /*
- * Code for inter cluster link setup here
- */
- return TIPC_OK;
-}
-#endif
-
/*
* disc_lost_link(): A link has lost contact
*/
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index c36eaeb..9d064c3 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -51,8 +51,5 @@ void tipc_disc_stop_link_req(struct link_req *req);
void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr);
void tipc_disc_link_event(u32 addr, char *name, int up);
-#if 0
-int disc_create_link(const struct tipc_link_create *argv);
-#endif
#endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 13bc0b6..4284351 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -99,23 +99,6 @@ struct link_name {
char if_peer[TIPC_MAX_IF_NAME];
};
-#if 0
-
-/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */
-
-/**
- * struct link_event - link up/down event notification
- */
-
-struct link_event {
- u32 addr;
- int up;
- void (*fcn)(u32, char *, int);
- char name[TIPC_MAX_LINK_NAME];
-};
-
-#endif
-
static void link_handle_out_of_seq_msg(struct link *l_ptr,
struct sk_buff *buf);
static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf);
@@ -634,39 +617,9 @@ void tipc_link_stop(struct link *l_ptr)
l_ptr->proto_msg_queue = NULL;
}
-#if 0
-
/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */
-
-static void link_recv_event(struct link_event *ev)
-{
- ev->fcn(ev->addr, ev->name, ev->up);
- kfree(ev);
-}
-
-static void link_send_event(void (*fcn)(u32 a, char *n, int up),
- struct link *l_ptr, int up)
-{
- struct link_event *ev;
-
- ev = kmalloc(sizeof(*ev), GFP_ATOMIC);
- if (!ev) {
- warn("Link event allocation failure\n");
- return;
- }
- ev->addr = l_ptr->addr;
- ev->up = up;
- ev->fcn = fcn;
- memcpy(ev->name, l_ptr->name, TIPC_MAX_LINK_NAME);
- tipc_k_signal((Handler)link_recv_event, (unsigned long)ev);
-}
-
-#else
-
#define link_send_event(fcn, l_ptr, up) do { } while (0)
-#endif
-
void tipc_link_reset(struct link *l_ptr)
{
struct sk_buff *buf;
@@ -690,10 +643,7 @@ void tipc_link_reset(struct link *l_ptr)
tipc_node_link_down(l_ptr->owner, l_ptr);
tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr);
-#if 0
- tipc_printf(TIPC_CONS, "\nReset link <%s>\n", l_ptr->name);
- dbg_link_dump();
-#endif
+
if (was_active_link && tipc_node_has_active_links(l_ptr->owner) &&
l_ptr->owner->permit_changeover) {
l_ptr->reset_checkpoint = checkpoint;
@@ -3198,44 +3148,6 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_s
return buf;
}
-#if 0
-int link_control(const char *name, u32 op, u32 val)
-{
- int res = -EINVAL;
- struct link *l_ptr;
- u32 bearer_id;
- struct tipc_node * node;
- u32 a;
-
- a = link_name2addr(name, &bearer_id);
- read_lock_bh(&tipc_net_lock);
- node = tipc_node_find(a);
- if (node) {
- tipc_node_lock(node);
- l_ptr = node->links[bearer_id];
- if (l_ptr) {
- if (op == TIPC_REMOVE_LINK) {
- struct bearer *b_ptr = l_ptr->b_ptr;
- spin_lock_bh(&b_ptr->publ.lock);
- tipc_link_delete(l_ptr);
- spin_unlock_bh(&b_ptr->publ.lock);
- }
- if (op == TIPC_CMD_BLOCK_LINK) {
- tipc_link_reset(l_ptr);
- l_ptr->blocked = 1;
- }
- if (op == TIPC_CMD_UNBLOCK_LINK) {
- l_ptr->blocked = 0;
- }
- res = 0;
- }
- tipc_node_unlock(node);
- }
- read_unlock_bh(&tipc_net_lock);
- return res;
-}
-#endif
-
/**
* tipc_link_get_max_pkt - get maximum packet size to use when sending to destination
* @dest: network address of destination node
@@ -3266,28 +3178,6 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
return res;
}
-#if 0
-static void link_dump_rec_queue(struct link *l_ptr)
-{
- struct sk_buff *crs;
-
- if (!l_ptr->oldest_deferred_in) {
- info("Reception queue empty\n");
- return;
- }
- info("Contents of Reception queue:\n");
- crs = l_ptr->oldest_deferred_in;
- while (crs) {
- if (crs->data == (void *)0x0000a3a3) {
- info("buffer %x invalid\n", crs);
- return;
- }
- msg_dbg(buf_msg(crs), "In rec queue:\n");
- crs = crs->next;
- }
-}
-#endif
-
static void link_dump_send_queue(struct link *l_ptr)
{
if (l_ptr->next_out) {
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 26151d3..4e944ef 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -210,10 +210,6 @@ struct link {
u32 msg_length_counts;
u32 msg_lengths_total;
u32 msg_length_profile[7];
-#if 0
- u32 sent_tunneled;
- u32 recv_tunneled;
-#endif
} stats;
struct print_buf print_buf;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 9ca4b06..3a8de43 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1009,16 +1009,6 @@ static void nametbl_list(struct print_buf *buf, u32 depth_info,
}
}
-#if 0
-void tipc_nametbl_print(struct print_buf *buf, const char *str)
-{
- tipc_printf(buf, str);
- read_lock_bh(&tipc_nametbl_lock);
- nametbl_list(buf, 0, 0, 0, 0);
- read_unlock_bh(&tipc_nametbl_lock);
-}
-#endif
-
#define MAX_NAME_TBL_QUERY 32768
struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
@@ -1051,13 +1041,6 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
return buf;
}
-#if 0
-void tipc_nametbl_dump(void)
-{
- nametbl_list(TIPC_CONS, 0, 0, 0, 0);
-}
-#endif
-
int tipc_nametbl_init(void)
{
table.types = kcalloc(tipc_nametbl_size, sizeof(struct hlist_head),
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 7e05af4..1a621cf 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -129,15 +129,6 @@ u32 tipc_net_select_router(u32 addr, u32 ref)
return tipc_zone_select_router(tipc_net.zones[tipc_zone(addr)], addr, ref);
}
-#if 0
-u32 tipc_net_next_node(u32 a)
-{
- if (tipc_net.zones[tipc_zone(a)])
- return tipc_zone_next_node(a);
- return 0;
-}
-#endif
-
void tipc_net_remove_as_router(u32 router)
{
u32 z_num;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 944b480..fab80b4 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -121,16 +121,6 @@ void tipc_node_delete(struct tipc_node *n_ptr)
if (!n_ptr)
return;
-#if 0
- /* Not needed because links are already deleted via tipc_bearer_stop() */
-
- u32 l_num;
-
- for (l_num = 0; l_num < MAX_BEARERS; l_num++) {
- link_delete(n_ptr->links[l_num]);
- }
-#endif
-
dbg("node %x deleted\n", n_ptr->addr);
node_count--;
list_del(&n_ptr->node_list);
@@ -597,22 +587,6 @@ void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router)
node_lost_contact(n_ptr);
}
-#if 0
-void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str)
-{
- u32 i;
-
- tipc_printf(buf, "\n\n%s", str);
- for (i = 0; i < MAX_BEARERS; i++) {
- if (!n_ptr->links[i])
- continue;
- tipc_printf(buf, "Links[%u]: %x, ", i, n_ptr->links[i]);
- }
- tipc_printf(buf, "Active links: [%x,%x]\n",
- n_ptr->active_links[0], n_ptr->active_links[1]);
-}
-#endif
-
u32 tipc_available_nodes(const u32 domain)
{
struct tipc_node *n_ptr;
diff --git a/net/tipc/port.c b/net/tipc/port.c
index d760336..5c4285b 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -710,50 +710,6 @@ struct sk_buff *tipc_port_get_ports(void)
return buf;
}
-#if 0
-
-#define MAX_PORT_STATS 2000
-
-struct sk_buff *port_show_stats(const void *req_tlv_area, int req_tlv_space)
-{
- u32 ref;
- struct port *p_ptr;
- struct sk_buff *buf;
- struct tlv_desc *rep_tlv;
- struct print_buf pb;
- int str_len;
-
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_PORT_REF))
- return cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
- ref = *(u32 *)TLV_DATA(req_tlv_area);
- ref = ntohl(ref);
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return cfg_reply_error_string("port not found");
-
- buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_PORT_STATS));
- if (!buf) {
- tipc_port_unlock(p_ptr);
- return NULL;
- }
- rep_tlv = (struct tlv_desc *)buf->data;
-
- tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), MAX_PORT_STATS);
- port_print(p_ptr, &pb, 1);
- /* NEED TO FILL IN ADDITIONAL PORT STATISTICS HERE */
- tipc_port_unlock(p_ptr);
- str_len = tipc_printbuf_validate(&pb);
-
- skb_put(buf, TLV_SPACE(str_len));
- TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
-
- return buf;
-}
-
-#endif
-
void tipc_port_reinit(void)
{
struct port *p_ptr;
--
1.7.0.4
^ permalink raw reply related
* [PATCH] via-velocity: forced 1000 Mbps mode support.
From: David Lv @ 2010-10-13 1:31 UTC (permalink / raw)
To: netdev; +Cc: romieu, DavidLv, ShirleyHu, AndersMa
I am sending this patch for forced 1000 Mbps mode support.
Full duplex only. Half duplex 1000 Mbps is not supported.
Thanks!
Signed-off-by: David Lv <DavidLv@viatech.com.cn>
Acked-by: Francois Romieu <romieu@fr.zoreil.com>
---
drivers/net/via-velocity.c | 82 ++++++++++++++++++++++++++++++++++++++++---
drivers/net/via-velocity.h | 5 ++-
2 files changed, 79 insertions(+), 8 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index f534123..b21a3d9 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -312,13 +312,14 @@ VELOCITY_PARAM(flow_control, "Enable flow
control ability");
#define MED_LNK_DEF 0
#define MED_LNK_MIN 0
-#define MED_LNK_MAX 4
+#define MED_LNK_MAX 5
/* speed_duplex[] is used for setting the speed and duplex mode of NIC.
0: indicate autonegotiation for both speed and duplex mode
1: indicate 100Mbps half duplex mode
2: indicate 100Mbps full duplex mode
3: indicate 10Mbps half duplex mode
4: indicate 10Mbps full duplex mode
+ 5: indicate 1000Mbps full duplex mode
Note:
if EEPROM have been set to the force mode, this option is ignored
@@ -617,6 +618,9 @@ static u32 velocity_get_opt_media_mode(struct
velocity_info *vptr)
case SPD_DPX_10_HALF:
status = VELOCITY_SPEED_10;
break;
+ case SPD_DPX_1000_FULL:
+ status = VELOCITY_SPEED_1000 | VELOCITY_DUPLEX_FULL;
+ break;
}
vptr->mii_status = status;
return status;
@@ -922,6 +926,7 @@ static int velocity_set_media_mode(struct
velocity_info *vptr, u32 mii_status)
/* enable AUTO-NEGO mode */
mii_set_auto_on(vptr);
} else {
+ u16 CTRL1000;
u16 ANAR;
u8 CHIPGCR;
@@ -936,7 +941,11 @@ static int velocity_set_media_mode(struct
velocity_info *vptr, u32 mii_status)
BYTE_REG_BITS_ON(CHIPGCR_FCMODE, ®s->CHIPGCR);
CHIPGCR = readb(®s->CHIPGCR);
- CHIPGCR &= ~CHIPGCR_FCGMII;
+
+ if (mii_status & VELOCITY_SPEED_1000)
+ CHIPGCR |= CHIPGCR_FCGMII;
+ else
+ CHIPGCR &= ~CHIPGCR_FCGMII;
if (mii_status & VELOCITY_DUPLEX_FULL) {
CHIPGCR |= CHIPGCR_FCFDX;
@@ -952,7 +961,13 @@ static int velocity_set_media_mode(struct
velocity_info *vptr, u32 mii_status)
BYTE_REG_BITS_ON(TCR_TB2BDIS, ®s->TCR);
}
- MII_REG_BITS_OFF(ADVERTISE_1000FULL | ADVERTISE_1000HALF,
MII_CTRL1000, vptr->mac_regs);
+ velocity_mii_read(vptr->mac_regs, MII_CTRL1000, &CTRL1000);
+ CTRL1000 &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF);
+ if ((mii_status & VELOCITY_SPEED_1000) &&
+ (mii_status & VELOCITY_DUPLEX_FULL)) {
+ CTRL1000 |= ADVERTISE_1000FULL;
+ }
+ velocity_mii_write(vptr->mac_regs, MII_CTRL1000, CTRL1000);
if (!(mii_status & VELOCITY_DUPLEX_FULL) && (mii_status & VELOCITY_SPEED_10))
BYTE_REG_BITS_OFF(TESTCFG_HBDIS, ®s->TESTCFG); @@ -967,7 +982,7
@@ static int velocity_set_media_mode(struct velocity_info *vptr, u32
mii_status)
ANAR |= ADVERTISE_100FULL;
else
ANAR |= ADVERTISE_100HALF;
- } else {
+ } else if (mii_status & VELOCITY_SPEED_10) {
if (mii_status & VELOCITY_DUPLEX_FULL)
ANAR |= ADVERTISE_10FULL;
else
@@ -1013,6 +1028,9 @@ static void velocity_print_link_status(struct
velocity_info *vptr)
} else {
VELOCITY_PRT(MSG_LEVEL_INFO, KERN_NOTICE "%s: Link forced", vptr->dev->name);
switch (vptr->options.spd_dpx) {
+ case SPD_DPX_1000_FULL:
+ VELOCITY_PRT(MSG_LEVEL_INFO, " speed 1000M bps full duplex\n");
+ break;
case SPD_DPX_100_HALF:
VELOCITY_PRT(MSG_LEVEL_INFO, " speed 100M bps half duplex\n");
break;
@@ -3170,6 +3188,37 @@ static int velocity_get_settings(struct
net_device *dev, struct ethtool_cmd *cmd
SUPPORTED_100baseT_Full |
SUPPORTED_1000baseT_Half |
SUPPORTED_1000baseT_Full;
+
+ cmd->advertising = ADVERTISED_TP | ADVERTISED_Autoneg;
+ if (vptr->options.spd_dpx == SPD_DPX_AUTO) {
+ cmd->advertising |=
+ ADVERTISED_10baseT_Half |
+ ADVERTISED_10baseT_Full |
+ ADVERTISED_100baseT_Half |
+ ADVERTISED_100baseT_Full |
+ ADVERTISED_1000baseT_Half |
+ ADVERTISED_1000baseT_Full;
+ } else {
+ switch (vptr->options.spd_dpx) {
+ case SPD_DPX_1000_FULL:
+ cmd->advertising |= ADVERTISED_1000baseT_Full;
+ break;
+ case SPD_DPX_100_HALF:
+ cmd->advertising |= ADVERTISED_100baseT_Half;
+ break;
+ case SPD_DPX_100_FULL:
+ cmd->advertising |= ADVERTISED_100baseT_Full;
+ break;
+ case SPD_DPX_10_HALF:
+ cmd->advertising |= ADVERTISED_10baseT_Half;
+ break;
+ case SPD_DPX_10_FULL:
+ cmd->advertising |= ADVERTISED_10baseT_Full;
+ break;
+ default:
+ break;
+ }
+ }
if (status & VELOCITY_SPEED_1000)
cmd->speed = SPEED_1000;
else if (status & VELOCITY_SPEED_100)
@@ -3200,14 +3249,35 @@ static int velocity_set_settings(struct
net_device *dev, struct ethtool_cmd *cmd
curr_status &= (~VELOCITY_LINK_FAIL);
new_status |= ((cmd->autoneg) ? VELOCITY_AUTONEG_ENABLE : 0);
+ new_status |= ((cmd->speed == SPEED_1000) ? VELOCITY_SPEED_1000 : 0);
new_status |= ((cmd->speed == SPEED_100) ? VELOCITY_SPEED_100 : 0);
new_status |= ((cmd->speed == SPEED_10) ? VELOCITY_SPEED_10 : 0);
new_status |= ((cmd->duplex == DUPLEX_FULL) ? VELOCITY_DUPLEX_FULL : 0);
- if ((new_status & VELOCITY_AUTONEG_ENABLE) && (new_status !=
(curr_status | VELOCITY_AUTONEG_ENABLE)))
+ if ((new_status & VELOCITY_AUTONEG_ENABLE) &&
+ (new_status != (curr_status | VELOCITY_AUTONEG_ENABLE))) {
ret = -EINVAL;
- else
+ } else {
+ enum speed_opt spd_dpx;
+
+ if (new_status & VELOCITY_AUTONEG_ENABLE)
+ spd_dpx = SPD_DPX_AUTO;
+ else if ((new_status & VELOCITY_SPEED_1000) &&
+ (new_status & VELOCITY_DUPLEX_FULL)) {
+ spd_dpx = SPD_DPX_1000_FULL;
+ } else if (new_status & VELOCITY_SPEED_100)
+ spd_dpx = (new_status & VELOCITY_DUPLEX_FULL) ?
+ SPD_DPX_100_FULL : SPD_DPX_100_HALF;
+ else if (new_status & VELOCITY_SPEED_10)
+ spd_dpx = (new_status & VELOCITY_DUPLEX_FULL) ?
+ SPD_DPX_10_FULL : SPD_DPX_10_HALF;
+ else
+ return -EOPNOTSUPP;
+
+ vptr->options.spd_dpx = spd_dpx;
+
velocity_set_media_mode(vptr, new_status);
+ }
return ret;
}
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h
index f7b33ae..df55f6c 100644
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -848,7 +848,7 @@ enum velocity_owner {
* Bits in CHIPGCR register
*/
-#define CHIPGCR_FCGMII 0x80
+#define CHIPGCR_FCGMII 0x80 /* enable GMII mode */
#define CHIPGCR_FCFDX 0x40
#define CHIPGCR_FCRESV 0x20
#define CHIPGCR_FCMODE 0x10
@@ -1390,7 +1390,8 @@ enum speed_opt {
SPD_DPX_100_HALF = 1,
SPD_DPX_100_FULL = 2,
SPD_DPX_10_HALF = 3,
- SPD_DPX_10_FULL = 4
+ SPD_DPX_10_FULL = 4,
+ SPD_DPX_1000_FULL = 5
};
enum velocity_init_type {
--
1.7.2.3
^ permalink raw reply related
* Question on DHCP
From: Sri Ram Vemulpali @ 2010-10-13 1:33 UTC (permalink / raw)
To: linux-netdev
Hi All,
I have very basic stupid doubt. Can anyone help to resolve this.
DHCP, is used for dynamically configuring IP address, mask, MTU, DNS
domain, .... for a host. DHCP is a application layer protocol, but the
values it configures are related to IP layer. How come DHCP can
configure when its data packet in embedded in to IP datagram, which
are disparate layers. I am little confused. Please clarify. Thanks.
--
Regards,
Sri.
^ permalink raw reply
* Re: Question on DHCP
From: Stephen Hemminger @ 2010-10-13 1:45 UTC (permalink / raw)
To: Sri Ram Vemulpali; +Cc: linux-netdev
In-Reply-To: <AANLkTikTKL2JzmHKUrraOoNdqV1=0e_r+EuvBTgURjUz@mail.gmail.com>
On Tue, 12 Oct 2010 21:33:25 -0400
Sri Ram Vemulpali <sri.ram.gmu06@gmail.com> wrote:
> Hi All,
>
> I have very basic stupid doubt. Can anyone help to resolve this.
>
> DHCP, is used for dynamically configuring IP address, mask, MTU, DNS
> domain, .... for a host. DHCP is a application layer protocol, but the
> values it configures are related to IP layer. How come DHCP can
> configure when its data packet in embedded in to IP datagram, which
> are disparate layers. I am little confused. Please clarify. Thanks.
>
DHCP client uses UDP to send broadcast packet and find the nearest
server. The Linux DHCP client ends up using a AF_PACKET socket because
it wants to seem MAC address of server.
--
^ permalink raw reply
* Re: [PATCH 2/5] Fix deadlock in bonding driver resulting from internal locking when using netpoll
From: Cong Wang @ 2010-10-13 2:44 UTC (permalink / raw)
To: nhorman; +Cc: netdev, bonding-devel, fubar, davem, andy
In-Reply-To: <1286920552-2173-3-git-send-email-nhorman@tuxdriver.com>
On 10/13/10 05:55, nhorman@tuxdriver.com wrote:
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -76,6 +76,7 @@
> #include<linux/if_vlan.h>
> #include<linux/if_bonding.h>
> #include<linux/jiffies.h>
> +#include<linux/preempt.h>
> #include<net/route.h>
> #include<net/net_namespace.h>
> #include<net/netns/generic.h>
> @@ -169,6 +170,35 @@ MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link
>
> /*----------------------------- Global variables ----------------------------*/
>
> +#ifdef CONFIG_NET_POLL_CONTROLLER
> +static cpumask_var_t netpoll_block_tx;
> +
> +static inline void block_netpoll_tx(void
> +{
> + preempt_disable();
> + BUG_ON(cpumask_test_and_set_cpu(smp_processor_id(),
> + netpoll_block_tx));
> +}
> +
> +static inline void unblock_netpoll_tx(void)
> +{
> + BUG_ON(!cpumask_test_and_clear_cpu(smp_processor_id(),
> + netpoll_block_tx));
> + preempt_enable();
> +}
> +
> +static inline int is_netpoll_tx_blocked(struct net_device *dev)
> +{
> + if (unlikely(dev->priv_flags& IFF_IN_NETPOLL))
> + return cpumask_test_cpu(smp_processor_id(), netpoll_block_tx);
> + return 0;
> +}
> +#else
> +#define block_netpoll_tx()
> +#define unblock_netpoll_tx()
> +#define is_netpoll_tx_blocked(dev)
> +#endif
> +
These should go to netpoll.h, IMHO.
^ permalink raw reply
* Re: [MeeGo-Dev][PATCH v3] Topcliff: Update PCH_CAN driver to 2.6.35
From: Masayuki Ohtake @ 2010-10-13 4:23 UTC (permalink / raw)
To: Marc Kleine-Budde
Cc: andrew.chih.howe.khor-ral2JQCrhuEAvxtiuMwx3w,
qi.wang-ral2JQCrhuEAvxtiuMwx3w,
margie.foster-ral2JQCrhuEAvxtiuMwx3w,
netdev-u79uwXL29TY76Z2rM5mHXA, Wolfgang Grandegger,
yong.y.wang-ral2JQCrhuEAvxtiuMwx3w,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
socketcan-core-0fE9KPoRgkgATYTw5x5z8w,
kok.howg.ewe-ral2JQCrhuEAvxtiuMwx3w, chripell-VaTbYqLCNhc,
morinaga526-ECg8zkTtlr0C6LszWs/t0g, David Miller,
joel.clark-ral2JQCrhuEAvxtiuMwx3w, sameo-VuQAYsv1563Yd54FQh9/CA
In-Reply-To: <4CB414A2.8020202@pengutronix.de>
On Tuesday, October 12, 2010 4:56 PM, Marc Kleine-Budde wrote :
> I just had a small peak at the datasheet. It seems you can implement the
> same scheme for rx as on the at91, if you enough rx buffers. In the at91
> driver I use 8+4 = 12 buffers for rx. Have a look at the driver, the rx
> path is documented.
Thank you for your suggestion.
This is valuable for us.
It seems the at91's Hw benabior similar to ours.
Using this method, we will implement and test.
Thanks, Ohtake(OKISemi)
^ permalink raw reply
* Re: [PATCH] Phonet: 'connect' socket implementation for Pipe controller
From: Kumar SANGHVI @ 2010-10-13 4:32 UTC (permalink / raw)
To: Rémi Denis-Courmont
Cc: remi.denis-courmont@nokia.com, davem@davemloft.net,
netdev@vger.kernel.org, Linus WALLEIJ, Gulshan KARMANI,
Sudeep DIVAKARAN
In-Reply-To: <201010121930.32116.remi@remlab.net>
Hi Rémi Denis-Courmontt,
On Tue, Oct 12, 2010 at 18:30:30 +0200, Rémi Denis-Courmont wrote:
> > + static u8 host_pref_rx_fc[3] = {3, 2, 1}, host_req_tx_fc[3] = {3, 2, 1};
>
> Why is this 'static' ? Doesn't that break concurrent uses?
Yes. The 'static' is not needed here. I will fix this.
Thanks.
> +static int pn_socket_connect(struct socket *sock, struct sockaddr *addr,
> > + int len, int flags)
> > +{
> > + struct sock *sk = sock->sk;
> > + struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
> > + long timeo;
> > + int err;
> > +
> > + lock_sock(sk);
> > +
> > + if (len < sizeof(struct sockaddr_pn))
> > + return -EINVAL;
> > + if (spn->spn_family != AF_PHONET)
> > + return -EAFNOSUPPORT;
>
> You should move lock_sock(sk); here, I think.
Yes. I will fix this locking.
Thanks.
>
> > +
> > + switch (sock->state) {
> > + case SS_UNCONNECTED:
> > + sk->sk_state = TCP_CLOSE;
> > + break;
> > + case SS_CONNECTING:
> > + switch (sk->sk_state) {
> > + case TCP_SYN_RECV:
> > + sock->state = SS_CONNECTED;
> > + err = -EISCONN;
> > + goto out;
> > + case TCP_CLOSE:
> > + err = -EALREADY;
> > + if (flags & O_NONBLOCK)
> > + goto out;
> > + goto wait_connect;
> > + break;
>
> I think the kernel policy is against redumdant break statements.
Yes, the break is redundant here. I will fix this.
Thanks.
>
> > + }
> > + break;
> > + case SS_CONNECTED:
> > + switch (sk->sk_state) {
> > + case TCP_SYN_RECV:
> > + err = -EISCONN;
> > + goto out;
> > + case TCP_CLOSE:
> > + sock->state = SS_UNCONNECTED;
> > + break;
> > + }
> > + break;
> > + case SS_DISCONNECTING:
> > + case SS_FREE:
> > + break;
> > + }
> > + sk->sk_state = TCP_CLOSE;
> > + sock->state = SS_UNCONNECTED;
>
> This is dead code...
Yes. I will remove the above assignment to sock->state which is not
required at all.
Thanks.
>
> > + sk_stream_kill_queues(sk);
> > +
> > +
> > + sock->state = SS_CONNECTING;
>
> ...because of this ^ .
>
> > + err = sk->sk_prot->connect(sk, addr, len);
Thanks for the review. I will fix and upload v2 version of the patch.
I will also upload a patch on Documentation describing on 'connect' for
Pipe controller logic.
Thanks & regards,
Kumar.
^ permalink raw reply
* Re: [RFC PATCH 4/9] ipvs network name space aware
From: Hans Schillstrom @ 2010-10-13 5:07 UTC (permalink / raw)
To: Daniel Lezcano
Cc: lvs-devel@vger.kernel.org, netdev@vger.kernel.org,
netfilter-devel@vger.kernel.org, horms@verge.net.au, ja@ssi.bg,
wensong@linux-vs.org
In-Reply-To: <4CB4869D.9050605@free.fr>
On Tuesday 12 October 2010 18:02:37 Daniel Lezcano wrote:
> On 10/08/2010 01:16 PM, Hans Schillstrom wrote:
> > This patch just contains ip_vs_core.c
> >
> > Signed-off-by:Hans Schillstrom<hans.schillstrom@ericsson.com>
> >
> > diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
> > index 0c043b6..4fdc5cb 100644
> > --- a/net/netfilter/ipvs/ip_vs_core.c
> > +++ b/net/netfilter/ipvs/ip_vs_core.c
> > @@ -52,7 +52,6 @@
> >
> > #include<net/ip_vs.h>
> >
> > -
> > EXPORT_SYMBOL(register_ip_vs_scheduler);
> > EXPORT_SYMBOL(unregister_ip_vs_scheduler);
> > EXPORT_SYMBOL(ip_vs_proto_name);
> > @@ -67,6 +66,8 @@ EXPORT_SYMBOL(ip_vs_conn_put);
> > EXPORT_SYMBOL(ip_vs_get_debug_level);
> > #endif
> >
> > +/* netns cnt used for uniqueness */
> > +static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
> >
>
> Why is this counter needed ?
>
It's an incarnation counter that's used globally to name
- cachemem
- master/backup thread
> [ cut ]
>
> > + * Initialize IP Virtual Server netns mem.
> > + */
> > +static int __net_init __ip_vs_init(struct net *net)
> > +{
> > + struct netns_ipvs *ipvs = 0;
> >
> > + ipvs = kzalloc(sizeof(struct netns_ipvs), GFP_ATOMIC);
> > + if( ipvs == NULL ) {
> > + pr_err("%s(): no memory.\n", __func__);
> > + return -ENOMEM;
> > + }
> > + ipvs->inc = atomic_read(&ipvs_netns_cnt);
> >
>
> AFAICS, this counter is never used. Is it really needed ?
>
> > + atomic_inc(&ipvs_netns_cnt);
> > + IP_VS_DBG(10, "Creating new netns *net=%p *ipvs=%p size=%lu\n",
> > + net, ipvs, sizeof(struct netns_ipvs));
> > + net->ipvs = ipvs;
> > +
> > + return 0;
> > +}
> >
>
>
--
Regards
Hans Schillstrom <hans.schillstrom@ericsson.com>
^ permalink raw reply
* Re: Documentation for IP_TRANSPARENT?
From: Michael Kerrisk @ 2010-10-13 5:16 UTC (permalink / raw)
To: Balazs Scheidler; +Cc: KOVACS Krisztian, David Miller, linux-net, netdev
In-Reply-To: <1228828648.7631.75.camel@bzorp.balabit>
Hello Bazsi
A long time after your mail, I've now added the text you supplied
below. The changes are now available via git, and will be in
man-pages-3.29. However, I have a few small questions that you can
maybe clarify. See below.
On Tue, Dec 9, 2008 at 2:17 PM, Balazs Scheidler <bazsi@balabit.hu> wrote:
> Hi,
>
> On Mon, 2008-11-24 at 11:06 -0500, Michael Kerrisk wrote:
>> Hello Krisztian,
>>
>> Your 2.6.27 patch adds the IP_TRANSPARENT socket option. Could you
>> please supply some documentation (plain text is fine) suitable for
>> inclusion in the ip(7) man page.
>
> Although it was Krisztian who originally submitted the patches, but I
> also did some things about them, so here I gave the documentation part a
> spin.
>
> There are some other related options, that are undocumented right now,
> so I tried to give documentation on those as well.
>
> IP_FREEBIND
> Enable binding to IP addresses that do not currently exist. When
> enabled on a TCP or UDP socket, the bind(2) call referencing a
> non-existing IP address will succeed. This functionality is useful
> if the specified IP address is dynamic (e.g. assigned to a PPP device)
> and is down at the time when the application is trying to bind to it.
> This setsockopt is the per-socket equivalent to ip_nonlocal_bind sysctl.
>
> IP_TRANSPARENT
> Enable transparent proxying on this socket. This socket option allows
> the calling application to bind to a non-local IP address and operate
> both as a client and a server with the foreign address as the local
> endpoint. NOTE: this requires that routing be set up in a way that
> packets going to the foreign address are routed through the tproxy box.
Can you say a few words explaining what is meant by "tproxy box".
Also, should one write "TProxy" here, analogous with "TProxy" below?
> Enabling this socket option requires superuser privileges
> (more specifically the CAP_NET_ADMIN capability).
>
> TProxy redirection with the iptables TPROXY target also requires that
> this option be set on the redirected socket.
In iptables(8), I find no mention of TPROXY. Is it just that the
iptables(8) man page is not up to date?
> IP_RECVORIGDSTADDR
> Enables the IP_ORIGDSTADDR ancillary message in recvmsg(2) in which
> the kernel returns the original destination address of a datagram. The
> ancillary message contains a "struct sockaddr_in" instance.
Do you happen to know of any userspace test code that demonstrates the
use of IP_RECVORIGDSTADDR?
Thanks for this documentation, and sorry it took so long to make its
way into man-pages.
Cheers,
Michael
--
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Author of "The Linux Programming Interface"; http://man7.org/tlpi/
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox