* Re: [PATCH net-next-2.6] bonding: introduce primary_reselect option
From: Jiri Pirko @ 2009-09-18 19:52 UTC (permalink / raw)
To: Nicolas de Pesloüan; +Cc: netdev, davem, fubar, bonding-devel
In-Reply-To: <4AB3E03B.3070205@free.fr>
Fri, Sep 18, 2009 at 09:32:11PM CEST, nicolas.2p.debian@free.fr wrote:
> Jiri Pirko a écrit :
>> (updated 3)
>>
>> In some cases there is not desirable to switch back to primary interface when
>> it's link recovers and rather stay with currently active one. We need to avoid
>> packetloss as much as we can in some cases. This is solved by introducing
>> primary_reselect option. Note that enslaved primary slave is set as current
>> active no matter what.
>>
>> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>>
>> diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
>> index d5181ce..fd650e0 100644
>> --- a/Documentation/networking/bonding.txt
>> +++ b/Documentation/networking/bonding.txt
>> @@ -614,6 +614,32 @@ primary
>> The primary option is only valid for active-backup mode.
>> +primary_reselect
>> +
>> + Specifies the behavior of the current active slave when the primary was
>> + down and comes back up. This option is designed to prevent
>> + flip-flopping between the primary slave and other slaves. The possible
>> + values and their respective effects are:
>> +
>> + always or 0 (default)
>> +
>> + The primary slave becomes the active slave whenever it comes
>> + back up.
>> +
>> + better or 1
>> +
>> + The primary slave becomes the active slave when it comes back
>> + up, if the speed and duplex of the primary slave is better
>> + than the speed and duplex of the current active slave.
>> +
>> + failure or 2
>> +
>> + The primary slave becomes the active slave only if the current
>> + active slave fails and the primary slave is up.
>> +
>> + When no slave are active, if the primary comes back up, it becomes the
>> + active slave, regardless of the value of primary_reselect.
>> +
>> updelay
>> Specifies the time, in milliseconds, to wait before enabling a
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>> index 699bfdd..1127361 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -94,6 +94,7 @@ static int downdelay;
>> static int use_carrier = 1;
>> static char *mode;
>> static char *primary;
>> +static char *primary_reselect;
>> static char *lacp_rate;
>> static char *ad_select;
>> static char *xmit_hash_policy;
>> @@ -126,6 +127,13 @@ MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
>> "6 for balance-alb");
>> module_param(primary, charp, 0);
>> MODULE_PARM_DESC(primary, "Primary network device to use");
>> +module_param(primary_reselect, charp, 0);
>> +MODULE_PARM_DESC(primary_reselect, "Reselect primary slave "
>> + "once it comes up; "
>> + "0 for always (default), "
>> + "1 for only if speed of primary is not "
>> + "better, "
>> + "2 for never");
>
> You should remove "not" for option value 1 and use the word failure for
> option value 2.
>
> MODULE_PARM_DESC(primary_reselect, "Reselect primary slave "
> "once it comes up; "
> "0 for always (default), "
> "1 for only if speed of primary is "
> "better, "
> "2 for only on active slave "
> "failure");
Okay, I wasn't sure how to put it here. This sounds good, going to resend.
Thanks Nicolas.
>
> Apart from this small detail, this sounds good for me.
>
>> module_param(lacp_rate, charp, 0);
>> MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
>> "(slow/fast)");
>> @@ -200,6 +208,13 @@ const struct bond_parm_tbl fail_over_mac_tbl[] = {
>> { NULL, -1},
>> };
>> +const struct bond_parm_tbl pri_reselect_tbl[] = {
>> +{ "always", BOND_PRI_RESELECT_ALWAYS},
>> +{ "better", BOND_PRI_RESELECT_BETTER},
>> +{ "failure", BOND_PRI_RESELECT_FAILURE},
>> +{ NULL, -1},
>> +};
>> +
>> struct bond_parm_tbl ad_select_tbl[] = {
>> { "stable", BOND_AD_STABLE},
>> { "bandwidth", BOND_AD_BANDWIDTH},
>> @@ -1070,6 +1085,25 @@ out:
>> }
>> +static bool bond_should_change_active(struct bonding *bond)
>> +{
>> + struct slave *prim = bond->primary_slave;
>> + struct slave *curr = bond->curr_active_slave;
>> +
>> + if (!prim || !curr || curr->link != BOND_LINK_UP)
>> + return true;
>> + if (bond->force_primary) {
>> + bond->force_primary = false;
>> + return true;
>> + }
>> + if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER &&
>> + (prim->speed < curr->speed ||
>> + (prim->speed == curr->speed && prim->duplex <= curr->duplex)))
>> + return false;
>> + if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE)
>> + return false;
>> + return true;
>> +}
>> /**
>> * find_best_interface - select the best available slave to be the active one
>> @@ -1094,7 +1128,8 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
>> }
>> if ((bond->primary_slave) &&
>> - bond->primary_slave->link == BOND_LINK_UP) {
>> + bond->primary_slave->link == BOND_LINK_UP &&
>> + bond_should_change_active(bond)) {
>> new_active = bond->primary_slave;
>> }
>> @@ -1675,8 +1710,10 @@ int bond_enslave(struct net_device *bond_dev,
>> struct net_device *slave_dev)
>> if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
>> /* if there is a primary slave, remember it */
>> - if (strcmp(bond->params.primary, new_slave->dev->name) == 0)
>> + if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
>> bond->primary_slave = new_slave;
>> + bond->force_primary = true;
>> + }
>> }
>> write_lock_bh(&bond->curr_slave_lock);
>> @@ -4643,7 +4680,7 @@ int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl)
>> static int bond_check_params(struct bond_params *params)
>> {
>> - int arp_validate_value, fail_over_mac_value;
>> + int arp_validate_value, fail_over_mac_value, primary_reselect_value;
>> /*
>> * Convert string parameters.
>> @@ -4942,6 +4979,20 @@ static int bond_check_params(struct bond_params *params)
>> primary = NULL;
>> }
>> + if (primary && primary_reselect) {
>> + primary_reselect_value = bond_parse_parm(primary_reselect,
>> + pri_reselect_tbl);
>> + if (primary_reselect_value == -1) {
>> + pr_err(DRV_NAME
>> + ": Error: Invalid primary_reselect \"%s\"\n",
>> + primary_reselect ==
>> + NULL ? "NULL" : primary_reselect);
>> + return -EINVAL;
>> + }
>> + } else {
>> + primary_reselect_value = BOND_PRI_RESELECT_ALWAYS;
>> + }
>> +
>> if (fail_over_mac) {
>> fail_over_mac_value = bond_parse_parm(fail_over_mac,
>> fail_over_mac_tbl);
>> @@ -4973,6 +5024,7 @@ static int bond_check_params(struct bond_params *params)
>> params->use_carrier = use_carrier;
>> params->lacp_fast = lacp_fast;
>> params->primary[0] = 0;
>> + params->primary_reselect = primary_reselect_value;
>> params->fail_over_mac = fail_over_mac_value;
>> if (primary) {
>> diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
>> index 6044e12..42c44f2 100644
>> --- a/drivers/net/bonding/bond_sysfs.c
>> +++ b/drivers/net/bonding/bond_sysfs.c
>> @@ -1212,6 +1212,61 @@ static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR,
>> bonding_show_primary, bonding_store_primary);
>> /*
>> + * Show and set the primary_reselect flag.
>> + */
>> +static ssize_t bonding_show_primary_reselect(struct device *d,
>> + struct device_attribute *attr,
>> + char *buf)
>> +{
>> + struct bonding *bond = to_bond(d);
>> +
>> + return sprintf(buf, "%s %d\n",
>> + pri_reselect_tbl[bond->params.primary_reselect].modename,
>> + bond->params.primary_reselect);
>> +}
>> +
>> +static ssize_t bonding_store_primary_reselect(struct device *d,
>> + struct device_attribute *attr,
>> + const char *buf, size_t count)
>> +{
>> + int new_value, ret = count;
>> + struct bonding *bond = to_bond(d);
>> +
>> + if (!rtnl_trylock())
>> + return restart_syscall();
>> +
>> + new_value = bond_parse_parm(buf, pri_reselect_tbl);
>> + if (new_value < 0) {
>> + pr_err(DRV_NAME
>> + ": %s: Ignoring invalid primary_reselect value %.*s.\n",
>> + bond->dev->name,
>> + (int) strlen(buf) - 1, buf);
>> + ret = -EINVAL;
>> + goto out;
>> + } else {
>> + bond->params.primary_reselect = new_value;
>> + pr_info(DRV_NAME ": %s: setting primary_reselect to %s (%d).\n",
>> + bond->dev->name, pri_reselect_tbl[new_value].modename,
>> + new_value);
>> + if (new_value == BOND_PRI_RESELECT_ALWAYS ||
>> + new_value == BOND_PRI_RESELECT_BETTER) {
>> + bond->force_primary = true;
>> + read_lock(&bond->lock);
>> + write_lock_bh(&bond->curr_slave_lock);
>> + bond_select_active_slave(bond);
>> + write_unlock_bh(&bond->curr_slave_lock);
>> + read_unlock(&bond->lock);
>> + }
>> + }
>> +out:
>> + rtnl_unlock();
>> + return ret;
>> +}
>> +static DEVICE_ATTR(primary_reselect, S_IRUGO | S_IWUSR,
>> + bonding_show_primary_reselect,
>> + bonding_store_primary_reselect);
>> +
>> +/*
>> * Show and set the use_carrier flag.
>> */
>> static ssize_t bonding_show_carrier(struct device *d,
>> @@ -1500,6 +1555,7 @@ static struct attribute *per_bond_attrs[] = {
>> &dev_attr_num_unsol_na.attr,
>> &dev_attr_miimon.attr,
>> &dev_attr_primary.attr,
>> + &dev_attr_primary_reselect.attr,
>> &dev_attr_use_carrier.attr,
>> &dev_attr_active_slave.attr,
>> &dev_attr_mii_status.attr,
>> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>> index 6824771..b5b1530 100644
>> --- a/drivers/net/bonding/bonding.h
>> +++ b/drivers/net/bonding/bonding.h
>> @@ -131,6 +131,7 @@ struct bond_params {
>> int lacp_fast;
>> int ad_select;
>> char primary[IFNAMSIZ];
>> + int primary_reselect;
>> __be32 arp_targets[BOND_MAX_ARP_TARGETS];
>> };
>> @@ -190,6 +191,7 @@ struct bonding {
>> struct slave *curr_active_slave;
>> struct slave *current_arp_slave;
>> struct slave *primary_slave;
>> + bool force_primary;
>> s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
>> rwlock_t lock;
>> rwlock_t curr_slave_lock;
>> @@ -258,6 +260,10 @@ static inline bool bond_is_lb(const struct bonding *bond)
>> || bond->params.mode == BOND_MODE_ALB;
>> }
>> +#define BOND_PRI_RESELECT_ALWAYS 0
>> +#define BOND_PRI_RESELECT_BETTER 1
>> +#define BOND_PRI_RESELECT_FAILURE 2
>> +
>> #define BOND_FOM_NONE 0
>> #define BOND_FOM_ACTIVE 1
>> #define BOND_FOM_FOLLOW 2
>> @@ -348,6 +354,7 @@ extern const struct bond_parm_tbl bond_mode_tbl[];
>> extern const struct bond_parm_tbl xmit_hashtype_tbl[];
>> extern const struct bond_parm_tbl arp_validate_tbl[];
>> extern const struct bond_parm_tbl fail_over_mac_tbl[];
>> +extern const struct bond_parm_tbl pri_reselect_tbl[];
>> extern struct bond_parm_tbl ad_select_tbl[];
>> #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
>>
>
> Nicolas.
^ permalink raw reply
* [net-2.6 PATCH 3/3] ixgbe: move rx queue RSC configuration to a separate function
From: Jeff Kirsher @ 2009-09-18 19:46 UTC (permalink / raw)
To: davem
Cc: netdev, gospo, Shannon Nelson, Peter P Waskiewicz Jr,
Don Skidmore, Jeff Kirsher
In-Reply-To: <20090918194533.28898.49436.stgit@localhost.localdomain>
From: Nelson, Shannon <shannon.nelson@intel.com>
Shorten ixgbe_configure_rx() and lessen indent depth.
Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Acked-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ixgbe/ixgbe_main.c | 78 ++++++++++++++++++++++++----------------
1 files changed, 47 insertions(+), 31 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 1aa9f6a..c407bd9 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1970,6 +1970,50 @@ static u32 ixgbe_setup_mrqc(struct ixgbe_adapter *adapter)
}
/**
+ * ixgbe_configure_rscctl - enable RSC for the indicated ring
+ * @adapter: address of board private structure
+ * @index: index of ring to set
+ * @rx_buf_len: rx buffer length
+ **/
+static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, int index,
+ int rx_buf_len)
+{
+ struct ixgbe_ring *rx_ring;
+ struct ixgbe_hw *hw = &adapter->hw;
+ int j;
+ u32 rscctrl;
+
+ rx_ring = &adapter->rx_ring[index];
+ j = rx_ring->reg_idx;
+ rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(j));
+ rscctrl |= IXGBE_RSCCTL_RSCEN;
+ /*
+ * we must limit the number of descriptors so that the
+ * total size of max desc * buf_len is not greater
+ * than 65535
+ */
+ if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) {
+#if (MAX_SKB_FRAGS > 16)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
+#elif (MAX_SKB_FRAGS > 8)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
+#elif (MAX_SKB_FRAGS > 4)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
+#else
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
+#endif
+ } else {
+ if (rx_buf_len < IXGBE_RXBUFFER_4096)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
+ else if (rx_buf_len < IXGBE_RXBUFFER_8192)
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
+ else
+ rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(j), rscctrl);
+}
+
+/**
* ixgbe_configure_rx - Configure 8259x Receive Unit after Reset
* @adapter: board private structure
*
@@ -1990,7 +2034,6 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
u32 fctrl, hlreg0;
u32 reta = 0, mrqc = 0;
u32 rdrxctl;
- u32 rscctrl;
int rx_buf_len;
/* Decide whether to use packet split mode or not */
@@ -2148,36 +2191,9 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
/* Enable 82599 HW-RSC */
- for (i = 0; i < adapter->num_rx_queues; i++) {
- rx_ring = &adapter->rx_ring[i];
- j = rx_ring->reg_idx;
- rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(j));
- rscctrl |= IXGBE_RSCCTL_RSCEN;
- /*
- * we must limit the number of descriptors so that the
- * total size of max desc * buf_len is not greater
- * than 65535
- */
- if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) {
-#if (MAX_SKB_FRAGS > 16)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
-#elif (MAX_SKB_FRAGS > 8)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
-#elif (MAX_SKB_FRAGS > 4)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
-#else
- rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
-#endif
- } else {
- if (rx_buf_len < IXGBE_RXBUFFER_4096)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
- else if (rx_buf_len < IXGBE_RXBUFFER_8192)
- rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
- else
- rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
- }
- IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(j), rscctrl);
- }
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ ixgbe_configure_rscctl(adapter, i, rx_buf_len);
+
/* Disable RSC for ACK packets */
IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
(IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
^ permalink raw reply related
* [net-2.6 PATCH 2/3] ixgbe: Allow tx itr specific settings
From: Jeff Kirsher @ 2009-09-18 19:46 UTC (permalink / raw)
To: davem; +Cc: netdev, gospo, Shannon Nelson, Peter P Waskiewicz Jr,
Jeff Kirsher
In-Reply-To: <20090918194533.28898.49436.stgit@localhost.localdomain>
From: Nelson, Shannon <shannon.nelson@intel.com>
Allow the user to set Tx specific itr values. This only makes sense
when there are separate vectors for Tx and Rx. When the queues are
doubled up RxTx on the vectors, we still only use the rx itr value.
Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Acked-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ixgbe/ixgbe.h | 6 ++-
drivers/net/ixgbe/ixgbe_ethtool.c | 75 ++++++++++++++++++++++++++++++-------
drivers/net/ixgbe/ixgbe_main.c | 31 +++++++++------
3 files changed, 83 insertions(+), 29 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index dd688d4..385be60 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -267,7 +267,8 @@ struct ixgbe_adapter {
enum ixgbe_fc_mode last_lfc_mode;
/* Interrupt Throttle Rate */
- u32 itr_setting;
+ u32 rx_itr_setting;
+ u32 tx_itr_setting;
u16 eitr_low;
u16 eitr_high;
@@ -351,7 +352,8 @@ struct ixgbe_adapter {
struct ixgbe_hw_stats stats;
/* Interrupt Throttle Rate */
- u32 eitr_param;
+ u32 rx_eitr_param;
+ u32 tx_eitr_param;
unsigned long state;
u64 tx_busy;
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 026e94a..53b0a66 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -1929,7 +1929,7 @@ static int ixgbe_get_coalesce(struct net_device *netdev,
ec->tx_max_coalesced_frames_irq = adapter->tx_ring[0].work_limit;
/* only valid if in constant ITR mode */
- switch (adapter->itr_setting) {
+ switch (adapter->rx_itr_setting) {
case 0:
/* throttling disabled */
ec->rx_coalesce_usecs = 0;
@@ -1940,9 +1940,25 @@ static int ixgbe_get_coalesce(struct net_device *netdev,
break;
default:
/* fixed interrupt rate mode */
- ec->rx_coalesce_usecs = 1000000/adapter->eitr_param;
+ ec->rx_coalesce_usecs = 1000000/adapter->rx_eitr_param;
break;
}
+
+ /* only valid if in constant ITR mode */
+ switch (adapter->tx_itr_setting) {
+ case 0:
+ /* throttling disabled */
+ ec->tx_coalesce_usecs = 0;
+ break;
+ case 1:
+ /* dynamic ITR mode */
+ ec->tx_coalesce_usecs = 1;
+ break;
+ default:
+ ec->tx_coalesce_usecs = 1000000/adapter->tx_eitr_param;
+ break;
+ }
+
return 0;
}
@@ -1953,6 +1969,14 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
struct ixgbe_q_vector *q_vector;
int i;
+ /*
+ * don't accept tx specific changes if we've got mixed RxTx vectors
+ * test and jump out here if needed before changing the rx numbers
+ */
+ if ((1000000/ec->tx_coalesce_usecs) != adapter->tx_eitr_param &&
+ adapter->q_vector[0]->txr_count && adapter->q_vector[0]->rxr_count)
+ return -EINVAL;
+
if (ec->tx_max_coalesced_frames_irq)
adapter->tx_ring[0].work_limit = ec->tx_max_coalesced_frames_irq;
@@ -1963,26 +1987,49 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
return -EINVAL;
/* store the value in ints/second */
- adapter->eitr_param = 1000000/ec->rx_coalesce_usecs;
+ adapter->rx_eitr_param = 1000000/ec->rx_coalesce_usecs;
/* static value of interrupt rate */
- adapter->itr_setting = adapter->eitr_param;
+ adapter->rx_itr_setting = adapter->rx_eitr_param;
/* clear the lower bit as its used for dynamic state */
- adapter->itr_setting &= ~1;
+ adapter->rx_itr_setting &= ~1;
} else if (ec->rx_coalesce_usecs == 1) {
/* 1 means dynamic mode */
- adapter->eitr_param = 20000;
- adapter->itr_setting = 1;
+ adapter->rx_eitr_param = 20000;
+ adapter->rx_itr_setting = 1;
} else {
/*
* any other value means disable eitr, which is best
* served by setting the interrupt rate very high
*/
if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
- adapter->eitr_param = IXGBE_MAX_RSC_INT_RATE;
+ adapter->rx_eitr_param = IXGBE_MAX_RSC_INT_RATE;
else
- adapter->eitr_param = IXGBE_MAX_INT_RATE;
- adapter->itr_setting = 0;
+ adapter->rx_eitr_param = IXGBE_MAX_INT_RATE;
+ adapter->rx_itr_setting = 0;
+ }
+
+ if (ec->tx_coalesce_usecs > 1) {
+ /* check the limits */
+ if ((1000000/ec->tx_coalesce_usecs > IXGBE_MAX_INT_RATE) ||
+ (1000000/ec->tx_coalesce_usecs < IXGBE_MIN_INT_RATE))
+ return -EINVAL;
+
+ /* store the value in ints/second */
+ adapter->tx_eitr_param = 1000000/ec->tx_coalesce_usecs;
+
+ /* static value of interrupt rate */
+ adapter->tx_itr_setting = adapter->tx_eitr_param;
+
+ /* clear the lower bit as its used for dynamic state */
+ adapter->tx_itr_setting &= ~1;
+ } else if (ec->tx_coalesce_usecs == 1) {
+ /* 1 means dynamic mode */
+ adapter->tx_eitr_param = 10000;
+ adapter->tx_itr_setting = 1;
+ } else {
+ adapter->tx_eitr_param = IXGBE_MAX_INT_RATE;
+ adapter->tx_itr_setting = 0;
}
/* MSI/MSIx Interrupt Mode */
@@ -1992,17 +2039,17 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
for (i = 0; i < num_vectors; i++) {
q_vector = adapter->q_vector[i];
if (q_vector->txr_count && !q_vector->rxr_count)
- /* tx vector gets half the rate */
- q_vector->eitr = (adapter->eitr_param >> 1);
+ /* tx only */
+ q_vector->eitr = adapter->tx_eitr_param;
else
/* rx only or mixed */
- q_vector->eitr = adapter->eitr_param;
+ q_vector->eitr = adapter->rx_eitr_param;
ixgbe_write_eitr(q_vector);
}
/* Legacy Interrupt Mode */
} else {
q_vector = adapter->q_vector[0];
- q_vector->eitr = adapter->eitr_param;
+ q_vector->eitr = adapter->rx_eitr_param;
ixgbe_write_eitr(q_vector);
}
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 056434c..1aa9f6a 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -926,12 +926,12 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
r_idx + 1);
}
- /* if this is a tx only vector halve the interrupt rate */
if (q_vector->txr_count && !q_vector->rxr_count)
- q_vector->eitr = (adapter->eitr_param >> 1);
+ /* tx only */
+ q_vector->eitr = adapter->tx_eitr_param;
else if (q_vector->rxr_count)
- /* rx only */
- q_vector->eitr = adapter->eitr_param;
+ /* rx or mixed */
+ q_vector->eitr = adapter->rx_eitr_param;
ixgbe_write_eitr(q_vector);
}
@@ -1359,7 +1359,7 @@ static int ixgbe_clean_rxonly(struct napi_struct *napi, int budget)
/* If all Rx work done, exit the polling mode */
if (work_done < budget) {
napi_complete(napi);
- if (adapter->itr_setting & 1)
+ if (adapter->rx_itr_setting & 1)
ixgbe_set_itr_msix(q_vector);
if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter,
@@ -1420,7 +1420,7 @@ static int ixgbe_clean_rxtx_many(struct napi_struct *napi, int budget)
/* If all Rx work done, exit the polling mode */
if (work_done < budget) {
napi_complete(napi);
- if (adapter->itr_setting & 1)
+ if (adapter->rx_itr_setting & 1)
ixgbe_set_itr_msix(q_vector);
if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter,
@@ -1458,10 +1458,10 @@ static int ixgbe_clean_txonly(struct napi_struct *napi, int budget)
if (!ixgbe_clean_tx_irq(q_vector, tx_ring))
work_done = budget;
- /* If all Rx work done, exit the polling mode */
+ /* If all Tx work done, exit the polling mode */
if (work_done < budget) {
napi_complete(napi);
- if (adapter->itr_setting & 1)
+ if (adapter->tx_itr_setting & 1)
ixgbe_set_itr_msix(q_vector);
if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter, ((u64)1 << q_vector->v_idx));
@@ -1848,7 +1848,7 @@ static void ixgbe_configure_msi_and_legacy(struct ixgbe_adapter *adapter)
struct ixgbe_hw *hw = &adapter->hw;
IXGBE_WRITE_REG(hw, IXGBE_EITR(0),
- EITR_INTS_PER_SEC_TO_REG(adapter->eitr_param));
+ EITR_INTS_PER_SEC_TO_REG(adapter->rx_eitr_param));
ixgbe_set_ivar(adapter, 0, 0, 0);
ixgbe_set_ivar(adapter, 1, 0, 0);
@@ -2991,7 +2991,7 @@ static int ixgbe_poll(struct napi_struct *napi, int budget)
/* If budget not fully consumed, exit the polling mode */
if (work_done < budget) {
napi_complete(napi);
- if (adapter->itr_setting & 1)
+ if (adapter->rx_itr_setting & 1)
ixgbe_set_itr(adapter);
if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter, IXGBE_EIMS_RTX_QUEUE);
@@ -3601,7 +3601,10 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
if (!q_vector)
goto err_out;
q_vector->adapter = adapter;
- q_vector->eitr = adapter->eitr_param;
+ if (q_vector->txr_count && !q_vector->rxr_count)
+ q_vector->eitr = adapter->tx_eitr_param;
+ else
+ q_vector->eitr = adapter->rx_eitr_param;
q_vector->v_idx = q_idx;
netif_napi_add(adapter->netdev, &q_vector->napi, (*poll), 64);
adapter->q_vector[q_idx] = q_vector;
@@ -3870,8 +3873,10 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
hw->fc.disable_fc_autoneg = false;
/* enable itr by default in dynamic mode */
- adapter->itr_setting = 1;
- adapter->eitr_param = 20000;
+ adapter->rx_itr_setting = 1;
+ adapter->rx_eitr_param = 20000;
+ adapter->tx_itr_setting = 1;
+ adapter->tx_eitr_param = 10000;
/* set defaults for eitr in MegaBytes */
adapter->eitr_low = 10;
^ permalink raw reply related
* [net-2.6 PATCH 1/3] ixgbe: fix sfp_timer clean up in ixgbe_down
From: Jeff Kirsher @ 2009-09-18 19:45 UTC (permalink / raw)
To: davem
Cc: netdev, gospo, Shannon Nelson, Don Skidmore,
Peter P Waskiewicz Jr, Jeff Kirsher
From: Don Skidmore <donald.c.skidmore@intel.com>
We weren't stoping the sfp_timer after the device was brought down.
This patch properly cleans up.
Signed-off-by: Shannon Nelson <shannon.nelson@intel.com>
Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Acked-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ixgbe/ixgbe_main.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 59ad959..056434c 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -2926,6 +2926,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
ixgbe_napi_disable_all(adapter);
+ clear_bit(__IXGBE_SFP_MODULE_NOT_FOUND, &adapter->state);
+ del_timer_sync(&adapter->sfp_timer);
del_timer_sync(&adapter->watchdog_timer);
cancel_work_sync(&adapter->watchdog_task);
^ permalink raw reply related
* Re: [PATCH net-next-2.6] bonding: introduce primary_reselect option
From: Nicolas de Pesloüan @ 2009-09-18 19:32 UTC (permalink / raw)
To: Jiri Pirko; +Cc: netdev, davem, fubar, bonding-devel
In-Reply-To: <20090918153006.GC2801@psychotron.redhat.com>
Jiri Pirko a écrit :
> (updated 3)
>
> In some cases there is not desirable to switch back to primary interface when
> it's link recovers and rather stay with currently active one. We need to avoid
> packetloss as much as we can in some cases. This is solved by introducing
> primary_reselect option. Note that enslaved primary slave is set as current
> active no matter what.
>
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>
> diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
> index d5181ce..fd650e0 100644
> --- a/Documentation/networking/bonding.txt
> +++ b/Documentation/networking/bonding.txt
> @@ -614,6 +614,32 @@ primary
>
> The primary option is only valid for active-backup mode.
>
> +primary_reselect
> +
> + Specifies the behavior of the current active slave when the primary was
> + down and comes back up. This option is designed to prevent
> + flip-flopping between the primary slave and other slaves. The possible
> + values and their respective effects are:
> +
> + always or 0 (default)
> +
> + The primary slave becomes the active slave whenever it comes
> + back up.
> +
> + better or 1
> +
> + The primary slave becomes the active slave when it comes back
> + up, if the speed and duplex of the primary slave is better
> + than the speed and duplex of the current active slave.
> +
> + failure or 2
> +
> + The primary slave becomes the active slave only if the current
> + active slave fails and the primary slave is up.
> +
> + When no slave are active, if the primary comes back up, it becomes the
> + active slave, regardless of the value of primary_reselect.
> +
> updelay
>
> Specifies the time, in milliseconds, to wait before enabling a
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 699bfdd..1127361 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -94,6 +94,7 @@ static int downdelay;
> static int use_carrier = 1;
> static char *mode;
> static char *primary;
> +static char *primary_reselect;
> static char *lacp_rate;
> static char *ad_select;
> static char *xmit_hash_policy;
> @@ -126,6 +127,13 @@ MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
> "6 for balance-alb");
> module_param(primary, charp, 0);
> MODULE_PARM_DESC(primary, "Primary network device to use");
> +module_param(primary_reselect, charp, 0);
> +MODULE_PARM_DESC(primary_reselect, "Reselect primary slave "
> + "once it comes up; "
> + "0 for always (default), "
> + "1 for only if speed of primary is not "
> + "better, "
> + "2 for never");
You should remove "not" for option value 1 and use the word failure for option
value 2.
MODULE_PARM_DESC(primary_reselect, "Reselect primary slave "
"once it comes up; "
"0 for always (default), "
"1 for only if speed of primary is "
"better, "
"2 for only on active slave "
"failure");
Apart from this small detail, this sounds good for me.
> module_param(lacp_rate, charp, 0);
> MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
> "(slow/fast)");
> @@ -200,6 +208,13 @@ const struct bond_parm_tbl fail_over_mac_tbl[] = {
> { NULL, -1},
> };
>
> +const struct bond_parm_tbl pri_reselect_tbl[] = {
> +{ "always", BOND_PRI_RESELECT_ALWAYS},
> +{ "better", BOND_PRI_RESELECT_BETTER},
> +{ "failure", BOND_PRI_RESELECT_FAILURE},
> +{ NULL, -1},
> +};
> +
> struct bond_parm_tbl ad_select_tbl[] = {
> { "stable", BOND_AD_STABLE},
> { "bandwidth", BOND_AD_BANDWIDTH},
> @@ -1070,6 +1085,25 @@ out:
>
> }
>
> +static bool bond_should_change_active(struct bonding *bond)
> +{
> + struct slave *prim = bond->primary_slave;
> + struct slave *curr = bond->curr_active_slave;
> +
> + if (!prim || !curr || curr->link != BOND_LINK_UP)
> + return true;
> + if (bond->force_primary) {
> + bond->force_primary = false;
> + return true;
> + }
> + if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER &&
> + (prim->speed < curr->speed ||
> + (prim->speed == curr->speed && prim->duplex <= curr->duplex)))
> + return false;
> + if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE)
> + return false;
> + return true;
> +}
>
> /**
> * find_best_interface - select the best available slave to be the active one
> @@ -1094,7 +1128,8 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
> }
>
> if ((bond->primary_slave) &&
> - bond->primary_slave->link == BOND_LINK_UP) {
> + bond->primary_slave->link == BOND_LINK_UP &&
> + bond_should_change_active(bond)) {
> new_active = bond->primary_slave;
> }
>
> @@ -1675,8 +1710,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
>
> if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
> /* if there is a primary slave, remember it */
> - if (strcmp(bond->params.primary, new_slave->dev->name) == 0)
> + if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
> bond->primary_slave = new_slave;
> + bond->force_primary = true;
> + }
> }
>
> write_lock_bh(&bond->curr_slave_lock);
> @@ -4643,7 +4680,7 @@ int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl)
>
> static int bond_check_params(struct bond_params *params)
> {
> - int arp_validate_value, fail_over_mac_value;
> + int arp_validate_value, fail_over_mac_value, primary_reselect_value;
>
> /*
> * Convert string parameters.
> @@ -4942,6 +4979,20 @@ static int bond_check_params(struct bond_params *params)
> primary = NULL;
> }
>
> + if (primary && primary_reselect) {
> + primary_reselect_value = bond_parse_parm(primary_reselect,
> + pri_reselect_tbl);
> + if (primary_reselect_value == -1) {
> + pr_err(DRV_NAME
> + ": Error: Invalid primary_reselect \"%s\"\n",
> + primary_reselect ==
> + NULL ? "NULL" : primary_reselect);
> + return -EINVAL;
> + }
> + } else {
> + primary_reselect_value = BOND_PRI_RESELECT_ALWAYS;
> + }
> +
> if (fail_over_mac) {
> fail_over_mac_value = bond_parse_parm(fail_over_mac,
> fail_over_mac_tbl);
> @@ -4973,6 +5024,7 @@ static int bond_check_params(struct bond_params *params)
> params->use_carrier = use_carrier;
> params->lacp_fast = lacp_fast;
> params->primary[0] = 0;
> + params->primary_reselect = primary_reselect_value;
> params->fail_over_mac = fail_over_mac_value;
>
> if (primary) {
> diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
> index 6044e12..42c44f2 100644
> --- a/drivers/net/bonding/bond_sysfs.c
> +++ b/drivers/net/bonding/bond_sysfs.c
> @@ -1212,6 +1212,61 @@ static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR,
> bonding_show_primary, bonding_store_primary);
>
> /*
> + * Show and set the primary_reselect flag.
> + */
> +static ssize_t bonding_show_primary_reselect(struct device *d,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct bonding *bond = to_bond(d);
> +
> + return sprintf(buf, "%s %d\n",
> + pri_reselect_tbl[bond->params.primary_reselect].modename,
> + bond->params.primary_reselect);
> +}
> +
> +static ssize_t bonding_store_primary_reselect(struct device *d,
> + struct device_attribute *attr,
> + const char *buf, size_t count)
> +{
> + int new_value, ret = count;
> + struct bonding *bond = to_bond(d);
> +
> + if (!rtnl_trylock())
> + return restart_syscall();
> +
> + new_value = bond_parse_parm(buf, pri_reselect_tbl);
> + if (new_value < 0) {
> + pr_err(DRV_NAME
> + ": %s: Ignoring invalid primary_reselect value %.*s.\n",
> + bond->dev->name,
> + (int) strlen(buf) - 1, buf);
> + ret = -EINVAL;
> + goto out;
> + } else {
> + bond->params.primary_reselect = new_value;
> + pr_info(DRV_NAME ": %s: setting primary_reselect to %s (%d).\n",
> + bond->dev->name, pri_reselect_tbl[new_value].modename,
> + new_value);
> + if (new_value == BOND_PRI_RESELECT_ALWAYS ||
> + new_value == BOND_PRI_RESELECT_BETTER) {
> + bond->force_primary = true;
> + read_lock(&bond->lock);
> + write_lock_bh(&bond->curr_slave_lock);
> + bond_select_active_slave(bond);
> + write_unlock_bh(&bond->curr_slave_lock);
> + read_unlock(&bond->lock);
> + }
> + }
> +out:
> + rtnl_unlock();
> + return ret;
> +}
> +static DEVICE_ATTR(primary_reselect, S_IRUGO | S_IWUSR,
> + bonding_show_primary_reselect,
> + bonding_store_primary_reselect);
> +
> +/*
> * Show and set the use_carrier flag.
> */
> static ssize_t bonding_show_carrier(struct device *d,
> @@ -1500,6 +1555,7 @@ static struct attribute *per_bond_attrs[] = {
> &dev_attr_num_unsol_na.attr,
> &dev_attr_miimon.attr,
> &dev_attr_primary.attr,
> + &dev_attr_primary_reselect.attr,
> &dev_attr_use_carrier.attr,
> &dev_attr_active_slave.attr,
> &dev_attr_mii_status.attr,
> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
> index 6824771..b5b1530 100644
> --- a/drivers/net/bonding/bonding.h
> +++ b/drivers/net/bonding/bonding.h
> @@ -131,6 +131,7 @@ struct bond_params {
> int lacp_fast;
> int ad_select;
> char primary[IFNAMSIZ];
> + int primary_reselect;
> __be32 arp_targets[BOND_MAX_ARP_TARGETS];
> };
>
> @@ -190,6 +191,7 @@ struct bonding {
> struct slave *curr_active_slave;
> struct slave *current_arp_slave;
> struct slave *primary_slave;
> + bool force_primary;
> s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
> rwlock_t lock;
> rwlock_t curr_slave_lock;
> @@ -258,6 +260,10 @@ static inline bool bond_is_lb(const struct bonding *bond)
> || bond->params.mode == BOND_MODE_ALB;
> }
>
> +#define BOND_PRI_RESELECT_ALWAYS 0
> +#define BOND_PRI_RESELECT_BETTER 1
> +#define BOND_PRI_RESELECT_FAILURE 2
> +
> #define BOND_FOM_NONE 0
> #define BOND_FOM_ACTIVE 1
> #define BOND_FOM_FOLLOW 2
> @@ -348,6 +354,7 @@ extern const struct bond_parm_tbl bond_mode_tbl[];
> extern const struct bond_parm_tbl xmit_hashtype_tbl[];
> extern const struct bond_parm_tbl arp_validate_tbl[];
> extern const struct bond_parm_tbl fail_over_mac_tbl[];
> +extern const struct bond_parm_tbl pri_reselect_tbl[];
> extern struct bond_parm_tbl ad_select_tbl[];
>
> #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
>
Nicolas.
^ permalink raw reply
* Re: [origin tree build failure] [PATCH] Fix: (.text+0x22ec88): undefined reference to `ieee80211_unregister_hw'
From: Kalle Valo @ 2009-09-18 18:09 UTC (permalink / raw)
To: Ingo Molnar
Cc: David Miller, Bob Copeland, Coelho Luciano (Nokia-D/Helsinki),
Oikarinen Juuso (Nokia-D/Tampere), torvalds@linux-foundation.org,
akpm@linux-foundation.org, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, John W. Linville,
linux-wireless-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20090915114958.GA26902-X9Un+BFzKDI@public.gmane.org>
Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org> writes:
> FYI, -tip testing found that something in this lot broke the build with
> certain configs (attached):
>
> drivers/built-in.o: In function `wl1251_free_hw':
> (.text+0x22ec88): undefined reference to `ieee80211_unregister_hw'
> drivers/built-in.o: In function `wl1251_free_hw':
> (.text+0x22ecf5): undefined reference to `ieee80211_free_hw'
> drivers/built-in.o: In function `wl1251_op_bss_info_changed':
> wl1251_main.c:(.text+0x22f161): undefined reference to `ieee80211_beacon_get'
> drivers/built-in.o: In function `wl1251_op_config':
> wl1251_main.c:(.text+0x22f2f8): undefined reference to `ieee80211_frequency_to_channel'
> drivers/built-in.o: In function `wl1251_op_stop':
> wl1251_main.c:(.text+0x22f554): undefined reference to `ieee80211_scan_completed'
> drivers/built-in.o: In function `wl1251_op_tx':
> wl1251_main.c:(.text+0x22f6a5): undefined reference to `ieee80211_queue_work'
> wl1251_main.c:(.text+0x22f6b6): undefined reference to `ieee80211_stop_queues'
> drivers/built-in.o: In function `wl1251_alloc_hw':
> (.text+0x22f710): undefined reference to `ieee80211_alloc_hw'
> drivers/built-in.o: In function `wl1251_alloc_hw':
> (.text+0x22f9e4): undefined reference to `ieee80211_free_hw'
> drivers/built-in.o: In function `wl1251_init_ieee80211':
> (.text+0x2305df): undefined reference to `ieee80211_register_hw'
> drivers/built-in.o: In function `wl1251_event_handle':
> (.text+0x2306c4): undefined reference to `ieee80211_scan_completed'
> drivers/built-in.o: In function `wl1251_tx_flush':
> (.text+0x230810): undefined reference to `ieee80211_tx_status'
> drivers/built-in.o: In function `wl1251_tx_flush':
> (.text+0x230846): undefined reference to `ieee80211_tx_status'
> drivers/built-in.o: In function `wl1251_tx_frame':
> wl1251_tx.c:(.text+0x230a97): undefined reference to `ieee80211_hdrlen'
> drivers/built-in.o: In function `wl1251_tx_complete':
> (.text+0x230d30): undefined reference to `ieee80211_get_hdrlen_from_skb'
> drivers/built-in.o: In function `wl1251_tx_complete':
> (.text+0x230d58): undefined reference to `ieee80211_tx_status'
> drivers/built-in.o: In function `wl1251_tx_complete':
> (.text+0x230dc0): undefined reference to `ieee80211_wake_queues'
> drivers/built-in.o: In function `wl1251_tx_work':
> (.text+0x230f57): undefined reference to `ieee80211_stop_queues'
> drivers/built-in.o: In function `wl1251_rx':
> (.text+0x231187): undefined reference to `ieee80211_channel_to_frequency'
> drivers/built-in.o: In function `wl1251_rx':
> (.text+0x2311e4): undefined reference to `ieee80211_rx'
>
> Turning CONFIG_WL1251 off makes it build.
>
> A (very) quick first look suggests that not all prior dependencies were
> carried over to the new drivers in drivers/net/wireless/wl12xx/Kconfig:
>
> -config WL12XX
> - tristate "TI wl1251/wl1271 support"
> - depends on MAC80211 && WLAN_80211 && SPI_MASTER &&
> GENERIC_HARDIRQS && EXPERIMENTAL
> +menuconfig WL12XX
> + boolean "TI wl12xx driver support"
> + depends on MAC80211 && WLAN_80211 && EXPERIMENTAL
> + ---help---
> + This will enable TI wl12xx driver support. The drivers make
> + use of the mac80211 stack.
> +
> +config WL1251
> + tristate "TI wl1251 support"
> + depends on WL12XX && GENERIC_HARDIRQS
>
> the friction is between modular/build-in mode:
>
> CONFIG_WL1251=y
> CONFIG_MAC80211=m
>
> Kconfig does not carry over the modular dependency from WL12XX to
> WL1251. An explicit rule via the patch below turns CONFIG_WL1251 into a
> modular entry as well:
>
> CONFIG_WL12XX=y
> CONFIG_WL1251=m
>
> ( Note: i have tested this patch with this particular config and it
> solves the problem there but have not investigated any deeper. )
>
> Ingo
>
> Signed-off-by: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
I missed this state entirely, thanks for fixing this.
Acked-by: Kalle Valo <kalle.valo-xNZwKgViW5gAvxtiuMwx3w@public.gmane.org>
"wl1251:" prefix is just missing from the commit summary.
Who is going to take the patch? Should I send this to John?
--
Kalle Valo
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [RFC] defer skb allocation in virtio_net -- mergable buff part
From: Shirley Ma @ 2009-09-18 17:04 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: netdev, kvm, linux-kernel
In-Reply-To: <20090825114143.GA13884@redhat.com>
Hello Michael,
I am working on the patch to address the question you raised below. I am
adding one more function -- destroy_buf in virtqueue_ops, so we don't
need to maintain the list of pending buffers in upper layer (like
virtio_net), when the device is shutdown or removed, this buffer free
func will be called to release all pending buffers in virtio_ring on
behalf of virtio_net.
The rest of comments are minor. The new patch will defer skb allocation
for both mergable and none-mergable buffers.
Thanks
Shirley
On Tue, 2009-08-25 at 14:41 +0300, Michael S. Tsirkin wrote:
> > #define VIRTNET_SEND_COMMAND_SG_MAX 2
> >
> > +struct page_list
> > +{
>
> Kernel style is "struct page_list {".
> Also, prefix with virtnet_?
>
> > + struct page *page;
> > + struct list_head list;
> > +};
> > +
> > struct virtnet_info
> > {
> > struct virtio_device *vdev;
> > @@ -72,6 +79,8 @@ struct virtnet_info
> >
> > /* Chain pages by the private ptr. */
> > struct page *pages;
>
> Do we need the pages list now? Can we do without?
>
> Pls document fields below.
>
> > + struct list_head used_pages;
>
> Seems a waste to have this list just for dev down.
> Extend virtio to give us all buffers from vq
> on shutdown?
>
^ permalink raw reply
* Re: [PATCH 2/4 v3] bonding: make sure tx and rx hash tables stay in sync when using alb mode
From: Andy Gospodarek @ 2009-09-18 15:56 UTC (permalink / raw)
To: Jay Vosburgh; +Cc: netdev, bonding-devel
In-Reply-To: <20090918153622.GX8515@gospo.rdu.redhat.com>
On Fri, Sep 18, 2009 at 11:36:22AM -0400, Andy Gospodarek wrote:
> On Wed, Sep 16, 2009 at 04:36:09PM -0700, Jay Vosburgh wrote:
> > Andy Gospodarek <andy@greyhouse.net> wrote:
> >
> > >
> > >Subject: [PATCH] bonding: make sure tx and rx hash tables stay in sync when using alb mode
> >
> > When testing this, I'm getting a lockdep warning. It appears to
> > be unhappy that tlb_choose_channel acquires the tx / rx hash table locks
> > in the order tx then rx, but rlb_choose_channel -> alb_get_best_slave
> > acquires the locks in the other order. I applied all four patches, but
> > it looks like the change that trips lockdep is in this patch (#2).
> >
> > I haven't gotten an actual deadlock from this, although it seems
> > plausible if there are two cpus in bond_alb_xmit at the same time, and
> > one of them is sending an ARP.
> >
> > One fairly straightforward fix would be to combine the rx and tx
> > hash table locks into a single lock. I suspect that wouldn't have any
> > real performance penalty, since the rx hash table lock is generally not
> > acquired very often (unlike the tx lock, which is taken for every packet
> > that goes out).
> >
> > Also, FYI, two of the four patches had trailing whitespace. I
> > believe it was #2 and #4.
> >
> > Thoughts?
>
> Jay,
>
> This patch should address both the the deadlock and whitespace conerns.
> I ran a kernel with LOCKDEP enabled and saw no warnings while passing
> traffic on the bond while pulling cables and while removing the module.
> Here it is....
>
Adding the version and signed-off-by lines might be nice, eh?
[PATCH v3] bonding: make sure tx and rx hash tables stay in sync when using alb mode
I noticed that it was easy for alb (mode 6) bonding to get into a state
where the tx hash-table and rx hash-table are out of sync (there is
really nothing to keep them synchronized), and we will transmit traffic
destined for a host on one slave and send ARP frames to the same slave
from another interface using a different source MAC.
There is no compelling reason to do this, so this patch makes sure the
rx hash-table changes whenever the tx hash-table is updated based on
device load. This patch also drops the code that does rlb re-balancing
since the balancing will not be controlled by the tx hash-table based on
transmit load. In order to address an issue found with the initial
patch, I have also combined the rx and tx hash table lock into a single
lock. This will facilitate moving these into a single table at some
point.
Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
---
drivers/net/bonding/bond_alb.c | 203 +++++++++++++++-------------------------
drivers/net/bonding/bond_alb.h | 3 +-
2 files changed, 75 insertions(+), 131 deletions(-)
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index bcf25c6..04b7055 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -111,6 +111,7 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
/* Forward declaration */
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
+static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index);
static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
{
@@ -124,18 +125,18 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
return hash;
}
-/*********************** tlb specific functions ***************************/
-
-static inline void _lock_tx_hashtbl(struct bonding *bond)
+/********************* hash table lock functions *************************/
+static inline void _lock_hashtbl(struct bonding *bond)
{
- spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
+ spin_lock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
}
-static inline void _unlock_tx_hashtbl(struct bonding *bond)
+static inline void _unlock_hashtbl(struct bonding *bond)
{
- spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
+ spin_unlock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
}
+/*********************** tlb specific functions ***************************/
/* Caller must hold tx_hashtbl lock */
static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
{
@@ -163,7 +164,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
struct tlb_client_info *tx_hash_table;
u32 index;
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
/* clear slave from tx_hashtbl */
tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
@@ -180,7 +181,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
tlb_init_slave(slave);
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* Must be called before starting the monitor timer */
@@ -191,7 +192,7 @@ static int tlb_initialize(struct bonding *bond)
struct tlb_client_info *new_hashtbl;
int i;
- spin_lock_init(&(bond_info->tx_hashtbl_lock));
+ spin_lock_init(&(bond_info->hashtbl_lock));
new_hashtbl = kzalloc(size, GFP_KERNEL);
if (!new_hashtbl) {
@@ -200,7 +201,7 @@ static int tlb_initialize(struct bonding *bond)
bond->dev->name);
return -1;
}
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
bond_info->tx_hashtbl = new_hashtbl;
@@ -208,7 +209,7 @@ static int tlb_initialize(struct bonding *bond)
tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1);
}
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return 0;
}
@@ -218,12 +219,12 @@ static void tlb_deinitialize(struct bonding *bond)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
kfree(bond_info->tx_hashtbl);
bond_info->tx_hashtbl = NULL;
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* Caller must hold bond lock for read */
@@ -264,24 +265,6 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
return least_loaded;
}
-/* Caller must hold bond lock for read and hashtbl lock */
-static struct slave *tlb_get_best_slave(struct bonding *bond, u32 hash_index)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
- struct slave *last_slave = tx_hash_table[hash_index].last_slave;
- struct slave *next_slave = NULL;
-
- if (last_slave && SLAVE_IS_OK(last_slave)) {
- /* Use the last slave listed in the tx hashtbl if:
- the last slave currently is essentially unloaded. */
- if (SLAVE_TLB_INFO(last_slave).load < 10)
- next_slave = last_slave;
- }
-
- return next_slave ? next_slave : tlb_get_least_loaded_slave(bond);
-}
-
/* Caller must hold bond lock for read */
static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
{
@@ -289,13 +272,12 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
struct tlb_client_info *hash_table;
struct slave *assigned_slave;
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_table = bond_info->tx_hashtbl;
assigned_slave = hash_table[hash_index].tx_slave;
if (!assigned_slave) {
- assigned_slave = tlb_get_best_slave(bond, hash_index);
-
+ assigned_slave = alb_get_best_slave(bond, hash_index);
if (assigned_slave) {
struct tlb_slave_info *slave_info =
&(SLAVE_TLB_INFO(assigned_slave));
@@ -319,20 +301,52 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
hash_table[hash_index].tx_bytes += skb_len;
}
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return assigned_slave;
}
/*********************** rlb specific functions ***************************/
-static inline void _lock_rx_hashtbl(struct bonding *bond)
+
+/* Caller must hold bond lock for read and hashtbl lock */
+static struct slave *rlb_update_rx_table(struct bonding *bond, struct slave *next_slave, u32 hash_index)
{
- spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+
+ /* check rlb table and correct it if wrong */
+ if (bond_info->rlb_enabled) {
+ struct rlb_client_info *rx_client_info = &(bond_info->rx_hashtbl[hash_index]);
+
+ /* if the new slave computed by tlb checks doesn't match rlb, stop rlb from using it */
+ if (next_slave && (next_slave != rx_client_info->slave))
+ rx_client_info->slave = next_slave;
+ }
+ return next_slave;
}
-static inline void _unlock_rx_hashtbl(struct bonding *bond)
+/* Caller must hold bond lock for read and hashtbl lock */
+static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index)
{
- spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
+ struct slave *last_slave = tx_hash_table[hash_index].last_slave;
+ struct slave *next_slave = NULL;
+
+ /* presume the next slave will be the least loaded one */
+ next_slave = tlb_get_least_loaded_slave(bond);
+
+ if (last_slave && SLAVE_IS_OK(last_slave)) {
+ /* Use the last slave listed in the tx hashtbl if:
+ the last slave currently is essentially unloaded. */
+ if (SLAVE_TLB_INFO(last_slave).load < 10)
+ next_slave = last_slave;
+ }
+
+ /* update the rlb hashtbl if there was a previous entry */
+ if (bond_info->rlb_enabled)
+ rlb_update_rx_table(bond, next_slave, hash_index);
+
+ return next_slave;
}
/* when an ARP REPLY is received from a client update its info
@@ -344,7 +358,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
struct rlb_client_info *client_info;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -358,7 +372,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
bond_info->rx_ntt = 1;
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev)
@@ -402,38 +416,6 @@ out:
return res;
}
-/* Caller must hold bond lock for read */
-static struct slave *rlb_next_rx_slave(struct bonding *bond)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct slave *rx_slave, *slave, *start_at;
- int i = 0;
-
- if (bond_info->next_rx_slave) {
- start_at = bond_info->next_rx_slave;
- } else {
- start_at = bond->first_slave;
- }
-
- rx_slave = NULL;
-
- bond_for_each_slave_from(bond, slave, i, start_at) {
- if (SLAVE_IS_OK(slave)) {
- if (!rx_slave) {
- rx_slave = slave;
- } else if (slave->speed > rx_slave->speed) {
- rx_slave = slave;
- }
- }
- }
-
- if (rx_slave) {
- bond_info->next_rx_slave = rx_slave->next;
- }
-
- return rx_slave;
-}
-
/* teach the switch the mac of a disabled slave
* on the primary for fault tolerance
*
@@ -468,14 +450,14 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
u32 index, next_index;
/* clear slave from rx_hashtbl */
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
rx_hash_table = bond_info->rx_hashtbl;
index = bond_info->rx_hashtbl_head;
for (; index != RLB_NULL_INDEX; index = next_index) {
next_index = rx_hash_table[index].next;
if (rx_hash_table[index].slave == slave) {
- struct slave *assigned_slave = rlb_next_rx_slave(bond);
+ struct slave *assigned_slave = alb_get_best_slave(bond, index);
if (assigned_slave) {
rx_hash_table[index].slave = assigned_slave;
@@ -499,7 +481,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
}
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
write_lock_bh(&bond->curr_slave_lock);
@@ -558,7 +540,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
struct rlb_client_info *client_info;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -576,7 +558,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
*/
bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* The slave was assigned a new mac address - update the clients */
@@ -587,7 +569,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
int ntt = 0;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -607,7 +589,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* mark all clients using src_ip to be updated */
@@ -617,7 +599,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
struct rlb_client_info *client_info;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -643,7 +625,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
}
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* Caller must hold both bond and ptr locks for read */
@@ -655,7 +637,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
struct rlb_client_info *client_info;
u32 hash_index = 0;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src));
client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -671,7 +653,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
assigned_slave = client_info->slave;
if (assigned_slave) {
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return assigned_slave;
}
} else {
@@ -687,7 +669,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
}
}
/* assign a new slave */
- assigned_slave = rlb_next_rx_slave(bond);
+ assigned_slave = alb_get_best_slave(bond, hash_index);
if (assigned_slave) {
client_info->ip_src = arp->ip_src;
@@ -723,7 +705,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
}
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return assigned_slave;
}
@@ -771,36 +753,6 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
return tx_slave;
}
-/* Caller must hold bond lock for read */
-static void rlb_rebalance(struct bonding *bond)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct slave *assigned_slave;
- struct rlb_client_info *client_info;
- int ntt;
- u32 hash_index;
-
- _lock_rx_hashtbl(bond);
-
- ntt = 0;
- hash_index = bond_info->rx_hashtbl_head;
- for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
- client_info = &(bond_info->rx_hashtbl[hash_index]);
- assigned_slave = rlb_next_rx_slave(bond);
- if (assigned_slave && (client_info->slave != assigned_slave)) {
- client_info->slave = assigned_slave;
- client_info->ntt = 1;
- ntt = 1;
- }
- }
-
- /* update the team's flag only after the whole iteration */
- if (ntt) {
- bond_info->rx_ntt = 1;
- }
- _unlock_rx_hashtbl(bond);
-}
-
/* Caller must hold rx_hashtbl lock */
static void rlb_init_table_entry(struct rlb_client_info *entry)
{
@@ -817,8 +769,6 @@ static int rlb_initialize(struct bonding *bond)
int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
int i;
- spin_lock_init(&(bond_info->rx_hashtbl_lock));
-
new_hashtbl = kmalloc(size, GFP_KERNEL);
if (!new_hashtbl) {
printk(KERN_ERR DRV_NAME
@@ -826,7 +776,7 @@ static int rlb_initialize(struct bonding *bond)
bond->dev->name);
return -1;
}
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
bond_info->rx_hashtbl = new_hashtbl;
@@ -836,7 +786,7 @@ static int rlb_initialize(struct bonding *bond)
rlb_init_table_entry(bond_info->rx_hashtbl + i);
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
/*initialize packet type*/
pk_type->type = cpu_to_be16(ETH_P_ARP);
@@ -855,13 +805,13 @@ static void rlb_deinitialize(struct bonding *bond)
dev_remove_pack(&(bond_info->rlb_pkt_type));
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
kfree(bond_info->rx_hashtbl);
bond_info->rx_hashtbl = NULL;
bond_info->rx_hashtbl_head = RLB_NULL_INDEX;
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
@@ -869,7 +819,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
u32 curr_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
curr_index = bond_info->rx_hashtbl_head;
while (curr_index != RLB_NULL_INDEX) {
@@ -894,7 +844,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
curr_index = next_index;
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/*********************** tlb/rlb shared functions *********************/
@@ -1521,11 +1471,6 @@ void bond_alb_monitor(struct work_struct *work)
read_lock(&bond->lock);
}
- if (bond_info->rlb_rebalance) {
- bond_info->rlb_rebalance = 0;
- rlb_rebalance(bond);
- }
-
/* check if clients need updating */
if (bond_info->rx_ntt) {
if (bond_info->rlb_update_delay_counter) {
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index b65fd29..09d755a 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -90,7 +90,7 @@ struct tlb_slave_info {
struct alb_bond_info {
struct timer_list alb_timer;
struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
- spinlock_t tx_hashtbl_lock;
+ spinlock_t hashtbl_lock; /* lock for both tables */
u32 unbalanced_load;
int tx_rebalance_counter;
int lp_counter;
@@ -98,7 +98,6 @@ struct alb_bond_info {
int rlb_enabled;
struct packet_type rlb_pkt_type;
struct rlb_client_info *rx_hashtbl; /* Receive hash table */
- spinlock_t rx_hashtbl_lock;
u32 rx_hashtbl_head;
u8 rx_ntt; /* flag - need to transmit
* to all rx clients
--
1.5.5.6
^ permalink raw reply related
* Re: [PATCH 4/4 v2] bonding: add sysfs files to display tlb and alb hash table contents
From: Andy Gospodarek @ 2009-09-18 15:53 UTC (permalink / raw)
To: netdev, fubar, bonding-devel
In-Reply-To: <20090911211317.GT8515@gospo.rdu.redhat.com>
On Fri, Sep 11, 2009 at 05:13:17PM -0400, Andy Gospodarek wrote:
>
> bonding: add sysfs files to display tlb and alb hash table contents
>
> While debugging some problems with alb (mode 6) bonding I realized that
> being able to output the contents of both hash tables would be helpful.
> This is what the output looks like for the two files:
>
> device load
> eth1 491
> eth2 491
> hash device last device tx bytes load next previous
> 2 eth1 eth1 2254 491 0 0
> 3 eth2 eth2 2744 491 0 0
> 6 eth2 0 488 0 0
> 8 eth2 0 461698 0 0
> 1b eth2 0 249 0 0
> eb eth2 0 21 0 0
> ff eth2 0 22 0 0
>
> hash ip_src ip_dst mac_dst slave assign ntt
> 2 10.0.3.2 10.0.3.11 00:e0:81:71:ee:a9 eth1 1 0
> 3 10.0.3.2 10.0.3.10 00:e0:81:71:ee:a9 eth2 1 0
> 8 10.0.3.2 10.0.3.1 00:e0:81:71:ee:a9 eth2 1 0
>
> These were a great help debugging the fixes I have just posted and they
> might be helpful for others, so I decided to include them in my
> patchset.
>
> Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
>
Needed to repost since patch 2/4 changed and first patch had whitespace
issues:
[PATCH v2] bonding: add sysfs files to display tlb and alb hash table contents
While debugging some problems with alb (mode 6) bonding I realized that
being able to output the contents of both hash tables would be helpful.
This is what the output looks like for the two files:
device load
eth1 491
eth2 491
hash device last device tx bytes load next previous
2 eth1 eth1 2254 491 0 0
3 eth2 eth2 2744 491 0 0
6 eth2 0 488 0 0
8 eth2 0 461698 0 0
1b eth2 0 249 0 0
eb eth2 0 21 0 0
ff eth2 0 22 0 0
hash ip_src ip_dst mac_dst slave assign ntt
2 10.0.3.2 10.0.3.11 00:e0:81:71:ee:a9 eth1 1 0
3 10.0.3.2 10.0.3.10 00:e0:81:71:ee:a9 eth2 1 0
8 10.0.3.2 10.0.3.1 00:e0:81:71:ee:a9 eth2 1 0
These were a great help debugging the fixes I have just posted and they
might be helpful for others, so I decided to include them in my post.
Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
---
drivers/net/bonding/bond_alb.c | 61 ++++++++++++++++++++++++++++++++++++++
drivers/net/bonding/bond_alb.h | 2 +
drivers/net/bonding/bond_sysfs.c | 40 +++++++++++++++++++++++++
3 files changed, 103 insertions(+), 0 deletions(-)
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 5d51489..adc5acd 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -750,6 +750,67 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
return tx_slave;
}
+int rlb_print_rx_hashtbl(struct bonding *bond, char *buf)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct rlb_client_info *client_info;
+ u32 hash_index;
+ u32 count = 0;
+
+ _lock_hashtbl(bond);
+
+ count = sprintf(buf, "hash ip_src ip_dst mac_dst slave assign ntt\n");
+ hash_index = bond_info->rx_hashtbl_head;
+ for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
+ client_info = &(bond_info->rx_hashtbl[hash_index]);
+ count += sprintf(buf + count,"%-4x %-15pi4 %-15pi4 %pM %-5s %-6d %d\n",
+ hash_index,
+ &client_info->ip_src,
+ &client_info->ip_dst,
+ client_info->mac_dst,
+ client_info->slave->dev->name,
+ client_info->assigned,
+ client_info->ntt);
+ }
+
+ _unlock_hashtbl(bond);
+ return count;
+}
+
+int tlb_print_tx_hashtbl(struct bonding *bond, char *buf)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 hash_index;
+ u32 count = 0;
+ struct slave *slave;
+ int i;
+
+ _lock_hashtbl(bond);
+
+ count += sprintf(buf, "device load\n");
+ bond_for_each_slave(bond, slave, i) {
+ struct tlb_slave_info *slave_info = &(SLAVE_TLB_INFO(slave));
+ count += sprintf(buf + count,"%-7s %d\n",slave->dev->name,slave_info->load);
+ }
+ count += sprintf(buf + count, "hash device last device tx bytes load next previous\n");
+ for (hash_index = 0; hash_index < TLB_HASH_TABLE_SIZE; hash_index++) {
+ struct tlb_client_info *client_info = &(bond_info->tx_hashtbl[hash_index]);
+ if (client_info->tx_slave || client_info->last_slave) {
+ count += sprintf(buf + count,"%-4x %-8s %-13s %-14d %-11d %-4x %d\n",
+ hash_index,
+ (client_info->tx_slave) ? client_info->tx_slave->dev->name : "",
+ (client_info->last_slave) ? client_info->last_slave->dev->name : "",
+ client_info->tx_bytes,
+ client_info->load_history,
+ (client_info->next != TLB_NULL_INDEX) ? client_info->next : 0,
+ (client_info->prev != TLB_NULL_INDEX) ? client_info->prev : 0);
+ }
+ }
+
+ _unlock_hashtbl(bond);
+ return count;
+}
+
/* Caller must hold rx_hashtbl lock */
static void rlb_init_table_entry(struct rlb_client_info *entry)
{
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index 09d755a..57e761b 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -131,5 +131,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
void bond_alb_monitor(struct work_struct *);
int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
+int rlb_print_rx_hashtbl(struct bonding *bond, char *buf);
+int tlb_print_tx_hashtbl(struct bonding *bond, char *buf);
#endif /* __BOND_ALB_H__ */
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 55bf34f..1123e1f 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1480,6 +1480,44 @@ static ssize_t bonding_show_ad_partner_mac(struct device *d,
static DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, NULL);
+/*
+ * Show current tlb/alb tx hash table.
+ */
+static ssize_t bonding_show_tlb_tx_hash(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int count = 0;
+ struct bonding *bond = to_bond(d);
+
+ if (bond->params.mode == BOND_MODE_ALB ||
+ bond->params.mode == BOND_MODE_TLB) {
+ count = tlb_print_tx_hashtbl(bond, buf);
+ }
+
+ return count;
+}
+static DEVICE_ATTR(tlb_tx_hash, S_IRUGO, bonding_show_tlb_tx_hash, NULL);
+
+
+/*
+ * Show current alb rx hash table.
+ */
+static ssize_t bonding_show_alb_rx_hash(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int count = 0;
+ struct bonding *bond = to_bond(d);
+
+ if (bond->params.mode == BOND_MODE_ALB) {
+ count = rlb_print_rx_hashtbl(bond, buf);
+ }
+
+ return count;
+}
+static DEVICE_ATTR(alb_rx_hash, S_IRUGO, bonding_show_alb_rx_hash, NULL);
+
static struct attribute *per_bond_attrs[] = {
&dev_attr_slaves.attr,
@@ -1505,6 +1543,8 @@ static struct attribute *per_bond_attrs[] = {
&dev_attr_ad_actor_key.attr,
&dev_attr_ad_partner_key.attr,
&dev_attr_ad_partner_mac.attr,
+ &dev_attr_alb_rx_hash.attr,
+ &dev_attr_tlb_tx_hash.attr,
NULL,
};
--
1.5.5.6
^ permalink raw reply related
* Re: [PATCH 2/4] bonding: make sure tx and rx hash tables stay in sync when using alb mode
From: Andy Gospodarek @ 2009-09-18 15:36 UTC (permalink / raw)
To: Jay Vosburgh; +Cc: netdev, bonding-devel
In-Reply-To: <27763.1253144169@death.nxdomain.ibm.com>
On Wed, Sep 16, 2009 at 04:36:09PM -0700, Jay Vosburgh wrote:
> Andy Gospodarek <andy@greyhouse.net> wrote:
>
> >
> >Subject: [PATCH] bonding: make sure tx and rx hash tables stay in sync when using alb mode
>
> When testing this, I'm getting a lockdep warning. It appears to
> be unhappy that tlb_choose_channel acquires the tx / rx hash table locks
> in the order tx then rx, but rlb_choose_channel -> alb_get_best_slave
> acquires the locks in the other order. I applied all four patches, but
> it looks like the change that trips lockdep is in this patch (#2).
>
> I haven't gotten an actual deadlock from this, although it seems
> plausible if there are two cpus in bond_alb_xmit at the same time, and
> one of them is sending an ARP.
>
> One fairly straightforward fix would be to combine the rx and tx
> hash table locks into a single lock. I suspect that wouldn't have any
> real performance penalty, since the rx hash table lock is generally not
> acquired very often (unlike the tx lock, which is taken for every packet
> that goes out).
>
> Also, FYI, two of the four patches had trailing whitespace. I
> believe it was #2 and #4.
>
> Thoughts?
Jay,
This patch should address both the the deadlock and whitespace conerns.
I ran a kernel with LOCKDEP enabled and saw no warnings while passing
traffic on the bond while pulling cables and while removing the module.
Here it is....
[PATCH] bonding: make sure tx and rx hash tables stay in sync when using alb mode
I noticed that it was easy for alb (mode 6) bonding to get into a state
where the tx hash-table and rx hash-table are out of sync (there is
really nothing to keep them synchronized), and we will transmit traffic
destined for a host on one slave and send ARP frames to the same slave
from another interface using a different source MAC.
There is no compelling reason to do this, so this patch makes sure the
rx hash-table changes whenever the tx hash-table is updated based on
device load. This patch also drops the code that does rlb re-balancing
since the balancing will not be controlled by the tx hash-table based on
transmit load. In order to address an issue found with the initial
patch, I have also combined the rx and tx hash table lock into a single
lock. This will facilitate moving these into a single table at some
point.
---
drivers/net/bonding/bond_alb.c | 203 +++++++++++++++-------------------------
drivers/net/bonding/bond_alb.h | 3 +-
2 files changed, 75 insertions(+), 131 deletions(-)
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index bcf25c6..04b7055 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -111,6 +111,7 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
/* Forward declaration */
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
+static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index);
static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
{
@@ -124,18 +125,18 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
return hash;
}
-/*********************** tlb specific functions ***************************/
-
-static inline void _lock_tx_hashtbl(struct bonding *bond)
+/********************* hash table lock functions *************************/
+static inline void _lock_hashtbl(struct bonding *bond)
{
- spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
+ spin_lock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
}
-static inline void _unlock_tx_hashtbl(struct bonding *bond)
+static inline void _unlock_hashtbl(struct bonding *bond)
{
- spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
+ spin_unlock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
}
+/*********************** tlb specific functions ***************************/
/* Caller must hold tx_hashtbl lock */
static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
{
@@ -163,7 +164,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
struct tlb_client_info *tx_hash_table;
u32 index;
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
/* clear slave from tx_hashtbl */
tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
@@ -180,7 +181,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
tlb_init_slave(slave);
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* Must be called before starting the monitor timer */
@@ -191,7 +192,7 @@ static int tlb_initialize(struct bonding *bond)
struct tlb_client_info *new_hashtbl;
int i;
- spin_lock_init(&(bond_info->tx_hashtbl_lock));
+ spin_lock_init(&(bond_info->hashtbl_lock));
new_hashtbl = kzalloc(size, GFP_KERNEL);
if (!new_hashtbl) {
@@ -200,7 +201,7 @@ static int tlb_initialize(struct bonding *bond)
bond->dev->name);
return -1;
}
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
bond_info->tx_hashtbl = new_hashtbl;
@@ -208,7 +209,7 @@ static int tlb_initialize(struct bonding *bond)
tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1);
}
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return 0;
}
@@ -218,12 +219,12 @@ static void tlb_deinitialize(struct bonding *bond)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
kfree(bond_info->tx_hashtbl);
bond_info->tx_hashtbl = NULL;
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* Caller must hold bond lock for read */
@@ -264,24 +265,6 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
return least_loaded;
}
-/* Caller must hold bond lock for read and hashtbl lock */
-static struct slave *tlb_get_best_slave(struct bonding *bond, u32 hash_index)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
- struct slave *last_slave = tx_hash_table[hash_index].last_slave;
- struct slave *next_slave = NULL;
-
- if (last_slave && SLAVE_IS_OK(last_slave)) {
- /* Use the last slave listed in the tx hashtbl if:
- the last slave currently is essentially unloaded. */
- if (SLAVE_TLB_INFO(last_slave).load < 10)
- next_slave = last_slave;
- }
-
- return next_slave ? next_slave : tlb_get_least_loaded_slave(bond);
-}
-
/* Caller must hold bond lock for read */
static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
{
@@ -289,13 +272,12 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
struct tlb_client_info *hash_table;
struct slave *assigned_slave;
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_table = bond_info->tx_hashtbl;
assigned_slave = hash_table[hash_index].tx_slave;
if (!assigned_slave) {
- assigned_slave = tlb_get_best_slave(bond, hash_index);
-
+ assigned_slave = alb_get_best_slave(bond, hash_index);
if (assigned_slave) {
struct tlb_slave_info *slave_info =
&(SLAVE_TLB_INFO(assigned_slave));
@@ -319,20 +301,52 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
hash_table[hash_index].tx_bytes += skb_len;
}
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return assigned_slave;
}
/*********************** rlb specific functions ***************************/
-static inline void _lock_rx_hashtbl(struct bonding *bond)
+
+/* Caller must hold bond lock for read and hashtbl lock */
+static struct slave *rlb_update_rx_table(struct bonding *bond, struct slave *next_slave, u32 hash_index)
{
- spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+
+ /* check rlb table and correct it if wrong */
+ if (bond_info->rlb_enabled) {
+ struct rlb_client_info *rx_client_info = &(bond_info->rx_hashtbl[hash_index]);
+
+ /* if the new slave computed by tlb checks doesn't match rlb, stop rlb from using it */
+ if (next_slave && (next_slave != rx_client_info->slave))
+ rx_client_info->slave = next_slave;
+ }
+ return next_slave;
}
-static inline void _unlock_rx_hashtbl(struct bonding *bond)
+/* Caller must hold bond lock for read and hashtbl lock */
+static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index)
{
- spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
+ struct slave *last_slave = tx_hash_table[hash_index].last_slave;
+ struct slave *next_slave = NULL;
+
+ /* presume the next slave will be the least loaded one */
+ next_slave = tlb_get_least_loaded_slave(bond);
+
+ if (last_slave && SLAVE_IS_OK(last_slave)) {
+ /* Use the last slave listed in the tx hashtbl if:
+ the last slave currently is essentially unloaded. */
+ if (SLAVE_TLB_INFO(last_slave).load < 10)
+ next_slave = last_slave;
+ }
+
+ /* update the rlb hashtbl if there was a previous entry */
+ if (bond_info->rlb_enabled)
+ rlb_update_rx_table(bond, next_slave, hash_index);
+
+ return next_slave;
}
/* when an ARP REPLY is received from a client update its info
@@ -344,7 +358,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
struct rlb_client_info *client_info;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -358,7 +372,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
bond_info->rx_ntt = 1;
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev)
@@ -402,38 +416,6 @@ out:
return res;
}
-/* Caller must hold bond lock for read */
-static struct slave *rlb_next_rx_slave(struct bonding *bond)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct slave *rx_slave, *slave, *start_at;
- int i = 0;
-
- if (bond_info->next_rx_slave) {
- start_at = bond_info->next_rx_slave;
- } else {
- start_at = bond->first_slave;
- }
-
- rx_slave = NULL;
-
- bond_for_each_slave_from(bond, slave, i, start_at) {
- if (SLAVE_IS_OK(slave)) {
- if (!rx_slave) {
- rx_slave = slave;
- } else if (slave->speed > rx_slave->speed) {
- rx_slave = slave;
- }
- }
- }
-
- if (rx_slave) {
- bond_info->next_rx_slave = rx_slave->next;
- }
-
- return rx_slave;
-}
-
/* teach the switch the mac of a disabled slave
* on the primary for fault tolerance
*
@@ -468,14 +450,14 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
u32 index, next_index;
/* clear slave from rx_hashtbl */
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
rx_hash_table = bond_info->rx_hashtbl;
index = bond_info->rx_hashtbl_head;
for (; index != RLB_NULL_INDEX; index = next_index) {
next_index = rx_hash_table[index].next;
if (rx_hash_table[index].slave == slave) {
- struct slave *assigned_slave = rlb_next_rx_slave(bond);
+ struct slave *assigned_slave = alb_get_best_slave(bond, index);
if (assigned_slave) {
rx_hash_table[index].slave = assigned_slave;
@@ -499,7 +481,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
}
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
write_lock_bh(&bond->curr_slave_lock);
@@ -558,7 +540,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
struct rlb_client_info *client_info;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -576,7 +558,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
*/
bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* The slave was assigned a new mac address - update the clients */
@@ -587,7 +569,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
int ntt = 0;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -607,7 +589,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* mark all clients using src_ip to be updated */
@@ -617,7 +599,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
struct rlb_client_info *client_info;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -643,7 +625,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
}
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* Caller must hold both bond and ptr locks for read */
@@ -655,7 +637,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
struct rlb_client_info *client_info;
u32 hash_index = 0;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src));
client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -671,7 +653,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
assigned_slave = client_info->slave;
if (assigned_slave) {
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return assigned_slave;
}
} else {
@@ -687,7 +669,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
}
}
/* assign a new slave */
- assigned_slave = rlb_next_rx_slave(bond);
+ assigned_slave = alb_get_best_slave(bond, hash_index);
if (assigned_slave) {
client_info->ip_src = arp->ip_src;
@@ -723,7 +705,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
}
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return assigned_slave;
}
@@ -771,36 +753,6 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
return tx_slave;
}
-/* Caller must hold bond lock for read */
-static void rlb_rebalance(struct bonding *bond)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct slave *assigned_slave;
- struct rlb_client_info *client_info;
- int ntt;
- u32 hash_index;
-
- _lock_rx_hashtbl(bond);
-
- ntt = 0;
- hash_index = bond_info->rx_hashtbl_head;
- for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
- client_info = &(bond_info->rx_hashtbl[hash_index]);
- assigned_slave = rlb_next_rx_slave(bond);
- if (assigned_slave && (client_info->slave != assigned_slave)) {
- client_info->slave = assigned_slave;
- client_info->ntt = 1;
- ntt = 1;
- }
- }
-
- /* update the team's flag only after the whole iteration */
- if (ntt) {
- bond_info->rx_ntt = 1;
- }
- _unlock_rx_hashtbl(bond);
-}
-
/* Caller must hold rx_hashtbl lock */
static void rlb_init_table_entry(struct rlb_client_info *entry)
{
@@ -817,8 +769,6 @@ static int rlb_initialize(struct bonding *bond)
int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
int i;
- spin_lock_init(&(bond_info->rx_hashtbl_lock));
-
new_hashtbl = kmalloc(size, GFP_KERNEL);
if (!new_hashtbl) {
printk(KERN_ERR DRV_NAME
@@ -826,7 +776,7 @@ static int rlb_initialize(struct bonding *bond)
bond->dev->name);
return -1;
}
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
bond_info->rx_hashtbl = new_hashtbl;
@@ -836,7 +786,7 @@ static int rlb_initialize(struct bonding *bond)
rlb_init_table_entry(bond_info->rx_hashtbl + i);
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
/*initialize packet type*/
pk_type->type = cpu_to_be16(ETH_P_ARP);
@@ -855,13 +805,13 @@ static void rlb_deinitialize(struct bonding *bond)
dev_remove_pack(&(bond_info->rlb_pkt_type));
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
kfree(bond_info->rx_hashtbl);
bond_info->rx_hashtbl = NULL;
bond_info->rx_hashtbl_head = RLB_NULL_INDEX;
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
@@ -869,7 +819,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
u32 curr_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
curr_index = bond_info->rx_hashtbl_head;
while (curr_index != RLB_NULL_INDEX) {
@@ -894,7 +844,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
curr_index = next_index;
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/*********************** tlb/rlb shared functions *********************/
@@ -1521,11 +1471,6 @@ void bond_alb_monitor(struct work_struct *work)
read_lock(&bond->lock);
}
- if (bond_info->rlb_rebalance) {
- bond_info->rlb_rebalance = 0;
- rlb_rebalance(bond);
- }
-
/* check if clients need updating */
if (bond_info->rx_ntt) {
if (bond_info->rlb_update_delay_counter) {
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index b65fd29..09d755a 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -90,7 +90,7 @@ struct tlb_slave_info {
struct alb_bond_info {
struct timer_list alb_timer;
struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
- spinlock_t tx_hashtbl_lock;
+ spinlock_t hashtbl_lock; /* lock for both tables */
u32 unbalanced_load;
int tx_rebalance_counter;
int lp_counter;
@@ -98,7 +98,6 @@ struct alb_bond_info {
int rlb_enabled;
struct packet_type rlb_pkt_type;
struct rlb_client_info *rx_hashtbl; /* Receive hash table */
- spinlock_t rx_hashtbl_lock;
u32 rx_hashtbl_head;
u8 rx_ntt; /* flag - need to transmit
* to all rx clients
--
1.5.5.6
^ permalink raw reply related
* [PATCH net-next-2.6] bonding: introduce primary_reselect option
From: Jiri Pirko @ 2009-09-18 15:30 UTC (permalink / raw)
To: netdev; +Cc: davem, fubar, bonding-devel, nicolas.2p.debian
(updated 3)
In some cases there is not desirable to switch back to primary interface when
it's link recovers and rather stay with currently active one. We need to avoid
packetloss as much as we can in some cases. This is solved by introducing
primary_reselect option. Note that enslaved primary slave is set as current
active no matter what.
Signed-off-by: Jiri Pirko <jpirko@redhat.com>
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index d5181ce..fd650e0 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -614,6 +614,32 @@ primary
The primary option is only valid for active-backup mode.
+primary_reselect
+
+ Specifies the behavior of the current active slave when the primary was
+ down and comes back up. This option is designed to prevent
+ flip-flopping between the primary slave and other slaves. The possible
+ values and their respective effects are:
+
+ always or 0 (default)
+
+ The primary slave becomes the active slave whenever it comes
+ back up.
+
+ better or 1
+
+ The primary slave becomes the active slave when it comes back
+ up, if the speed and duplex of the primary slave is better
+ than the speed and duplex of the current active slave.
+
+ failure or 2
+
+ The primary slave becomes the active slave only if the current
+ active slave fails and the primary slave is up.
+
+ When no slave are active, if the primary comes back up, it becomes the
+ active slave, regardless of the value of primary_reselect.
+
updelay
Specifies the time, in milliseconds, to wait before enabling a
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 699bfdd..1127361 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -94,6 +94,7 @@ static int downdelay;
static int use_carrier = 1;
static char *mode;
static char *primary;
+static char *primary_reselect;
static char *lacp_rate;
static char *ad_select;
static char *xmit_hash_policy;
@@ -126,6 +127,13 @@ MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
"6 for balance-alb");
module_param(primary, charp, 0);
MODULE_PARM_DESC(primary, "Primary network device to use");
+module_param(primary_reselect, charp, 0);
+MODULE_PARM_DESC(primary_reselect, "Reselect primary slave "
+ "once it comes up; "
+ "0 for always (default), "
+ "1 for only if speed of primary is not "
+ "better, "
+ "2 for never");
module_param(lacp_rate, charp, 0);
MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner "
"(slow/fast)");
@@ -200,6 +208,13 @@ const struct bond_parm_tbl fail_over_mac_tbl[] = {
{ NULL, -1},
};
+const struct bond_parm_tbl pri_reselect_tbl[] = {
+{ "always", BOND_PRI_RESELECT_ALWAYS},
+{ "better", BOND_PRI_RESELECT_BETTER},
+{ "failure", BOND_PRI_RESELECT_FAILURE},
+{ NULL, -1},
+};
+
struct bond_parm_tbl ad_select_tbl[] = {
{ "stable", BOND_AD_STABLE},
{ "bandwidth", BOND_AD_BANDWIDTH},
@@ -1070,6 +1085,25 @@ out:
}
+static bool bond_should_change_active(struct bonding *bond)
+{
+ struct slave *prim = bond->primary_slave;
+ struct slave *curr = bond->curr_active_slave;
+
+ if (!prim || !curr || curr->link != BOND_LINK_UP)
+ return true;
+ if (bond->force_primary) {
+ bond->force_primary = false;
+ return true;
+ }
+ if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER &&
+ (prim->speed < curr->speed ||
+ (prim->speed == curr->speed && prim->duplex <= curr->duplex)))
+ return false;
+ if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE)
+ return false;
+ return true;
+}
/**
* find_best_interface - select the best available slave to be the active one
@@ -1094,7 +1128,8 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
}
if ((bond->primary_slave) &&
- bond->primary_slave->link == BOND_LINK_UP) {
+ bond->primary_slave->link == BOND_LINK_UP &&
+ bond_should_change_active(bond)) {
new_active = bond->primary_slave;
}
@@ -1675,8 +1710,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
/* if there is a primary slave, remember it */
- if (strcmp(bond->params.primary, new_slave->dev->name) == 0)
+ if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
bond->primary_slave = new_slave;
+ bond->force_primary = true;
+ }
}
write_lock_bh(&bond->curr_slave_lock);
@@ -4643,7 +4680,7 @@ int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl)
static int bond_check_params(struct bond_params *params)
{
- int arp_validate_value, fail_over_mac_value;
+ int arp_validate_value, fail_over_mac_value, primary_reselect_value;
/*
* Convert string parameters.
@@ -4942,6 +4979,20 @@ static int bond_check_params(struct bond_params *params)
primary = NULL;
}
+ if (primary && primary_reselect) {
+ primary_reselect_value = bond_parse_parm(primary_reselect,
+ pri_reselect_tbl);
+ if (primary_reselect_value == -1) {
+ pr_err(DRV_NAME
+ ": Error: Invalid primary_reselect \"%s\"\n",
+ primary_reselect ==
+ NULL ? "NULL" : primary_reselect);
+ return -EINVAL;
+ }
+ } else {
+ primary_reselect_value = BOND_PRI_RESELECT_ALWAYS;
+ }
+
if (fail_over_mac) {
fail_over_mac_value = bond_parse_parm(fail_over_mac,
fail_over_mac_tbl);
@@ -4973,6 +5024,7 @@ static int bond_check_params(struct bond_params *params)
params->use_carrier = use_carrier;
params->lacp_fast = lacp_fast;
params->primary[0] = 0;
+ params->primary_reselect = primary_reselect_value;
params->fail_over_mac = fail_over_mac_value;
if (primary) {
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 6044e12..42c44f2 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1212,6 +1212,61 @@ static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR,
bonding_show_primary, bonding_store_primary);
/*
+ * Show and set the primary_reselect flag.
+ */
+static ssize_t bonding_show_primary_reselect(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct bonding *bond = to_bond(d);
+
+ return sprintf(buf, "%s %d\n",
+ pri_reselect_tbl[bond->params.primary_reselect].modename,
+ bond->params.primary_reselect);
+}
+
+static ssize_t bonding_store_primary_reselect(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int new_value, ret = count;
+ struct bonding *bond = to_bond(d);
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ new_value = bond_parse_parm(buf, pri_reselect_tbl);
+ if (new_value < 0) {
+ pr_err(DRV_NAME
+ ": %s: Ignoring invalid primary_reselect value %.*s.\n",
+ bond->dev->name,
+ (int) strlen(buf) - 1, buf);
+ ret = -EINVAL;
+ goto out;
+ } else {
+ bond->params.primary_reselect = new_value;
+ pr_info(DRV_NAME ": %s: setting primary_reselect to %s (%d).\n",
+ bond->dev->name, pri_reselect_tbl[new_value].modename,
+ new_value);
+ if (new_value == BOND_PRI_RESELECT_ALWAYS ||
+ new_value == BOND_PRI_RESELECT_BETTER) {
+ bond->force_primary = true;
+ read_lock(&bond->lock);
+ write_lock_bh(&bond->curr_slave_lock);
+ bond_select_active_slave(bond);
+ write_unlock_bh(&bond->curr_slave_lock);
+ read_unlock(&bond->lock);
+ }
+ }
+out:
+ rtnl_unlock();
+ return ret;
+}
+static DEVICE_ATTR(primary_reselect, S_IRUGO | S_IWUSR,
+ bonding_show_primary_reselect,
+ bonding_store_primary_reselect);
+
+/*
* Show and set the use_carrier flag.
*/
static ssize_t bonding_show_carrier(struct device *d,
@@ -1500,6 +1555,7 @@ static struct attribute *per_bond_attrs[] = {
&dev_attr_num_unsol_na.attr,
&dev_attr_miimon.attr,
&dev_attr_primary.attr,
+ &dev_attr_primary_reselect.attr,
&dev_attr_use_carrier.attr,
&dev_attr_active_slave.attr,
&dev_attr_mii_status.attr,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 6824771..b5b1530 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -131,6 +131,7 @@ struct bond_params {
int lacp_fast;
int ad_select;
char primary[IFNAMSIZ];
+ int primary_reselect;
__be32 arp_targets[BOND_MAX_ARP_TARGETS];
};
@@ -190,6 +191,7 @@ struct bonding {
struct slave *curr_active_slave;
struct slave *current_arp_slave;
struct slave *primary_slave;
+ bool force_primary;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
rwlock_t lock;
rwlock_t curr_slave_lock;
@@ -258,6 +260,10 @@ static inline bool bond_is_lb(const struct bonding *bond)
|| bond->params.mode == BOND_MODE_ALB;
}
+#define BOND_PRI_RESELECT_ALWAYS 0
+#define BOND_PRI_RESELECT_BETTER 1
+#define BOND_PRI_RESELECT_FAILURE 2
+
#define BOND_FOM_NONE 0
#define BOND_FOM_ACTIVE 1
#define BOND_FOM_FOLLOW 2
@@ -348,6 +354,7 @@ extern const struct bond_parm_tbl bond_mode_tbl[];
extern const struct bond_parm_tbl xmit_hashtype_tbl[];
extern const struct bond_parm_tbl arp_validate_tbl[];
extern const struct bond_parm_tbl fail_over_mac_tbl[];
+extern const struct bond_parm_tbl pri_reselect_tbl[];
extern struct bond_parm_tbl ad_select_tbl[];
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
^ permalink raw reply related
* RE: [RFC] CAIF Protocol Stack
From: Sjur Brændeland @ 2009-09-18 13:38 UTC (permalink / raw)
To: Rémi Denis-Courmont, netdev
In-Reply-To: <0f510ae3e0a78a2c1345d8e08bdafb0e@chewa.net>
> -----Original Message-----
> From: Rémi Denis-Courmont
> Sent: 18. september 2009 14:32
> Hello,
>
> On Wed, 16 Sep 2009 14:30:34 +0200, Sjur Brændeland
> <sjur.brandeland@stericsson.com> wrote:
> > The Implementation of CAIF is divided into:
> > * CAIF Devices: Character Device, Net Device and Kernel API.
> > * CAIF Protocol Implementation
> > * CAIF Link Layer
>
> I'm a bit confused here. What do you call a CAIF Device?
>
> Do you mean a GPRS context is a network device, and an AT
> command interface is a character device? Or is the CAIF modem
> a device? or what?
What I meant was:
* "Net Device" - a "struct net_device" with one instance for each GPRS PDP context.
* "Character Device" - a chr device, with one instance for each AT channel towards the modem.
BR/Sjur Brændeland
^ permalink raw reply
* Re: [RFC] CAIF Protocol Stack
From: Rémi Denis-Courmont @ 2009-09-18 12:31 UTC (permalink / raw)
To: netdev
In-Reply-To: <61D8D34BB13CFE408D154529C120E07902DF9076@eseldmw101.eemea.ericsson.se>
Hello,
On Wed, 16 Sep 2009 14:30:34 +0200, Sjur Brændeland
<sjur.brandeland@stericsson.com> wrote:
> The Implementation of CAIF is divided into:
> * CAIF Devices: Character Device, Net Device and Kernel API.
> * CAIF Protocol Implementation
> * CAIF Link Layer
I'm a bit confused here. What do you call a CAIF Device?
Do you mean a GPRS context is a network device, and an AT command interface
is a character device? Or is the CAIF modem a device? or what?
--
Rémi Denis-Courmont
^ permalink raw reply
* RE: [RFC] CAIF Protocol Stack
From: Sjur Brændeland @ 2009-09-18 12:01 UTC (permalink / raw)
To: David Miller; +Cc: netdev
Hi David.
I understand that you are one of the main Maintainers of netdev.
As explained below we have a largeish driver we would like to contribute.
I realize we should have started contributing this on a earlier stage...., but
What is the preferred way of doing this, i.e. how should we split it up?
Submit the whole shebang,
Or
Split Horizontally e.g. a) CAIF-Protocol, b) GPRS-Net-Device c) CAIF-Link Layer
Or
Split Vertically e.g. a) Payload Path Net-Device, b) Payload Path AT-channel, c) Configuration
Which kernel GIT should we base the patch set on?
Any hints on this would be greatly appreciated.
Best Regards
Sjur Brændeland
ST-Ericsson
> -----Original Message-----
> From: Sjur Brændeland
> Sent: 16. september 2009 14:31
> To: 'netdev@vger.kernel.org'
> Subject: [RFC] CAIF Protocol Stack
>
> Hello,
>
> We are currently working on a patch set in order to introduce
> the CAIF protocol in Linux. CAIF (Communication CPU to
> Application CPU Interface) is the primary protocol used to
> communicate between to ST-Ericsson modem and external host system.
>
> The host processes can use CAIF to open virtual AT channels,
> initiate GPRS Data connections, Video channels and Utility Channels.
> The Utility Channels are general purpose pipes between modem and host.
>
> ST-Ericsson modems support a number of Link Layers between
> modem and host, currently Uart and Shared Memory are
> available for Linux.
>
> Architecture:
> ------------
> The Implementation of CAIF is divided into:
> * CAIF Devices: Character Device, Net Device and Kernel API.
> * CAIF Protocol Implementation
> * CAIF Link Layer
>
> In order to configure the devices a set of IOCTLs is used.
>
>
>
> IOCTL
> !
> ! +------+ +------+ +------+
> ! +------+! +------+! +------+!
> ! ! Chr !! !Kernel!! ! Net !!
> ! ! Dev !+ ! API !+ ! Dev !+ <- CAIF Devices
> ! +------+ +------! +------+
> ! ! ! !
> ! +----------!----------+
> ! +------+ <- CAIF Protocol
> Implementation
> +-------> ! CAIF ! /dev/caifconfig
> +------+
> +--------!--------+
> ! !
> +------+ +-----+
> !ShMem ! ! TTY ! <- Link Layer
> +------+ +-----+
>
> Any comments welcome.
>
>
>
> Files:
> -----
>
> net/caif/Kconfig | 61 +
> net/caif/Makefile | 62 +
> net/caif/caif_chnlif.c | 209 ++++
> net/caif/caif_chr.c | 392 +++++++
> net/caif/caif_config_util.c | 279 +++++
> net/caif/chnl_chr.c | 1161
> ++++++++++++++++++++
> net/caif/chnl_net.c | 338 ++++++
> net/caif/generic/cfcnfg.c | 722
> ++++++++++++
> net/caif/generic/cfctrl.c | 640 +++++++++++
> net/caif/generic/cfdgml.c | 119 ++
> net/caif/generic/cffrml.c | 144 +++
> net/caif/generic/cflist.c | 99 ++
> net/caif/generic/cfloopcfg.c | 93 ++
> net/caif/generic/cflooplayer.c | 113 ++
> net/caif/generic/cfmsll.c | 55 +
> net/caif/generic/cfmuxl.c | 270 +++++
> net/caif/generic/cfpkt_skbuff.c | 545 +++++++++
> net/caif/generic/cfrfml.c | 112 ++
> net/caif/generic/cfserl.c | 297 +++++
> net/caif/generic/cfshml.c | 67 ++
> net/caif/generic/cfspil.c | 245 ++++
> net/caif/generic/cfsrvl.c | 177 +++
> net/caif/generic/cfutill.c | 115 ++
> net/caif/generic/cfveil.c | 118 ++
> net/caif/generic/cfvidl.c | 68 ++
> net/caif/generic/fcs.c | 58 +
>
> drivers/net/caif/Kconfig | 58 +
> drivers/net/caif/Makefile | 29 +
> drivers/net/caif/chnl_tty.c | 217 ++++
> drivers/net/caif/phyif_loop.c | 418 +++++++
> drivers/net/caif/phyif_ser.c | 182 +++
> drivers/net/caif/phyif_shm.c | 838
> ++++++++++++++
> drivers/net/caif/shm.h | 95 ++
> drivers/net/caif/shm_cfgifc.c | 63 ++
> drivers/net/caif/shm_mbxifc.c | 104 ++
> drivers/net/caif/shm_smbx.c | 78 ++
>
> include/linux/caif/caif_config.h | 231 ++++
> include/linux/caif/caif_ioctl.h | 106 ++
> include/net/caif/caif_actions.h | 81 ++
> include/net/caif/caif_chr.h | 46 +
> include/net/caif/caif_config_util.h | 27 +
> include/net/caif/caif_kernel.h | 324 ++++++
> include/net/caif/caif_log.h | 57 +
> include/net/caif/generic/caif_layer.h | 476 ++++++++
> include/net/caif/generic/cfcnfg.h | 223 ++++
> include/net/caif/generic/cfctrl.h | 139 +++
> include/net/caif/generic/cffrml.h | 29 +
> include/net/caif/generic/cfglue.h | 387 +++++++
> include/net/caif/generic/cfloopcfg.h | 28 +
> include/net/caif/generic/cflst.h | 27 +
> include/net/caif/generic/cfmsll.h | 22 +
> include/net/caif/generic/cfmuxl.h | 30 +
> include/net/caif/generic/cfpkt.h | 246 +++++
> include/net/caif/generic/cfserl.h | 22 +
> include/net/caif/generic/cfshml.h | 21 +
> include/net/caif/generic/cfspil.h | 80 ++
> include/net/caif/generic/cfsrvl.h | 48 +
> include/net/caif/generic/fcs.h | 22 +
>
>
>
> Regards
> Sjur Brandeland
> ST-Ericsson
^ permalink raw reply
* [PATCH net-next-2.6] bonding: set primary param via sysfs
From: Jiri Pirko @ 2009-09-18 12:13 UTC (permalink / raw)
To: netdev; +Cc: davem, fubar, bonding-devel
Primary module parameter passed to bonding is pernament. That means if you
release the primary slave and enslave it again, it becomes the primary slave
again. But if you set primary slave via sysfs, the primary slave is only set
once and it's not remembered in bond->params structure. Therefore the setting is
lost after releasing the primary slave. This simple one-liner fixes this.
Signed-off-by: Jiri Pirko <jpirko@redhat.com>
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 6044e12..ff449de 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1182,6 +1182,7 @@ static ssize_t bonding_store_primary(struct device *d,
": %s: Setting %s as primary slave.\n",
bond->dev->name, slave->dev->name);
bond->primary_slave = slave;
+ strcpy(bond->params.primary, slave->dev->name);
bond_select_active_slave(bond);
goto out;
}
^ permalink raw reply related
* Re: tun: Return -EINVAL if neither IFF_TUN nor IFF_TAP is set.
From: Paul Moore @ 2009-09-18 11:54 UTC (permalink / raw)
To: Kusanagi Kouichi; +Cc: netdev, linux-kernel
In-Reply-To: <20090917073614.15217260031@msa105lp.auone-net.jp>
On Thursday 17 September 2009 03:36:13 am Kusanagi Kouichi wrote:
> After commit 2b980dbd77d229eb60588802162c9659726b11f4
> ("lsm: Add hooks to the TUN driver") tun_set_iff doesn't
> return -EINVAL though neither IFF_TUN nor IFF_TAP is set.
>
> Signed-off-by: Kusanagi Kouichi <slash@ma.neweb.ne.jp>
Sorry about that, my mistake, thanks for finding and fixing this.
Reviewed-by: Paul Moore <paul.moore@hp.com>
> ---
> drivers/net/tun.c | 4 +---
> 1 files changed, 1 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 3f5d288..e091756 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -946,8 +946,6 @@ static int tun_set_iff(struct net *net, struct file
> *file, struct ifreq *ifr) char *name;
> unsigned long flags = 0;
>
> - err = -EINVAL;
> -
> if (!capable(CAP_NET_ADMIN))
> return -EPERM;
> err = security_tun_dev_create();
> @@ -964,7 +962,7 @@ static int tun_set_iff(struct net *net, struct file
> *file, struct ifreq *ifr) flags |= TUN_TAP_DEV;
> name = "tap%d";
> } else
> - goto failed;
> + return -EINVAL;
>
> if (*ifr->ifr_name)
> name = ifr->ifr_name;
>
--
paul moore
linux @ hp
^ permalink raw reply
* tcp_sock variable initialization
From: Armin Abfalterer @ 2009-09-18 8:50 UTC (permalink / raw)
To: netdev
Hi!
I need a control variable (ecnn_flags) in tcp_sock that should be set
properly after the 3-way-handshake in tcp_create_openreq_child(). If I
set the variable in its value is always 0 afterwards.
struct sock *tcp_create_openreq_child( ... )
{
struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
if (newsk != NULL) {
struct tcp_sock *newtp;
newtp = tcp_sk(newsk);
newtp->ecnn_flags |= TCP_ECN_NONCE_OK;
}
}
When I read the variable for the next outgoing segment the values is not
set.
static int tcp_transmit_skb( ... )
{
struct tcp_sock *tp;
if (tp->ecnn_flags & TCP_ECN_NONCE_OK) {
/*
* never entered!!!!
*/
}
}
I'm quite sure that it has to do with the creation of the big socket
when the connection enters TCP_ESTABLISHED but searching for hours
didn't help to find the right place where my variable is re-initialized.
Any hint in the right direction would greatly appreciated!!! Thanks!
Armin
^ permalink raw reply
* Re: ipv4 regression in 2.6.31 ?
From: Stephan von Krawczynski @ 2009-09-18 8:30 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Jarek Poplawski, David Miller, Eric Dumazet, linux-kernel,
Linux Netdev List
In-Reply-To: <20090916100028.654f7893@nehalam>
On Wed, 16 Sep 2009 10:00:28 -0700
Stephen Hemminger <shemminger@vyatta.com> wrote:
> On Wed, 16 Sep 2009 05:23:04 +0000
> Jarek Poplawski <jarkao2@gmail.com> wrote:
>
> > On Tue, Sep 15, 2009 at 03:57:19PM -0700, Stephen Hemminger wrote:
> > > On Tue, 15 Sep 2009 08:13:55 +0000
> > > Jarek Poplawski <jarkao2@gmail.com> wrote:
> > >
> > > > On 14-09-2009 18:31, Stephen Hemminger wrote:
> > > > > On Mon, 14 Sep 2009 17:55:05 +0200
> > > > > Stephan von Krawczynski <skraw@ithnet.com> wrote:
> > > > >
> > > > >> On Mon, 14 Sep 2009 15:57:03 +0200
> > > > >> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > > > >>
> > > > >>> Stephan von Krawczynski a A~(c)crit :
> > > > >>>> Hello all,
> > > > ...
> > > > >>> rp_filter - INTEGER
> > > > >>> 0 - No source validation.
> > > > >>> 1 - Strict mode as defined in RFC3704 Strict Reverse Path
> > > > >>> Each incoming packet is tested against the FIB and if the interface
> > > > >>> is not the best reverse path the packet check will fail.
> > > > >>> By default failed packets are discarded.
> > > > >>> 2 - Loose mode as defined in RFC3704 Loose Reverse Path
> > > > >>> Each incoming packet's source address is also tested against the FIB
> > > > >>> and if the source address is not reachable via any interface
> > > > >>> the packet check will fail.
> > > > ...
> > > > > RP filter did not work correctly in 2.6.30. The code added to to the loose
> > > > > mode caused a bug; the rp_filter value was being computed as:
> > > > > rp_filter = interface_value & all_value;
> > > > > So in order to get reverse path filter both would have to be set.
> > > > >
> > > > > In 2.6.31 this was change to:
> > > > > rp_filter = max(interface_value, all_value);
> > > > >
> > > > > This was the intended behaviour, if user asks all interfaces to have rp
> > > > > filtering turned on, then set /proc/sys/net/ipv4/conf/all/rp_filter = 1
> > > > > or to turn on just one interface, set it for just that interface.
> > > >
> > > > Alas this max() formula handles also cases where both values are set
> > > > and it doesn't look very natural/"user friendly" to me. Especially
> > > > with something like this: all_value = 2; interface_value = 1
> > > > Why would anybody care to bother with interface_value in such a case?
> > > >
> > > > "All" suggests "default" in this context, so I'd rather expect
> > > > something like:
> > > > rp_filter = interface_value ? : all_value;
> > > > which gives "the inteded behaviour" too, plus more...
> > > >
> > > > We'd only need to add e.g.:
> > > > 0 - Default ("all") validation. (No source validation if "all" is 0).
> > > > 3 - No source validation on this interface.
> > >
> > > More values == more confusion.
> > > I chose the maxconf() method to make rp_filter consistent with other
> > > multi valued variables (arp_announce and arp_ignore).
> >
> > This additional value is not necessary (it'd give as superpowers).
> > Max seems logical to me only when values are sorted (especially if
> > max is the strictest).
>
> The values had to be unsorted because of the requirement to retain
> interface compatibility with older releases.
The parameters are the same (I guess this is what you call interface
compatibility), but the function came out different, meaning you broke
functional compatibility with 2.6.31 instead. Just to mention that - though
the argument is leight-weight for the compatibility broke because the whole
thing was broken somehow before the bugfix.
--
Regards,
Stephan
^ permalink raw reply
* Re: [PATCH] ks8851_ml ethernet network driver
From: Greg KH @ 2009-09-18 5:27 UTC (permalink / raw)
To: David Miller; +Cc: David.Choi, netdev, Charles.Li, Choi, jgarzik, shemminger
In-Reply-To: <20090917.164952.33104590.davem@davemloft.net>
On Thu, Sep 17, 2009 at 04:49:52PM -0700, David Miller wrote:
> From: "Choi, David" <David.Choi@Micrel.Com>
> Date: Thu, 17 Sep 2009 12:30:27 -0700
>
> > --- linux-2.6.31-rc3/drivers/net/ks8851_mll.c.orig 2009-09-17
> > 10:18:56.000000000 -0700
> > +++ linux-2.6.31-rc3/drivers/net/ks8851_mll.c 2009-09-17
> > 10:09:37.000000000 -0700
> > @@ -21,8 +21,6 @@
> > * KS8851 16bit MLL chip from Micrel Inc.
>
> I can't use this patch or even test it, as your email client
> has corrupted it by, for example, breaking up long lines.
Yeah, that's why I had to post the original patch for David :(
I'm going to be away from email for the next 10 days due to conferences,
so hopefully David can fix the email issue so he can repost it
himself...
thanks,
greg k-h
^ permalink raw reply
* Re: Netlink API for bonding ?
From: Stephen Hemminger @ 2009-09-18 4:00 UTC (permalink / raw)
To: Nicolas de Pesloüan; +Cc: Jay Vosburgh, bonding-devel, netdev, Jiri Pirko
In-Reply-To: <4AB2B3EF.50307@free.fr>
On Fri, 18 Sep 2009 00:10:55 +0200
Nicolas de Pesloüan <nicolas.2p.debian@free.fr> wrote:
> Stephen Hemminger wrote:
> > On Thu, 17 Sep 2009 23:44:30 +0200
> > Nicolas de Pesloüan <nicolas.2p.debian@free.fr> wrote:
> >
> >> Stephen Hemminger wrote:
> >>> On Mon, 31 Aug 2009 22:34:50 +0200
> >>> Nicolas de Pesloüan <nicolas.2p.debian@free.fr> wrote:
> >>>
> >>>> Stephen,
> >>>>
> >>>> Can you please describe the netlink API you plan to implement for bonding ?
> >>>>
> >>>> Both Jiri Pirko and I plan to add some advanced active slave selection rules,
> >>>> for more-than-two-slaves bonding configuration.
> >>>>
> >>>> Jay suggested that such advanced features be implemented in user space, using
> >>>> netlink to notify a daemon when slaves come up or fall down. I agree with Jay,
> >>>> but don't want to design something without having first a view at your proposed
> >>>> API for bonding.
> >>>>
> >>>> Do you plan to have some notification to user space, or only the ability to read
> >>>> and set bonding configuration using netlink ?
> >>>>
> >>>> Thanks,
> >>>>
> >>>> Nicolas.
> >>> No paper spec, but was looking to add interface similar to vlan and macvlan.
> >>> Just use (and extend if needed) existing rtnl_link_ops.
> >>>
> >>>
> >>> Was not planning on adding a notification interface, thats good idea but
> >>> really not what I was looking at.
> >> What kind of notification system would you suggest to notify userland that a
> >> given bond device just lose the current active slave ?
> >
> > First why should user land care? Unless all slaves are gone maybe it
> > should just be transparent.
>
> Because we try to design a notification from kernel to userland when current
> active slave fail, to give an opportunity to userland to decide which non-failed
> slave should become the new active one. This is in order to try and move complex
> decisions to userland, only keeping very simple "two slaves" decisions into the
> kernel.
>
> Think of it as the bonding counter part of moving STP to userland for bridge.
> Userland should be able to decide which slave should be the active one for the
> same reasons userland is able to decide which bridge port should be forwarding
> and which should be blocked.
>
> I assume that we cannot just try to make the current bridge userland
> notification system more generic. May be I'm wrong. May be the ability to notify
> port failure, port coming back and BPDU for bridge is a superset of what we need
> to notify port failure and port coming back for bonding.
>
> > Use existing link ops mechanism (see vlan and macvlan). You may need
> > to add new operations, but these should be generic enough so that bonding and bridging
> > have same operations.
> >
> > .newlink => create bond device
> > .dellink => remove bond device
> > .newport => add slave
> > .delport => remove slave
> >
> > Also, dellink should always work (even if slaves are present).
>
> This sounds perfect for setup, but might not be good the elect the "best" port
> (active slave). Also, I assume a new RTNETLINK operation needs to be added for
> that. I thought that this was what you were working on. Do I miss something ?
> Does brctl use RTNETLINK for port setup ? Or do you plan to use iproute2 to
> replace brctl in the futur ?
I got to busy to get past making bonding amenable to using newlink/delink.
One common way to handle changes is to send another NEWXXX message with
different parameters (TLV values).
> > The terminology slave is not widely used outside of bonding, and so probably
> > shouldn't be buried in the API.
>
> Yes, you are definitely right with this point.
>
> Nicolas.
^ permalink raw reply
* Re: [net-2.6 PATCH 1/6] net: initialize rmem_alloc and omem_alloc to 0 in netlink socket
From: David Miller @ 2009-09-18 1:29 UTC (permalink / raw)
To: jeffrey.t.kirsher; +Cc: netdev, gospo, linux-scsi, john.r.fastabend
In-Reply-To: <20090918005708.25594.52575.stgit@localhost.localdomain>
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 17 Sep 2009 17:57:09 -0700
> From: John Fastabend <john.r.fastabend@intel.com>
>
> The rmem_alloc and omem_alloc socket fields are not
> initialized. This sets each variable to zero when a socket
> is created. Note the sk_wmem_alloc is already initialized
> in sock_init_data.
>
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
It's set to zero implicitly by the memset() done at sock_alloc()
time.
Re-setting it again here explicitly will just add unnecessary
memory traffic.
^ permalink raw reply
* Re: [net-2.6 PATCH 5/6] net: fix sock locking for sk_err field in netlink.
From: David Miller @ 2009-09-18 1:27 UTC (permalink / raw)
To: jeffrey.t.kirsher; +Cc: netdev, gospo, linux-scsi, john.r.fastabend
In-Reply-To: <20090918005832.25594.45086.stgit@localhost.localdomain>
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 17 Sep 2009 17:58:32 -0700
> From: John Fastabend <john.r.fastabend@intel.com>
>
> This adds the sock lock around setting the sk_err field
> in sock struct. Without the lock multiple threads may
> write to this field.
>
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
This isn't right.
Writes to sk->sk_err can occur asynchronously just fine and
without any locking.
The only requirement is that consumers of the sk_err value
use sock_error() which uses xchg() to get and clear the
value atomically.
^ permalink raw reply
* Re: [net-2.6 PATCH 2/6] net: remove kfree_skb on a NULL pointer in af_netlink.c
From: David Miller @ 2009-09-18 1:24 UTC (permalink / raw)
To: jeffrey.t.kirsher; +Cc: netdev, gospo, linux-scsi, john.r.fastabend
In-Reply-To: <20090918005729.25594.14261.stgit@localhost.localdomain>
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 17 Sep 2009 17:57:29 -0700
> From: John Fastabend <john.r.fastabend@intel.com>
>
> This removes a kfree_skb that is being called on a NULL pointer when
> do_one_broadcast() is sucessful. And moves the kfree_skb into
> do_one_broadcast() for the error case.
>
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
kfree_skb() on a NULL pointer is completely legal.
^ permalink raw reply
* [net-2.6 PATCH 6/6] net: fix double skb free in dcbnl
From: Jeff Kirsher @ 2009-09-18 0:58 UTC (permalink / raw)
To: davem; +Cc: netdev, gospo, linux-scsi, John Fastabend, Jeff Kirsher
In-Reply-To: <20090918005708.25594.52575.stgit@localhost.localdomain>
From: John Fastabend <john.r.fastabend@intel.com>
netlink_unicast() calls kfree_skb even in the error case.
dcbnl calls netlink_unicast() which when it fails free's the
skb and returns an error value. dcbnl is free'ing the skb
again when this error occurs. This patch removes the double
free.
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
net/dcb/dcbnl.c | 15 +++++++--------
1 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index e0879bf..ac1205d 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -194,7 +194,7 @@ static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
nlmsg_end(dcbnl_skb, nlh);
ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
if (ret)
- goto err;
+ return -EINVAL;
return 0;
nlmsg_failure:
@@ -275,7 +275,7 @@ static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb,
ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
if (ret)
- goto err;
+ goto err_out;
return 0;
nlmsg_failure:
@@ -316,12 +316,11 @@ static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb,
ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
if (ret)
- goto err;
+ goto err_out;
return 0;
nlmsg_failure:
-err:
kfree_skb(dcbnl_skb);
err_out:
return -EINVAL;
@@ -383,7 +382,7 @@ static int dcbnl_getcap(struct net_device *netdev, struct nlattr **tb,
ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
if (ret)
- goto err;
+ goto err_out;
return 0;
nlmsg_failure:
@@ -460,7 +459,7 @@ static int dcbnl_getnumtcs(struct net_device *netdev, struct nlattr **tb,
ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
if (ret) {
ret = -EINVAL;
- goto err;
+ goto err_out;
}
return 0;
@@ -799,7 +798,7 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb,
ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
if (ret)
- goto err;
+ goto err_out;
return 0;
@@ -1063,7 +1062,7 @@ static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlattr **tb,
ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
if (ret)
- goto err;
+ goto err_out;
return 0;
^ permalink raw reply related
* [net-2.6 PATCH 5/6] net: fix sock locking for sk_err field in netlink.
From: Jeff Kirsher @ 2009-09-18 0:58 UTC (permalink / raw)
To: davem; +Cc: netdev, gospo, linux-scsi, John Fastabend, Jeff Kirsher
In-Reply-To: <20090918005708.25594.52575.stgit@localhost.localdomain>
From: John Fastabend <john.r.fastabend@intel.com>
This adds the sock lock around setting the sk_err field
in sock struct. Without the lock multiple threads may
write to this field.
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
net/netlink/af_netlink.c | 6 ++++++
1 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index aa74011..1669dfc 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -732,7 +732,9 @@ static void netlink_overrun(struct sock *sk)
if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
+ lock_sock(sk);
sk->sk_err = ENOBUFS;
+ release_sock(sk);
sk->sk_error_report(sk);
}
}
@@ -1101,7 +1103,9 @@ static inline int do_one_set_err(struct sock *sk,
!test_bit(p->group - 1, nlk->groups))
goto out;
+ lock_sock(sk);
sk->sk_err = p->code;
+ release_sock(sk);
sk->sk_error_report(sk);
out:
return 0;
@@ -1780,7 +1784,9 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
in_skb->sk->sk_protocol,
NETLINK_CB(in_skb).pid);
if (sk) {
+ lock_sock(sk);
sk->sk_err = ENOBUFS;
+ release_sock(sk);
sk->sk_error_report(sk);
sock_put(sk);
}
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox