* [PATCH v2 02/14] vlan: Rename VLAN_GROUP_ARRAY_LEN to VLAN_N_VID.
From: Jesse Gross @ 2010-10-20 23:56 UTC (permalink / raw)
To: David Miller; +Cc: netdev
In-Reply-To: <1287618974-4714-1-git-send-email-jesse@nicira.com>
VLAN_GROUP_ARRAY_LEN is simply the number of possible vlan VIDs.
Since vlan groups will soon be more of an implementation detail
for vlan devices, rename the constant to be descriptive of its
actual purpose.
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
drivers/net/benet/be.h | 2 +-
drivers/net/benet/be_main.c | 2 +-
drivers/net/e1000/e1000_main.c | 2 +-
drivers/net/e1000e/netdev.c | 2 +-
drivers/net/igb/igb_main.c | 2 +-
drivers/net/igbvf/netdev.c | 2 +-
drivers/net/ixgb/ixgb_main.c | 2 +-
drivers/net/ixgbe/ixgbe_main.c | 2 +-
drivers/net/ixgbevf/ixgbevf_main.c | 2 +-
drivers/net/qlcnic/qlcnic_main.c | 2 +-
drivers/net/vmxnet3/vmxnet3_drv.c | 2 +-
drivers/net/vxge/vxge-main.c | 2 +-
drivers/s390/net/qeth_l3_main.c | 6 +++---
include/linux/if_vlan.h | 4 ++--
net/8021q/vlan.c | 16 ++++++++--------
net/bridge/netfilter/ebt_vlan.c | 4 ++--
16 files changed, 27 insertions(+), 27 deletions(-)
diff --git a/drivers/net/benet/be.h b/drivers/net/benet/be.h
index 1afabb1..59a17b5 100644
--- a/drivers/net/benet/be.h
+++ b/drivers/net/benet/be.h
@@ -263,7 +263,7 @@ struct be_adapter {
struct vlan_group *vlan_grp;
u16 vlans_added;
u16 max_vlans; /* Number of vlans supported */
- u8 vlan_tag[VLAN_GROUP_ARRAY_LEN];
+ u8 vlan_tag[VLAN_N_VID];
struct be_dma_mem mc_cmd_mem;
struct be_dma_mem stats_cmd;
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 9a1cd28..4b59e53 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -626,7 +626,7 @@ static int be_vid_config(struct be_adapter *adapter, bool vf, u32 vf_num)
if (adapter->vlans_added <= adapter->max_vlans) {
/* Construct VLAN Table to give to HW */
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
if (adapter->vlan_tag[i]) {
vtag[ntags] = cpu_to_le16(i);
ntags++;
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index cb3f84b..232ac2d 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -4541,7 +4541,7 @@ static void e1000_restore_vlan(struct e1000_adapter *adapter)
if (adapter->vlgrp) {
u16 vid;
- for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+ for (vid = 0; vid < VLAN_N_VID; vid++) {
if (!vlan_group_get_device(adapter->vlgrp, vid))
continue;
e1000_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 992b622..5d6cdea 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -2545,7 +2545,7 @@ static void e1000_restore_vlan(struct e1000_adapter *adapter)
if (!adapter->vlgrp)
return;
- for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+ for (vid = 0; vid < VLAN_N_VID; vid++) {
if (!vlan_group_get_device(adapter->vlgrp, vid))
continue;
e1000_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index b8dccc0..0f0939c 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -6153,7 +6153,7 @@ static void igb_restore_vlan(struct igb_adapter *adapter)
if (adapter->vlgrp) {
u16 vid;
- for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+ for (vid = 0; vid < VLAN_N_VID; vid++) {
if (!vlan_group_get_device(adapter->vlgrp, vid))
continue;
igb_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c
index 6693323..ebfaa68 100644
--- a/drivers/net/igbvf/netdev.c
+++ b/drivers/net/igbvf/netdev.c
@@ -1254,7 +1254,7 @@ static void igbvf_restore_vlan(struct igbvf_adapter *adapter)
if (!adapter->vlgrp)
return;
- for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+ for (vid = 0; vid < VLAN_N_VID; vid++) {
if (!vlan_group_get_device(adapter->vlgrp, vid))
continue;
igbvf_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 80e6257..666207a 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -2223,7 +2223,7 @@ ixgb_restore_vlan(struct ixgb_adapter *adapter)
if (adapter->vlgrp) {
u16 vid;
- for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+ for (vid = 0; vid < VLAN_N_VID; vid++) {
if (!vlan_group_get_device(adapter->vlgrp, vid))
continue;
ixgb_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 790a0da..1d42442 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3185,7 +3185,7 @@ static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter)
if (adapter->vlgrp) {
u16 vid;
- for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+ for (vid = 0; vid < VLAN_N_VID; vid++) {
if (!vlan_group_get_device(adapter->vlgrp, vid))
continue;
ixgbe_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c
index 0866a1c..78bfbe4 100644
--- a/drivers/net/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ixgbevf/ixgbevf_main.c
@@ -1495,7 +1495,7 @@ static void ixgbevf_restore_vlan(struct ixgbevf_adapter *adapter)
if (adapter->vlgrp) {
u16 vid;
- for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+ for (vid = 0; vid < VLAN_N_VID; vid++) {
if (!vlan_group_get_device(adapter->vlgrp, vid))
continue;
ixgbevf_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 4aada0b..f047c7c 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -4093,7 +4093,7 @@ qlcnic_restore_indev_addr(struct net_device *netdev, unsigned long event)
if (!adapter->vlgrp)
return;
- for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+ for (vid = 0; vid < VLAN_N_VID; vid++) {
dev = vlan_group_get_device(adapter->vlgrp, vid);
if (!dev)
continue;
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 198ce92..b1de73b 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1634,7 +1634,7 @@ vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
bool activeVlan = false;
- for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+ for (vid = 0; vid < VLAN_N_VID; vid++) {
if (vlan_group_get_device(adapter->vlan_grp, vid)) {
VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
activeVlan = true;
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index 5378b84..0bda7fe 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -1862,7 +1862,7 @@ enum vxge_hw_status vxge_restore_vpath_vid_table(struct vxge_vpath *vpath)
if (vdev->vlgrp && vpath->is_open) {
- for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+ for (vid = 0; vid < VLAN_N_VID; vid++) {
if (!vlan_group_get_device(vdev->vlgrp, vid))
continue;
/* Add these vlan to the vid table */
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index c094707..74d1401 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1820,7 +1820,7 @@ static void qeth_l3_add_vlan_mc(struct qeth_card *card)
return;
vg = card->vlangrp;
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
struct net_device *netdev = vlan_group_get_device(vg, i);
if (netdev == NULL ||
!(netdev->flags & IFF_UP))
@@ -1883,7 +1883,7 @@ static void qeth_l3_add_vlan_mc6(struct qeth_card *card)
return;
vg = card->vlangrp;
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
struct net_device *netdev = vlan_group_get_device(vg, i);
if (netdev == NULL ||
!(netdev->flags & IFF_UP))
@@ -2247,7 +2247,7 @@ static int qeth_l3_verify_vlan_dev(struct net_device *dev,
if (!vg)
return rc;
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
if (vlan_group_get_device(vg, i) == dev) {
rc = QETH_VLAN_CARD;
break;
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index a523207..494cce8 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -68,6 +68,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */
#define VLAN_TAG_PRESENT VLAN_CFI_MASK
#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */
+#define VLAN_N_VID 4096
/* found in socket.c */
extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
@@ -76,9 +77,8 @@ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
* depends on completely exhausting the VLAN identifier space. Thus
* it gives constant time look-up, but in many cases it wastes memory.
*/
-#define VLAN_GROUP_ARRAY_LEN 4096
#define VLAN_GROUP_ARRAY_SPLIT_PARTS 8
-#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_GROUP_ARRAY_LEN/VLAN_GROUP_ARRAY_SPLIT_PARTS)
+#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS)
struct vlan_group {
struct net_device *real_dev; /* The ethernet(like) device
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 25c2133..54f22d8 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -439,7 +439,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
switch (event) {
case NETDEV_CHANGE:
/* Propagate real device state to vlan devices */
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -450,7 +450,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_CHANGEADDR:
/* Adjust unicast filters on underlying device */
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -464,7 +464,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
break;
case NETDEV_CHANGEMTU:
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -478,7 +478,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_FEAT_CHANGE:
/* Propagate device features to underlying device */
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -490,7 +490,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_DOWN:
/* Put all VLANs for this dev in the down state too. */
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -508,7 +508,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_UP:
/* Put all VLANs for this dev in the up state too. */
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -532,7 +532,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
/* Delete all VLANs for this dev. */
grp->killall = 1;
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -540,7 +540,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
/* unregistration of last vlan destroys group, abort
* afterwards */
if (grp->nr_vlans == 1)
- i = VLAN_GROUP_ARRAY_LEN;
+ i = VLAN_N_VID;
unregister_vlan_dev(vlandev, &list);
}
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index cc11d6b..eae67bf 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -118,10 +118,10 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
* 0 - The null VLAN ID.
* 1 - The default Port VID (PVID)
* 0x0FFF - Reserved for implementation use.
- * if_vlan.h: VLAN_GROUP_ARRAY_LEN 4096. */
+ * if_vlan.h: VLAN_N_VID 4096. */
if (GET_BITMASK(EBT_VLAN_ID)) {
if (!!info->id) { /* if id!=0 => check vid range */
- if (info->id > VLAN_GROUP_ARRAY_LEN) {
+ if (info->id > VLAN_N_VID) {
pr_debug("id %d is out of range (1-4096)\n",
info->id);
return -EINVAL;
--
1.7.1
^ permalink raw reply related
* [PATCH v2 01/14] ebtables: Allow filtering of hardware accelerated vlan frames.
From: Jesse Gross @ 2010-10-20 23:56 UTC (permalink / raw)
To: David Miller; +Cc: netdev
In-Reply-To: <1287618974-4714-1-git-send-email-jesse@nicira.com>
An upcoming commit will allow packets with hardware vlan acceleration
information to be passed though more parts of the network stack, including
packets trunked through the bridge. This adds support for matching and
filtering those packets through ebtables.
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
net/bridge/br_netfilter.c | 16 +++++++++-------
net/bridge/netfilter/ebt_vlan.c | 21 ++++++++++++++-------
net/bridge/netfilter/ebtables.c | 15 +++++++++++----
3 files changed, 34 insertions(+), 18 deletions(-)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 7f9ce96..47c2dab 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -64,22 +64,24 @@ static int brnf_filter_pppoe_tagged __read_mostly = 0;
static inline __be16 vlan_proto(const struct sk_buff *skb)
{
- return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
+ if (vlan_tx_tag_present(skb))
+ return skb->protocol;
+ else if (skb->protocol == htons(ETH_P_8021Q))
+ return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
+ else
+ return 0;
}
#define IS_VLAN_IP(skb) \
- (skb->protocol == htons(ETH_P_8021Q) && \
- vlan_proto(skb) == htons(ETH_P_IP) && \
+ (vlan_proto(skb) == htons(ETH_P_IP) && \
brnf_filter_vlan_tagged)
#define IS_VLAN_IPV6(skb) \
- (skb->protocol == htons(ETH_P_8021Q) && \
- vlan_proto(skb) == htons(ETH_P_IPV6) &&\
+ (vlan_proto(skb) == htons(ETH_P_IPV6) && \
brnf_filter_vlan_tagged)
#define IS_VLAN_ARP(skb) \
- (skb->protocol == htons(ETH_P_8021Q) && \
- vlan_proto(skb) == htons(ETH_P_ARP) && \
+ (vlan_proto(skb) == htons(ETH_P_ARP) && \
brnf_filter_vlan_tagged)
static inline __be16 pppoe_proto(const struct sk_buff *skb)
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 87b53b3..cc11d6b 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -39,8 +39,6 @@ static bool
ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct ebt_vlan_info *info = par->matchinfo;
- const struct vlan_hdr *fp;
- struct vlan_hdr _frame;
unsigned short TCI; /* Whole TCI, given from parsed frame */
unsigned short id; /* VLAN ID, given from frame TCI */
@@ -48,9 +46,20 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
/* VLAN encapsulated Type/Length field, given from orig frame */
__be16 encap;
- fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame);
- if (fp == NULL)
- return false;
+ if (vlan_tx_tag_present(skb)) {
+ TCI = vlan_tx_tag_get(skb);
+ encap = skb->protocol;
+ } else {
+ const struct vlan_hdr *fp;
+ struct vlan_hdr _frame;
+
+ fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame);
+ if (fp == NULL)
+ return false;
+
+ TCI = ntohs(fp->h_vlan_TCI);
+ encap = fp->h_vlan_encapsulated_proto;
+ }
/* Tag Control Information (TCI) consists of the following elements:
* - User_priority. The user_priority field is three bits in length,
@@ -59,10 +68,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
* (CFI) is a single bit flag value. Currently ignored.
* - VLAN Identifier (VID). The VID is encoded as
* an unsigned binary number. */
- TCI = ntohs(fp->h_vlan_TCI);
id = TCI & VLAN_VID_MASK;
prio = (TCI >> 13) & 0x7;
- encap = fp->h_vlan_encapsulated_proto;
/* Checking VLAN Identifier (VID) */
if (GET_BITMASK(EBT_VLAN_ID))
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index bcc102e..a1dcf83 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -124,16 +124,23 @@ ebt_dev_check(const char *entry, const struct net_device *device)
#define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg))
/* process standard matches */
static inline int
-ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h,
+ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
const struct net_device *in, const struct net_device *out)
{
+ const struct ethhdr *h = eth_hdr(skb);
+ __be16 ethproto;
int verdict, i;
+ if (vlan_tx_tag_present(skb))
+ ethproto = htons(ETH_P_8021Q);
+ else
+ ethproto = h->h_proto;
+
if (e->bitmask & EBT_802_3) {
- if (FWINV2(ntohs(h->h_proto) >= 1536, EBT_IPROTO))
+ if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO))
return 1;
} else if (!(e->bitmask & EBT_NOPROTO) &&
- FWINV2(e->ethproto != h->h_proto, EBT_IPROTO))
+ FWINV2(e->ethproto != ethproto, EBT_IPROTO))
return 1;
if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN))
@@ -213,7 +220,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
base = private->entries;
i = 0;
while (i < nentries) {
- if (ebt_basic_match(point, eth_hdr(skb), in, out))
+ if (ebt_basic_match(point, skb, in, out))
goto letscontinue;
if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
--
1.7.1
^ permalink raw reply related
* [PATCH v2 00/14] Move vlan acceleration into networking core.
From: Jesse Gross @ 2010-10-20 23:56 UTC (permalink / raw)
To: David Miller; +Cc: netdev
Hardware vlan acceleration behaves fairly differently from other types of
offloading, which limits its usefulness. This patch series aims to bring
it more in line with other common forms of acceleration, such as checksum
offloading and TSO. In doing this it eliminates common driver bugs, increases
flexibility, and improves performance, while reducing the number of lines of
code.
The first eleven patches can be applied immediately, while the last three need
to wait until all drivers that support vlan acceleration are updated. If
people agree that this patch set makes sense I will go ahead and switch over
the dozen or so drivers that would need to change.
Changes since v1:
* Break apart patches and use temporary variables for better readibility.
* Use rcu_dereference_rtnl() in vlan_find_dev().
* Restructure netif_needs_gso() for better common case performance.
* Make ebtables consistently use the outer vlan tag if two are present.
* Drop check for vlan group on transmit in all drivers.
* Send vlan traffic through __netif_receive_skb both tagged and untagged for
consistent results between accelerated and non-acclerated traffic.
* Add support for Ethtool.
* Enable vlan accleration on bridge devices.
* Convert bnx2x driver.
Hao Zheng (1):
bnx2x: Update bnx2x to use new vlan accleration.
Jesse Gross (13):
ebtables: Allow filtering of hardware accelerated vlan frames.
vlan: Rename VLAN_GROUP_ARRAY_LEN to VLAN_N_VID.
vlan: Don't check for vlan group before vlan_tx_tag_present.
vlan: Enable software emulation for vlan accleration.
vlan: Avoid hash table lookup to find group.
vlan: Centralize handling of hardware acceleration.
ethtool: Add support for vlan accleration.
bridge: Add support for TX vlan offload.
bnx2: Update bnx2 to use new vlan accleration.
ixgbe: Update ixgbe to use new vlan accleration.
lro: Remove explicit vlan support.
bonding: Update bonding for new vlan model.
vlan: Remove accleration legacy functions.
drivers/net/8139cp.c | 2 +-
drivers/net/amd8111e.c | 2 +-
drivers/net/atl1c/atl1c_main.c | 2 +-
drivers/net/atl1e/atl1e_main.c | 2 +-
drivers/net/atlx/atl1.c | 2 +-
drivers/net/atlx/atl2.c | 2 +-
drivers/net/benet/be.h | 2 +-
drivers/net/benet/be_main.c | 9 +-
drivers/net/bna/bnad.c | 2 +-
drivers/net/bnx2.c | 99 +++++++------------------
drivers/net/bnx2.h | 4 -
drivers/net/bnx2x/bnx2x.h | 10 ---
drivers/net/bnx2x/bnx2x_cmn.c | 63 +++-------------
drivers/net/bnx2x/bnx2x_ethtool.c | 33 ++++----
drivers/net/bnx2x/bnx2x_main.c | 8 --
drivers/net/bonding/bond_alb.c | 8 +-
drivers/net/bonding/bond_ipv6.c | 5 +-
drivers/net/bonding/bond_main.c | 143 +++++++-----------------------------
drivers/net/bonding/bonding.h | 1 -
drivers/net/chelsio/sge.c | 2 +-
drivers/net/cxgb3/sge.c | 4 +-
drivers/net/e1000/e1000_main.c | 4 +-
drivers/net/e1000e/netdev.c | 4 +-
drivers/net/ehea/ehea_main.c | 2 +-
drivers/net/enic/enic_main.c | 2 +-
drivers/net/forcedeth.c | 11 +--
drivers/net/gianfar.c | 4 +-
drivers/net/igb/igb_main.c | 4 +-
drivers/net/igbvf/netdev.c | 2 +-
drivers/net/ixgb/ixgb_main.c | 2 +-
drivers/net/ixgbe/ixgbe.h | 4 +-
drivers/net/ixgbe/ixgbe_ethtool.c | 12 +++-
drivers/net/ixgbe/ixgbe_main.c | 139 +++++++++++++++-------------------
drivers/net/ixgbevf/ixgbevf_main.c | 4 +-
drivers/net/mlx4/en_tx.c | 4 +-
drivers/net/qlcnic/qlcnic_main.c | 2 +-
drivers/net/qlge/qlge_main.c | 2 +-
drivers/net/r8169.c | 2 +-
drivers/net/s2io.c | 2 +-
drivers/net/sky2.c | 2 +-
drivers/net/tg3.c | 4 +-
drivers/net/via-velocity.c | 2 +-
drivers/net/vmxnet3/vmxnet3_drv.c | 2 +-
drivers/net/vxge/vxge-main.c | 4 +-
drivers/s390/net/qeth_l3_main.c | 6 +-
include/linux/ethtool.h | 2 +
include/linux/if_vlan.h | 88 +++++++---------------
include/linux/inet_lro.h | 20 -----
include/linux/netdevice.h | 28 ++++---
net/8021q/vlan.c | 84 ++++-----------------
net/8021q/vlan.h | 17 ----
net/8021q/vlan_core.c | 132 +++++----------------------------
net/8021q/vlan_dev.c | 2 +-
net/bridge/br_device.c | 8 ++-
net/bridge/br_netfilter.c | 16 ++--
net/bridge/netfilter/ebt_vlan.c | 25 ++++--
net/bridge/netfilter/ebtables.c | 15 +++-
net/core/dev.c | 83 ++++++++++++---------
net/core/ethtool.c | 3 +-
net/ipv4/inet_lro.c | 74 +++----------------
60 files changed, 395 insertions(+), 833 deletions(-)
^ permalink raw reply
* [BUG] problems with "ip xfrm" on 32-bit userspace with 64-bit kernel
From: Chris Friesen @ 2010-10-20 23:18 UTC (permalink / raw)
To: netdev, Linux Kernel Mailing List
We've run into a 32/64 compatibility problem with iproute2. The "ip
xfrm monitor acquire" command doesn't work properly due to struct size
mismatches between kernel and userspace.
If I modify include/linux/xfrm.h to pack all the structures and rebuild
the kernel and userspace, this message is displayed properly. However,
this shouldn't be necessary and might not work on all architectures.
Anyone got any ideas that are less drastic?
Thanks,
Chris
Details:
iproute2-2.6.35.tar.bz2 package (the "ip" binary reports a version of
iproute2-ss100804)
2.6.27.18 kernel, ARCH is x86, kernel is 64-bit, userspace is 32-bit.
To reproduce:
1. Find a src and dst IP address that normally passes a ping test
ping -I 172.25.0.4 172.25.132.1
2. Setup a single outgoing IPsec policy that will require an IPsec SA
on the next ping packet.
setkey -c << EOF
spdadd 172.24.132.4/32[any] 172.24.136.0/32[any] any -P out ipsec
esp/transport//unique:1;
EOF
3. In a separate window/terminal, launch the following command to
monitor Netlink messages from the kernel
ip xfrm monitor acquire
4. Send a ping packet (this command will block, or fail depending on
your kernel config)
ping -I 172.25.0.4 172.25.132.1
5. The "ip xfrm monitor acquire" command displays something similar to
this:
!!!Deficit 72, rta_len=1
acquire proto esp
sel src 172.25.0.4/32 dst 172.25.132.1/32 proto udp sport 44136 dport 1025
policy src 172.25.0.4/32 dst 172.25.132.1/32
dir out priority 2147483648 ptype main
6. The "!!!Deficit 72, rta_len=1" string at the beginning of the
message is complaining about mismatches between the total reported
length of the Netlink message and the useable length detected. Also,
the ACQUIRE message is incomplete as shown--there are attributes such as
the reqId value that are not displayed.
7. Now clean up after yourself and take down the ipsec policy:
setkey -c << EOF
spddelete 172.24.132.4/32[any] 172.24.136.0/32[any] any -P out ipsec
esp/transport//unique:1;
EOF
--
Chris Friesen
Software Developer
GENBAND
chris.friesen@genband.com
www.genband.com
^ permalink raw reply
* [patch 1/1] net: avoid limits overflow
From: akpm @ 2010-10-20 22:59 UTC (permalink / raw)
To: davem; +Cc: netdev, akpm, eric.dumazet, holt
From: Eric Dumazet <eric.dumazet@gmail.com>
Robin Holt tried to boot a 16TB machine and found some limits were reached
: sysctl_tcp_mem[2], sysctl_udp_mem[2]
We can switch infrastructure to use long "instead" of "int", now
atomic_long_t primitives are available for free.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reported-by: Robin Holt <holt@sgi.com>
Reviewed-by: Robin Holt <holt@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/net/dn.h | 2 +-
include/net/sock.h | 4 ++--
include/net/tcp.h | 6 +++---
include/net/udp.h | 4 ++--
net/core/sock.c | 14 +++++++-------
net/decnet/af_decnet.c | 2 +-
net/decnet/sysctl_net_decnet.c | 4 ++--
net/ipv4/proc.c | 8 ++++----
net/ipv4/sysctl_net_ipv4.c | 5 ++---
net/ipv4/tcp.c | 4 ++--
net/ipv4/tcp_input.c | 11 +++++++----
net/ipv4/udp.c | 4 ++--
net/sctp/protocol.c | 2 +-
net/sctp/socket.c | 4 ++--
net/sctp/sysctl.c | 4 ++--
15 files changed, 40 insertions(+), 38 deletions(-)
diff -puN include/net/dn.h~net-avoid-limits-overflow include/net/dn.h
--- a/include/net/dn.h~net-avoid-limits-overflow
+++ a/include/net/dn.h
@@ -225,7 +225,7 @@ extern int decnet_di_count;
extern int decnet_dr_count;
extern int decnet_no_fc_max_cwnd;
-extern int sysctl_decnet_mem[3];
+extern long sysctl_decnet_mem[3];
extern int sysctl_decnet_wmem[3];
extern int sysctl_decnet_rmem[3];
diff -puN include/net/sock.h~net-avoid-limits-overflow include/net/sock.h
--- a/include/net/sock.h~net-avoid-limits-overflow
+++ a/include/net/sock.h
@@ -762,7 +762,7 @@ struct proto {
/* Memory pressure */
void (*enter_memory_pressure)(struct sock *sk);
- atomic_t *memory_allocated; /* Current allocated memory. */
+ atomic_long_t *memory_allocated; /* Current allocated memory. */
struct percpu_counter *sockets_allocated; /* Current number of sockets. */
/*
* Pressure flag: try to collapse.
@@ -771,7 +771,7 @@ struct proto {
* is strict, actions are advisory and have some latency.
*/
int *memory_pressure;
- int *sysctl_mem;
+ long *sysctl_mem;
int *sysctl_wmem;
int *sysctl_rmem;
int max_header;
diff -puN include/net/tcp.h~net-avoid-limits-overflow include/net/tcp.h
--- a/include/net/tcp.h~net-avoid-limits-overflow
+++ a/include/net/tcp.h
@@ -224,7 +224,7 @@ extern int sysctl_tcp_fack;
extern int sysctl_tcp_reordering;
extern int sysctl_tcp_ecn;
extern int sysctl_tcp_dsack;
-extern int sysctl_tcp_mem[3];
+extern long sysctl_tcp_mem[3];
extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win;
@@ -247,7 +247,7 @@ extern int sysctl_tcp_cookie_size;
extern int sysctl_tcp_thin_linear_timeouts;
extern int sysctl_tcp_thin_dupack;
-extern atomic_t tcp_memory_allocated;
+extern atomic_long_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
extern int tcp_memory_pressure;
@@ -280,7 +280,7 @@ static inline bool tcp_too_many_orphans(
}
if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
- atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])
+ atomic_long_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])
return true;
return false;
}
diff -puN include/net/udp.h~net-avoid-limits-overflow include/net/udp.h
--- a/include/net/udp.h~net-avoid-limits-overflow
+++ a/include/net/udp.h
@@ -105,10 +105,10 @@ static inline struct udp_hslot *udp_hash
extern struct proto udp_prot;
-extern atomic_t udp_memory_allocated;
+extern atomic_long_t udp_memory_allocated;
/* sysctl variables for udp */
-extern int sysctl_udp_mem[3];
+extern long sysctl_udp_mem[3];
extern int sysctl_udp_rmem_min;
extern int sysctl_udp_wmem_min;
diff -puN net/core/sock.c~net-avoid-limits-overflow net/core/sock.c
--- a/net/core/sock.c~net-avoid-limits-overflow
+++ a/net/core/sock.c
@@ -1653,10 +1653,10 @@ int __sk_mem_schedule(struct sock *sk, i
{
struct proto *prot = sk->sk_prot;
int amt = sk_mem_pages(size);
- int allocated;
+ long allocated;
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
- allocated = atomic_add_return(amt, prot->memory_allocated);
+ allocated = atomic_long_add_return(amt, prot->memory_allocated);
/* Under limit. */
if (allocated <= prot->sysctl_mem[0]) {
@@ -1714,7 +1714,7 @@ suppress_allocation:
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
- atomic_sub(amt, prot->memory_allocated);
+ atomic_long_sub(amt, prot->memory_allocated);
return 0;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1727,12 +1727,12 @@ void __sk_mem_reclaim(struct sock *sk)
{
struct proto *prot = sk->sk_prot;
- atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
+ atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
prot->memory_allocated);
sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
if (prot->memory_pressure && *prot->memory_pressure &&
- (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
+ (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
*prot->memory_pressure = 0;
}
EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -2452,12 +2452,12 @@ static char proto_method_implemented(con
static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
{
- seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
+ seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
proto->name,
proto->obj_size,
sock_prot_inuse_get(seq_file_net(seq), proto),
- proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
+ proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
proto->max_header,
proto->slab == NULL ? "no" : "yes",
diff -puN net/decnet/af_decnet.c~net-avoid-limits-overflow net/decnet/af_decnet.c
--- a/net/decnet/af_decnet.c~net-avoid-limits-overflow
+++ a/net/decnet/af_decnet.c
@@ -155,7 +155,7 @@ static const struct proto_ops dn_proto_o
static DEFINE_RWLOCK(dn_hash_lock);
static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
static struct hlist_head dn_wild_sk;
-static atomic_t decnet_memory_allocated;
+static atomic_long_t decnet_memory_allocated;
static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
diff -puN net/decnet/sysctl_net_decnet.c~net-avoid-limits-overflow net/decnet/sysctl_net_decnet.c
--- a/net/decnet/sysctl_net_decnet.c~net-avoid-limits-overflow
+++ a/net/decnet/sysctl_net_decnet.c
@@ -38,7 +38,7 @@ int decnet_log_martians = 1;
int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
/* Reasonable defaults, I hope, based on tcp's defaults */
-int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
+long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
@@ -324,7 +324,7 @@ static ctl_table dn_table[] = {
.data = &sysctl_decnet_mem,
.maxlen = sizeof(sysctl_decnet_mem),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_doulongvec_minmax
},
{
.procname = "decnet_rmem",
diff -puN net/ipv4/proc.c~net-avoid-limits-overflow net/ipv4/proc.c
--- a/net/ipv4/proc.c~net-avoid-limits-overflow
+++ a/net/ipv4/proc.c
@@ -59,13 +59,13 @@ static int sockstat_seq_show(struct seq_
local_bh_enable();
socket_seq_show(seq);
- seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
+ seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
sock_prot_inuse_get(net, &tcp_prot), orphans,
tcp_death_row.tw_count, sockets,
- atomic_read(&tcp_memory_allocated));
- seq_printf(seq, "UDP: inuse %d mem %d\n",
+ atomic_long_read(&tcp_memory_allocated));
+ seq_printf(seq, "UDP: inuse %d mem %ld\n",
sock_prot_inuse_get(net, &udp_prot),
- atomic_read(&udp_memory_allocated));
+ atomic_long_read(&udp_memory_allocated));
seq_printf(seq, "UDPLITE: inuse %d\n",
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
diff -puN net/ipv4/sysctl_net_ipv4.c~net-avoid-limits-overflow net/ipv4/sysctl_net_ipv4.c
--- a/net/ipv4/sysctl_net_ipv4.c~net-avoid-limits-overflow
+++ a/net/ipv4/sysctl_net_ipv4.c
@@ -398,7 +398,7 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_tcp_mem,
.maxlen = sizeof(sysctl_tcp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_doulongvec_minmax
},
{
.procname = "tcp_wmem",
@@ -602,8 +602,7 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero
+ .proc_handler = proc_doulongvec_minmax,
},
{
.procname = "udp_rmem_min",
diff -puN net/ipv4/tcp.c~net-avoid-limits-overflow net/ipv4/tcp.c
--- a/net/ipv4/tcp.c~net-avoid-limits-overflow
+++ a/net/ipv4/tcp.c
@@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
-int sysctl_tcp_mem[3] __read_mostly;
+long sysctl_tcp_mem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
-atomic_t tcp_memory_allocated; /* Current allocated memory. */
+atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
/*
diff -puN net/ipv4/tcp_input.c~net-avoid-limits-overflow net/ipv4/tcp_input.c
--- a/net/ipv4/tcp_input.c~net-avoid-limits-overflow
+++ a/net/ipv4/tcp_input.c
@@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock
int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
sizeof(struct sk_buff);
- if (sk->sk_sndbuf < 3 * sndmem)
- sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]);
+ if (sk->sk_sndbuf < 3 * sndmem) {
+ sk->sk_sndbuf = 3 * sndmem;
+ if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
+ sk->sk_sndbuf = sysctl_tcp_wmem[2];
+ }
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_memory_pressure &&
- atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+ atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]);
}
@@ -4861,7 +4864,7 @@ static int tcp_should_expand_sndbuf(stru
return 0;
/* If we are under soft global TCP memory pressure, do not expand. */
- if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
+ if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
return 0;
/* If we filled the congestion window, do not expand. */
diff -puN net/ipv4/udp.c~net-avoid-limits-overflow net/ipv4/udp.c
--- a/net/ipv4/udp.c~net-avoid-limits-overflow
+++ a/net/ipv4/udp.c
@@ -110,7 +110,7 @@
struct udp_table udp_table __read_mostly;
EXPORT_SYMBOL(udp_table);
-int sysctl_udp_mem[3] __read_mostly;
+long sysctl_udp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_udp_mem);
int sysctl_udp_rmem_min __read_mostly;
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min);
int sysctl_udp_wmem_min __read_mostly;
EXPORT_SYMBOL(sysctl_udp_wmem_min);
-atomic_t udp_memory_allocated;
+atomic_long_t udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated);
#define MAX_UDP_PORTS 65536
diff -puN net/sctp/protocol.c~net-avoid-limits-overflow net/sctp/protocol.c
--- a/net/sctp/protocol.c~net-avoid-limits-overflow
+++ a/net/sctp/protocol.c
@@ -92,7 +92,7 @@ static struct sctp_af *sctp_af_v6_specif
struct kmem_cache *sctp_chunk_cachep __read_mostly;
struct kmem_cache *sctp_bucket_cachep __read_mostly;
-int sysctl_sctp_mem[3];
+long sysctl_sctp_mem[3];
int sysctl_sctp_rmem[3];
int sysctl_sctp_wmem[3];
diff -puN net/sctp/socket.c~net-avoid-limits-overflow net/sctp/socket.c
--- a/net/sctp/socket.c~net-avoid-limits-overflow
+++ a/net/sctp/socket.c
@@ -111,12 +111,12 @@ static void sctp_sock_migrate(struct soc
static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
extern struct kmem_cache *sctp_bucket_cachep;
-extern int sysctl_sctp_mem[3];
+extern long sysctl_sctp_mem[3];
extern int sysctl_sctp_rmem[3];
extern int sysctl_sctp_wmem[3];
static int sctp_memory_pressure;
-static atomic_t sctp_memory_allocated;
+static atomic_long_t sctp_memory_allocated;
struct percpu_counter sctp_sockets_allocated;
static void sctp_enter_memory_pressure(struct sock *sk)
diff -puN net/sctp/sysctl.c~net-avoid-limits-overflow net/sctp/sysctl.c
--- a/net/sctp/sysctl.c~net-avoid-limits-overflow
+++ a/net/sctp/sysctl.c
@@ -54,7 +54,7 @@ static int sack_timer_max = 500;
static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
static int rwnd_scale_max = 16;
-extern int sysctl_sctp_mem[3];
+extern long sysctl_sctp_mem[3];
extern int sysctl_sctp_rmem[3];
extern int sysctl_sctp_wmem[3];
@@ -203,7 +203,7 @@ static ctl_table sctp_table[] = {
.data = &sysctl_sctp_mem,
.maxlen = sizeof(sysctl_sctp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_doulongvec_minmax
},
{
.procname = "sctp_rmem",
_
^ permalink raw reply
* Re: Linux 2.6.35/TIPC 2.0 ABI breaking changes
From: Leandro Lucarella @ 2010-10-20 22:59 UTC (permalink / raw)
To: Jon Maloy
Cc: netdev@vger.kernel.org, tipc-discussion@lists.sourceforge.net,
linux-kernel@vger.kernel.org, Neil Horman, David Miller
In-Reply-To: <20101020192350.GR8781@llucax.com.ar>
Leandro Lucarella, el 20 de octubre a las 16:23 me escribiste:
> Leandro Lucarella, el 20 de octubre a las 15:28 me escribiste:
> > Jon Maloy, el 20 de octubre a las 14:10 me escribiste:
> > > <...>
> > > > >
> > > > > Remember, permitting both is a superset of the current one
> > > > (NBO only)
> > > > > so it is fully backwards compatible. We break absolutly nothing by
> > > > > permitting this.
> > > > >
> > > > Thats effectively reverting both our patches though, isn't it
> > > > (not that I'm disagreeing with it, just looking for
> > > > clarification). If we revert my patch and reintroduce the
> > > > htohl mechanism which tracks endianess, we might as well
> > > > revert the TIPC_SUB_SERVICE flag as well, yeah?
> > >
> > > Absolutely. I think it was a mistake to change that value.
> > > But I don't think we need to reintroduce the htohl(). That
> > > was just one way of doing it. If I understood your suggestion
> > > from yesterday correctly you converted the whole message within
> > > one if()clause, without any htohl(). I have have no problem with
> > > that approach.
> >
> > There is a difference between both solutions, the htohl() version
> > tracked the need for swap as a struct subscription member (which was
> > used when sending back events). Neils patch doesn't do that tracking.
> > I don't really know the implications of this, but maybe it would be
> > a wise idea to stay in the safe side and revert both patches for now.
>
> BTW, I tried 2.6.37 reverting both offending patches and everything
> seems to work well.
I meant 2.6.35.7.
--
Leandro Lucarella (AKA luca) http://llucax.com.ar/
----------------------------------------------------------------------
GPG Key: 5F5A8D05 (F8CD F9A7 BF00 5431 4145 104C 949E BFB6 5F5A 8D05)
----------------------------------------------------------------------
Karma police
arrest this girl,
her Hitler hairdo
is making me feel ill
and we have crashed her party.
------------------------------------------------------------------------------
Nokia and AT&T present the 2010 Calling All Innovators-North America contest
Create new apps & games for the Nokia N8 for consumers in U.S. and Canada
$10 million total in prizes - $4M cash, 500 devices, nearly $6M in marketing
Develop with Nokia Qt SDK, Web Runtime, or Java and Publish to Ovi Store
http://p.sf.net/sfu/nokia-dev2dev
^ permalink raw reply
* Re: [GIT PULL net-2.6] vhost-net: access_ok fix
From: Michael S. Tsirkin @ 2010-10-20 22:43 UTC (permalink / raw)
To: David Miller; +Cc: kvm, virtualization, netdev, linux-kernel
In-Reply-To: <20101019145901.GA16025@redhat.com>
On Tue, Oct 19, 2010 at 04:59:01PM +0200, Michael S. Tsirkin wrote:
> David,
> Not sure if it's too late for 2.6.36 - in case it's not, the following tree
> includes a last minute bugfix for vhost-net, found by code inspection.
Um, I see it was too late :) Never mind ...
--
MST
^ permalink raw reply
* Re: kernel panic in fib_rules_lookup [2.6.27.7 vendor-patched]
From: Eric Dumazet @ 2010-10-20 22:05 UTC (permalink / raw)
To: Joe Buehler; +Cc: netdev
In-Reply-To: <4CBF2A3F.2070108@cox.net>
Le mercredi 20 octobre 2010 à 13:43 -0400, Joe Buehler a écrit :
> Eric Dumazet wrote:
>
> > 2.6.27 is a bit old, you might try :
> >
> > commit 7fa7cb7109d07c29ab28bb877bc7049a0150dbe5
> > Author: Eric Dumazet <eric.dumazet@gmail.com>
> > Date: Mon Sep 27 04:18:27 2010 +0000
>
> Alas, after more load testing I find that the panic still occurs:
>
> CPU 1 Unable to handle kernel paging request at virtual address
> 0000000000000000, epc == ffffffff8146728c, ra == ffffffff81467258
> Oops[#1]:
> Cpu 1
> $ 0 : 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> $ 4 : ffffffffffffffff a80000008c9d78f0 a80000009acd9880 000000000a205a7c
> $ 8 : 0000000000000000 0000000000000020 a80000009a9c49d0 0000000000000000
> $12 : ffffffff8155de00 0000000000000004 0000000000000001 0000000000000000
> $16 : 0000000000000000 a80000008c9d78f0 0000000000000002 a80000009b94ed80
> $20 : a80000009b94edf8 0000000000000000 0000000000000003 a80000008c9d78a0
> $24 : 0000000000000000 ffffffff812df388
> $28 : a80000008c9d4000 a80000008c9d7840 fffffffffffffff5 ffffffff81467258
> Hi : 0000000000000000
> Lo : 0000000000000000
> epc : ffffffff8146728c fib_rules_lookup+0x11c/0x260
> Not tainted
> ra : ffffffff81467258 fib_rules_lookup+0xe8/0x260
> Status: 1010cce3 KX SX UX KERNEL EXL IE
> Cause : 00800008
> BadVA : 0000000000000000
> PrId : 000d0409 (Cavium Octeon)
> Modules linked in: x_tables ip_tables iptable_filter nf_conntrack
> nf_conntrack_ipv4 nf_nat iptable_nat tun xt_tcpudp xt_state ipt_REJECT
> ipv6 ip6_tables ip6table_filter ip6t_ipv6header ip6t_REJECT
> Process qscope7500 (pid: 1343, threadinfo=a80000008c9d4000,
> task=a80000008c068ac0, tls=000000002d51e920)
> Stack : ffffffffffffffff 0000000000000003 a80000008c9d78d8 a80000008c9d79d8
> a80000008c9d78f0 0000000000000000 ffffffff816c39c0 ffffffffffffffff
> 0000000000000003 00000000000004a6 0000000000000000 ffffffff814bd2a4
> 0000000000000000 a80000008c9d78d8 0000000000000000 ffffffffc001aa24
> a80000008c9d78d8 ffffffff81478a38 0000000000000003 0000000000000001
> 0000000000000000 0000000000000000 0000000000000001 000000000a205a7c
> 0a2059bf00000000 0000000000000000 0000000000000000 0000000000000000
> 0000000000000000 0000000000000000 a80000008c9d79d0 0000000000000000
> a80000008c9d79d8 a8000000994ff380 0000000000000000 0000000000000000
> a80000008c9d79d0 ffffffff816c39c0 0000000000000003 00000000000004a6
> ...
> Call Trace:
> [<ffffffff8146728c>] fib_rules_lookup+0x11c/0x260
> [<ffffffff814bd2a4>] fib_lookup+0x2c/0x48
> [<ffffffff81478a38>] __ip_route_output_key+0x918/0xf38
> [<ffffffff81479090>] ip_route_output_flow+0x38/0x2e8
> [<ffffffff81482c44>] ip_queue_xmit+0x38c/0x3a8
> [<ffffffff81497f5c>] tcp_transmit_skb+0x3f4/0x7d0
> [<ffffffff8149af2c>] __tcp_push_pending_frames+0x1fc/0x9e0
> [<ffffffff8148cf50>] tcp_sendmsg+0x900/0xe00
> [<ffffffff81441f7c>] sock_aio_write+0x16c/0x190
> [<ffffffff811d0424>] do_sync_write+0xbc/0x130
> [<ffffffff811d10a8>] vfs_write+0x150/0x158
> [<ffffffff811d124c>] sys_write+0x5c/0x118
> [<ffffffff8114532c>] handle_sys+0x12c/0x148
>
>
> Code: 0040282d 00000000 de100000 <de020000> cc400000 1614ffce
> 00000000 2405fffd dfbf0058
> Fatal exception: panic in 5 seconds
> Kernel panic - not syncing: Fatal exception
> Rebooting in 1 seconds..
Thanks Joe
Could you provide a disassembly of function fib_rules_lookup ?
^ permalink raw reply
* [PATCH 2/2] r6040: bump to version 0.27 and release date of 20Oct2010
From: Florian Fainelli @ 2010-10-20 21:09 UTC (permalink / raw)
To: netdev, David Miller
Signed-off-by: Florian Fainelli <florian@openwrt.org>
---
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 3843363..7294cec 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -49,8 +49,8 @@
#include <asm/processor.h>
#define DRV_NAME "r6040"
-#define DRV_VERSION "0.26"
-#define DRV_RELDATE "30May2010"
+#define DRV_VERSION "0.27"
+#define DRV_RELDATE "20Oct2010"
/* PHY CHIP Address */
#define PHY1_ADDR 1 /* For MAC1 */
^ permalink raw reply related
* [PATCH 1/2] r6040: fix multicast operations
From: Florian Fainelli @ 2010-10-20 21:09 UTC (permalink / raw)
To: netdev, Shawn Lin, Marc Leclerc, Albert Chen, David Miller
This patch fixes the following issues with the r6040 NIC operating in
multicast:
1) When the IFF_ALLMULTI flag is set, we should write 0xffff to the NIC hash
table registers to make it process multicast traffic
2) When the number of multicast address to handle is smaller than MCAST_MAX
we should use the NIC multicast registers MID1_{L,M,H}.
3) The hashing of the address was not correct, due to an invalid substraction
(15 - (crc & 0x0f)) instead of (crc & 0x0f)
Reported-by: Marc Leclerc <marc-leclerc@signaturealpha.com>
Tested-by: Marc Leclerc <marc-leclerc@signaturealpha.com>
Signed-off-by: Shawn Lin <shawn@dmp.com.tw>
Signed-off-by: Albert Chen <albert.chen@rdc.com.tw>
Signed-off-by: Florian Fainelli <florian@openwrt.org>
CC: stable@kernel.org
---
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 68a8419..3843363 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -852,74 +852,90 @@ static void r6040_multicast_list(struct net_device *dev)
struct r6040_private *lp = netdev_priv(dev);
void __iomem *ioaddr = lp->base;
u16 *adrp;
- u16 reg;
unsigned long flags;
struct netdev_hw_addr *ha;
int i;
- /* MAC Address */
- adrp = (u16 *)dev->dev_addr;
- iowrite16(adrp[0], ioaddr + MID_0L);
- iowrite16(adrp[1], ioaddr + MID_0M);
- iowrite16(adrp[2], ioaddr + MID_0H);
-
- /* Promiscous Mode */
spin_lock_irqsave(&lp->lock, flags);
/* Clear AMCP & PROM bits */
- reg = ioread16(ioaddr) & ~0x0120;
- if (dev->flags & IFF_PROMISC) {
- reg |= 0x0020;
+ lp->mcr0 = ioread16(ioaddr) & ~0x0120;
+
+ /* Promiscuous Mode */
+ if (dev->flags & IFF_PROMISC)
lp->mcr0 |= 0x0020;
- }
- /* Too many multicast addresses
- * accept all traffic */
- else if ((netdev_mc_count(dev) > MCAST_MAX) ||
- (dev->flags & IFF_ALLMULTI))
- reg |= 0x0020;
- iowrite16(reg, ioaddr);
- spin_unlock_irqrestore(&lp->lock, flags);
+ /* Enable multicast hash table function to
+ * receive all multicast packets.
+ */
+ else if (dev->flags & IFF_ALLMULTI) {
+ lp->mcr0 |= 0x0100;
- /* Build the hash table */
- if (netdev_mc_count(dev) > MCAST_MAX) {
- u16 hash_table[4];
+ for (i = 0; i < MCAST_MAX ; i++) {
+ iowrite16(0, ioaddr + MID_1L + 8 * i);
+ iowrite16(0, ioaddr + MID_1M + 8 * i);
+ iowrite16(0, ioaddr + MID_1H + 8 * i);
+ }
+
+ iowrite16(0xffff, ioaddr + MAR0);
+ iowrite16(0xffff, ioaddr + MAR1);
+ iowrite16(0xffff, ioaddr + MAR2);
+ iowrite16(0xffff, ioaddr + MAR3);
+ }
+
+ /* Use internal multicast address registers
+ * if the number of multicast addresses is not greater than MCAST_MAX.
+ */
+ else if (netdev_mc_empty(dev)) {
+ for (i = 0; i < MCAST_MAX ; i++) {
+ iowrite16(0, ioaddr + MID_1L + 8 * i);
+ iowrite16(0, ioaddr + MID_1M + 8 * i);
+ iowrite16(0, ioaddr + MID_1H + 8 * i);
+ }
+ } else if (netdev_mc_count(dev) <= MCAST_MAX) {
+ i = 0;
+ netdev_for_each_mc_addr(ha, dev) {
+ adrp = (u16 *) ha->addr;
+ iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
+ iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
+ iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
+ i++;
+ }
+ }
+ /* Otherwise, Enable multicast hash table function. */
+ else {
+ u16 hash_table[4] = { 0, };
u32 crc;
- for (i = 0; i < 4; i++)
- hash_table[i] = 0;
+ lp->mcr0 |= 0x0100;
+ for (i = 0; i < MCAST_MAX ; i++) {
+ iowrite16(0, ioaddr + MID_1L + 8 * i);
+ iowrite16(0, ioaddr + MID_1M + 8 * i);
+ iowrite16(0, ioaddr + MID_1H + 8 * i);
+ }
+
+ /* Build multicast hash table */
netdev_for_each_mc_addr(ha, dev) {
char *addrs = ha->addr;
if (!(*addrs & 1))
continue;
- crc = ether_crc_le(6, addrs);
+ crc = ether_crc(ETH_ALEN, addrs);
crc >>= 26;
- hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf));
+ hash_table[crc >> 4] |= 1 << (crc & 0xf);
}
+
/* Fill the MAC hash tables with their values */
iowrite16(hash_table[0], ioaddr + MAR0);
iowrite16(hash_table[1], ioaddr + MAR1);
iowrite16(hash_table[2], ioaddr + MAR2);
iowrite16(hash_table[3], ioaddr + MAR3);
}
- /* Multicast Address 1~4 case */
- i = 0;
- netdev_for_each_mc_addr(ha, dev) {
- if (i < MCAST_MAX) {
- adrp = (u16 *) ha->addr;
- iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
- iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
- iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
- } else {
- iowrite16(0xffff, ioaddr + MID_1L + 8 * i);
- iowrite16(0xffff, ioaddr + MID_1M + 8 * i);
- iowrite16(0xffff, ioaddr + MID_1H + 8 * i);
- }
- i++;
- }
+ iowrite16(lp->mcr0, ioaddr);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
}
static void netdev_get_drvinfo(struct net_device *dev,
^ permalink raw reply related
* [net-next-2.6 PATCH 5/5] enic: Fix log message
From: Vasanthy Kolluri @ 2010-10-20 20:17 UTC (permalink / raw)
To: davem; +Cc: netdev, roprabhu, dwang2
In-Reply-To: <20101020201609.26870.80308.stgit@savbu-pc100.cisco.com>
From: Vasanthy Kolluri <vkolluri@cisco.com>
Fix a log message
Signed-off-by: Vasanthy Kolluri <vkolluri@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David Wang <dwang2@cisco.com>
---
drivers/net/enic/enic_main.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index a63d2ba..f47fbb6 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -2428,7 +2428,7 @@ static int enic_dev_init(struct enic *enic)
err = enic_dev_set_ig_vlan_rewrite_mode(enic);
if (err) {
- netdev_err(netdev,
+ dev_err(dev,
"Failed to set ingress vlan rewrite mode, aborting.\n");
goto err_out_free_vnic_resources;
}
^ permalink raw reply related
* [net-next-2.6 PATCH 4/5] enic: Change min MTU
From: Vasanthy Kolluri @ 2010-10-20 20:17 UTC (permalink / raw)
To: davem; +Cc: netdev, roprabhu, dwang2
In-Reply-To: <20101020201609.26870.80308.stgit@savbu-pc100.cisco.com>
From: Vasanthy Kolluri <vkolluri@cisco.com>
Change min MTU to 68.
Signed-off-by: Vasanthy Kolluri <vkolluri@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David Wang <dwang2@cisco.com>
---
drivers/net/enic/enic_res.h | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/net/enic/enic_res.h b/drivers/net/enic/enic_res.h
index 83bd172..9a103d9 100644
--- a/drivers/net/enic/enic_res.h
+++ b/drivers/net/enic/enic_res.h
@@ -30,7 +30,7 @@
#define ENIC_MIN_RQ_DESCS 64
#define ENIC_MAX_RQ_DESCS 4096
-#define ENIC_MIN_MTU 576 /* minimum for IPv4 */
+#define ENIC_MIN_MTU 68
#define ENIC_MAX_MTU 9000
#define ENIC_MULTICAST_PERFECT_FILTERS 32
^ permalink raw reply related
* [net-next-2.6 PATCH 3/5] enic: Replace firmware devcmd CMD_ENABLE with CMD_ENABLE_WAIT
From: Vasanthy Kolluri @ 2010-10-20 20:17 UTC (permalink / raw)
To: davem; +Cc: netdev, roprabhu, dwang2
In-Reply-To: <20101020201609.26870.80308.stgit@savbu-pc100.cisco.com>
From: Vasanthy Kolluri <vkolluri@cisco.com>
Replace no wait CMD_ENABLE firmware devcmd with CMD_ENABLE_WAIT
Signed-off-by: Vasanthy Kolluri <vkolluri@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David Wang <dwang2@cisco.com>
---
drivers/net/enic/enic_main.c | 2 +-
drivers/net/enic/vnic_dev.c | 10 ++++++++--
drivers/net/enic/vnic_dev.h | 2 +-
3 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 622106d..a63d2ba 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -1787,7 +1787,7 @@ static int enic_dev_enable(struct enic *enic)
int err;
spin_lock(&enic->devcmd_lock);
- err = vnic_dev_enable(enic->vdev);
+ err = vnic_dev_enable_wait(enic->vdev);
spin_unlock(&enic->devcmd_lock);
return err;
diff --git a/drivers/net/enic/vnic_dev.c b/drivers/net/enic/vnic_dev.c
index 11dc8f7..fb35d8b 100644
--- a/drivers/net/enic/vnic_dev.c
+++ b/drivers/net/enic/vnic_dev.c
@@ -487,11 +487,17 @@ int vnic_dev_close(struct vnic_dev *vdev)
return vnic_dev_cmd(vdev, CMD_CLOSE, &a0, &a1, wait);
}
-int vnic_dev_enable(struct vnic_dev *vdev)
+int vnic_dev_enable_wait(struct vnic_dev *vdev)
{
u64 a0 = 0, a1 = 0;
int wait = 1000;
- return vnic_dev_cmd(vdev, CMD_ENABLE, &a0, &a1, wait);
+ int err;
+
+ err = vnic_dev_cmd(vdev, CMD_ENABLE_WAIT, &a0, &a1, wait);
+ if (err == ERR_ECMDUNKNOWN)
+ return vnic_dev_cmd(vdev, CMD_ENABLE, &a0, &a1, wait);
+
+ return err;
}
int vnic_dev_disable(struct vnic_dev *vdev)
diff --git a/drivers/net/enic/vnic_dev.h b/drivers/net/enic/vnic_dev.h
index 3f00143..05f9a24 100644
--- a/drivers/net/enic/vnic_dev.h
+++ b/drivers/net/enic/vnic_dev.h
@@ -111,7 +111,7 @@ u32 vnic_dev_port_speed(struct vnic_dev *vdev);
u32 vnic_dev_msg_lvl(struct vnic_dev *vdev);
u32 vnic_dev_mtu(struct vnic_dev *vdev);
int vnic_dev_close(struct vnic_dev *vdev);
-int vnic_dev_enable(struct vnic_dev *vdev);
+int vnic_dev_enable_wait(struct vnic_dev *vdev);
int vnic_dev_disable(struct vnic_dev *vdev);
int vnic_dev_open(struct vnic_dev *vdev, int arg);
int vnic_dev_open_done(struct vnic_dev *vdev, int *done);
^ permalink raw reply related
* [net-next-2.6 PATCH 2/5] enic: Make firmware cognizant of the user set mac address
From: Vasanthy Kolluri @ 2010-10-20 20:17 UTC (permalink / raw)
To: davem; +Cc: netdev, roprabhu, dwang2
In-Reply-To: <20101020201609.26870.80308.stgit@savbu-pc100.cisco.com>
From: Vasanthy Kolluri <vkolluri@cisco.com>
Let the firmware know about the mac address set by the user using ndo_set_mac_address
Signed-off-by: Vasanthy Kolluri <vkolluri@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David Wang <dwang2@cisco.com>
---
drivers/net/enic/enic_main.c | 13 ++++++++++++-
1 files changed, 12 insertions(+), 1 deletions(-)
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index dcfc541..622106d 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -974,8 +974,19 @@ static int enic_set_mac_address_dynamic(struct net_device *netdev, void *p)
static int enic_set_mac_address(struct net_device *netdev, void *p)
{
struct sockaddr *saddr = p;
+ char *addr = saddr->sa_data;
+ struct enic *enic = netdev_priv(netdev);
+ int err;
+
+ err = enic_dev_del_station_addr(enic);
+ if (err)
+ return err;
+
+ err = enic_set_mac_addr(netdev, addr);
+ if (err)
+ return err;
- return enic_set_mac_addr(netdev, (char *)saddr->sa_data);
+ return enic_dev_add_station_addr(enic);
}
static int enic_dev_packet_filter(struct enic *enic, int directed,
^ permalink raw reply related
* [net-next-2.6 PATCH 1/5] enic: Add support for multiple hardware receive queues
From: Vasanthy Kolluri @ 2010-10-20 20:16 UTC (permalink / raw)
To: davem; +Cc: netdev, roprabhu, dwang2
In-Reply-To: <20101020201609.26870.80308.stgit@savbu-pc100.cisco.com>
From: Vasanthy Kolluri <vkolluri@cisco.com>
Add support for multiple hardware receive queues. The ingress traffic is hashed into one of the receive queues based on IP or TCP or both headers. The max no. of receive queues supported is 8.
Signed-off-by: Vasanthy Kolluri <vkolluri@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David Wang <dwang2@cisco.com>
---
drivers/net/enic/enic.h | 28 +--
drivers/net/enic/enic_main.c | 373 +++++++++++++++++++++++++++++++++---------
drivers/net/enic/enic_res.c | 49 +++---
drivers/net/enic/enic_res.h | 2
| 40 +++++
5 files changed, 368 insertions(+), 124 deletions(-)
create mode 100644 drivers/net/enic/vnic_rss.h
diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h
index ae62320..c91d364 100644
--- a/drivers/net/enic/enic.h
+++ b/drivers/net/enic/enic.h
@@ -28,10 +28,11 @@
#include "vnic_intr.h"
#include "vnic_stats.h"
#include "vnic_nic.h"
+#include "vnic_rss.h"
#define DRV_NAME "enic"
#define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION "1.4.1.2a"
+#define DRV_VERSION "1.4.1.6"
#define DRV_COPYRIGHT "Copyright 2008-2010 Cisco Systems, Inc"
#define ENIC_BARS_MAX 6
@@ -41,25 +42,6 @@
#define ENIC_CQ_MAX (ENIC_WQ_MAX + ENIC_RQ_MAX)
#define ENIC_INTR_MAX (ENIC_CQ_MAX + 2)
-enum enic_cq_index {
- ENIC_CQ_RQ,
- ENIC_CQ_WQ,
-};
-
-enum enic_intx_intr_index {
- ENIC_INTX_WQ_RQ,
- ENIC_INTX_ERR,
- ENIC_INTX_NOTIFY,
-};
-
-enum enic_msix_intr_index {
- ENIC_MSIX_RQ,
- ENIC_MSIX_WQ,
- ENIC_MSIX_ERR,
- ENIC_MSIX_NOTIFY,
- ENIC_MSIX_MAX,
-};
-
struct enic_msix_entry {
int requested;
char devname[IFNAMSIZ];
@@ -90,8 +72,8 @@ struct enic {
struct vnic_dev *vdev;
struct timer_list notify_timer;
struct work_struct reset;
- struct msix_entry msix_entry[ENIC_MSIX_MAX];
- struct enic_msix_entry msix[ENIC_MSIX_MAX];
+ struct msix_entry msix_entry[ENIC_INTR_MAX];
+ struct enic_msix_entry msix[ENIC_INTR_MAX];
u32 msg_enable;
spinlock_t devcmd_lock;
u8 mac_addr[ETH_ALEN];
@@ -118,7 +100,7 @@ struct enic {
int (*rq_alloc_buf)(struct vnic_rq *rq);
u64 rq_truncated_pkts;
u64 rq_bad_fcs;
- struct napi_struct napi;
+ struct napi_struct napi[ENIC_RQ_MAX];
/* interrupt resource cache line section */
____cacheline_aligned struct vnic_intr intr[ENIC_INTR_MAX];
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index a1f92f1..dcfc541 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -122,6 +122,51 @@ static int enic_is_dynamic(struct enic *enic)
return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN;
}
+static inline unsigned int enic_cq_rq(struct enic *enic, unsigned int rq)
+{
+ return rq;
+}
+
+static inline unsigned int enic_cq_wq(struct enic *enic, unsigned int wq)
+{
+ return enic->rq_count + wq;
+}
+
+static inline unsigned int enic_legacy_io_intr(void)
+{
+ return 0;
+}
+
+static inline unsigned int enic_legacy_err_intr(void)
+{
+ return 1;
+}
+
+static inline unsigned int enic_legacy_notify_intr(void)
+{
+ return 2;
+}
+
+static inline unsigned int enic_msix_rq_intr(struct enic *enic, unsigned int rq)
+{
+ return rq;
+}
+
+static inline unsigned int enic_msix_wq_intr(struct enic *enic, unsigned int wq)
+{
+ return enic->rq_count + wq;
+}
+
+static inline unsigned int enic_msix_err_intr(struct enic *enic)
+{
+ return enic->rq_count + enic->wq_count;
+}
+
+static inline unsigned int enic_msix_notify_intr(struct enic *enic)
+{
+ return enic->rq_count + enic->wq_count + 1;
+}
+
static int enic_get_settings(struct net_device *netdev,
struct ethtool_cmd *ecmd)
{
@@ -306,6 +351,7 @@ static int enic_set_coalesce(struct net_device *netdev,
struct enic *enic = netdev_priv(netdev);
u32 tx_coalesce_usecs;
u32 rx_coalesce_usecs;
+ unsigned int i, intr;
tx_coalesce_usecs = min_t(u32,
INTR_COALESCE_HW_TO_USEC(VNIC_INTR_TIMER_MAX),
@@ -319,7 +365,8 @@ static int enic_set_coalesce(struct net_device *netdev,
if (tx_coalesce_usecs != rx_coalesce_usecs)
return -EINVAL;
- vnic_intr_coalescing_timer_set(&enic->intr[ENIC_INTX_WQ_RQ],
+ intr = enic_legacy_io_intr();
+ vnic_intr_coalescing_timer_set(&enic->intr[intr],
INTR_COALESCE_USEC_TO_HW(tx_coalesce_usecs));
break;
case VNIC_DEV_INTR_MODE_MSI:
@@ -330,10 +377,18 @@ static int enic_set_coalesce(struct net_device *netdev,
INTR_COALESCE_USEC_TO_HW(tx_coalesce_usecs));
break;
case VNIC_DEV_INTR_MODE_MSIX:
- vnic_intr_coalescing_timer_set(&enic->intr[ENIC_MSIX_WQ],
- INTR_COALESCE_USEC_TO_HW(tx_coalesce_usecs));
- vnic_intr_coalescing_timer_set(&enic->intr[ENIC_MSIX_RQ],
- INTR_COALESCE_USEC_TO_HW(rx_coalesce_usecs));
+ for (i = 0; i < enic->wq_count; i++) {
+ intr = enic_msix_wq_intr(enic, i);
+ vnic_intr_coalescing_timer_set(&enic->intr[intr],
+ INTR_COALESCE_USEC_TO_HW(tx_coalesce_usecs));
+ }
+
+ for (i = 0; i < enic->rq_count; i++) {
+ intr = enic_msix_rq_intr(enic, i);
+ vnic_intr_coalescing_timer_set(&enic->intr[intr],
+ INTR_COALESCE_USEC_TO_HW(rx_coalesce_usecs));
+ }
+
break;
default:
break;
@@ -482,34 +537,37 @@ static irqreturn_t enic_isr_legacy(int irq, void *data)
{
struct net_device *netdev = data;
struct enic *enic = netdev_priv(netdev);
+ unsigned int io_intr = enic_legacy_io_intr();
+ unsigned int err_intr = enic_legacy_err_intr();
+ unsigned int notify_intr = enic_legacy_notify_intr();
u32 pba;
- vnic_intr_mask(&enic->intr[ENIC_INTX_WQ_RQ]);
+ vnic_intr_mask(&enic->intr[io_intr]);
pba = vnic_intr_legacy_pba(enic->legacy_pba);
if (!pba) {
- vnic_intr_unmask(&enic->intr[ENIC_INTX_WQ_RQ]);
+ vnic_intr_unmask(&enic->intr[io_intr]);
return IRQ_NONE; /* not our interrupt */
}
- if (ENIC_TEST_INTR(pba, ENIC_INTX_NOTIFY)) {
- vnic_intr_return_all_credits(&enic->intr[ENIC_INTX_NOTIFY]);
+ if (ENIC_TEST_INTR(pba, notify_intr)) {
+ vnic_intr_return_all_credits(&enic->intr[notify_intr]);
enic_notify_check(enic);
}
- if (ENIC_TEST_INTR(pba, ENIC_INTX_ERR)) {
- vnic_intr_return_all_credits(&enic->intr[ENIC_INTX_ERR]);
+ if (ENIC_TEST_INTR(pba, err_intr)) {
+ vnic_intr_return_all_credits(&enic->intr[err_intr]);
enic_log_q_error(enic);
/* schedule recovery from WQ/RQ error */
schedule_work(&enic->reset);
return IRQ_HANDLED;
}
- if (ENIC_TEST_INTR(pba, ENIC_INTX_WQ_RQ)) {
- if (napi_schedule_prep(&enic->napi))
- __napi_schedule(&enic->napi);
+ if (ENIC_TEST_INTR(pba, io_intr)) {
+ if (napi_schedule_prep(&enic->napi[0]))
+ __napi_schedule(&enic->napi[0]);
} else {
- vnic_intr_unmask(&enic->intr[ENIC_INTX_WQ_RQ]);
+ vnic_intr_unmask(&enic->intr[io_intr]);
}
return IRQ_HANDLED;
@@ -535,17 +593,17 @@ static irqreturn_t enic_isr_msi(int irq, void *data)
* writes).
*/
- napi_schedule(&enic->napi);
+ napi_schedule(&enic->napi[0]);
return IRQ_HANDLED;
}
static irqreturn_t enic_isr_msix_rq(int irq, void *data)
{
- struct enic *enic = data;
+ struct napi_struct *napi = data;
/* schedule NAPI polling for RQ cleanup */
- napi_schedule(&enic->napi);
+ napi_schedule(napi);
return IRQ_HANDLED;
}
@@ -553,13 +611,15 @@ static irqreturn_t enic_isr_msix_rq(int irq, void *data)
static irqreturn_t enic_isr_msix_wq(int irq, void *data)
{
struct enic *enic = data;
+ unsigned int cq = enic_cq_wq(enic, 0);
+ unsigned int intr = enic_msix_wq_intr(enic, 0);
unsigned int wq_work_to_do = -1; /* no limit */
unsigned int wq_work_done;
- wq_work_done = vnic_cq_service(&enic->cq[ENIC_CQ_WQ],
+ wq_work_done = vnic_cq_service(&enic->cq[cq],
wq_work_to_do, enic_wq_service, NULL);
- vnic_intr_return_credits(&enic->intr[ENIC_MSIX_WQ],
+ vnic_intr_return_credits(&enic->intr[intr],
wq_work_done,
1 /* unmask intr */,
1 /* reset intr timer */);
@@ -570,8 +630,9 @@ static irqreturn_t enic_isr_msix_wq(int irq, void *data)
static irqreturn_t enic_isr_msix_err(int irq, void *data)
{
struct enic *enic = data;
+ unsigned int intr = enic_msix_err_intr(enic);
- vnic_intr_return_all_credits(&enic->intr[ENIC_MSIX_ERR]);
+ vnic_intr_return_all_credits(&enic->intr[intr]);
enic_log_q_error(enic);
@@ -584,8 +645,9 @@ static irqreturn_t enic_isr_msix_err(int irq, void *data)
static irqreturn_t enic_isr_msix_notify(int irq, void *data)
{
struct enic *enic = data;
+ unsigned int intr = enic_msix_notify_intr(enic);
- vnic_intr_return_all_credits(&enic->intr[ENIC_MSIX_NOTIFY]);
+ vnic_intr_return_all_credits(&enic->intr[intr]);
enic_notify_check(enic);
return IRQ_HANDLED;
@@ -1409,8 +1471,8 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
(vlan_tci & CQ_ENET_RQ_DESC_VLAN_TCI_VLAN_MASK)) {
if (netdev->features & NETIF_F_GRO)
- vlan_gro_receive(&enic->napi, enic->vlan_group,
- vlan_tci, skb);
+ vlan_gro_receive(&enic->napi[q_number],
+ enic->vlan_group, vlan_tci, skb);
else
vlan_hwaccel_receive_skb(skb,
enic->vlan_group, vlan_tci);
@@ -1418,12 +1480,11 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
} else {
if (netdev->features & NETIF_F_GRO)
- napi_gro_receive(&enic->napi, skb);
+ napi_gro_receive(&enic->napi[q_number], skb);
else
netif_receive_skb(skb);
}
-
} else {
/* Buffer overflow
@@ -1447,7 +1508,11 @@ static int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
static int enic_poll(struct napi_struct *napi, int budget)
{
- struct enic *enic = container_of(napi, struct enic, napi);
+ struct net_device *netdev = napi->dev;
+ struct enic *enic = netdev_priv(netdev);
+ unsigned int cq_rq = enic_cq_rq(enic, 0);
+ unsigned int cq_wq = enic_cq_wq(enic, 0);
+ unsigned int intr = enic_legacy_io_intr();
unsigned int rq_work_to_do = budget;
unsigned int wq_work_to_do = -1; /* no limit */
unsigned int work_done, rq_work_done, wq_work_done;
@@ -1456,10 +1521,10 @@ static int enic_poll(struct napi_struct *napi, int budget)
/* Service RQ (first) and WQ
*/
- rq_work_done = vnic_cq_service(&enic->cq[ENIC_CQ_RQ],
+ rq_work_done = vnic_cq_service(&enic->cq[cq_rq],
rq_work_to_do, enic_rq_service, NULL);
- wq_work_done = vnic_cq_service(&enic->cq[ENIC_CQ_WQ],
+ wq_work_done = vnic_cq_service(&enic->cq[cq_wq],
wq_work_to_do, enic_wq_service, NULL);
/* Accumulate intr event credits for this polling
@@ -1470,7 +1535,7 @@ static int enic_poll(struct napi_struct *napi, int budget)
work_done = rq_work_done + wq_work_done;
if (work_done > 0)
- vnic_intr_return_credits(&enic->intr[ENIC_INTX_WQ_RQ],
+ vnic_intr_return_credits(&enic->intr[intr],
work_done,
0 /* don't unmask intr */,
0 /* don't reset intr timer */);
@@ -1491,7 +1556,7 @@ static int enic_poll(struct napi_struct *napi, int budget)
*/
napi_complete(napi);
- vnic_intr_unmask(&enic->intr[ENIC_INTX_WQ_RQ]);
+ vnic_intr_unmask(&enic->intr[intr]);
}
return rq_work_done;
@@ -1499,7 +1564,11 @@ static int enic_poll(struct napi_struct *napi, int budget)
static int enic_poll_msix(struct napi_struct *napi, int budget)
{
- struct enic *enic = container_of(napi, struct enic, napi);
+ struct net_device *netdev = napi->dev;
+ struct enic *enic = netdev_priv(netdev);
+ unsigned int rq = (napi - &enic->napi[0]);
+ unsigned int cq = enic_cq_rq(enic, rq);
+ unsigned int intr = enic_msix_rq_intr(enic, rq);
unsigned int work_to_do = budget;
unsigned int work_done;
int err;
@@ -1507,7 +1576,7 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)
/* Service RQ
*/
- work_done = vnic_cq_service(&enic->cq[ENIC_CQ_RQ],
+ work_done = vnic_cq_service(&enic->cq[cq],
work_to_do, enic_rq_service, NULL);
/* Return intr event credits for this polling
@@ -1516,12 +1585,12 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)
*/
if (work_done > 0)
- vnic_intr_return_credits(&enic->intr[ENIC_MSIX_RQ],
+ vnic_intr_return_credits(&enic->intr[intr],
work_done,
0 /* don't unmask intr */,
0 /* don't reset intr timer */);
- err = vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
+ err = vnic_rq_fill(&enic->rq[rq], enic->rq_alloc_buf);
/* Buffer allocation failed. Stay in polling mode
* so we can try to fill the ring again.
@@ -1537,7 +1606,7 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)
*/
napi_complete(napi);
- vnic_intr_unmask(&enic->intr[ENIC_MSIX_RQ]);
+ vnic_intr_unmask(&enic->intr[intr]);
}
return work_done;
@@ -1579,7 +1648,7 @@ static void enic_free_intr(struct enic *enic)
static int enic_request_intr(struct enic *enic)
{
struct net_device *netdev = enic->netdev;
- unsigned int i;
+ unsigned int i, intr;
int err = 0;
switch (vnic_dev_get_intr_mode(enic->vdev)) {
@@ -1598,27 +1667,38 @@ static int enic_request_intr(struct enic *enic)
case VNIC_DEV_INTR_MODE_MSIX:
- sprintf(enic->msix[ENIC_MSIX_RQ].devname,
- "%.11s-rx-0", netdev->name);
- enic->msix[ENIC_MSIX_RQ].isr = enic_isr_msix_rq;
- enic->msix[ENIC_MSIX_RQ].devid = enic;
+ for (i = 0; i < enic->rq_count; i++) {
+ intr = enic_msix_rq_intr(enic, i);
+ sprintf(enic->msix[intr].devname,
+ "%.11s-rx-%d", netdev->name, i);
+ enic->msix[intr].isr = enic_isr_msix_rq;
+ enic->msix[intr].devid = &enic->napi[i];
+ }
- sprintf(enic->msix[ENIC_MSIX_WQ].devname,
- "%.11s-tx-0", netdev->name);
- enic->msix[ENIC_MSIX_WQ].isr = enic_isr_msix_wq;
- enic->msix[ENIC_MSIX_WQ].devid = enic;
+ for (i = 0; i < enic->wq_count; i++) {
+ intr = enic_msix_wq_intr(enic, i);
+ sprintf(enic->msix[intr].devname,
+ "%.11s-tx-%d", netdev->name, i);
+ enic->msix[intr].isr = enic_isr_msix_wq;
+ enic->msix[intr].devid = enic;
+ }
- sprintf(enic->msix[ENIC_MSIX_ERR].devname,
+ intr = enic_msix_err_intr(enic);
+ sprintf(enic->msix[intr].devname,
"%.11s-err", netdev->name);
- enic->msix[ENIC_MSIX_ERR].isr = enic_isr_msix_err;
- enic->msix[ENIC_MSIX_ERR].devid = enic;
+ enic->msix[intr].isr = enic_isr_msix_err;
+ enic->msix[intr].devid = enic;
- sprintf(enic->msix[ENIC_MSIX_NOTIFY].devname,
+ intr = enic_msix_notify_intr(enic);
+ sprintf(enic->msix[intr].devname,
"%.11s-notify", netdev->name);
- enic->msix[ENIC_MSIX_NOTIFY].isr = enic_isr_msix_notify;
- enic->msix[ENIC_MSIX_NOTIFY].devid = enic;
+ enic->msix[intr].isr = enic_isr_msix_notify;
+ enic->msix[intr].devid = enic;
+
+ for (i = 0; i < ARRAY_SIZE(enic->msix); i++)
+ enic->msix[i].requested = 0;
- for (i = 0; i < ARRAY_SIZE(enic->msix); i++) {
+ for (i = 0; i < enic->intr_count; i++) {
err = request_irq(enic->msix_entry[i].vector,
enic->msix[i].isr, 0,
enic->msix[i].devname,
@@ -1664,10 +1744,12 @@ static int enic_dev_notify_set(struct enic *enic)
spin_lock(&enic->devcmd_lock);
switch (vnic_dev_get_intr_mode(enic->vdev)) {
case VNIC_DEV_INTR_MODE_INTX:
- err = vnic_dev_notify_set(enic->vdev, ENIC_INTX_NOTIFY);
+ err = vnic_dev_notify_set(enic->vdev,
+ enic_legacy_notify_intr());
break;
case VNIC_DEV_INTR_MODE_MSIX:
- err = vnic_dev_notify_set(enic->vdev, ENIC_MSIX_NOTIFY);
+ err = vnic_dev_notify_set(enic->vdev,
+ enic_msix_notify_intr(enic));
break;
default:
err = vnic_dev_notify_set(enic->vdev, -1 /* no intr */);
@@ -1762,7 +1844,10 @@ static int enic_open(struct net_device *netdev)
enic_set_multicast_list(netdev);
netif_wake_queue(netdev);
- napi_enable(&enic->napi);
+
+ for (i = 0; i < enic->rq_count; i++)
+ napi_enable(&enic->napi[i]);
+
enic_dev_enable(enic);
for (i = 0; i < enic->intr_count; i++)
@@ -1797,7 +1882,10 @@ static int enic_stop(struct net_device *netdev)
del_timer_sync(&enic->notify_timer);
enic_dev_disable(enic);
- napi_disable(&enic->napi);
+
+ for (i = 0; i < enic->rq_count; i++)
+ napi_disable(&enic->napi[i]);
+
netif_carrier_off(netdev);
netif_tx_disable(netdev);
enic_dev_del_station_addr(enic);
@@ -1857,11 +1945,16 @@ static void enic_poll_controller(struct net_device *netdev)
{
struct enic *enic = netdev_priv(netdev);
struct vnic_dev *vdev = enic->vdev;
+ unsigned int i, intr;
switch (vnic_dev_get_intr_mode(vdev)) {
case VNIC_DEV_INTR_MODE_MSIX:
- enic_isr_msix_rq(enic->pdev->irq, enic);
- enic_isr_msix_wq(enic->pdev->irq, enic);
+ for (i = 0; i < enic->rq_count; i++) {
+ intr = enic_msix_rq_intr(enic, i);
+ enic_isr_msix_rq(enic->msix_entry[intr].vector, enic);
+ }
+ intr = enic_msix_wq_intr(enic, i);
+ enic_isr_msix_wq(enic->msix_entry[intr].vector, enic);
break;
case VNIC_DEV_INTR_MODE_MSI:
enic_isr_msi(enic->pdev->irq, enic);
@@ -1936,19 +2029,73 @@ static int enic_dev_hang_reset(struct enic *enic)
return err;
}
-static int enic_set_niccfg(struct enic *enic)
+static int enic_set_rsskey(struct enic *enic)
+{
+ u64 rss_key_buf_pa;
+ union vnic_rss_key *rss_key_buf_va = NULL;
+ union vnic_rss_key rss_key = {
+ .key[0].b = {85, 67, 83, 97, 119, 101, 115, 111, 109, 101},
+ .key[1].b = {80, 65, 76, 79, 117, 110, 105, 113, 117, 101},
+ .key[2].b = {76, 73, 78, 85, 88, 114, 111, 99, 107, 115},
+ .key[3].b = {69, 78, 73, 67, 105, 115, 99, 111, 111, 108},
+ };
+ int err;
+
+ rss_key_buf_va = pci_alloc_consistent(enic->pdev,
+ sizeof(union vnic_rss_key), &rss_key_buf_pa);
+ if (!rss_key_buf_va)
+ return -ENOMEM;
+
+ memcpy(rss_key_buf_va, &rss_key, sizeof(union vnic_rss_key));
+
+ spin_lock(&enic->devcmd_lock);
+ err = enic_set_rss_key(enic,
+ rss_key_buf_pa,
+ sizeof(union vnic_rss_key));
+ spin_unlock(&enic->devcmd_lock);
+
+ pci_free_consistent(enic->pdev, sizeof(union vnic_rss_key),
+ rss_key_buf_va, rss_key_buf_pa);
+
+ return err;
+}
+
+static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits)
+{
+ u64 rss_cpu_buf_pa;
+ union vnic_rss_cpu *rss_cpu_buf_va = NULL;
+ unsigned int i;
+ int err;
+
+ rss_cpu_buf_va = pci_alloc_consistent(enic->pdev,
+ sizeof(union vnic_rss_cpu), &rss_cpu_buf_pa);
+ if (!rss_cpu_buf_va)
+ return -ENOMEM;
+
+ for (i = 0; i < (1 << rss_hash_bits); i++)
+ (*rss_cpu_buf_va).cpu[i/4].b[i%4] = i % enic->rq_count;
+
+ spin_lock(&enic->devcmd_lock);
+ err = enic_set_rss_cpu(enic,
+ rss_cpu_buf_pa,
+ sizeof(union vnic_rss_cpu));
+ spin_unlock(&enic->devcmd_lock);
+
+ pci_free_consistent(enic->pdev, sizeof(union vnic_rss_cpu),
+ rss_cpu_buf_va, rss_cpu_buf_pa);
+
+ return err;
+}
+
+static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
+ u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
{
- const u8 rss_default_cpu = 0;
- const u8 rss_hash_type = 0;
- const u8 rss_hash_bits = 0;
- const u8 rss_base_cpu = 0;
- const u8 rss_enable = 0;
const u8 tso_ipid_split_en = 0;
const u8 ig_vlan_strip_en = 1;
int err;
- /* Enable VLAN tag stripping. RSS not enabled (yet).
- */
+ /* Enable VLAN tag stripping.
+ */
spin_lock(&enic->devcmd_lock);
err = enic_set_nic_cfg(enic,
@@ -1961,6 +2108,35 @@ static int enic_set_niccfg(struct enic *enic)
return err;
}
+static int enic_set_rss_nic_cfg(struct enic *enic)
+{
+ struct device *dev = enic_get_dev(enic);
+ const u8 rss_default_cpu = 0;
+ const u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
+ NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 |
+ NIC_CFG_RSS_HASH_TYPE_IPV6 |
+ NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
+ const u8 rss_hash_bits = 7;
+ const u8 rss_base_cpu = 0;
+ u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1);
+
+ if (rss_enable) {
+ if (!enic_set_rsskey(enic)) {
+ if (enic_set_rsscpu(enic, rss_hash_bits)) {
+ rss_enable = 0;
+ dev_warn(dev, "RSS disabled, "
+ "Failed to set RSS cpu indirection table.");
+ }
+ } else {
+ rss_enable = 0;
+ dev_warn(dev, "RSS disabled, Failed to set RSS key.\n");
+ }
+ }
+
+ return enic_set_niccfg(enic, rss_default_cpu, rss_hash_type,
+ rss_hash_bits, rss_base_cpu, rss_enable);
+}
+
static int enic_dev_hang_notify(struct enic *enic)
{
int err;
@@ -1998,7 +2174,7 @@ static void enic_reset(struct work_struct *work)
enic_dev_hang_reset(enic);
enic_reset_multicast_list(enic);
enic_init_vnic_resources(enic);
- enic_set_niccfg(enic);
+ enic_set_rss_nic_cfg(enic);
enic_dev_set_ig_vlan_rewrite_mode(enic);
enic_open(enic->netdev);
@@ -2007,12 +2183,12 @@ static void enic_reset(struct work_struct *work)
static int enic_set_intr_mode(struct enic *enic)
{
- unsigned int n = 1;
+ unsigned int n = min_t(unsigned int, enic->rq_count, ENIC_RQ_MAX);
unsigned int m = 1;
unsigned int i;
/* Set interrupt mode (INTx, MSI, MSI-X) depending
- * system capabilities.
+ * on system capabilities.
*
* Try MSI-X first
*
@@ -2025,21 +2201,47 @@ static int enic_set_intr_mode(struct enic *enic)
for (i = 0; i < n + m + 2; i++)
enic->msix_entry[i].entry = i;
- if (enic->config.intr_mode < 1 &&
+ /* Use multiple RQs if RSS is enabled
+ */
+
+ if (ENIC_SETTING(enic, RSS) &&
+ enic->config.intr_mode < 1 &&
enic->rq_count >= n &&
enic->wq_count >= m &&
enic->cq_count >= n + m &&
- enic->intr_count >= n + m + 2 &&
- !pci_enable_msix(enic->pdev, enic->msix_entry, n + m + 2)) {
+ enic->intr_count >= n + m + 2) {
- enic->rq_count = n;
- enic->wq_count = m;
- enic->cq_count = n + m;
- enic->intr_count = n + m + 2;
+ if (!pci_enable_msix(enic->pdev, enic->msix_entry, n + m + 2)) {
- vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_MSIX);
+ enic->rq_count = n;
+ enic->wq_count = m;
+ enic->cq_count = n + m;
+ enic->intr_count = n + m + 2;
- return 0;
+ vnic_dev_set_intr_mode(enic->vdev,
+ VNIC_DEV_INTR_MODE_MSIX);
+
+ return 0;
+ }
+ }
+
+ if (enic->config.intr_mode < 1 &&
+ enic->rq_count >= 1 &&
+ enic->wq_count >= m &&
+ enic->cq_count >= 1 + m &&
+ enic->intr_count >= 1 + m + 2) {
+ if (!pci_enable_msix(enic->pdev, enic->msix_entry, 1 + m + 2)) {
+
+ enic->rq_count = 1;
+ enic->wq_count = m;
+ enic->cq_count = 1 + m;
+ enic->intr_count = 1 + m + 2;
+
+ vnic_dev_set_intr_mode(enic->vdev,
+ VNIC_DEV_INTR_MODE_MSIX);
+
+ return 0;
+ }
}
/* Next try MSI
@@ -2149,7 +2351,11 @@ static const struct net_device_ops enic_netdev_ops = {
static void enic_dev_deinit(struct enic *enic)
{
- netif_napi_del(&enic->napi);
+ unsigned int i;
+
+ for (i = 0; i < enic->rq_count; i++)
+ netif_napi_del(&enic->napi[i]);
+
enic_free_vnic_resources(enic);
enic_clear_intr_mode(enic);
}
@@ -2158,6 +2364,7 @@ static int enic_dev_init(struct enic *enic)
{
struct device *dev = enic_get_dev(enic);
struct net_device *netdev = enic->netdev;
+ unsigned int i;
int err;
/* Get vNIC configuration
@@ -2202,7 +2409,7 @@ static int enic_dev_init(struct enic *enic)
goto err_out_free_vnic_resources;
}
- err = enic_set_niccfg(enic);
+ err = enic_set_rss_nic_cfg(enic);
if (err) {
dev_err(dev, "Failed to config nic, aborting\n");
goto err_out_free_vnic_resources;
@@ -2217,10 +2424,12 @@ static int enic_dev_init(struct enic *enic)
switch (vnic_dev_get_intr_mode(enic->vdev)) {
default:
- netif_napi_add(netdev, &enic->napi, enic_poll, 64);
+ netif_napi_add(netdev, &enic->napi[0], enic_poll, 64);
break;
case VNIC_DEV_INTR_MODE_MSIX:
- netif_napi_add(netdev, &enic->napi, enic_poll_msix, 64);
+ for (i = 0; i < enic->rq_count; i++)
+ netif_napi_add(netdev, &enic->napi[i],
+ enic_poll_msix, 64);
break;
}
diff --git a/drivers/net/enic/enic_res.c b/drivers/net/enic/enic_res.c
index 19a276c..f111a37 100644
--- a/drivers/net/enic/enic_res.c
+++ b/drivers/net/enic/enic_res.c
@@ -35,6 +35,7 @@
#include "vnic_intr.h"
#include "vnic_stats.h"
#include "vnic_nic.h"
+#include "vnic_rss.h"
#include "enic_res.h"
#include "enic.h"
@@ -93,13 +94,14 @@ int enic_get_vnic_config(struct enic *enic)
INTR_COALESCE_HW_TO_USEC(VNIC_INTR_TIMER_MAX),
c->intr_timer_usec);
- dev_info(enic_get_dev(enic), "vNIC MAC addr %pM wq/rq %d/%d\n",
- enic->mac_addr, c->wq_desc_count, c->rq_desc_count);
- dev_info(enic_get_dev(enic), "vNIC mtu %d csum tx/rx %d/%d "
- "tso/lro %d/%d intr timer %d usec\n",
- c->mtu, ENIC_SETTING(enic, TXCSUM),
- ENIC_SETTING(enic, RXCSUM), ENIC_SETTING(enic, TSO),
- ENIC_SETTING(enic, LRO), c->intr_timer_usec);
+ dev_info(enic_get_dev(enic),
+ "vNIC MAC addr %pM wq/rq %d/%d mtu %d\n",
+ enic->mac_addr, c->wq_desc_count, c->rq_desc_count, c->mtu);
+ dev_info(enic_get_dev(enic), "vNIC csum tx/rx %d/%d "
+ "tso/lro %d/%d intr timer %d usec rss %d\n",
+ ENIC_SETTING(enic, TXCSUM), ENIC_SETTING(enic, RXCSUM),
+ ENIC_SETTING(enic, TSO), ENIC_SETTING(enic, LRO),
+ c->intr_timer_usec, ENIC_SETTING(enic, RSS));
return 0;
}
@@ -148,6 +150,22 @@ int enic_set_nic_cfg(struct enic *enic, u8 rss_default_cpu, u8 rss_hash_type,
return vnic_dev_cmd(enic->vdev, CMD_NIC_CFG, &a0, &a1, wait);
}
+int enic_set_rss_key(struct enic *enic, dma_addr_t key_pa, u64 len)
+{
+ u64 a0 = (u64)key_pa, a1 = len;
+ int wait = 1000;
+
+ return vnic_dev_cmd(enic->vdev, CMD_RSS_KEY, &a0, &a1, wait);
+}
+
+int enic_set_rss_cpu(struct enic *enic, dma_addr_t cpu_pa, u64 len)
+{
+ u64 a0 = (u64)cpu_pa, a1 = len;
+ int wait = 1000;
+
+ return vnic_dev_cmd(enic->vdev, CMD_RSS_CPU, &a0, &a1, wait);
+}
+
void enic_free_vnic_resources(struct enic *enic)
{
unsigned int i;
@@ -164,18 +182,11 @@ void enic_free_vnic_resources(struct enic *enic)
void enic_get_res_counts(struct enic *enic)
{
- enic->wq_count = min_t(int,
- vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ),
- ENIC_WQ_MAX);
- enic->rq_count = min_t(int,
- vnic_dev_get_res_count(enic->vdev, RES_TYPE_RQ),
- ENIC_RQ_MAX);
- enic->cq_count = min_t(int,
- vnic_dev_get_res_count(enic->vdev, RES_TYPE_CQ),
- ENIC_CQ_MAX);
- enic->intr_count = min_t(int,
- vnic_dev_get_res_count(enic->vdev, RES_TYPE_INTR_CTRL),
- ENIC_INTR_MAX);
+ enic->wq_count = vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ);
+ enic->rq_count = vnic_dev_get_res_count(enic->vdev, RES_TYPE_RQ);
+ enic->cq_count = vnic_dev_get_res_count(enic->vdev, RES_TYPE_CQ);
+ enic->intr_count = vnic_dev_get_res_count(enic->vdev,
+ RES_TYPE_INTR_CTRL);
dev_info(enic_get_dev(enic),
"vNIC resources avail: wq %d rq %d cq %d intr %d\n",
diff --git a/drivers/net/enic/enic_res.h b/drivers/net/enic/enic_res.h
index 3c59f54..83bd172 100644
--- a/drivers/net/enic/enic_res.h
+++ b/drivers/net/enic/enic_res.h
@@ -137,6 +137,8 @@ int enic_del_vlan(struct enic *enic, u16 vlanid);
int enic_set_nic_cfg(struct enic *enic, u8 rss_default_cpu, u8 rss_hash_type,
u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable, u8 tso_ipid_split_en,
u8 ig_vlan_strip_en);
+int enic_set_rss_key(struct enic *enic, dma_addr_t key_pa, u64 len);
+int enic_set_rss_cpu(struct enic *enic, dma_addr_t cpu_pa, u64 len);
void enic_get_res_counts(struct enic *enic);
void enic_init_vnic_resources(struct enic *enic);
int enic_alloc_vnic_resources(struct enic *);
--git a/drivers/net/enic/vnic_rss.h b/drivers/net/enic/vnic_rss.h
new file mode 100644
index 0000000..fa421ba
--- /dev/null
+++ b/drivers/net/enic/vnic_rss.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2008-2010 Cisco Systems, Inc. All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _VNIC_RSS_H_
+#define _VNIC_RSS_H_
+
+/* RSS key array */
+union vnic_rss_key {
+ struct {
+ u8 b[10];
+ u8 b_pad[6];
+ } key[4];
+ u64 raw[8];
+};
+
+/* RSS cpu array */
+union vnic_rss_cpu {
+ struct {
+ u8 b[4] ;
+ u8 b_pad[4];
+ } cpu[32];
+ u64 raw[32];
+};
+
+#endif /* _VNIC_RSS_H_ */
^ permalink raw reply related
* [net-next-2.6 PATCH 0/5] enic: updates to version 1.4.1.6
From: Vasanthy Kolluri @ 2010-10-20 20:16 UTC (permalink / raw)
To: davem; +Cc: netdev, roprabhu, dwang2
The following patch series implements enic driver updates:
1/5 - Add support for multiple hardware receive queues
2/5 - Make firmware cognizant of the user set mac address
3/5 - Replace firmware devcmd CMD_ENABLE with CMD_ENABLE_WAIT
4/5 - Change min MTU
5/5 - Fix log message
Signed-off-by: Vasanthy Kolluri <vkolluri@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David Wang <dwang2@cisco.com>
^ permalink raw reply
* Re: Linux 2.6.35/TIPC 2.0 ABI breaking changes
From: Leandro Lucarella @ 2010-10-20 19:23 UTC (permalink / raw)
To: Jon Maloy
Cc: netdev@vger.kernel.org, tipc-discussion@lists.sourceforge.net,
linux-kernel@vger.kernel.org, Neil Horman, David Miller
In-Reply-To: <20101020182851.GP8781@llucax.com.ar>
Leandro Lucarella, el 20 de octubre a las 15:28 me escribiste:
> Jon Maloy, el 20 de octubre a las 14:10 me escribiste:
> > <...>
> > > >
> > > > Remember, permitting both is a superset of the current one
> > > (NBO only)
> > > > so it is fully backwards compatible. We break absolutly nothing by
> > > > permitting this.
> > > >
> > > Thats effectively reverting both our patches though, isn't it
> > > (not that I'm disagreeing with it, just looking for
> > > clarification). If we revert my patch and reintroduce the
> > > htohl mechanism which tracks endianess, we might as well
> > > revert the TIPC_SUB_SERVICE flag as well, yeah?
> >
> > Absolutely. I think it was a mistake to change that value.
> > But I don't think we need to reintroduce the htohl(). That
> > was just one way of doing it. If I understood your suggestion
> > from yesterday correctly you converted the whole message within
> > one if()clause, without any htohl(). I have have no problem with
> > that approach.
>
> There is a difference between both solutions, the htohl() version
> tracked the need for swap as a struct subscription member (which was
> used when sending back events). Neils patch doesn't do that tracking.
> I don't really know the implications of this, but maybe it would be
> a wise idea to stay in the safe side and revert both patches for now.
BTW, I tried 2.6.37 reverting both offending patches and everything
seems to work well.
--
Leandro Lucarella (AKA luca) http://llucax.com.ar/
----------------------------------------------------------------------
GPG Key: 5F5A8D05 (F8CD F9A7 BF00 5431 4145 104C 949E BFB6 5F5A 8D05)
----------------------------------------------------------------------
Vivimos en una época muy contemporánea, Don Inodoro...
-- Mendieta
------------------------------------------------------------------------------
Nokia and AT&T present the 2010 Calling All Innovators-North America contest
Create new apps & games for the Nokia N8 for consumers in U.S. and Canada
$10 million total in prizes - $4M cash, 500 devices, nearly $6M in marketing
Develop with Nokia Qt SDK, Web Runtime, or Java and Publish to Ovi Store
http://p.sf.net/sfu/nokia-dev2dev
_______________________________________________
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion
^ permalink raw reply
* Re: Future of the Wimedia LLC Protocol (WLP) subsystem/drivers
From: Greg KH @ 2010-10-20 19:22 UTC (permalink / raw)
To: Randy Dunlap; +Cc: David Vrabel, netdev
In-Reply-To: <20101020091541.ec00fe96.randy.dunlap@oracle.com>
On Wed, Oct 20, 2010 at 09:15:41AM -0700, Randy Dunlap wrote:
> On Tue, 19 Oct 2010 17:30:47 +0100 David Vrabel wrote:
>
> > Hi,
> >
> > I've have been nominally the maintainer of the Wimedia LLC Protocol
> > (WLP) subsystem and driver since it was originally submitted. I am no
> > longer in a position to even pretend to be a maintainer.
> >
> > The only usable hardware was an Intel i1480 devices with beta firmware
> > that was never released as a product. Intel have since sold all there
> > UWB/WLP IP and I see little prospect of there ever being hardware
> > commercially available for WLP.
> >
> > Here are a number of options:
> >
> > 1. Someone else maintains it. Any volunteers?
> >
> > 2. It gets labelled as Orphaned in MAINTAINERS.
> >
> > 3. It gets moved to staging.
> >
> > 4, It gets removed.
> >
> > If no one says anything I'll submit a patch to Linus to mark it as Orphaned.
>
> I'd say either 3 or 4.
>
> It could go to staging on it way to removal, but that's not really necessary.
>
>
> cc: gregkh
3 or 4 is fine with me, which ever David wants.
thanks,
greg k-h
^ permalink raw reply
* RE: [PATCH] PCI: MSI: Remove unsafe and unnecessary hardware access
From: Tantilov, Emil S @ 2010-10-20 19:05 UTC (permalink / raw)
To: Jesse Barnes, Emil S Tantilov
Cc: Ben Hutchings, Michael Chan, Matthew Wilcox,
linux-pci@vger.kernel.org, NetDev, Brandeburg, Jesse,
Kirsher, Jeffrey T
In-Reply-To: <20101015130629.046d3357@jbarnes-desktop>
>-----Original Message-----
>From: Jesse Barnes [mailto:jbarnes@virtuousgeek.org]
>Sent: Friday, October 15, 2010 1:06 PM
>To: Emil S Tantilov
>Cc: Ben Hutchings; Michael Chan; Matthew Wilcox; linux-pci@vger.kernel.org;
>NetDev; Tantilov, Emil S; Brandeburg, Jesse; Kirsher, Jeffrey T
>Subject: Re: [PATCH] PCI: MSI: Remove unsafe and unnecessary hardware
>access
>
>On Fri, 15 Oct 2010 11:26:08 -0700
>Emil S Tantilov <emils.tantilov@gmail.com> wrote:
>
>> On Thu, Jun 17, 2010 at 12:16 PM, Ben Hutchings
>> <bhutchings@solarflare.com> wrote:
>> > During suspend on an SMP system, {read,write}_msi_msg_desc() may be
>> > called to mask and unmask interrupts on a device that is already in a
>> > reduced power state. At this point memory-mapped registers including
>> > MSI-X tables are not accessible, and config space may not be fully
>> > functional either.
>> >
>> > While a device is in a reduced power state its interrupts are
>> > effectively masked and its MSI(-X) state will be restored when it is
>> > brought back to D0. Therefore these functions can simply read and
>> > write msi_desc::msg for devices not in D0.
>> >
>> > Further, read_msi_msg_desc() should only ever be used to update a
>> > previously written message, so it can always read msi_desc::msg
>> > and never needs to touch the hardware.
>> >
>> > Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
>> > ---
>> > On Mon, 2010-06-14 at 18:13 -0700, Michael Chan wrote:
>> >> I'm debugging the bnx2 driver which doesn't work after suspend/resume
>if
>> >> it is running in MSI-X mode. The problem is that during suspend, the
>> >> MSI-X vectors are disabled by the following sequence on x86:
>> >>
>> >> take_cpu_down() -> cpu_disable_common() -> fixup_irqs()
>> >>
>> >> The MSI-X address/data used to disable the vectors are remembered in
>the
>> >> above sequence. During resume, these address/data are then programmed
>> >> back to the device during pci_restore_state(), causing all the vectors
>> >> to remain disabled.
>> >
>> > That's not quite what I see. What I see is that the message is read
>> > back from the table *after* the driver's suspend method has been
>called.
>> > At this point the device is already in D3 and memory-mapped registers
>> > are not accessible, so we get random bits as the message. At least,
>> > that's what I see happening with the sfc driver.
>> >
>> >> Some drivers call free_irq() during suspend and request_irq() during
>> >> resume, and that should avoid the problem. bnx2 and some other
>drivers
>> >> do not do that. These drivers rely on pci_restore_state() to restore
>> >> the MSI-X vectors to the same working state before suspend.
>> >>
>> >> What's the right way to fix this? Thanks.
>> >
>> > This is my attempt, which works for sfc. See if it works for bnx2.
>> >
>> > Ben.
>> >
>> > drivers/pci/msi.c | 34 +++++++++++-----------------------
>> > 1 files changed, 11 insertions(+), 23 deletions(-)
>> >
>> > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
>> > index 77b68ea..03f04dc 100644
>> > --- a/drivers/pci/msi.c
>> > +++ b/drivers/pci/msi.c
>> > @@ -196,30 +196,15 @@ void unmask_msi_irq(unsigned int irq)
>> > void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
>> > {
>> > struct msi_desc *entry = get_irq_desc_msi(desc);
>> > - if (entry->msi_attrib.is_msix) {
>> > - void __iomem *base = entry->mask_base +
>> > - entry->msi_attrib.entry_nr *
>PCI_MSIX_ENTRY_SIZE;
>> >
>> > - msg->address_lo = readl(base +
>PCI_MSIX_ENTRY_LOWER_ADDR);
>> > - msg->address_hi = readl(base +
>PCI_MSIX_ENTRY_UPPER_ADDR);
>> > - msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
>> > - } else {
>> > - struct pci_dev *dev = entry->dev;
>> > - int pos = entry->msi_attrib.pos;
>> > - u16 data;
>> > + /* We do not touch the hardware (which may not even be
>> > + * accessible at the moment) but return the last message
>> > + * written. Assert that this is valid, assuming that
>> > + * valid messages are not all-zeroes. */
>> > + BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
>> > + entry->msg.data));
>> >
>> > - pci_read_config_dword(dev, msi_lower_address_reg(pos),
>> > - &msg->address_lo);
>> > - if (entry->msi_attrib.is_64) {
>> > - pci_read_config_dword(dev,
>msi_upper_address_reg(pos),
>> > - &msg->address_hi);
>> > - pci_read_config_word(dev, msi_data_reg(pos, 1),
>&data);
>> > - } else {
>> > - msg->address_hi = 0;
>> > - pci_read_config_word(dev, msi_data_reg(pos, 0),
>&data);
>> > - }
>> > - msg->data = data;
>> > - }
>> > + *msg = entry->msg;
>> > }
>> >
>> > void read_msi_msg(unsigned int irq, struct msi_msg *msg)
>> > @@ -232,7 +217,10 @@ void read_msi_msg(unsigned int irq, struct msi_msg
>*msg)
>> > void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
>> > {
>> > struct msi_desc *entry = get_irq_desc_msi(desc);
>> > - if (entry->msi_attrib.is_msix) {
>> > +
>> > + if (entry->dev->current_state != PCI_D0) {
>>
>> This check exposed a problem in ixgb (patch is on the way) where
>> pci_disable_device() was not being called in ixgb_remove(). As a
>> result the current_state was set to PCI_UNKNOWN and the interface
>> failed to work on subsequent load of the driver.
>>
>> Even though the problem was in ixgb, it made me wonder about this
>> check as the presumption here (low power state) may not always be
>> true. Like in the case of unloading a driver, which sets
>> dev->current_state to PCI_UNKNOWN which is not a representation of the
>> _real_ state of the device (actual state could be D0).
>>
>> BTW - quick search shows other drivers that could potentially suffer
>> the faith of ixgb due to lack of pci_disable_device() call on removal.
>
>Yeah we just ran into this in the DRM layer as well; which does a
>pci_enable_device but never calls _disable, so we're stuck with
>potentially stale state.
>
>I came up with the below to address that, but really I don't like the
>idea of nested pci_enable_device() calls at all. But I haven't looked
>at the latest Wireless USB stuff to see if those drivers still rely on
>it.
>
>--
>Jesse Barnes, Intel Open Source Technology Center
>
>diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>index 7fa3cbd..37facc1 100644
>--- a/drivers/pci/pci.c
>+++ b/drivers/pci/pci.c
>@@ -994,6 +994,18 @@ static int __pci_enable_device_flags(struct pci_dev
>*dev,
> int err;
> int i, bars = 0;
>
>+ /*
>+ * Power state could be unknown at this point, either due to a fresh
>+ * boot or a device removal call. So get the current power state
>+ * so that things like MSI message writing will behave as expected
>+ * (e.g. if the device really is in D0 at enable time).
>+ */
>+ if (dev->pm_cap) {
>+ u16 pmcsr;
>+ pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
>+ dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
>+ }
>+
> if (atomic_add_return(1, &dev->enable_cnt) > 1)
> return 0; /* already enabled */
>
With this patch applied I could reload the driver and confirmed that current_state is set to the actual power state.
Thanks,
Emil
^ permalink raw reply
* Re: Linux 2.6.35/TIPC 2.0 ABI breaking changes
From: Leandro Lucarella @ 2010-10-20 18:44 UTC (permalink / raw)
To: Jon Maloy
Cc: Neil Horman, netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
tipc-discussion@lists.sourceforge.net, David Miller
In-Reply-To: <0434463FDA60A94FA978ACA44617682DEE8466821E@EUSAACMS0702.eamcs.ericsson.se>
Jon Maloy, el 20 de octubre a las 14:37 me escribiste:
> > I think if they really go through the wire, it should be in
> > NBO, and if tipc_subscr and tipc_event are used only
> > internally, we can still fix the userspace messages when
> > sending them through the wire.
>
> There are plenty of protocols around not using NBO over the wire.
> This is not a must.
Of course, but is harder to sniff and debug if you haven't a fixed BO,
so, if it's easy to adjust transparently to userspace, I think it could
worth the trouble.
But my main concern is backwards compatibility, everything else is
secondary :)
--
Leandro Lucarella (AKA luca) http://llucax.com.ar/
----------------------------------------------------------------------
GPG Key: 5F5A8D05 (F8CD F9A7 BF00 5431 4145 104C 949E BFB6 5F5A 8D05)
----------------------------------------------------------------------
The Muppet show was banned from TV in Saudi Arabia
Because one of its stars was a pig
------------------------------------------------------------------------------
Nokia and AT&T present the 2010 Calling All Innovators-North America contest
Create new apps & games for the Nokia N8 for consumers in U.S. and Canada
$10 million total in prizes - $4M cash, 500 devices, nearly $6M in marketing
Develop with Nokia Qt SDK, Web Runtime, or Java and Publish to Ovi Store
http://p.sf.net/sfu/nokia-dev2dev
^ permalink raw reply
* Re: Linux 2.6.35/TIPC 2.0 ABI breaking changes
From: Jon Maloy @ 2010-10-20 18:37 UTC (permalink / raw)
To: Leandro Lucarella
Cc: Neil Horman, netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
tipc-discussion@lists.sourceforge.net, David Miller
In-Reply-To: <20101020182411.GO8781@llucax.com.ar>
<...>
>
> Just to try to understand better how things works, or are supposed to
> work: do the subscription and event messages (and I mean the
> struct tipc_subscr and tipc_event published in tipc.h) really
> go over the wire or are only used to communicate the stack to
> the application inside a node?
Both. And, given TIPC fundamental "location transparency" principle
the sender (or receiver) at user level does not need to know the
difference.
For a TIPC user, all messages are "local", insofar they stay within
the same cluster.
>
> I think this is a crucial matter, since it defines if the
> changes cross kernel/userspace boundaries only or it also
> crosses the kernel/network boundaries.
>
> > Remember, permitting both is a superset of the current one
> (NBO only)
> > so it is fully backwards compatible. We break absolutly nothing by
> > permitting this.
>
> I think if they really go through the wire, it should be in
> NBO, and if tipc_subscr and tipc_event are used only
> internally, we can still fix the userspace messages when
> sending them through the wire.
There are plenty of protocols around not using NBO over the wire.
This is not a must.
>
> In any case, I agree that the patches should be reverted and
> a solution should be planned with more time and consensus.
>
> Thanks.
>
> --
> Leandro Lucarella (AKA luca) http://llucax.com.ar/
> ----------------------------------------------------------------------
> GPG Key: 5F5A8D05 (F8CD F9A7 BF00 5431 4145 104C 949E BFB6 5F5A 8D05)
> ----------------------------------------------------------------------
> The world's best known word is "okay"
> The second most well-known word is "Coca-Cola"
>
------------------------------------------------------------------------------
Nokia and AT&T present the 2010 Calling All Innovators-North America contest
Create new apps & games for the Nokia N8 for consumers in U.S. and Canada
$10 million total in prizes - $4M cash, 500 devices, nearly $6M in marketing
Develop with Nokia Qt SDK, Web Runtime, or Java and Publish to Ovi Store
http://p.sf.net/sfu/nokia-dev2dev
^ permalink raw reply
* Re: Linux 2.6.35/TIPC 2.0 ABI breaking changes
From: Leandro Lucarella @ 2010-10-20 18:28 UTC (permalink / raw)
To: Jon Maloy
Cc: Neil Horman, netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
tipc-discussion@lists.sourceforge.net, David Miller
In-Reply-To: <0434463FDA60A94FA978ACA44617682DEE846681B9@EUSAACMS0702.eamcs.ericsson.se>
Jon Maloy, el 20 de octubre a las 14:10 me escribiste:
> <...>
> > >
> > > Remember, permitting both is a superset of the current one
> > (NBO only)
> > > so it is fully backwards compatible. We break absolutly nothing by
> > > permitting this.
> > >
> > Thats effectively reverting both our patches though, isn't it
> > (not that I'm disagreeing with it, just looking for
> > clarification). If we revert my patch and reintroduce the
> > htohl mechanism which tracks endianess, we might as well
> > revert the TIPC_SUB_SERVICE flag as well, yeah?
>
> Absolutely. I think it was a mistake to change that value.
> But I don't think we need to reintroduce the htohl(). That
> was just one way of doing it. If I understood your suggestion
> from yesterday correctly you converted the whole message within
> one if()clause, without any htohl(). I have have no problem with
> that approach.
There is a difference between both solutions, the htohl() version
tracked the need for swap as a struct subscription member (which was
used when sending back events). Neils patch doesn't do that tracking.
I don't really know the implications of this, but maybe it would be
a wise idea to stay in the safe side and revert both patches for now.
--
Leandro Lucarella (AKA luca) http://llucax.com.ar/
----------------------------------------------------------------------
GPG Key: 5F5A8D05 (F8CD F9A7 BF00 5431 4145 104C 949E BFB6 5F5A 8D05)
----------------------------------------------------------------------
It's not a lie, if you believe it.
-- George Constanza
------------------------------------------------------------------------------
Nokia and AT&T present the 2010 Calling All Innovators-North America contest
Create new apps & games for the Nokia N8 for consumers in U.S. and Canada
$10 million total in prizes - $4M cash, 500 devices, nearly $6M in marketing
Develop with Nokia Qt SDK, Web Runtime, or Java and Publish to Ovi Store
http://p.sf.net/sfu/nokia-dev2dev
^ permalink raw reply
* Re: Linux 2.6.35/TIPC 2.0 ABI breaking changes
From: Leandro Lucarella @ 2010-10-20 18:24 UTC (permalink / raw)
To: Jon Maloy
Cc: Neil Horman, David Miller, paul.gortmaker@windriver.com,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
tipc-discussion@lists.sourceforge.net
In-Reply-To: <0434463FDA60A94FA978ACA44617682DEE84668199@EUSAACMS0702.eamcs.ericsson.se>
Jon Maloy, el 20 de octubre a las 13:57 me escribiste:
> > Another option is to change the TIPC 2.0 specification to use
> > the old format (use HBO in subscriptions and keep
> > TIPC_SUB_SERVICE as a separate flag with value 2) and forget
> > about all this. After all, I can't see what advantages gives
> > having to change the BO for internal messages between the
> > applications and the stack.
>
> I agree with this. I have no problems with changing the draft
> (which as Leandro already noted is "work-in-progress") to specify that
> both HBO and NBO are permitted over the wire, and that it is the
> topology server's task to keep track of which one is used.
Just to try to understand better how things works, or are supposed to
work: do the subscription and event messages (and I mean the struct
tipc_subscr and tipc_event published in tipc.h) really go over the wire
or are only used to communicate the stack to the application inside
a node?
I think this is a crucial matter, since it defines if the changes cross
kernel/userspace boundaries only or it also crosses the kernel/network
boundaries.
> Remember, permitting both is a superset of the current one (NBO only)
> so it is fully backwards compatible. We break absolutly nothing by
> permitting this.
I think if they really go through the wire, it should be in NBO, and if
tipc_subscr and tipc_event are used only internally, we can still fix
the userspace messages when sending them through the wire.
In any case, I agree that the patches should be reverted and a solution
should be planned with more time and consensus.
Thanks.
--
Leandro Lucarella (AKA luca) http://llucax.com.ar/
----------------------------------------------------------------------
GPG Key: 5F5A8D05 (F8CD F9A7 BF00 5431 4145 104C 949E BFB6 5F5A 8D05)
----------------------------------------------------------------------
The world's best known word is "okay"
The second most well-known word is "Coca-Cola"
^ permalink raw reply
* Re: Linux 2.6.35/TIPC 2.0 ABI breaking changes
From: Jon Maloy @ 2010-10-20 18:10 UTC (permalink / raw)
To: Neil Horman
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
Leandro Lucarella, tipc-discussion@lists.sourceforge.net,
David Miller
In-Reply-To: <20101020180454.GC14407@hmsreliant.think-freely.org>
<...>
> >
> > Remember, permitting both is a superset of the current one
> (NBO only)
> > so it is fully backwards compatible. We break absolutly nothing by
> > permitting this.
> >
> Thats effectively reverting both our patches though, isn't it
> (not that I'm disagreeing with it, just looking for
> clarification). If we revert my patch and reintroduce the
> htohl mechanism which tracks endianess, we might as well
> revert the TIPC_SUB_SERVICE flag as well, yeah?
Absolutely. I think it was a mistake to change that value.
But I don't think we need to reintroduce the htohl(). That
was just one way of doing it. If I understood your suggestion
from yesterday correctly you converted the whole message within
one if()clause, without any htohl(). I have have no problem with
that approach.
///jon
>
> Neil
>
> >
> > >
> > > [1] http://tipc.sourceforge.net/doc/draft-spec-tipc-06.html
> > > [2]
> http://tipc.sourceforge.net/doc/draft-spec-tipc-06.html#anchor92
> > >
> > > --
> > > Leandro Lucarella (AKA luca)
> http://llucax.com.ar/
> > >
> --------------------------------------------------------------------
> > > -- GPG Key: 5F5A8D05 (F8CD F9A7 BF00 5431 4145 104C 949E
> BFB6 5F5A
> > > 8D05)
> > >
> --------------------------------------------------------------------
> > > -- CARANCHO OBNUBILADO APARECE EN PARQUE CHACABUCO!
> > > -- Crónica TV
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe
> netdev" in
> > > the body of a message to majordomo@vger.kernel.org More majordomo
> > > info at http://vger.kernel.org/majordomo-info.html
> > > --
> > To unsubscribe from this list: send the line "unsubscribe
> netdev" in
> > the body of a message to majordomo@vger.kernel.org More
> majordomo info
> > at http://vger.kernel.org/majordomo-info.html
> >
>
------------------------------------------------------------------------------
Nokia and AT&T present the 2010 Calling All Innovators-North America contest
Create new apps & games for the Nokia N8 for consumers in U.S. and Canada
$10 million total in prizes - $4M cash, 500 devices, nearly $6M in marketing
Develop with Nokia Qt SDK, Web Runtime, or Java and Publish to Ovi Store
http://p.sf.net/sfu/nokia-dev2dev
^ permalink raw reply
* Re: Linux 2.6.35/TIPC 2.0 ABI breaking changes
From: Neil Horman @ 2010-10-20 18:04 UTC (permalink / raw)
To: Jon Maloy
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
Leandro Lucarella, tipc-discussion@lists.sourceforge.net,
David Miller
In-Reply-To: <0434463FDA60A94FA978ACA44617682DEE84668199@EUSAACMS0702.eamcs.ericsson.se>
On Wed, Oct 20, 2010 at 01:57:06PM -0400, Jon Maloy wrote:
> <...>
> > subscr struct because 0x0 is a valid filter in TIPC 2.0.
> >
>
> >
> > Another option is to change the TIPC 2.0 specification to use
> > the old format (use HBO in subscriptions and keep
> > TIPC_SUB_SERVICE as a separate flag with value 2) and forget
> > about all this. After all, I can't see what advantages gives
> > having to change the BO for internal messages between the
> > applications and the stack.
>
> I agree with this. I have no problems with changing the draft
> (which as Leandro already noted is "work-in-progress") to specify that
> both HBO and NBO are permitted over the wire, and that it is the
> topology server's task to keep track of which one is used.
>
> Remember, permitting both is a superset of the current one (NBO only)
> so it is fully backwards compatible. We break absolutly nothing by
> permitting this.
>
Thats effectively reverting both our patches though, isn't it (not that I'm
disagreeing with it, just looking for clarification). If we revert my patch and
reintroduce the htohl mechanism which tracks endianess, we might as well revert
the TIPC_SUB_SERVICE flag as well, yeah?
Neil
>
> >
> > [1] http://tipc.sourceforge.net/doc/draft-spec-tipc-06.html
> > [2] http://tipc.sourceforge.net/doc/draft-spec-tipc-06.html#anchor92
> >
> > --
> > Leandro Lucarella (AKA luca) http://llucax.com.ar/
> > ----------------------------------------------------------------------
> > GPG Key: 5F5A8D05 (F8CD F9A7 BF00 5431 4145 104C 949E BFB6 5F5A 8D05)
> > ----------------------------------------------------------------------
> > CARANCHO OBNUBILADO APARECE EN PARQUE CHACABUCO!
> > -- Crónica TV
> > --
> > To unsubscribe from this list: send the line "unsubscribe
> > netdev" in the body of a message to majordomo@vger.kernel.org
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> > --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
------------------------------------------------------------------------------
Nokia and AT&T present the 2010 Calling All Innovators-North America contest
Create new apps & games for the Nokia N8 for consumers in U.S. and Canada
$10 million total in prizes - $4M cash, 500 devices, nearly $6M in marketing
Develop with Nokia Qt SDK, Web Runtime, or Java and Publish to Ovi Store
http://p.sf.net/sfu/nokia-dev2dev
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox