Netdev List
 help / color / mirror / Atom feed
* [patch] qlge: fix an "&&" vs "||" bug
From: Dan Carpenter @ 2012-07-12 14:47 UTC (permalink / raw)
  To: Anirban Chakraborty
  Cc: Jitendra Kalsaria, Ron Mercer, linux-driver, netdev,
	kernel-janitors

The condition is always true so WOL will never work.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
index 3d4462b..6f316ab 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c
@@ -440,7 +440,7 @@ static int ql_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 	unsigned short ssys_dev = qdev->pdev->subsystem_device;
 
 	/* WOL is only supported for mezz card. */
-	if (ssys_dev != QLGE_MEZZ_SSYS_ID_068 ||
+	if (ssys_dev != QLGE_MEZZ_SSYS_ID_068 &&
 			ssys_dev != QLGE_MEZZ_SSYS_ID_180) {
 		netif_info(qdev, drv, qdev->ndev,
 				"WOL is only supported for mezz card\n");

^ permalink raw reply related

* Re: [net-next:master 90/102] net/ipv4/route.c:1283:9: warning: unused variable 'saddr'
From: David Miller @ 2012-07-12 14:40 UTC (permalink / raw)
  To: fengguang.wu; +Cc: kernel-janitors, netdev
In-Reply-To: <20120712143449.GA19890@localhost>


There's not need to report these to kernel-janitors if it's a
net-next specific issue and I'm going to fix it up 5 minutes
after you report it.

====================
[PATCH] ipv4: Fix warnings in ip_do_redirect() for some configurations.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c |   10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 23bbe29..9319bf1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1275,12 +1275,9 @@ static void rt_del(unsigned int hash, struct rtable *rt)
 
 static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
 {
-	const struct iphdr *iph = (const struct iphdr *) skb->data;
 	__be32 new_gw = icmp_hdr(skb)->un.gateway;
 	__be32 old_gw = ip_hdr(skb)->saddr;
 	struct net_device *dev = skb->dev;
-	__be32 daddr = iph->daddr;
-	__be32 saddr = iph->saddr;
 	struct in_device *in_dev;
 	struct neighbour *n;
 	struct rtable *rt;
@@ -1336,11 +1333,16 @@ static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
 
 reject_redirect:
 #ifdef CONFIG_IP_ROUTE_VERBOSE
-	if (IN_DEV_LOG_MARTIANS(in_dev))
+	if (IN_DEV_LOG_MARTIANS(in_dev)) {
+		const struct iphdr *iph = (const struct iphdr *) skb->data;
+		__be32 daddr = iph->daddr;
+		__be32 saddr = iph->saddr;
+
 		net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
 				     "  Advised path = %pI4 -> %pI4\n",
 				     &old_gw, dev->name, &new_gw,
 				     &saddr, &daddr);
+	}
 #endif
 	;
 }
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH net-next] net: ftgmac100/ftmac100: dont pull too much data
From: David Miller @ 2012-07-12 14:38 UTC (permalink / raw)
  To: ratbert.chuang; +Cc: eric.dumazet, netdev, ratbert
In-Reply-To: <CANutaR-ER_+Y6FSSoA-31u0qLRBeU+ddrKwrTNujHsYeQhWVyQ@mail.gmail.com>

From: Po-Yu Chuang <ratbert.chuang@gmail.com>
Date: Thu, 12 Jul 2012 22:35:18 +0800

> Thank you Eric. :-)

You can thank him by providing an "Acked-by: ..." tag in your
reply.

^ permalink raw reply

* Re: Is TCP vulneribility patch (as in RFC 5961) done in linux?
From: Randy Dunlap @ 2012-07-12 14:37 UTC (permalink / raw)
  To: Kiran (Kiran Kumar) Kella; +Cc: linux-kernel@vger.kernel.org, netdev
In-Reply-To: <68700EDA775E5E47B5EBA9FF8AC0F15C07506A@SJEXCHMB09.corp.ad.broadcom.com>

On 07/12/2012 05:40 AM, Kiran (Kiran Kumar) Kella wrote:

> Hi,
> 
> I just now checked in the kernel archives if the patch in section 3.2 mentioned in RFC 5961 for RST attacks with predictable sequence numbers.
> I see some discussion happened in 2004 timeframe.
> I was just wondering if in the latest linux source, the patch is made available.
> 
> Appreciate your quick response in this regard.
> 
> Thanks,
> Kiran



You should ask this question on the netdev mailing list (cc-ed).

-- 
~Randy

^ permalink raw reply

* Re: [PATCH net-next] net: ftgmac100/ftmac100: dont pull too much data
From: Po-Yu Chuang @ 2012-07-12 14:35 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, Po-Yu Chuang
In-Reply-To: <1342102778.3265.8272.camel@edumazet-glaptop>

Thank you Eric. :-)

regards,
Po-Yu Chuang

On Thu, Jul 12, 2012 at 10:19 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> From: Eric Dumazet <edumazet@google.com>
>
> Drivers should pull only ethernet header from page frag
> to skb->head.
>
> Pulling 64 bytes is too much for TCP (without options) on IPv4.
>
> However, it makes sense to pull all the frame if it fits the
> 128 bytes bloc allocated for skb->head, to free one page per
> small incoming frame.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Po-Yu Chuang <ratbert@faraday-tech.com>
> ---
>  drivers/net/ethernet/faraday/ftgmac100.c |    9 +++++++--
>  drivers/net/ethernet/faraday/ftmac100.c  |   11 +++++++----
>  2 files changed, 14 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
> index 16b0704..74d749e 100644
> --- a/drivers/net/ethernet/faraday/ftgmac100.c
> +++ b/drivers/net/ethernet/faraday/ftgmac100.c
> @@ -479,9 +479,14 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed)
>                 rxdes = ftgmac100_current_rxdes(priv);
>         } while (!done);
>
> -       if (skb->len <= 64)
> +       /* Small frames are copied into linear part of skb to free one page */
> +       if (skb->len <= 128) {
>                 skb->truesize -= PAGE_SIZE;
> -       __pskb_pull_tail(skb, min(skb->len, 64U));
> +               __pskb_pull_tail(skb, skb->len);
> +       } else {
> +               /* We pull the minimum amount into linear part */
> +               __pskb_pull_tail(skb, ETH_HLEN);
> +       }
>         skb->protocol = eth_type_trans(skb, netdev);
>
>         netdev->stats.rx_packets++;
> diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
> index 829b109..b901a01 100644
> --- a/drivers/net/ethernet/faraday/ftmac100.c
> +++ b/drivers/net/ethernet/faraday/ftmac100.c
> @@ -441,11 +441,14 @@ static bool ftmac100_rx_packet(struct ftmac100 *priv, int *processed)
>         skb->len += length;
>         skb->data_len += length;
>
> -       /* page might be freed in __pskb_pull_tail() */
> -       if (length > 64)
> +       if (length > 128) {
>                 skb->truesize += PAGE_SIZE;
> -       __pskb_pull_tail(skb, min(length, 64));
> -
> +               /* We pull the minimum amount into linear part */
> +               __pskb_pull_tail(skb, ETH_HLEN);
> +       } else {
> +               /* Small frames are copied into linear part to free one page */
> +               __pskb_pull_tail(skb, length);
> +       }
>         ftmac100_alloc_rx_page(priv, rxdes, GFP_ATOMIC);
>
>         ftmac100_rx_pointer_advance(priv);
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [net-next:master 90/102] net/ipv4/route.c:1283:9: warning: unused variable 'saddr'
From: Fengguang Wu @ 2012-07-12 14:34 UTC (permalink / raw)
  To: David S. Miller; +Cc: kernel-janitors, netdev

Hi David,

There are new compile warnings show up in

tree:   git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git master
head:   3ec5a261aef55a32664bffd335e5c32aeadf3215
commit: e47a185b31dd2acd424fac7dc0efb96fc5b31a33 [90/102] ipv4: Generalize ip_do_redirect() and hook into new dst_ops->redirect.

All warnings:

net/ipv4/route.c: In function 'ip_do_redirect':
net/ipv4/route.c:1283:9: warning: unused variable 'saddr' [-Wunused-variable]
net/ipv4/route.c:1282:9: warning: unused variable 'daddr' [-Wunused-variable]

vim +1283 net/ipv4/route.c
  1280		__be32 old_gw = ip_hdr(skb)->saddr;
  1281		struct net_device *dev = skb->dev;
  1282		__be32 daddr = iph->daddr;
> 1283		__be32 saddr = iph->saddr;
  1284		struct in_device *in_dev;
  1285		struct neighbour *n;
  1286		struct rtable *rt;

..because the saddr/daddr variables are used inside

#ifdef CONFIG_IP_ROUTE_VERBOSE
        if (IN_DEV_LOG_MARTIANS(in_dev))
                net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
                                     "  Advised path = %pI4 -> %pI4\n",
                                     &old_gw, dev->name, &new_gw,
                                     &saddr, &daddr);
#endif

---
0-DAY kernel build testing backend         Open Source Technology Centre
Fengguang Wu <wfg@linux.intel.com>                     Intel Corporation

^ permalink raw reply

* [PATCH net-next] net: ftgmac100/ftmac100: dont pull too much data
From: Eric Dumazet @ 2012-07-12 14:19 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Po-Yu Chuang

From: Eric Dumazet <edumazet@google.com>

Drivers should pull only ethernet header from page frag
to skb->head.

Pulling 64 bytes is too much for TCP (without options) on IPv4.

However, it makes sense to pull all the frame if it fits the
128 bytes bloc allocated for skb->head, to free one page per
small incoming frame.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Po-Yu Chuang <ratbert@faraday-tech.com>
---
 drivers/net/ethernet/faraday/ftgmac100.c |    9 +++++++--
 drivers/net/ethernet/faraday/ftmac100.c  |   11 +++++++----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 16b0704..74d749e 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -479,9 +479,14 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed)
 		rxdes = ftgmac100_current_rxdes(priv);
 	} while (!done);
 
-	if (skb->len <= 64)
+	/* Small frames are copied into linear part of skb to free one page */
+	if (skb->len <= 128) {
 		skb->truesize -= PAGE_SIZE;
-	__pskb_pull_tail(skb, min(skb->len, 64U));
+		__pskb_pull_tail(skb, skb->len);
+	} else {
+		/* We pull the minimum amount into linear part */
+		__pskb_pull_tail(skb, ETH_HLEN);
+	}
 	skb->protocol = eth_type_trans(skb, netdev);
 
 	netdev->stats.rx_packets++;
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 829b109..b901a01 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -441,11 +441,14 @@ static bool ftmac100_rx_packet(struct ftmac100 *priv, int *processed)
 	skb->len += length;
 	skb->data_len += length;
 
-	/* page might be freed in __pskb_pull_tail() */
-	if (length > 64)
+	if (length > 128) {
 		skb->truesize += PAGE_SIZE;
-	__pskb_pull_tail(skb, min(length, 64));
-
+		/* We pull the minimum amount into linear part */
+		__pskb_pull_tail(skb, ETH_HLEN);
+	} else {
+		/* Small frames are copied into linear part to free one page */
+		__pskb_pull_tail(skb, length);
+	}
 	ftmac100_alloc_rx_page(priv, rxdes, GFP_ATOMIC);
 
 	ftmac100_rx_pointer_advance(priv);

^ permalink raw reply related

* Re: [PATCH net-next 7/7] be2net: Enable RSS UDP hashing for Lancer and Skyhawk
From: Eric Dumazet @ 2012-07-12 14:10 UTC (permalink / raw)
  To: Padmanabh Ratnakar; +Cc: netdev
In-Reply-To: <58046138-832a-49a2-84a8-fa682a74162e@exht1.ad.emulex.com>

On Thu, 2012-07-12 at 19:27 +0530, Padmanabh Ratnakar wrote:
> Signed-off-by: Padmanabh Ratnakar <padmanabh.ratnakar@emulex.com>
> ---
>  drivers/net/ethernet/emulex/benet/be.h      |    3 +++
>  drivers/net/ethernet/emulex/benet/be_cmds.c |    7 +++++++
>  drivers/net/ethernet/emulex/benet/be_cmds.h |    2 ++
>  3 files changed, 12 insertions(+), 0 deletions(-)

It would be nice to add a bit of documentation on this, and what
components are used from the tuple (dst addr, src addr, dst port, src
port)

^ permalink raw reply

* [PATCH net-next 7/7] be2net: Enable RSS UDP hashing for Lancer and Skyhawk
From: Padmanabh Ratnakar @ 2012-07-12 13:57 UTC (permalink / raw)
  To: netdev; +Cc: Padmanabh Ratnakar


Signed-off-by: Padmanabh Ratnakar <padmanabh.ratnakar@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be.h      |    3 +++
 drivers/net/ethernet/emulex/benet/be_cmds.c |    7 +++++++
 drivers/net/ethernet/emulex/benet/be_cmds.h |    2 ++
 3 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index e2dfe31..330d59a 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -457,6 +457,9 @@ struct be_adapter {
 #define lancer_chip(adapter)	((adapter->pdev->device == OC_DEVICE_ID3) || \
 				 (adapter->pdev->device == OC_DEVICE_ID4))
 
+#define skyhawk_chip(adapter)	(adapter->pdev->device == OC_DEVICE_ID5)
+
+
 #define be_roce_supported(adapter) ((adapter->if_type == SLI_INTF_TYPE_3 || \
 				adapter->sli_family == SKYHAWK_SLI_FAMILY) && \
 				(adapter->function_mode & RDMA_ENABLED))
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 8730f0e..ddfca65 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -1792,6 +1792,13 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, u16 table_size)
 	req->if_id = cpu_to_le32(adapter->if_handle);
 	req->enable_rss = cpu_to_le16(RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
 				      RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6);
+
+	if (lancer_chip(adapter) || skyhawk_chip(adapter)) {
+		req->hdr.version = 1;
+		req->enable_rss |= cpu_to_le16(RSS_ENABLE_UDP_IPV4 |
+					       RSS_ENABLE_UDP_IPV6);
+	}
+
 	req->cpu_table_size_log2 = cpu_to_le16(fls(table_size) - 1);
 	memcpy(req->cpu_table, rsstable, table_size);
 	memcpy(req->hash, myhash, sizeof(myhash));
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 88f7237..45d70de 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -1089,6 +1089,8 @@ struct be_cmd_resp_query_fw_cfg {
 #define RSS_ENABLE_TCP_IPV4			0x2
 #define RSS_ENABLE_IPV6				0x4
 #define RSS_ENABLE_TCP_IPV6			0x8
+#define RSS_ENABLE_UDP_IPV4			0x10
+#define RSS_ENABLE_UDP_IPV6			0x20
 
 struct be_cmd_req_rss_config {
 	struct be_cmd_req_hdr hdr;
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH net-next 6/7] be2net: Fix port name in message during driver load
From: Padmanabh Ratnakar @ 2012-07-12 13:57 UTC (permalink / raw)
  To: netdev; +Cc: Padmanabh Ratnakar


Signed-off-by: Padmanabh Ratnakar <padmanabh.ratnakar@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c |   38 +++++++++++++++++++++++++++
 drivers/net/ethernet/emulex/benet/be_cmds.h |   13 +++++++++
 drivers/net/ethernet/emulex/benet/be_main.c |    8 ++++-
 3 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 34dfc0c..8730f0e 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -2716,6 +2716,44 @@ err:
 	return status;
 }
 
+int be_cmd_query_port_name(struct be_adapter *adapter, u8 *port_name)
+{
+	struct be_mcc_wrb *wrb;
+	struct be_cmd_req_get_port_name *req;
+	int status;
+
+	if (!lancer_chip(adapter)) {
+		*port_name = adapter->hba_port_num + '0';
+		return 0;
+	}
+
+	spin_lock_bh(&adapter->mcc_lock);
+
+	wrb = wrb_from_mccq(adapter);
+	if (!wrb) {
+		status = -EBUSY;
+		goto err;
+	}
+
+	req = embedded_payload(wrb);
+
+	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
+			       OPCODE_COMMON_GET_PORT_NAME, sizeof(*req), wrb,
+			       NULL);
+	req->hdr.version = 1;
+
+	status = be_mcc_notify_wait(adapter);
+	if (!status) {
+		struct be_cmd_resp_get_port_name *resp = embedded_payload(wrb);
+		*port_name = resp->port_name[adapter->hba_port_num];
+	} else {
+		*port_name = adapter->hba_port_num + '0';
+	}
+err:
+	spin_unlock_bh(&adapter->mcc_lock);
+	return status;
+}
+
 int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload,
 			int wrb_payload_size, u16 *cmd_status, u16 *ext_status)
 {
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index c1324e7..88f7237 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -186,6 +186,7 @@ struct be_mcc_mailbox {
 #define OPCODE_COMMON_ENABLE_DISABLE_BEACON		69
 #define OPCODE_COMMON_GET_BEACON_STATE			70
 #define OPCODE_COMMON_READ_TRANSRECV_DATA		73
+#define OPCODE_COMMON_GET_PORT_NAME			77
 #define OPCODE_COMMON_GET_PHY_DETAILS			102
 #define OPCODE_COMMON_SET_DRIVER_FUNCTION_CAP		103
 #define OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES	121
@@ -1506,6 +1507,17 @@ struct be_cmd_resp_get_hsw_config {
 	u32 rsvd;
 };
 
+/******************* get port names ***************/
+struct be_cmd_req_get_port_name {
+	struct be_cmd_req_hdr hdr;
+	u32 rsvd0;
+};
+
+struct be_cmd_resp_get_port_name {
+	struct be_cmd_req_hdr hdr;
+	u8 port_name[4];
+};
+
 /*************** HW Stats Get v1 **********************************/
 #define BE_TXP_SW_SZ			48
 struct be_port_rxf_stats_v1 {
@@ -1772,4 +1784,5 @@ extern int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter,
 					  struct be_fat_conf_params *cfgs);
 extern int lancer_wait_ready(struct be_adapter *adapter);
 extern int lancer_test_and_set_rdy_state(struct be_adapter *adapter);
+extern int be_cmd_query_port_name(struct be_adapter *adapter, u8 *port_name);
 
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index cf6ad1f..7e989d0 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3797,6 +3797,7 @@ static int __devinit be_probe(struct pci_dev *pdev,
 	int status = 0;
 	struct be_adapter *adapter;
 	struct net_device *netdev;
+	char port_name;
 
 	status = pci_enable_device(pdev);
 	if (status)
@@ -3887,8 +3888,11 @@ static int __devinit be_probe(struct pci_dev *pdev,
 
 	schedule_delayed_work(&adapter->func_recovery_work,
 			      msecs_to_jiffies(1000));
-	dev_info(&pdev->dev, "%s: %s port %d\n", netdev->name, nic_name(pdev),
-		adapter->port_num);
+
+	be_cmd_query_port_name(adapter, &port_name);
+
+	dev_info(&pdev->dev, "%s: %s port %c\n", netdev->name, nic_name(pdev),
+		 port_name);
 
 	return 0;
 
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH net-next 1/7] be2net: Fix error while toggling autoneg of pause parameters
From: Padmanabh Ratnakar @ 2012-07-12 13:56 UTC (permalink / raw)
  To: netdev; +Cc: Padmanabh Ratnakar

Autonegotiation of pause parameters is possible only on some PHYs.
Ability of autoneg of pause parameters is reported by adapter.
Autoneg of pause parameters cannot be changed from driver.
Fix driver to give error when autoneg mode is toggled by user.

Signed-off-by: Padmanabh Ratnakar <padmanabh.ratnakar@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_ethtool.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 63e51d4..e34be1c 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -648,7 +648,7 @@ be_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *ecmd)
 	struct be_adapter *adapter = netdev_priv(netdev);
 	int status;
 
-	if (ecmd->autoneg != 0)
+	if (ecmd->autoneg != adapter->phy.fc_autoneg)
 		return -EINVAL;
 	adapter->tx_fc = ecmd->tx_pause;
 	adapter->rx_fc = ecmd->rx_pause;
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH net-next 5/7] be2net: Fix cleanup path when EQ creation fails
From: Padmanabh Ratnakar @ 2012-07-12 13:57 UTC (permalink / raw)
  To: netdev; +Cc: Padmanabh Ratnakar


Signed-off-by: Padmanabh Ratnakar <padmanabh.ratnakar@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_main.c |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 9e7dbd5..cf6ad1f 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1734,9 +1734,10 @@ static void be_evt_queues_destroy(struct be_adapter *adapter)
 	int i;
 
 	for_all_evt_queues(adapter, eqo, i) {
-		be_eq_clean(eqo);
-		if (eqo->q.created)
+		if (eqo->q.created) {
+			be_eq_clean(eqo);
 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
+		}
 		be_queue_free(adapter, &eqo->q);
 	}
 }
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH net-next 0/7] be2net updates
From: Padmanabh Ratnakar @ 2012-07-12 13:55 UTC (permalink / raw)
  To: netdev; +Cc: Padmanabh Ratnakar

Please apply.
Thanks,
Padmanabh

Padmanabh Ratnakar (7):
  be2net: Fix error while toggling autoneg of pause parameters
  be2net : Fix die temperature stat for Lancer
  be2net: Fix initialization sequence for Lancer
  be2net: Activate new FW after FW download for Lancer
  be2net: Fix cleanup path when EQ creation fails
  be2net: Fix port name in message during driver load
  be2net: Enable RSS UDP hashing for Lancer and Skyhawk

 drivers/net/ethernet/emulex/benet/be.h         |   24 ++-
 drivers/net/ethernet/emulex/benet/be_cmds.c    |  142 +++++++++++-
 drivers/net/ethernet/emulex/benet/be_cmds.h    |   34 +++-
 drivers/net/ethernet/emulex/benet/be_ethtool.c |    2 +-
 drivers/net/ethernet/emulex/benet/be_hw.h      |    7 +-
 drivers/net/ethernet/emulex/benet/be_main.c    |  291 +++++++++++++-----------
 6 files changed, 345 insertions(+), 155 deletions(-)

^ permalink raw reply

* [PATCH net-next 4/7] be2net: Activate new FW after FW download for Lancer
From: Padmanabh Ratnakar @ 2012-07-12 13:57 UTC (permalink / raw)
  To: netdev; +Cc: Padmanabh Ratnakar

After FW download, activate new FW by invoking FW reset.
Recreate rings once new FW is operational.

Signed-off-by: Padmanabh Ratnakar <padmanabh.ratnakar@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be.h      |   20 +++-
 drivers/net/ethernet/emulex/benet/be_cmds.c |   13 +-
 drivers/net/ethernet/emulex/benet/be_cmds.h |   15 ++-
 drivers/net/ethernet/emulex/benet/be_hw.h   |    7 +-
 drivers/net/ethernet/emulex/benet/be_main.c |  203 +++++++++++++++++++--------
 5 files changed, 180 insertions(+), 78 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 748e870..e2dfe31 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -389,6 +389,7 @@ struct be_adapter {
 	struct delayed_work work;
 	u16 work_counter;
 
+	struct delayed_work func_recovery_work;
 	u32 flags;
 	/* Ethtool knobs and info */
 	char fw_ver[FW_VER_LEN];
@@ -396,9 +397,10 @@ struct be_adapter {
 	u32 *pmac_id;		/* MAC addr handle used by BE card */
 	u32 beacon_state;	/* for set_phys_id */
 
-	bool eeh_err;
-	bool ue_detected;
+	bool eeh_error;
 	bool fw_timeout;
+	bool hw_error;
+
 	u32 port_num;
 	bool promiscuous;
 	u32 function_mode;
@@ -599,7 +601,19 @@ static inline bool be_multi_rxq(const struct be_adapter *adapter)
 
 static inline bool be_error(struct be_adapter *adapter)
 {
-	return adapter->eeh_err || adapter->ue_detected || adapter->fw_timeout;
+	return adapter->eeh_error || adapter->hw_error || adapter->fw_timeout;
+}
+
+static inline bool be_crit_error(struct be_adapter *adapter)
+{
+	return adapter->eeh_error || adapter->hw_error;
+}
+
+static inline void  be_clear_all_error(struct be_adapter *adapter)
+{
+	adapter->eeh_error = false;
+	adapter->hw_error = false;
+	adapter->fw_timeout = false;
 }
 
 static inline bool be_is_wol_excluded(struct be_adapter *adapter)
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 431f774..34dfc0c 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -349,7 +349,7 @@ static int be_mbox_db_ready_wait(struct be_adapter *adapter, void __iomem *db)
 		if (msecs > 4000) {
 			dev_err(&adapter->pdev->dev, "FW not responding\n");
 			adapter->fw_timeout = true;
-			be_detect_dump_ue(adapter);
+			be_detect_error(adapter);
 			return -1;
 		}
 
@@ -1869,8 +1869,9 @@ err:
 }
 
 int lancer_cmd_write_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
-			u32 data_size, u32 data_offset, const char *obj_name,
-			u32 *data_written, u8 *addn_status)
+			    u32 data_size, u32 data_offset,
+			    const char *obj_name, u32 *data_written,
+			    u8 *change_status, u8 *addn_status)
 {
 	struct be_mcc_wrb *wrb;
 	struct lancer_cmd_req_write_object *req;
@@ -1926,10 +1927,12 @@ int lancer_cmd_write_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
 		status = adapter->flash_status;
 
 	resp = embedded_payload(wrb);
-	if (!status)
+	if (!status) {
 		*data_written = le32_to_cpu(resp->actual_write_len);
-	else
+		*change_status = resp->change_status;
+	} else {
 		*addn_status = resp->additional_status;
+	}
 
 	return status;
 
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index cfc0620..c1324e7 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -1163,6 +1163,8 @@ struct lancer_cmd_req_write_object {
 	u32 addr_high;
 };
 
+#define LANCER_NO_RESET_NEEDED		0x00
+#define LANCER_FW_RESET_NEEDED		0x02
 struct lancer_cmd_resp_write_object {
 	u8 opcode;
 	u8 subsystem;
@@ -1173,6 +1175,8 @@ struct lancer_cmd_resp_write_object {
 	u32 resp_len;
 	u32 actual_resp_len;
 	u32 actual_write_len;
+	u8 change_status;
+	u8 rsvd3[3];
 };
 
 /************************ Lancer Read FW info **************/
@@ -1718,10 +1722,11 @@ extern int be_cmd_write_flashrom(struct be_adapter *adapter,
 			struct be_dma_mem *cmd, u32 flash_oper,
 			u32 flash_opcode, u32 buf_size);
 extern int lancer_cmd_write_object(struct be_adapter *adapter,
-				struct be_dma_mem *cmd,
-				u32 data_size, u32 data_offset,
-				const char *obj_name,
-				u32 *data_written, u8 *addn_status);
+				   struct be_dma_mem *cmd,
+				   u32 data_size, u32 data_offset,
+				   const char *obj_name,
+				   u32 *data_written, u8 *change_status,
+				   u8 *addn_status);
 int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
 		u32 data_size, u32 data_offset, const char *obj_name,
 		u32 *data_read, u32 *eof, u8 *addn_status);
@@ -1744,7 +1749,7 @@ extern int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num,
 				u8 loopback_type, u8 enable);
 extern int be_cmd_get_phy_info(struct be_adapter *adapter);
 extern int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain);
-extern void be_detect_dump_ue(struct be_adapter *adapter);
+extern void be_detect_error(struct be_adapter *adapter);
 extern int be_cmd_get_die_temperature(struct be_adapter *adapter);
 extern int be_cmd_get_cntl_attributes(struct be_adapter *adapter);
 extern int be_cmd_req_native_mode(struct be_adapter *adapter);
diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h
index 7c8a710..b755f70 100644
--- a/drivers/net/ethernet/emulex/benet/be_hw.h
+++ b/drivers/net/ethernet/emulex/benet/be_hw.h
@@ -45,18 +45,19 @@
 #define POST_STAGE_ARMFW_RDY		0xc000	/* FW is done with POST */
 
 
-/* Lancer SLIPORT_CONTROL SLIPORT_STATUS registers */
+/* Lancer SLIPORT registers */
 #define SLIPORT_STATUS_OFFSET		0x404
 #define SLIPORT_CONTROL_OFFSET		0x408
 #define SLIPORT_ERROR1_OFFSET		0x40C
 #define SLIPORT_ERROR2_OFFSET		0x410
+#define PHYSDEV_CONTROL_OFFSET		0x414
 
 #define SLIPORT_STATUS_ERR_MASK		0x80000000
 #define SLIPORT_STATUS_RN_MASK		0x01000000
 #define SLIPORT_STATUS_RDY_MASK		0x00800000
-
-
 #define SLI_PORT_CONTROL_IP_MASK	0x08000000
+#define PHYSDEV_CONTROL_FW_RESET_MASK	0x00000002
+#define PHYSDEV_CONTROL_INP_MASK	0x40000000
 
 /********* Memory BAR register ************/
 #define PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET 	0xfc
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index a3641ba..9e7dbd5 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -155,7 +155,7 @@ static void be_intr_set(struct be_adapter *adapter, bool enable)
 {
 	u32 reg, enabled;
 
-	if (adapter->eeh_err)
+	if (adapter->eeh_error)
 		return;
 
 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
@@ -201,7 +201,7 @@ static void be_eq_notify(struct be_adapter *adapter, u16 qid,
 	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) <<
 			DB_EQ_RING_ID_EXT_MASK_SHIFT);
 
-	if (adapter->eeh_err)
+	if (adapter->eeh_error)
 		return;
 
 	if (arm)
@@ -220,7 +220,7 @@ void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
 	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
 			DB_CQ_RING_ID_EXT_MASK_SHIFT);
 
-	if (adapter->eeh_err)
+	if (adapter->eeh_error)
 		return;
 
 	if (arm)
@@ -2098,13 +2098,13 @@ int be_poll(struct napi_struct *napi, int budget)
 	return max_work;
 }
 
-void be_detect_dump_ue(struct be_adapter *adapter)
+void be_detect_error(struct be_adapter *adapter)
 {
 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
 	u32 i;
 
-	if (adapter->eeh_err || adapter->ue_detected)
+	if (be_crit_error(adapter))
 		return;
 
 	if (lancer_chip(adapter)) {
@@ -2125,16 +2125,24 @@ void be_detect_dump_ue(struct be_adapter *adapter)
 		pci_read_config_dword(adapter->pdev,
 				PCICFG_UE_STATUS_HI_MASK, &ue_hi_mask);
 
-		ue_lo = (ue_lo & (~ue_lo_mask));
-		ue_hi = (ue_hi & (~ue_hi_mask));
+		ue_lo = (ue_lo & ~ue_lo_mask);
+		ue_hi = (ue_hi & ~ue_hi_mask);
 	}
 
 	if (ue_lo || ue_hi ||
 		sliport_status & SLIPORT_STATUS_ERR_MASK) {
-		adapter->ue_detected = true;
-		adapter->eeh_err = true;
+		adapter->hw_error = true;
 		dev_err(&adapter->pdev->dev,
-			"Unrecoverable error in the card\n");
+			"Error detected in the card\n");
+	}
+
+	if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
+		dev_err(&adapter->pdev->dev,
+			"ERR: sliport status 0x%x\n", sliport_status);
+		dev_err(&adapter->pdev->dev,
+			"ERR: sliport error1 0x%x\n", sliport_err1);
+		dev_err(&adapter->pdev->dev,
+			"ERR: sliport error2 0x%x\n", sliport_err2);
 	}
 
 	if (ue_lo) {
@@ -2144,6 +2152,7 @@ void be_detect_dump_ue(struct be_adapter *adapter)
 				"UE: %s bit set\n", ue_status_low_desc[i]);
 		}
 	}
+
 	if (ue_hi) {
 		for (i = 0; ue_hi; ue_hi >>= 1, i++) {
 			if (ue_hi & 1)
@@ -2152,14 +2161,6 @@ void be_detect_dump_ue(struct be_adapter *adapter)
 		}
 	}
 
-	if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
-		dev_err(&adapter->pdev->dev,
-			"sliport status 0x%x\n", sliport_status);
-		dev_err(&adapter->pdev->dev,
-			"sliport error1 0x%x\n", sliport_err1);
-		dev_err(&adapter->pdev->dev,
-			"sliport error2 0x%x\n", sliport_err2);
-	}
 }
 
 static void be_msix_disable(struct be_adapter *adapter)
@@ -3067,6 +3068,40 @@ static int get_ufigen_type(struct flash_file_hdr_g2 *fhdr)
 		return 0;
 }
 
+static int lancer_wait_idle(struct be_adapter *adapter)
+{
+#define SLIPORT_IDLE_TIMEOUT 30
+	u32 reg_val;
+	int status = 0, i;
+
+	for (i = 0; i < SLIPORT_IDLE_TIMEOUT; i++) {
+		reg_val = ioread32(adapter->db + PHYSDEV_CONTROL_OFFSET);
+		if ((reg_val & PHYSDEV_CONTROL_INP_MASK) == 0)
+			break;
+
+		ssleep(1);
+	}
+
+	if (i == SLIPORT_IDLE_TIMEOUT)
+		status = -1;
+
+	return status;
+}
+
+static int lancer_fw_reset(struct be_adapter *adapter)
+{
+	int status = 0;
+
+	status = lancer_wait_idle(adapter);
+	if (status)
+		return status;
+
+	iowrite32(PHYSDEV_CONTROL_FW_RESET_MASK, adapter->db +
+		  PHYSDEV_CONTROL_OFFSET);
+
+	return status;
+}
+
 static int lancer_fw_download(struct be_adapter *adapter,
 				const struct firmware *fw)
 {
@@ -3081,6 +3116,7 @@ static int lancer_fw_download(struct be_adapter *adapter,
 	u32 offset = 0;
 	int status = 0;
 	u8 add_status = 0;
+	u8 change_status;
 
 	if (!IS_ALIGNED(fw->size, sizeof(u32))) {
 		dev_err(&adapter->pdev->dev,
@@ -3113,9 +3149,10 @@ static int lancer_fw_download(struct be_adapter *adapter,
 		memcpy(dest_image_ptr, data_ptr, chunk_size);
 
 		status = lancer_cmd_write_object(adapter, &flash_cmd,
-				chunk_size, offset, LANCER_FW_DOWNLOAD_LOCATION,
-				&data_written, &add_status);
-
+						 chunk_size, offset,
+						 LANCER_FW_DOWNLOAD_LOCATION,
+						 &data_written, &change_status,
+						 &add_status);
 		if (status)
 			break;
 
@@ -3127,8 +3164,10 @@ static int lancer_fw_download(struct be_adapter *adapter,
 	if (!status) {
 		/* Commit the FW written */
 		status = lancer_cmd_write_object(adapter, &flash_cmd,
-					0, offset, LANCER_FW_DOWNLOAD_LOCATION,
-					&data_written, &add_status);
+						 0, offset,
+						 LANCER_FW_DOWNLOAD_LOCATION,
+						 &data_written, &change_status,
+						 &add_status);
 	}
 
 	dma_free_coherent(&adapter->pdev->dev, flash_cmd.size, flash_cmd.va,
@@ -3141,6 +3180,20 @@ static int lancer_fw_download(struct be_adapter *adapter,
 		goto lancer_fw_exit;
 	}
 
+	if (change_status == LANCER_FW_RESET_NEEDED) {
+		status = lancer_fw_reset(adapter);
+		if (status) {
+			dev_err(&adapter->pdev->dev,
+				"Adapter busy for FW reset.\n"
+				"New FW will not be active.\n");
+			goto lancer_fw_exit;
+		}
+	} else if (change_status != LANCER_NO_RESET_NEEDED) {
+			dev_err(&adapter->pdev->dev,
+				"System reboot required for new FW"
+				" to be active\n");
+	}
+
 	dev_info(&adapter->pdev->dev, "Firmware flashed successfully\n");
 lancer_fw_exit:
 	return status;
@@ -3469,6 +3522,8 @@ static void __devexit be_remove(struct pci_dev *pdev)
 
 	be_roce_dev_remove(adapter);
 
+	cancel_delayed_work_sync(&adapter->func_recovery_work);
+
 	unregister_netdev(adapter->netdev);
 
 	be_clear(adapter);
@@ -3625,53 +3680,68 @@ static int be_dev_type_check(struct be_adapter *adapter)
 	return 0;
 }
 
-static void lancer_test_and_recover_fn_err(struct be_adapter *adapter)
+static int lancer_recover_func(struct be_adapter *adapter)
 {
 	int status;
-	u32 sliport_status;
 
-	if (adapter->eeh_err || adapter->ue_detected)
-		return;
+	status = lancer_test_and_set_rdy_state(adapter);
+	if (status)
+		goto err;
 
-	sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
+	if (netif_running(adapter->netdev))
+		be_close(adapter->netdev);
 
-	if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
-		dev_err(&adapter->pdev->dev,
-				"Adapter in error state."
-				"Trying to recover.\n");
+	be_clear(adapter);
+
+	adapter->hw_error = false;
+	adapter->fw_timeout = false;
+
+	status = be_setup(adapter);
+	if (status)
+		goto err;
 
-		status = lancer_test_and_set_rdy_state(adapter);
+	if (netif_running(adapter->netdev)) {
+		status = be_open(adapter->netdev);
 		if (status)
 			goto err;
+	}
 
-		netif_device_detach(adapter->netdev);
+	dev_err(&adapter->pdev->dev,
+		"Adapter SLIPORT recovery succeeded\n");
+	return 0;
+err:
+	dev_err(&adapter->pdev->dev,
+		"Adapter SLIPORT recovery failed\n");
 
-		if (netif_running(adapter->netdev))
-			be_close(adapter->netdev);
+	return status;
+}
+
+static void be_func_recovery_task(struct work_struct *work)
+{
+	struct be_adapter *adapter =
+		container_of(work, struct be_adapter,  func_recovery_work.work);
+	int status;
 
-		be_clear(adapter);
+	be_detect_error(adapter);
 
-		adapter->fw_timeout = false;
+	if (adapter->hw_error && lancer_chip(adapter)) {
 
-		status = be_setup(adapter);
-		if (status)
-			goto err;
+		if (adapter->eeh_error)
+			goto out;
 
-		if (netif_running(adapter->netdev)) {
-			status = be_open(adapter->netdev);
-			if (status)
-				goto err;
-		}
+		rtnl_lock();
+		netif_device_detach(adapter->netdev);
+		rtnl_unlock();
 
-		netif_device_attach(adapter->netdev);
+		status = lancer_recover_func(adapter);
 
-		dev_err(&adapter->pdev->dev,
-				"Adapter error recovery succeeded\n");
+		if (!status)
+			netif_device_attach(adapter->netdev);
 	}
-	return;
-err:
-	dev_err(&adapter->pdev->dev,
-			"Adapter error recovery failed\n");
+
+out:
+	schedule_delayed_work(&adapter->func_recovery_work,
+			      msecs_to_jiffies(1000));
 }
 
 static void be_worker(struct work_struct *work)
@@ -3682,11 +3752,6 @@ static void be_worker(struct work_struct *work)
 	struct be_eq_obj *eqo;
 	int i;
 
-	if (lancer_chip(adapter))
-		lancer_test_and_recover_fn_err(adapter);
-
-	be_detect_dump_ue(adapter);
-
 	/* when interrupts are not yet enabled, just reap any pending
 	* mcc completions */
 	if (!netif_running(adapter->netdev)) {
@@ -3805,6 +3870,7 @@ static int __devinit be_probe(struct pci_dev *pdev,
 		goto stats_clean;
 
 	INIT_DELAYED_WORK(&adapter->work, be_worker);
+	INIT_DELAYED_WORK(&adapter->func_recovery_work, be_func_recovery_task);
 	adapter->rx_fc = adapter->tx_fc = true;
 
 	status = be_setup(adapter);
@@ -3818,6 +3884,8 @@ static int __devinit be_probe(struct pci_dev *pdev,
 
 	be_roce_dev_add(adapter);
 
+	schedule_delayed_work(&adapter->func_recovery_work,
+			      msecs_to_jiffies(1000));
 	dev_info(&pdev->dev, "%s: %s port %d\n", netdev->name, nic_name(pdev),
 		adapter->port_num);
 
@@ -3851,6 +3919,8 @@ static int be_suspend(struct pci_dev *pdev, pm_message_t state)
 	if (adapter->wol)
 		be_setup_wol(adapter, true);
 
+	cancel_delayed_work_sync(&adapter->func_recovery_work);
+
 	netif_device_detach(netdev);
 	if (netif_running(netdev)) {
 		rtnl_lock();
@@ -3891,6 +3961,9 @@ static int be_resume(struct pci_dev *pdev)
 		be_open(netdev);
 		rtnl_unlock();
 	}
+
+	schedule_delayed_work(&adapter->func_recovery_work,
+			      msecs_to_jiffies(1000));
 	netif_device_attach(netdev);
 
 	if (adapter->wol)
@@ -3910,6 +3983,7 @@ static void be_shutdown(struct pci_dev *pdev)
 		return;
 
 	cancel_delayed_work_sync(&adapter->work);
+	cancel_delayed_work_sync(&adapter->func_recovery_work);
 
 	netif_device_detach(adapter->netdev);
 
@@ -3929,9 +4003,13 @@ static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
 
 	dev_err(&adapter->pdev->dev, "EEH error detected\n");
 
-	adapter->eeh_err = true;
+	adapter->eeh_error = true;
+
+	cancel_delayed_work_sync(&adapter->func_recovery_work);
 
+	rtnl_lock();
 	netif_device_detach(netdev);
+	rtnl_unlock();
 
 	if (netif_running(netdev)) {
 		rtnl_lock();
@@ -3959,9 +4037,7 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
 	int status;
 
 	dev_info(&adapter->pdev->dev, "EEH reset\n");
-	adapter->eeh_err = false;
-	adapter->ue_detected = false;
-	adapter->fw_timeout = false;
+	be_clear_all_error(adapter);
 
 	status = pci_enable_device(pdev);
 	if (status)
@@ -4007,6 +4083,9 @@ static void be_eeh_resume(struct pci_dev *pdev)
 		if (status)
 			goto err;
 	}
+
+	schedule_delayed_work(&adapter->func_recovery_work,
+			      msecs_to_jiffies(1000));
 	netif_device_attach(netdev);
 	return;
 err:
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH net-next 3/7] be2net: Fix initialization sequence for Lancer
From: Padmanabh Ratnakar @ 2012-07-12 13:56 UTC (permalink / raw)
  To: netdev; +Cc: Padmanabh Ratnakar

Invoke only required initialization routines for Lancer.
Remove invocation of unnecessary routines.

Signed-off-by: Padmanabh Ratnakar <padmanabh.ratnakar@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c |   76 ++++++++++++++++++++++++++-
 drivers/net/ethernet/emulex/benet/be_cmds.h |    4 +-
 drivers/net/ethernet/emulex/benet/be_main.c |   75 +++-----------------------
 3 files changed, 87 insertions(+), 68 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index d75c9fb..431f774 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -426,12 +426,65 @@ static int be_POST_stage_get(struct be_adapter *adapter, u16 *stage)
 		return 0;
 }
 
-int be_cmd_POST(struct be_adapter *adapter)
+int lancer_wait_ready(struct be_adapter *adapter)
+{
+#define SLIPORT_READY_TIMEOUT 30
+	u32 sliport_status;
+	int status = 0, i;
+
+	for (i = 0; i < SLIPORT_READY_TIMEOUT; i++) {
+		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
+		if (sliport_status & SLIPORT_STATUS_RDY_MASK)
+			break;
+
+		msleep(1000);
+	}
+
+	if (i == SLIPORT_READY_TIMEOUT)
+		status = -1;
+
+	return status;
+}
+
+int lancer_test_and_set_rdy_state(struct be_adapter *adapter)
+{
+	int status;
+	u32 sliport_status, err, reset_needed;
+	status = lancer_wait_ready(adapter);
+	if (!status) {
+		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
+		err = sliport_status & SLIPORT_STATUS_ERR_MASK;
+		reset_needed = sliport_status & SLIPORT_STATUS_RN_MASK;
+		if (err && reset_needed) {
+			iowrite32(SLI_PORT_CONTROL_IP_MASK,
+				  adapter->db + SLIPORT_CONTROL_OFFSET);
+
+			/* check adapter has corrected the error */
+			status = lancer_wait_ready(adapter);
+			sliport_status = ioread32(adapter->db +
+						  SLIPORT_STATUS_OFFSET);
+			sliport_status &= (SLIPORT_STATUS_ERR_MASK |
+						SLIPORT_STATUS_RN_MASK);
+			if (status || sliport_status)
+				status = -1;
+		} else if (err || reset_needed) {
+			status = -1;
+		}
+	}
+	return status;
+}
+
+int be_fw_wait_ready(struct be_adapter *adapter)
 {
 	u16 stage;
 	int status, timeout = 0;
 	struct device *dev = &adapter->pdev->dev;
 
+	if (lancer_chip(adapter)) {
+		status = lancer_wait_ready(adapter);
+		return status;
+	}
+
 	do {
 		status = be_POST_stage_get(adapter, &stage);
 		if (status) {
@@ -562,6 +615,9 @@ int be_cmd_fw_init(struct be_adapter *adapter)
 	u8 *wrb;
 	int status;
 
+	if (lancer_chip(adapter))
+		return 0;
+
 	if (mutex_lock_interruptible(&adapter->mbox_lock))
 		return -1;
 
@@ -589,6 +645,9 @@ int be_cmd_fw_clean(struct be_adapter *adapter)
 	u8 *wrb;
 	int status;
 
+	if (lancer_chip(adapter))
+		return 0;
+
 	if (mutex_lock_interruptible(&adapter->mbox_lock))
 		return -1;
 
@@ -607,6 +666,7 @@ int be_cmd_fw_clean(struct be_adapter *adapter)
 	mutex_unlock(&adapter->mbox_lock);
 	return status;
 }
+
 int be_cmd_eq_create(struct be_adapter *adapter,
 		struct be_queue_info *eq, int eq_delay)
 {
@@ -1682,6 +1742,20 @@ int be_cmd_reset_function(struct be_adapter *adapter)
 	struct be_cmd_req_hdr *req;
 	int status;
 
+	if (lancer_chip(adapter)) {
+		status = lancer_wait_ready(adapter);
+		if (!status) {
+			iowrite32(SLI_PORT_CONTROL_IP_MASK,
+				  adapter->db + SLIPORT_CONTROL_OFFSET);
+			status = lancer_test_and_set_rdy_state(adapter);
+		}
+		if (status) {
+			dev_err(&adapter->pdev->dev,
+				"Adapter in non recoverable error\n");
+		}
+		return status;
+	}
+
 	if (mutex_lock_interruptible(&adapter->mbox_lock))
 		return -1;
 
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 3c938f5..cfc0620 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -1656,7 +1656,7 @@ struct be_cmd_req_set_ext_fat_caps {
 };
 
 extern int be_pci_fnum_get(struct be_adapter *adapter);
-extern int be_cmd_POST(struct be_adapter *adapter);
+extern int be_fw_wait_ready(struct be_adapter *adapter);
 extern int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
 			u8 type, bool permanent, u32 if_handle, u32 pmac_id);
 extern int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr,
@@ -1765,4 +1765,6 @@ extern int be_cmd_get_ext_fat_capabilites(struct be_adapter *adapter,
 extern int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter,
 					  struct be_dma_mem *cmd,
 					  struct be_fat_conf_params *cfgs);
+extern int lancer_wait_ready(struct be_adapter *adapter);
+extern int lancer_test_and_set_rdy_state(struct be_adapter *adapter);
 
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 2436c3a..a3641ba 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2573,9 +2573,6 @@ static int be_clear(struct be_adapter *adapter)
 	be_tx_queues_destroy(adapter);
 	be_evt_queues_destroy(adapter);
 
-	/* tell fw we're done with firing cmds */
-	be_cmd_fw_clean(adapter);
-
 	be_msix_disable(adapter);
 	return 0;
 }
@@ -3476,6 +3473,9 @@ static void __devexit be_remove(struct pci_dev *pdev)
 
 	be_clear(adapter);
 
+	/* tell fw we're done with firing cmds */
+	be_cmd_fw_clean(adapter);
+
 	be_stats_cleanup(adapter);
 
 	be_ctrl_cleanup(adapter);
@@ -3625,54 +3625,6 @@ static int be_dev_type_check(struct be_adapter *adapter)
 	return 0;
 }
 
-static int lancer_wait_ready(struct be_adapter *adapter)
-{
-#define SLIPORT_READY_TIMEOUT 30
-	u32 sliport_status;
-	int status = 0, i;
-
-	for (i = 0; i < SLIPORT_READY_TIMEOUT; i++) {
-		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
-		if (sliport_status & SLIPORT_STATUS_RDY_MASK)
-			break;
-
-		msleep(1000);
-	}
-
-	if (i == SLIPORT_READY_TIMEOUT)
-		status = -1;
-
-	return status;
-}
-
-static int lancer_test_and_set_rdy_state(struct be_adapter *adapter)
-{
-	int status;
-	u32 sliport_status, err, reset_needed;
-	status = lancer_wait_ready(adapter);
-	if (!status) {
-		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
-		err = sliport_status & SLIPORT_STATUS_ERR_MASK;
-		reset_needed = sliport_status & SLIPORT_STATUS_RN_MASK;
-		if (err && reset_needed) {
-			iowrite32(SLI_PORT_CONTROL_IP_MASK,
-					adapter->db + SLIPORT_CONTROL_OFFSET);
-
-			/* check adapter has corrected the error */
-			status = lancer_wait_ready(adapter);
-			sliport_status = ioread32(adapter->db +
-							SLIPORT_STATUS_OFFSET);
-			sliport_status &= (SLIPORT_STATUS_ERR_MASK |
-						SLIPORT_STATUS_RN_MASK);
-			if (status || sliport_status)
-				status = -1;
-		} else if (err || reset_needed) {
-			status = -1;
-		}
-	}
-	return status;
-}
-
 static void lancer_test_and_recover_fn_err(struct be_adapter *adapter)
 {
 	int status;
@@ -3820,22 +3772,9 @@ static int __devinit be_probe(struct pci_dev *pdev,
 	if (status)
 		goto free_netdev;
 
-	if (lancer_chip(adapter)) {
-		status = lancer_wait_ready(adapter);
-		if (!status) {
-			iowrite32(SLI_PORT_CONTROL_IP_MASK,
-					adapter->db + SLIPORT_CONTROL_OFFSET);
-			status = lancer_test_and_set_rdy_state(adapter);
-		}
-		if (status) {
-			dev_err(&pdev->dev, "Adapter in non recoverable error\n");
-			goto ctrl_clean;
-		}
-	}
-
 	/* sync up with fw's ready state */
 	if (be_physfn(adapter)) {
-		status = be_cmd_POST(adapter);
+		status = be_fw_wait_ready(adapter);
 		if (status)
 			goto ctrl_clean;
 	}
@@ -4033,7 +3972,7 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
 	pci_restore_state(pdev);
 
 	/* Check if card is ok and fw is ready */
-	status = be_cmd_POST(adapter);
+	status = be_fw_wait_ready(adapter);
 	if (status)
 		return PCI_ERS_RESULT_DISCONNECT;
 
@@ -4055,6 +3994,10 @@ static void be_eeh_resume(struct pci_dev *pdev)
 	if (status)
 		goto err;
 
+	status = be_cmd_reset_function(adapter);
+	if (status)
+		goto err;
+
 	status = be_setup(adapter);
 	if (status)
 		goto err;
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH net-next 2/7] be2net : Fix die temperature stat for Lancer
From: Padmanabh Ratnakar @ 2012-07-12 13:56 UTC (permalink / raw)
  To: netdev; +Cc: Padmanabh Ratnakar

Query die temperature stat for Lancer to report it correctly
in ethtool.

Signed-off-by: Padmanabh Ratnakar <padmanabh.ratnakar@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be.h      |    1 +
 drivers/net/ethernet/emulex/benet/be_cmds.c |    8 +-------
 drivers/net/ethernet/emulex/benet/be_main.c |    6 ++++++
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 0490a04..748e870 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -435,6 +435,7 @@ struct be_adapter {
 	u32 max_pmac_cnt;	/* Max secondary UC MACs programmable */
 	u32 uc_macs;		/* Count of secondary UC MAC programmed */
 	u32 msg_enable;
+	int be_get_temp_freq;
 };
 
 #define be_physfn(adapter)		(!adapter->virtfn)
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 5eab791..d75c9fb 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -19,9 +19,6 @@
 #include "be.h"
 #include "be_cmds.h"
 
-/* Must be a power of 2 or else MODULO will BUG_ON */
-static int be_get_temp_freq = 64;
-
 static inline void *embedded_payload(struct be_mcc_wrb *wrb)
 {
 	return wrb->payload.embedded_payload;
@@ -115,7 +112,7 @@ static int be_mcc_compl_process(struct be_adapter *adapter,
 		}
 	} else {
 		if (opcode == OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES)
-			be_get_temp_freq = 0;
+			adapter->be_get_temp_freq = 0;
 
 		if (compl_status == MCC_STATUS_NOT_SUPPORTED ||
 			compl_status == MCC_STATUS_ILLEGAL_REQUEST)
@@ -1206,9 +1203,6 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd)
 	struct be_cmd_req_hdr *hdr;
 	int status = 0;
 
-	if (MODULO(adapter->work_counter, be_get_temp_freq) == 0)
-		be_cmd_get_die_temperature(adapter);
-
 	spin_lock_bh(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 2141bd7..2436c3a 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3567,6 +3567,9 @@ static int be_get_initial_config(struct be_adapter *adapter)
 	if (be_is_wol_supported(adapter))
 		adapter->wol = true;
 
+	/* Must be a power of 2 or else MODULO will BUG_ON */
+	adapter->be_get_temp_freq = 64;
+
 	level = be_get_fw_log_level(adapter);
 	adapter->msg_enable = level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
 
@@ -3747,6 +3750,9 @@ static void be_worker(struct work_struct *work)
 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
 	}
 
+	if (MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
+		be_cmd_get_die_temperature(adapter);
+
 	for_all_rx_queues(adapter, rxo, i) {
 		if (rxo->rx_post_starved) {
 			rxo->rx_post_starved = false;
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH] ixgbevf - Prevent RX/TX statistics getting reset to zero
From: Narendra_K @ 2012-07-12 13:55 UTC (permalink / raw)
  To: netdev

[-- Attachment #1: Type: text/plain, Size: 1959 bytes --]

Hello,

[Apologies if you are receiving this message twice. I am resending the message, as I got message delivery failure note].

While exploring SR-IOV on Intel 82599EB 10-Gigabit SFP+ adapter, I had the following observation.  I enabled two VFs by passing 'max_vfs=2' to ixgbe driver. One of the VFs was assigned to a guest.
In the guest, the ifconfig and ip tools reported 'RX packets' and 'TX packets' as zero, after pinging to a remote host. Looking into it further, the commit 4197aa7bb81877ebb06e4f2cc1b5fea2da23a7bd implements 64 bit per ring statistics. It seemed like the 'total_bytes' and 'total_packets' of RX and TX ring were being reset to zero by the RX and TX interrupt handlers, resulting in the user space tools reporting zero RX and TX bytes. 

The attached patch addresses the issue by preventing the resetting of RX and TX ring statistics to zero. The patch was taken against latest mainline 3.5-rc6 kernel.

I tested the patch by pinging  from the guest OS to a remote host.

ping -f <remote host> -c 10000

The ip and ifcofig showed the statistics increased by 10000 packets.  

# lspci | grep 82599
04:00.0 Ethernet controller: Intel Corporation 82599EB 10-Gigabit SFP+ Network Connection (rev 01)
04:00.1 Ethernet controller: Intel Corporation 82599EB 10-Gigabit SFP+ Network Connection (rev 01)
04:10.0 Ethernet controller: Intel Corporation 82599 Ethernet Controller Virtual Function (rev 01)
04:10.1 Ethernet controller: Intel Corporation 82599 Ethernet Controller Virtual Function (rev 01)
04:10.2 Ethernet controller: Intel Corporation 82599 Ethernet Controller Virtual Function (rev 01)
04:10.3 Ethernet controller: Intel Corporation 82599 Ethernet Controller Virtual Function (rev 01)

# lspci -s 04:00.0 -n
04:00.0 0200: 8086:154d (rev 01)
# lspci -s 04:10.0 -n
04:10.0 0200: 8086:10ed (rev 01) 

Please let me know if additional details and logs are required. 

With regards,
Narendra K




[-- Attachment #2: 0001-ixgbevf-Prevent-RX-TX-statistics-getting-reset-to-ze.patch --]
[-- Type: application/octet-stream, Size: 2033 bytes --]

From 37b3c2d1b7a1f5d45566b22fbaa038b3e5ed3218 Mon Sep 17 00:00:00 2001
From: Narendra K <narendra_k@dell.com>
Date: Thu, 12 Jul 2012 18:20:55 +0530
Subject: [PATCH] ixgbevf - Prevent RX/TX statistics getting reset to zero

The commit 4197aa7bb81877ebb06e4f2cc1b5fea2da23a7bd implements 64 bit
per ring statistics. But the driver resets the 'total_bytes' and
'total_packets' from RX and TX rings in the RX and TX interrupt
handlers to zero. This results in statistics being lost and user space
reporting RX and TX statistics as zero. This patch addresses the
issue by preventing the resetting of RX and TX ring statistics to
zero.

Signed-off-by: Narendra K <narendra_k@dell.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |   12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index f69ec42..8b304a4 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -969,8 +969,6 @@ static irqreturn_t ixgbevf_msix_clean_tx(int irq, void *data)
 	r_idx = find_first_bit(q_vector->txr_idx, adapter->num_tx_queues);
 	for (i = 0; i < q_vector->txr_count; i++) {
 		tx_ring = &(adapter->tx_ring[r_idx]);
-		tx_ring->total_bytes = 0;
-		tx_ring->total_packets = 0;
 		ixgbevf_clean_tx_irq(adapter, tx_ring);
 		r_idx = find_next_bit(q_vector->txr_idx, adapter->num_tx_queues,
 				      r_idx + 1);
@@ -994,16 +992,6 @@ static irqreturn_t ixgbevf_msix_clean_rx(int irq, void *data)
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ixgbevf_ring  *rx_ring;
 	int r_idx;
-	int i;
-
-	r_idx = find_first_bit(q_vector->rxr_idx, adapter->num_rx_queues);
-	for (i = 0; i < q_vector->rxr_count; i++) {
-		rx_ring = &(adapter->rx_ring[r_idx]);
-		rx_ring->total_bytes = 0;
-		rx_ring->total_packets = 0;
-		r_idx = find_next_bit(q_vector->rxr_idx, adapter->num_rx_queues,
-				      r_idx + 1);
-	}
 
 	if (!q_vector->rxr_count)
 		return IRQ_HANDLED;
-- 
1.7.10.1


^ permalink raw reply related

* Re: [PATCH] sch_sfb: Fix missing NULL check
From: Eric Dumazet @ 2012-07-12 13:50 UTC (permalink / raw)
  To: David Miller; +Cc: alan, netdev
In-Reply-To: <20120712.062553.947925376447776276.davem@davemloft.net>

On Thu, 2012-07-12 at 06:25 -0700, David Miller wrote:
> From: Alan Cox <alan@lxorguk.ukuu.org.uk>
> Date: Thu, 12 Jul 2012 14:39:11 +0100
> 
> > Signed-off-by: Alan Cox <alna@linux.intel.com>
>                            ^^^^
> 
> I'm truly astonished that you type in signoffs by hand Alan.

Weel, I do the same ;)

Feel free to add my

Acked-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply

* Re: [RFC PATCH v2] tcp: TCP Small Queues
From: Eric Dumazet @ 2012-07-12 13:46 UTC (permalink / raw)
  To: John Heffner
  Cc: David Miller, ycheng, dave.taht, netdev, codel, therbert,
	mattmathis, nanditad, ncardwell, andrewmcgr
In-Reply-To: <CABrhC0=Ls7G-noW1cjsyiF+G5v9f9R=bi6JvrOoT5ZDQB=gSXg@mail.gmail.com>

On Thu, 2012-07-12 at 09:33 -0400, John Heffner wrote:
> One general question: why a per-connection limit?  I haven't been
> following the bufferbloat conversation closely so I may have missed
> some of the conversation.  But it seems that multiple connections will
> still cause longer queue times.

We already have a per-device limit, in qdisc.

If you want to monitor several tcp sessions, I urge you use a controller
for that. Like codel or fq_codel.

Experiments show that limiting to two TSO packets in qdisc per tcp flow
is enough to stop insane qdisc queueing, without impact on throughput
for people wanting fast tcp sessions.

Thats not solving the more general problem of having 1000 competing
flows.

^ permalink raw reply

* Re: [RFC PATCH v2] tcp: TCP Small Queues
From: John Heffner @ 2012-07-12 13:33 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, ycheng, dave.taht, netdev, codel, therbert,
	mattmathis, nanditad, ncardwell, andrewmcgr
In-Reply-To: <1341933215.3265.5476.camel@edumazet-glaptop>

One general question: why a per-connection limit?  I haven't been
following the bufferbloat conversation closely so I may have missed
some of the conversation.  But it seems that multiple connections will
still cause longer queue times.

Thanks,
  -John


On Tue, Jul 10, 2012 at 11:13 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> This introduce TSQ (TCP Small Queues)
>
> TSQ goal is to reduce number of TCP packets in xmit queues (qdisc &
> device queues), to reduce RTT and cwnd bias, part of the bufferbloat
> problem.
>
> sk->sk_wmem_alloc not allowed to grow above a given limit,
> allowing no more than ~128KB [1] per tcp socket in qdisc/dev layers at a
> given time.
>
> TSO packets are sized/capped to half the limit, so that we have two
> TSO packets in flight, allowing better bandwidth use.
>
> As a side effect, setting the limit to 40000 automatically reduces the
> standard gso max limit (65536) to 40000/2 : It can help to reduce
> latencies of high prio packets, having smaller TSO packets.
>
> This means we divert sock_wfree() to a tcp_wfree() handler, to
> queue/send following frames when skb_orphan() [2] is called for the
> already queued skbs.
>
> Results on my dev machine (tg3 nic) are really impressive, using
> standard pfifo_fast, and with or without TSO/GSO. Without reduction of
> nominal bandwidth.
>
> I no longer have 3MBytes backlogged in qdisc by a single netperf
> session, and both side socket autotuning no longer use 4 Mbytes.
>
> As skb destructor cannot restart xmit itself ( as qdisc lock might be
> taken at this point ), we delegate the work to a tasklet. We use one
> tasklest per cpu for performance reasons.
>
>
>
> [1] New /proc/sys/net/ipv4/tcp_limit_output_bytes tunable
> [2] skb_orphan() is usually called at TX completion time,
>   but some drivers call it in their start_xmit() handler.
>   These drivers should at least use BQL, or else a single TCP
>   session can still fill the whole NIC TX ring, since TSQ will
>   have no effect.
>
> Not-Yet-Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  include/linux/tcp.h        |    9 ++
>  include/net/tcp.h          |    3
>  net/ipv4/sysctl_net_ipv4.c |    7 +
>  net/ipv4/tcp.c             |   14 ++-
>  net/ipv4/tcp_minisocks.c   |    1
>  net/ipv4/tcp_output.c      |  132 ++++++++++++++++++++++++++++++++++-
>  6 files changed, 160 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index 7d3bced..55b8cf9 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -339,6 +339,9 @@ struct tcp_sock {
>         u32     rcv_tstamp;     /* timestamp of last received ACK (for keepalives) */
>         u32     lsndtime;       /* timestamp of last sent data packet (for restart window) */
>
> +       struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
> +       unsigned long   tsq_flags;
> +
>         /* Data for direct copy to user */
>         struct {
>                 struct sk_buff_head     prequeue;
> @@ -494,6 +497,12 @@ struct tcp_sock {
>         struct tcp_cookie_values  *cookie_values;
>  };
>
> +enum tsq_flags {
> +       TSQ_THROTTLED,
> +       TSQ_QUEUED,
> +       TSQ_OWNED, /* tcp_tasklet_func() found socket was locked */
> +};
> +
>  static inline struct tcp_sock *tcp_sk(const struct sock *sk)
>  {
>         return (struct tcp_sock *)sk;
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 53fb7d8..3a6ed09 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -253,6 +253,7 @@ extern int sysctl_tcp_cookie_size;
>  extern int sysctl_tcp_thin_linear_timeouts;
>  extern int sysctl_tcp_thin_dupack;
>  extern int sysctl_tcp_early_retrans;
> +extern int sysctl_tcp_limit_output_bytes;
>
>  extern atomic_long_t tcp_memory_allocated;
>  extern struct percpu_counter tcp_sockets_allocated;
> @@ -321,6 +322,8 @@ extern struct proto tcp_prot;
>
>  extern void tcp_init_mem(struct net *net);
>
> +extern void tcp_tasklet_init(void);
> +
>  extern void tcp_v4_err(struct sk_buff *skb, u32);
>
>  extern void tcp_shutdown (struct sock *sk, int how);
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index 12aa0c5..70730f7 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -598,6 +598,13 @@ static struct ctl_table ipv4_table[] = {
>                 .mode           = 0644,
>                 .proc_handler   = proc_dointvec
>         },
> +       {
> +               .procname       = "tcp_limit_output_bytes",
> +               .data           = &sysctl_tcp_limit_output_bytes,
> +               .maxlen         = sizeof(int),
> +               .mode           = 0644,
> +               .proc_handler   = proc_dointvec
> +       },
>  #ifdef CONFIG_NET_DMA
>         {
>                 .procname       = "tcp_dma_copybreak",
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 3ba605f..8838bd2 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -376,6 +376,7 @@ void tcp_init_sock(struct sock *sk)
>         skb_queue_head_init(&tp->out_of_order_queue);
>         tcp_init_xmit_timers(sk);
>         tcp_prequeue_init(tp);
> +       INIT_LIST_HEAD(&tp->tsq_node);
>
>         icsk->icsk_rto = TCP_TIMEOUT_INIT;
>         tp->mdev = TCP_TIMEOUT_INIT;
> @@ -786,15 +787,17 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
>                                        int large_allowed)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
> -       u32 xmit_size_goal, old_size_goal;
> +       u32 xmit_size_goal, old_size_goal, gso_max_size;
>
>         xmit_size_goal = mss_now;
>
>         if (large_allowed && sk_can_gso(sk)) {
> -               xmit_size_goal = ((sk->sk_gso_max_size - 1) -
> -                                 inet_csk(sk)->icsk_af_ops->net_header_len -
> -                                 inet_csk(sk)->icsk_ext_hdr_len -
> -                                 tp->tcp_header_len);
> +               gso_max_size = min_t(u32, sk->sk_gso_max_size,
> +                                    sysctl_tcp_limit_output_bytes >> 1);
> +               xmit_size_goal = (gso_max_size - 1) -
> +                                inet_csk(sk)->icsk_af_ops->net_header_len -
> +                                inet_csk(sk)->icsk_ext_hdr_len -
> +                                tp->tcp_header_len;
>
>                 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
>
> @@ -3573,4 +3576,5 @@ void __init tcp_init(void)
>         tcp_secret_primary = &tcp_secret_one;
>         tcp_secret_retiring = &tcp_secret_two;
>         tcp_secret_secondary = &tcp_secret_two;
> +       tcp_tasklet_init();
>  }
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index 72b7c63..83b358f 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -482,6 +482,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
>                         treq->snt_isn + 1 + tcp_s_data_size(oldtp);
>
>                 tcp_prequeue_init(newtp);
> +               INIT_LIST_HEAD(&newtp->tsq_node);
>
>                 tcp_init_wl(newtp, treq->rcv_isn);
>
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index c465d3e..991ae45 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -50,6 +50,9 @@ int sysctl_tcp_retrans_collapse __read_mostly = 1;
>   */
>  int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
>
> +/* Default TSQ limit of two TSO segments */
> +int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
> +
>  /* This limits the percentage of the congestion window which we
>   * will allow a single TSO frame to consume.  Building TSO frames
>   * which are too large can cause TCP streams to be bursty.
> @@ -65,6 +68,8 @@ int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
>  int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
>  EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
>
> +static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
> +                          int push_one, gfp_t gfp);
>
>  /* Account for new data that has been sent to the network. */
>  static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
> @@ -783,6 +788,118 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
>         return size;
>  }
>
> +
> +/* TCP SMALL QUEUES (TSQ)
> + *
> + * TSQ goal is to keep small amount of skbs per tcp flow in tx queues (qdisc+dev)
> + * to reduce RTT and bufferbloat.
> + * We do this using a special skb destructor (tcp_wfree).
> + *
> + * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb
> + * needs to be reallocated in a driver.
> + * The invariant being skb->truesize substracted from sk->sk_wmem_alloc
> + *
> + * Since transmit from skb destructor is forbidden, we use a tasklet
> + * to process all sockets that eventually need to send more skbs.
> + * We use one tasklet per cpu, with its own queue of sockets.
> + */
> +struct tsq_tasklet {
> +       struct tasklet_struct   tasklet;
> +       struct list_head        head; /* queue of tcp sockets */
> +};
> +static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
> +
> +/*
> + * One tasklest per cpu tries to send more skbs.
> + * We run in tasklet context but need to disable irqs when
> + * transfering tsq->head because tcp_wfree() might
> + * interrupt us (non NAPI drivers)
> + */
> +static void tcp_tasklet_func(unsigned long data)
> +{
> +       struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
> +       LIST_HEAD(list);
> +       unsigned long flags;
> +       struct list_head *q, *n;
> +       struct tcp_sock *tp;
> +       struct sock *sk;
> +
> +       local_irq_save(flags);
> +       list_splice_init(&tsq->head, &list);
> +       local_irq_restore(flags);
> +
> +       list_for_each_safe(q, n, &list) {
> +               tp = list_entry(q, struct tcp_sock, tsq_node);
> +               list_del(&tp->tsq_node);
> +
> +               sk = (struct sock *)tp;
> +               bh_lock_sock(sk);
> +
> +               if (!sock_owned_by_user(sk)) {
> +                       if ((1 << sk->sk_state) &
> +                           (TCPF_CLOSE_WAIT | TCPF_ESTABLISHED))
> +                               tcp_write_xmit(sk,
> +                                              tcp_current_mss(sk),
> +                                              0, 0,
> +                                              GFP_ATOMIC);
> +               } else {
> +                       /* TODO:
> +                        * setup a timer, or check TSQ_OWNED in release_sock()
> +                        */
> +                       set_bit(TSQ_OWNED, &tp->tsq_flags);
> +               }
> +               bh_unlock_sock(sk);
> +
> +               clear_bit(TSQ_QUEUED, &tp->tsq_flags);
> +               sk_free(sk);
> +       }
> +}
> +
> +void __init tcp_tasklet_init(void)
> +{
> +       int i;
> +
> +       for_each_possible_cpu(i) {
> +               struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
> +
> +               INIT_LIST_HEAD(&tsq->head);
> +               tasklet_init(&tsq->tasklet,
> +                            tcp_tasklet_func,
> +                            (unsigned long)tsq);
> +       }
> +}
> +
> +/*
> + * Write buffer destructor automatically called from kfree_skb.
> + * We cant xmit new skbs from this context, as we might already
> + * hold qdisc lock.
> + */
> +void tcp_wfree(struct sk_buff *skb)
> +{
> +       struct sock *sk = skb->sk;
> +       struct tcp_sock *tp = tcp_sk(sk);
> +
> +       if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
> +           !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
> +               unsigned long flags;
> +               struct tsq_tasklet *tsq;
> +
> +               /* Keep a ref on socket.
> +                * This last ref will be released in tcp_tasklet_func()
> +                */
> +               atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc);
> +
> +               /* queue this socket to tasklet queue */
> +               local_irq_save(flags);
> +               tsq = &__get_cpu_var(tsq_tasklet);
> +               list_add(&tp->tsq_node, &tsq->head);
> +               tasklet_schedule(&tsq->tasklet);
> +               local_irq_restore(flags);
> +       } else {
> +               sock_wfree(skb);
> +       }
> +}
> +
>  /* This routine actually transmits TCP packets queued in by
>   * tcp_do_sendmsg().  This is used by both the initial
>   * transmission and possible later retransmissions.
> @@ -844,7 +961,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
>
>         skb_push(skb, tcp_header_size);
>         skb_reset_transport_header(skb);
> -       skb_set_owner_w(skb, sk);
> +
> +       skb_orphan(skb);
> +       skb->sk = sk;
> +       skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
> +                         tcp_wfree : sock_wfree;
> +       atomic_add(skb->truesize, &sk->sk_wmem_alloc);
>
>         /* Build TCP header and checksum it. */
>         th = tcp_hdr(skb);
> @@ -1780,6 +1902,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
>         while ((skb = tcp_send_head(sk))) {
>                 unsigned int limit;
>
> +
>                 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
>                 BUG_ON(!tso_segs);
>
> @@ -1800,6 +1923,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
>                                 break;
>                 }
>
> +               /* TSQ : sk_wmem_alloc accounts skb truesize,
> +                * including skb overhead. But thats OK.
> +                */
> +               if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
> +                       set_bit(TSQ_THROTTLED, &tp->tsq_flags);
> +                       break;
> +               }
>                 limit = mss_now;
>                 if (tso_segs > 1 && !tcp_urg_mode(tp))
>                         limit = tcp_mss_split_point(sk, skb, mss_now,
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] sch_sfb: Fix missing NULL check
From: David Miller @ 2012-07-12 13:25 UTC (permalink / raw)
  To: alan; +Cc: netdev
In-Reply-To: <20120712133847.18719.77998.stgit@localhost.localdomain>

From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 12 Jul 2012 14:39:11 +0100

> Signed-off-by: Alan Cox <alna@linux.intel.com>
                           ^^^^

I'm truly astonished that you type in signoffs by hand Alan.

^ permalink raw reply

* [PATCH] sch_sfb: Fix missing NULL check
From: Alan Cox @ 2012-07-12 13:39 UTC (permalink / raw)
  To: netdev

From: Alan Cox <alan@linux.intel.com>

Resolves-bug: https://bugzilla.kernel.org/show_bug.cgi?id=44461
Signed-off-by: Alan Cox <alna@linux.intel.com>
---

 net/sched/sch_sfb.c |    2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 74305c8..30ea467 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -570,6 +570,8 @@ static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb)
 
 	sch->qstats.backlog = q->qdisc->qstats.backlog;
 	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (opts == NULL)
+		goto nla_put_failure;
 	if (nla_put(skb, TCA_SFB_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 	return nla_nest_end(skb, opts);

^ permalink raw reply related

* Re: linux-next: manual merge of the net-next tree with the infiniband tree
From: Hadar Hen Zion @ 2012-07-12 12:59 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: David Miller, netdev, linux-next, linux-kernel, Jack Morgenstein,
	Roland Dreier, linux-rdma, Hadar Hen Zion, Or Gerlitz
In-Reply-To: <20120712120950.223be053857381046b7d5db6@canb.auug.org.au>

On 7/12/2012 5:09 AM, Stephen Rothwell wrote:
> Hi all,
>
> Today's linux-next merge of the net-next tree got a conflict in
> drivers/net/ethernet/mellanox/mlx4/main.c between commit 6634961c14d3
> ("mlx4: Put physical GID and P_Key table sizes in mlx4_phys_caps struct
> and paravirtualize them") from the infiniband tree and commit
> 0ff1fb654bec ("{NET, IB}/mlx4: Add device managed flow steering firmware
> API") from the net-next tree.
>
> Just context changes (I think).  I have fixed it up (see below) and can
> carry the fix as necessary.
>

Thanks Stephen.

Please add:
Acked-by: Hadar Hen Zion <hadarh@mellanox.co.il>

Hadar

^ permalink raw reply

* Re: linux-next: manual merge of the net-next tree with the infiniband tree
From: Hadar Hen Zion @ 2012-07-12 12:57 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: David Miller, netdev, linux-next, linux-kernel, Hadar Hen Zion,
	Or Gerlitz, Jack Morgenstein, Roland Dreier, linux-rdma
In-Reply-To: <20120712121307.a482863a98848e3813690ef2@canb.auug.org.au>

On 7/12/2012 5:13 AM, Stephen Rothwell wrote:
> Hi all,
>
> Today's linux-next merge of the net-next tree got a conflict in
> include/linux/mlx4/device.h between commit 396f2feb05d7 ("mlx4_core:
> Implement mechanism for reserved Q_Keys") from the infiniband tree and
> commit 0ff1fb654bec ("{NET, IB}/mlx4: Add device managed flow steering
> firmware API") from the net-next tree.
>
> Just context changes.  I fixed it up (see below) and can carry the fix
> as necessary.
>

Thanks Stephen.

Please add:
Acked-by: Hadar Hen Zion <hadarh@mellanox.co.il>

Hadar

^ permalink raw reply

* [PATCH iproute2] Ability to compile iproute2 as shared library
From: hamid jafarian @ 2012-07-12 11:44 UTC (permalink / raw)
  To: netdev; +Cc: shemminger

[-- Attachment #1: Type: text/plain, Size: 5198 bytes --]

Hi,

This is a try with minimum changes to compile iproute2 as shared
library.
Some functions would be used when we compile iproute2 as
shared library has been defined in "ip.c".
Also NICs caching strategy has been changed because, when we use
"libiproute2.so", system NIC list may change, so we should
re-cache all NICs at each call to NIC manipulation functions.
Also some call of "exit(*)" changed to "return *".
HOWTO Make: # export LIBIPROUTE2_SO=y; make

in attached files there is a simple wrapper to work with 
libiproute2.so ...

---
 ip/Makefile  |   13 ++++++++++++-
 ip/ip.c      |   40 ++++++++++++++++++++++++++++++++++++++++
 ip/iproute.c |    2 +-
 lib/Makefile |    3 +++
 lib/ll_map.c |   25 ++++++++++++++++++++++++-
 lib/utils.c  |    7 ++++---
 6 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/ip/Makefile b/ip/Makefile
index e029ea1..23fc22f 100644
--- a/ip/Makefile
+++ b/ip/Makefile
@@ -13,14 +13,25 @@ ifeq ($(IP_CONFIG_SETNS),y)
 	CFLAGS += -DHAVE_SETNS
 endif
 
+ifeq ($(LIBIPROUTE2_SO), y))
+	CFLAGS += -fPIC -DLIBIPROUTE2_SO
+endif
+
 ALLOBJ=$(IPOBJ) $(RTMONOBJ)
 SCRIPTS=ifcfg rtpr routel routef
-TARGETS=ip rtmon
+
+ifeq ($(LIBIPROUTE2_SO), y)
+	TARGETS=libiproute2.so rtmon
+else
+	TARGETS=ip rtmon	
+endif
 
 all: $(TARGETS) $(SCRIPTS)
 
 ip: $(IPOBJ) $(LIBNETLINK)
 
+libiproute2.so: $(IPOBJ) $(LIBNETLINK)
+	$(CC) $(CFLAGS) -shared $(IPOBJ) $(LIBNETLINK) -o $(@)
 
 rtmon: $(RTMONOBJ)
 
diff --git a/ip/ip.c b/ip/ip.c
index 20dc3b5..d5c7b2f 100644
--- a/ip/ip.c
+++ b/ip/ip.c
@@ -87,6 +87,44 @@ static const struct cmd {
 	{ 0 }
 };
 
+#ifdef LIBIPROUTE2_SO
+int do_ipinit()
+{
+
+	if (rtnl_open(&rth, 0) < 0)
+		return 1;
+	_SL_="\n";
+	return 0;
+}
+
+int do_ipfini()
+{
+	rtnl_close(&rth);
+	return 0;
+}
+
+void do_ipflushloop(int loop)
+{
+	max_flush_loops = loop;
+}
+
+int do_ipfamily(char *family)
+{
+	if (strcmp(family, "inet") == 0)
+		preferred_family = AF_INET;
+	else if (strcmp(family, "inet6") == 0)
+		preferred_family = AF_INET6;
+	else if (strcmp(family, "dnet") == 0)
+		preferred_family = AF_DECnet;
+	else if (strcmp(family, "link") == 0)
+		preferred_family = AF_PACKET;
+	else if (strcmp(family, "ipx") == 0)
+		preferred_family = AF_IPX;
+	else return -1;
+	return 0;
+}
+#endif //#ifdef LIBIPROUTE2_SO
+
 static int do_cmd(const char *argv0, int argc, char **argv)
 {
 	const struct cmd *c;
@@ -101,6 +139,7 @@ static int do_cmd(const char *argv0, int argc, char
**argv)
 	return EXIT_FAILURE;
 }
 
+#ifndef LIBIPROUTE2_SO
 static int batch(const char *name)
 {
 	char *line = NULL;
@@ -264,3 +303,4 @@ int main(int argc, char **argv)
 	rtnl_close(&rth);
 	usage();
 }
+#endif //#ifndef LIBIPROUTE2_SO
diff --git a/ip/iproute.c b/ip/iproute.c
index 5cd313e..8ae253b 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -1012,7 +1012,7 @@ int iproute_modify(int cmd, unsigned flags, int
argc, char **argv)
 		req.r.rtm_family = AF_INET;
 
 	if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
-		exit(2);
+		return 2;
 
 	return 0;
 }
diff --git a/lib/Makefile b/lib/Makefile
index da2f0fc..aa9a10f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -1,4 +1,7 @@
 CFLAGS += -fPIC
+ifeq ($(LIBIPROUTE2_SO), y))
+	CFLAGS += -DLIBIPROUTE2_SO
+endif
 
 UTILOBJ=utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o inet_proto.o
 
diff --git a/lib/ll_map.c b/lib/ll_map.c
index 1ca781e..6923511 100644
--- a/lib/ll_map.c
+++ b/lib/ll_map.c
@@ -172,15 +172,18 @@ unsigned ll_name_to_index(const char *name)
 
 	if (name == NULL)
 		return 0;
-
+#ifndef LIBIPROUTE2_SO
 	if (icache && strcmp(name, ncache) == 0)
 		return icache;
+#endif
 
 	for (i=0; i<IDXMAP_SIZE; i++) {
 		for (im = idx_head[i]; im; im = im->idx_next) {
 			if (strcmp(im->name, name) == 0) {
+#ifndef LIBIPROUTE2_SO
 				icache = im->index;
 				strcpy(ncache, name);
+#endif
 				return im->index;
 			}
 		}
@@ -192,12 +195,32 @@ unsigned ll_name_to_index(const char *name)
 	return idx;
 }
 
+#ifdef LIBIPROUTE2_SO
+int ll_free_map()
+{
+	int i;
+	struct ll_cache *im, *imt;
+	for (i=0; i<IDXMAP_SIZE; i++) {
+		for (im = idx_head[i]; im; im = imt) {
+			imt = im->idx_next;
+			free(im);
+		}
+		idx_head[i] = NULL;
+	}
+	return 0;
+}
+#endif
+
 int ll_init_map(struct rtnl_handle *rth)
 {
 	static int initialized;
 
+#ifdef LIBIPROUTE2_SO
+	ll_free_map();
+#else
 	if (initialized)
 		return 0;
+#endif
 
 	if (rtnl_wilddump_request(rth, AF_UNSPEC, RTM_GETLINK) < 0) {
 		perror("Cannot send dump request");
diff --git a/lib/utils.c b/lib/utils.c
index d80f79b..6b7cdee 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -330,6 +330,7 @@ int get_prefix_1(inet_prefix *dst, char *arg, int
family)
 	int err;
 	unsigned plen;
 	char *slash;
+	char *addr = arg;
 
 	memset(dst, 0, sizeof(*dst));
 
@@ -346,9 +347,9 @@ int get_prefix_1(inet_prefix *dst, char *arg, int
family)
 
 	slash = strchr(arg, '/');
 	if (slash)
-		*slash = 0;
+		addr = strndup(arg, slash - arg);
 
-	err = get_addr_1(dst, arg, family);
+	err = get_addr_1(dst, addr, family);
 	if (err == 0) {
 		switch(dst->family) {
 			case AF_INET6:
@@ -373,7 +374,7 @@ int get_prefix_1(inet_prefix *dst, char *arg, int
family)
 	}
 done:
 	if (slash)
-		*slash = '/';
+		free(addr);
 	return err;
 }
 
-- 
1.7.6.4


[-- Attachment #2: 0001-Ability-to-compile-iproute2-as-shared-library.patch --]
[-- Type: text/x-patch, Size: 5339 bytes --]

>From 623097d32e26f51aca72bcd75979c187f656b4ff Mon Sep 17 00:00:00 2001
From: "Hamid Jafarian (hm.t.)" <hamid@pdnsoft.com>
Date: Thu, 12 Jul 2012 15:26:20 +0430
Subject: [PATCH] Ability to compile iproute2 as shared library

This is a try with minimum changes to compile iproute2 as shared
library.
Some functions would be used when we compile iproute2 as
shared library has been defined in "ip.c".
Also NICs caching strategy has been changed because, when we use
"libiproute2.so", system NIC list may change, so we should
re-cache all NICs at each call to NIC manipulation functions.
Also some call of "exit(*)" changed to "return *".
HOWTO Make: # export LIBIPROUTE2_SO=y; make
---
 ip/Makefile  |   13 ++++++++++++-
 ip/ip.c      |   40 ++++++++++++++++++++++++++++++++++++++++
 ip/iproute.c |    2 +-
 lib/Makefile |    3 +++
 lib/ll_map.c |   25 ++++++++++++++++++++++++-
 lib/utils.c  |    7 ++++---
 6 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/ip/Makefile b/ip/Makefile
index e029ea1..23fc22f 100644
--- a/ip/Makefile
+++ b/ip/Makefile
@@ -13,14 +13,25 @@ ifeq ($(IP_CONFIG_SETNS),y)
 	CFLAGS += -DHAVE_SETNS
 endif
 
+ifeq ($(LIBIPROUTE2_SO), y))
+	CFLAGS += -fPIC -DLIBIPROUTE2_SO
+endif
+
 ALLOBJ=$(IPOBJ) $(RTMONOBJ)
 SCRIPTS=ifcfg rtpr routel routef
-TARGETS=ip rtmon
+
+ifeq ($(LIBIPROUTE2_SO), y)
+	TARGETS=libiproute2.so rtmon
+else
+	TARGETS=ip rtmon	
+endif
 
 all: $(TARGETS) $(SCRIPTS)
 
 ip: $(IPOBJ) $(LIBNETLINK)
 
+libiproute2.so: $(IPOBJ) $(LIBNETLINK)
+	$(CC) $(CFLAGS) -shared $(IPOBJ) $(LIBNETLINK) -o $(@)
 
 rtmon: $(RTMONOBJ)
 
diff --git a/ip/ip.c b/ip/ip.c
index 20dc3b5..d5c7b2f 100644
--- a/ip/ip.c
+++ b/ip/ip.c
@@ -87,6 +87,44 @@ static const struct cmd {
 	{ 0 }
 };
 
+#ifdef LIBIPROUTE2_SO
+int do_ipinit()
+{
+
+	if (rtnl_open(&rth, 0) < 0)
+		return 1;
+	_SL_="\n";
+	return 0;
+}
+
+int do_ipfini()
+{
+	rtnl_close(&rth);
+	return 0;
+}
+
+void do_ipflushloop(int loop)
+{
+	max_flush_loops = loop;
+}
+
+int do_ipfamily(char *family)
+{
+	if (strcmp(family, "inet") == 0)
+		preferred_family = AF_INET;
+	else if (strcmp(family, "inet6") == 0)
+		preferred_family = AF_INET6;
+	else if (strcmp(family, "dnet") == 0)
+		preferred_family = AF_DECnet;
+	else if (strcmp(family, "link") == 0)
+		preferred_family = AF_PACKET;
+	else if (strcmp(family, "ipx") == 0)
+		preferred_family = AF_IPX;
+	else return -1;
+	return 0;
+}
+#endif //#ifdef LIBIPROUTE2_SO
+
 static int do_cmd(const char *argv0, int argc, char **argv)
 {
 	const struct cmd *c;
@@ -101,6 +139,7 @@ static int do_cmd(const char *argv0, int argc, char **argv)
 	return EXIT_FAILURE;
 }
 
+#ifndef LIBIPROUTE2_SO
 static int batch(const char *name)
 {
 	char *line = NULL;
@@ -264,3 +303,4 @@ int main(int argc, char **argv)
 	rtnl_close(&rth);
 	usage();
 }
+#endif //#ifndef LIBIPROUTE2_SO
diff --git a/ip/iproute.c b/ip/iproute.c
index 5cd313e..8ae253b 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -1012,7 +1012,7 @@ int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
 		req.r.rtm_family = AF_INET;
 
 	if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
-		exit(2);
+		return 2;
 
 	return 0;
 }
diff --git a/lib/Makefile b/lib/Makefile
index da2f0fc..aa9a10f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -1,4 +1,7 @@
 CFLAGS += -fPIC
+ifeq ($(LIBIPROUTE2_SO), y))
+	CFLAGS += -DLIBIPROUTE2_SO
+endif
 
 UTILOBJ=utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o inet_proto.o
 
diff --git a/lib/ll_map.c b/lib/ll_map.c
index 1ca781e..6923511 100644
--- a/lib/ll_map.c
+++ b/lib/ll_map.c
@@ -172,15 +172,18 @@ unsigned ll_name_to_index(const char *name)
 
 	if (name == NULL)
 		return 0;
-
+#ifndef LIBIPROUTE2_SO
 	if (icache && strcmp(name, ncache) == 0)
 		return icache;
+#endif
 
 	for (i=0; i<IDXMAP_SIZE; i++) {
 		for (im = idx_head[i]; im; im = im->idx_next) {
 			if (strcmp(im->name, name) == 0) {
+#ifndef LIBIPROUTE2_SO
 				icache = im->index;
 				strcpy(ncache, name);
+#endif
 				return im->index;
 			}
 		}
@@ -192,12 +195,32 @@ unsigned ll_name_to_index(const char *name)
 	return idx;
 }
 
+#ifdef LIBIPROUTE2_SO
+int ll_free_map()
+{
+	int i;
+	struct ll_cache *im, *imt;
+	for (i=0; i<IDXMAP_SIZE; i++) {
+		for (im = idx_head[i]; im; im = imt) {
+			imt = im->idx_next;
+			free(im);
+		}
+		idx_head[i] = NULL;
+	}
+	return 0;
+}
+#endif
+
 int ll_init_map(struct rtnl_handle *rth)
 {
 	static int initialized;
 
+#ifdef LIBIPROUTE2_SO
+	ll_free_map();
+#else
 	if (initialized)
 		return 0;
+#endif
 
 	if (rtnl_wilddump_request(rth, AF_UNSPEC, RTM_GETLINK) < 0) {
 		perror("Cannot send dump request");
diff --git a/lib/utils.c b/lib/utils.c
index d80f79b..6b7cdee 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -330,6 +330,7 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family)
 	int err;
 	unsigned plen;
 	char *slash;
+	char *addr = arg;
 
 	memset(dst, 0, sizeof(*dst));
 
@@ -346,9 +347,9 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family)
 
 	slash = strchr(arg, '/');
 	if (slash)
-		*slash = 0;
+		addr = strndup(arg, slash - arg);
 
-	err = get_addr_1(dst, arg, family);
+	err = get_addr_1(dst, addr, family);
 	if (err == 0) {
 		switch(dst->family) {
 			case AF_INET6:
@@ -373,7 +374,7 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family)
 	}
 done:
 	if (slash)
-		*slash = '/';
+		free(addr);
 	return err;
 }
 
-- 
1.7.6.4


[-- Attachment #3: ipw.c --]
[-- Type: text/x-csrc, Size: 4547 bytes --]

#include "ipw.h"

int ip_initialize()
{
	return do_ipinit();
}

int ip_finalize()
{
	return do_ipfini();
}

int ip_setflushloop(int loop)
{
	do_ipflushloop(loop);
}

int ip_nicAddVLAN(const char *dev, int vlanID)
{
	char vlan[5];
	char vlanDevice[15];
	const char *argv[10];

	sprintf(vlan, "%d", vlanID);
	sprintf(vlanDevice, "%s.%d", dev, vlanID);
	argv[0] = "add"; //command
	argv[1] = "link";
	argv[2] = dev;
	argv[3] = "name";
	argv[4] = vlanDevice;
	argv[5] = "type";
	argv[6] = "vlan";
	argv[7] = "id";
	argv[8] = vlan;
	argv[9] = NULL;

	return do_iplink(9, (char **)argv);
}

int ip_nicDelVLAN(const char *dev)
{
	const char *argv[5];
	argv[0] = "delete";
	argv[1] = dev;
	argv[2] = "type";
	argv[3] = "vlan";
	argv[4] = NULL;

	return do_iplink(4, (char **)argv);
}

int ip_nicAddAddress(const char *dev, const char *family, const char *ifaddr, 
				const char *broadcast, const char *anycast)
{
	const char *argv[9];
	int i = 0;

	argv[i++] = "add";
	argv[i++] = ifaddr;
	if (broadcast != NULL) {
		argv[i++] = "broadcast";
		argv[i++] = broadcast;
	}
	if (anycast != NULL) {
		argv[i++] = "anycast";
		argv[i++] = anycast;
	}
	argv[i++] = "dev";
	argv[i++] = dev;
	argv[i++] = NULL;

	if (do_ipfamily(family)) return -1;
	return do_ipaddr(i - 1, (char **)argv);
}

int ip_nicDelAddress(const char *dev, const char *family, const char *ifaddr,
				const char *broadcast, const char *anycast)
{
	const char *argv[9];
	int i = 0;

	argv[i++] = "del";
	argv[i++] = ifaddr;
	if (broadcast != NULL) {
		argv[i++] = "broadcast";
		argv[i++] = broadcast;
	}
	if (anycast != NULL) {
		argv[i++] = "anycast";
		argv[i++] = anycast;
	}
	argv[i++] = "dev";
	argv[i++] = dev;
	argv[i++] = NULL;

	if (do_ipfamily(family)) return -1;
	return do_ipaddr(i - 1, (char **)argv);
}

int ip_nicFlushAddresses(const char *dev)
{
	const char *argv[4];
	argv[0] = "flush";
	argv[1] = "dev";
	argv[2] = dev;
	argv[3] = NULL;

	return do_ipaddr(3, (char **)argv);
}

int ip_nicSetMTU(const char *dev, const char *mtu)
{
	const char *argv[5];
	argv[0] = "set";
	argv[1] = dev;
	argv[2] = "mtu";
	argv[3] = mtu;
	argv[4] = NULL;

	return do_iplink(4, (char **)argv);
}

int ip_nicSetMulticast(const char *dev, unsigned short do_enable)
{
	const char *argv[5];
	argv[0] = "set";
	argv[1] = dev;
	argv[2] = "multicast";
	argv[3] = (do_enable)? "on" : "off";
	argv[4] = NULL;

	return do_iplink(4, (char **)argv);
}

int ip_nicSetAllMulticast(const char *dev, unsigned short do_enable)
{
	const char *argv[5];
	argv[0] = "set";
	argv[1] = dev;
	argv[2] = "allmulticast";
	argv[3] = (do_enable)? "on" : "off";
	argv[4] = NULL;

	return do_iplink(4, (char **)argv);
}

int ip_nicSetARP(const char *dev, unsigned short do_enable)
{
	const char *argv[5];
	argv[0] = "set";
	argv[1] = dev;
	argv[2] = "arp";
	argv[3] = (do_enable)? "on" : "off";
	argv[4] = NULL;

	return do_iplink(4, (char **)argv);
}

int ip_nicChangeState(const char *dev, unsigned short do_up)
{
	const char *argv[4];
	argv[0] = "set";
	argv[1] = dev;
	argv[2] = (do_up)? "up" : "down";
	argv[3] = NULL;

	return do_iplink(3, (char **)argv);
}

int ip_rtAdd(const struct RouteInfo *ri)
{
	const char *argv[26];
	argv[0] = "add";
	int num = _ip_rtArgv(ri, argv + 1);

	return do_iproute(num + 1, (char **)argv);
}

int ip_rtDel(const struct RouteInfo *ri)
{
	const char *argv[26];
	argv[0] = "del";
	int num = _ip_rtArgv(ri, argv + 1);

	return do_iproute(num + 1, (char **)argv);
}

int ip_rtRep(const struct RouteInfo *ri)
{
	const char *argv[26];
	argv[0] = "replace";
	int num = _ip_rtArgv(ri, argv + 1);

	return do_iproute(num + 1, (char **)argv);
}

int _ip_rtArgv(const struct RouteInfo *ri, const char **argv)
{
	int i = 0;

	if (ri->rt_type)
		argv[i++] = ri->rt_type;
	argv[i++] = ri->rt_prefix;
	if (ri->rt_tos) {
		argv[i++] = "tos";
		argv[i++] = ri->rt_tos;
	}
	if (ri->rt_table) {
		argv[i++] = "table";
		argv[i++] = ri->rt_table;
	}
	if (ri->rt_scope) {
		argv[i++] = "scope";
		argv[i++] = ri->rt_scope;
	}
	if (ri->rt_metric) {
		argv[i++] = "metric";
		argv[i++] = ri->rt_metric;
	}
	if (ri->rt_preference) {
		argv[i++] = "preference";
		argv[i++] = ri->rt_preference;
	}
	argv[i++] = "via";
	argv[i++] = ri->rt_via;
	argv[i++] = "dev";
	argv[i++] = ri->rt_dev;
	if (ri->rt_weight) {
		argv[i++] = "weight";
		argv[i++] = ri->rt_weight;
	}
	if (ri->rt_mtu) {
		argv[i++] = "mtu";
		if (ri->rt_mtu_lock)
			argv[i++] = "lock";
		argv[i++] = ri->rt_mtu;
	}
	if (ri->rt_tcp_window) {
		argv[i++] = "window";
		argv[i++] = ri->rt_tcp_window;
	}
	argv[i++] = NULL;

	return i - 1;
}

[-- Attachment #4: ipw.h --]
[-- Type: text/x-chdr, Size: 3032 bytes --]

/**
 * \file ipw.h
 * This is a wrapper definition for iproute2 package.
 *
 * \author Hamid Jafarian (hamid.jafarian@pdnsoft.com)
 *
 */
#ifndef _PTOOLS_IPW_H_
#define _PTOOLS_IPW_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <stdlib.h>
#include "utils.h"
#include "ip_common.h"

/**
 * Initialize "ip" command environment.
 */
int ip_initialize();
/**
 * Finalize "ip" commnd environment.
 */
int ip_finalize();
/**
 * Set maximum loops of address flush process.
 */
int ip_setflushloop(int loop);
/**
 * Add a vlan device on dev.
 *
 * Name of vlan device would be dev.vlan.
 */
int ip_nicAddVLAN(const char *dev, int vlanID);
/** 
 * Delete vlan device.
 *
 * \param dev is the name of vlan device.
 */
int ip_nicDelVLAN(const char *dev);
/**
 * Add defined address to the interface.
 *
 * \param dev device name.
 * \param family address family, may be inet or inet6.
 * \param ifaddr target address.
 * \param broadcast broadcast address.
 * \param anycast any cast address.
 */
int ip_nicAddAddress(const char *dev, const char *family, const char *ifaddr,
				const char *broadcast, const char *anycast);
/**
 * Delete specified address from device.
 * \see ip_nicAddAddress
 */
int ip_nicDelAddress(const char *dev, const char *family, const char *ifaddr, 
				const char *broadcast, const char *anycast);
/**
 * Delete all of the device addresses.
 */
int ip_nicFlushAddresses(const char *dev);
/**
 * Set mtu of device.
 */
int ip_nicSetMTU(const char *dev, const char *mtu);
/**
 * Set Muticast option of device.
 *
 * \param do_enable "1" means enable and "0" means disable.
 */
int ip_nicSetMulticast(const char *dev, unsigned short do_enable);
/**
 * Set AllMuticast option of device.
 *
 * \param do_enable "1" means enable and "0" means disable.
 */
int ip_nicSetAllMulticast(const char *dev, unsigned short do_enable);
/**
 * Set Arp option of device.
 *
 * \param do_enable "1" means enable and "0" means disable.
 */
int ip_nicSetARP(const char *dev, unsigned short do_enable);
/**
 * Change device state.
 *
 * \param do_up "1" means up and "0" means down.
 */
int ip_nicChangeState(const char *dev, unsigned short do_up);

/**
 * \struct RouteInfo
 *
 * Defines routing information to manage system routes.
 */
struct RouteInfo
{
	const char *rt_type; 
	const char *rt_prefix; 
	const char *rt_tos; 
	const char *rt_table;
	const char *rt_scope;
	const char *rt_metric;
	const char *rt_preference;
	const char *rt_via;
	const char *rt_dev; 
	const char *rt_weight;
	const char *rt_mtu; 
	unsigned short rt_mtu_lock; /* bool: 0:false, 1:true */
	const char *rt_tcp_window;
};
/**
 * Add defined route to system.
 */
int ip_rtAdd(const struct RouteInfo *ri);
/**
 * Delete defined route to system.
 */
int ip_rtDel(const struct RouteInfo *ri);
/**
 * Replace/Add defined route to system.
 */
int ip_rtRep(const struct RouteInfo *ri);
/**
 * Fill ip-route command args.
 */
int _ip_rtArgv(const struct RouteInfo *ri, const char **argv);

#ifdef __cplusplus
} // extern "C"
#endif
#endif // _PTOOLS_IPW_HPP_

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox