Netdev List
 help / color / mirror / Atom feed
* [PATCH V3 net-next 01/17] net: hns3: add support to query tqps number
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

This patch adds the support to query tqps number for PF driver
by using ehtool -l command.

Signed-off-by: qumingguang <qumingguang@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h         |  2 ++
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c  | 10 ++++++++++
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 21 +++++++++++++++++++++
 3 files changed, 33 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index a9e2b32..d887721 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -386,6 +386,8 @@ struct hnae3_ae_ops {
 				  u16 vlan, u8 qos, __be16 proto);
 	void (*reset_event)(struct hnae3_handle *handle,
 			    enum hnae3_reset_type reset);
+	void (*get_channels)(struct hnae3_handle *handle,
+			     struct ethtool_channels *ch);
 };
 
 struct hnae3_dcb_ops {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 65a69b4..23af36c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -849,6 +849,15 @@ static int hns3_nway_reset(struct net_device *netdev)
 	return genphy_restart_aneg(phy);
 }
 
+void hns3_get_channels(struct net_device *netdev,
+		       struct ethtool_channels *ch)
+{
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+
+	if (h->ae_algo->ops->get_channels)
+		h->ae_algo->ops->get_channels(h, ch);
+}
+
 static const struct ethtool_ops hns3vf_ethtool_ops = {
 	.get_drvinfo = hns3_get_drvinfo,
 	.get_ringparam = hns3_get_ringparam,
@@ -883,6 +892,7 @@ static int hns3_nway_reset(struct net_device *netdev)
 	.get_link_ksettings = hns3_get_link_ksettings,
 	.set_link_ksettings = hns3_set_link_ksettings,
 	.nway_reset = hns3_nway_reset,
+	.get_channels = hns3_get_channels,
 };
 
 void hns3_ethtool_set_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index e97fd66..a3101bc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5002,6 +5002,26 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	ae_dev->priv = NULL;
 }
 
+static u32 hclge_get_max_channels(struct hnae3_handle *handle)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return min_t(u32, hdev->rss_size_max * kinfo->num_tc, hdev->num_tqps);
+}
+
+static void hclge_get_channels(struct hnae3_handle *handle,
+			       struct ethtool_channels *ch)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+
+	ch->max_combined = hclge_get_max_channels(handle);
+	ch->other_count = 1;
+	ch->max_other = 1;
+	ch->combined_count = vport->alloc_tqps;
+}
+
 static const struct hnae3_ae_ops hclge_ops = {
 	.init_ae_dev = hclge_init_ae_dev,
 	.uninit_ae_dev = hclge_uninit_ae_dev,
@@ -5046,6 +5066,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	.set_vlan_filter = hclge_set_port_vlan_filter,
 	.set_vf_vlan_filter = hclge_set_vf_vlan_filter,
 	.reset_event = hclge_reset_event,
+	.get_channels = hclge_get_channels,
 };
 
 static struct hnae3_ae_algo ae_algo = {
-- 
1.9.1

^ permalink raw reply related

* RE: [PATCH v3 net-next 0/6] tls: Add generic NIC offload infrastructure
From: Boris Pismenny @ 2017-12-20  8:28 UTC (permalink / raw)
  To: Jiri Pirko, Ilya Lesokhin
  Cc: netdev@vger.kernel.org, davem@davemloft.net, davejwatson@fb.com,
	tom@herbertland.com, hannes@stressinduktion.org, Aviad Yehezkel,
	Liran Liss
In-Reply-To: <20171219103010.GC1928@nanopsycho>


> Tue, Dec 19, 2017 at 01:10:10AM CET, jiri@resnulli.us wrote:
> 
> Mon, Dec 18, 2017 at 06:10:10PM CET, jiri@resnulli.us wrote:
> >Mon, Dec 18, 2017 at 12:10:27PM CET, ilyal@mellanox.com wrote:
> >>Changes from v2:
> >>- Fix sk use after free and possible netdev use after free
> >>- tls device now keeps a refernce on the offloading netdev
> >>- tls device registers to the netdev notifer.
> >>  Upon a NETDEV_DOWN event, offload is stopped and
> >>  the reference on the netdev is dropped.
> >>- SW fallback support for skb->ip_summed != CHECKSUM_PARTIAL
> >>- Merged TLS patches are no longer part of this series.
> >>
> >>Changes from v1:
> >>- Remove the binding of the socket to a specific netdev
> >>  through sk->sk_bound_dev_if.
> >>  Add a check in validate_xmit_skb to detect route changes
> >>  and call SW fallback code to do the crypto in software.
> >>- tls_get_record now returns the tls record sequence number.
> >>  This is required to support connections with rcd_sn != iv.
> >>- Bug fixes to the TLS code.
> >>
> >>This patchset adds a generic infrastructure to offload TLS crypto to a
> >>network devices.
> >>
> >>patches 1-2 Export functions that we need patch 3 adds infrastructue
> >>for offloaded socket fallback patches 4-5 add new NDOs and
> >>capabilities.
> >>patch 6 adds the TLS NIC offload infrastructure.
> >>
> >>Github with mlx5e TLS offload support:
> >>https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2F
> git
> >>hub.com%2FMellanox%2Ftls-
> offload%2Ftree%2Ftls_device_v3&data=02%7C01%7
> >>Cborisp%40mellanox.com%7C5aebe81262554f40221908d546cb7c37%7Ca6
> 52971c7d
> >>2e4d9ba6a4d149256f461b%7C0%7C0%7C636492762141202894&sdata=gYY
> DEmspNfBs
> >>aQhefcEojl456L9eWqZnEEI7iPCT0NA%3D&reserved=0
> >
> >I don't get it. You are pushing infra but not the actual driver part
> >who is consuming the infra? Why?
> 
> Okay. Since the driver that uses the API introduced by this patchset is
> missing, this patchset should be marked as RFC.
> 
> Dave, I see that you were about to apply v2. I'm sure you missed this.
> Thanks.

Isn't this a chicken and egg problem, where something must come first,
driver or infra. Unless we combine the infra patches with mlx5 driver
code and submit both in a single pull request.
Here, we assumed that the infra goes first, and we will submit the
driver soon after. We could submit the driver first instead.

Dave, would you prefer to get the driver patches that use this infra before
the infra?

^ permalink raw reply

* r8169 take too long to complete driver initialization
From: Chris Chiu @ 2017-12-20  8:41 UTC (permalink / raw)
  To: nic_swsd, netdev, Linux Kernel, Linux Upstreaming Team

Hi,
    We've hit a suspend/resume issue on a Acer desktop caused by r8169
driver. The dmseg
https://gist.github.com/mschiu77/b741849b5070281daaead8dfee312d1a
shows it's still in msleep() within a mutex lock.
    After looking into the code, it's caused by the
rtl8168ep_stop_cmac() which is waiting 100 seconds for
rtl_ocp_tx_cond. The following dmesg states that the r8169 driver is
loaded.

[   20.270526] r8169 Gigabit Ethernet driver 2.3LK-NAPI loaded

But it takes > 100 seconds to get the following messages

[  140.400223] r8169 0000:02:00.0 (unnamed net_device)
(uninitialized): rtl_ocp_tx_cond == 1 (loop: 2000, delay: 50).
[  140.413294] r8169 0000:02:00.0 eth0: RTL8168ep/8111ep at
0xffffb16c80db1000, f8:0f:41:ea:74:0d, XID 10200800 IRQ 46
[  140.413297] r8169 0000:02:00.0 eth0: jumbo features [frames: 9200
bytes, tx checksumming: ko]

So any trial to suspend the machine during this period would always
get device/resource busy message then abort. Is this  rtl_ocp_tx_cond
necessary? Because the ethernet is still working and I don't see any
problem. I don't know it should be considered normal or not. Please
let me know if any more information required. Thanks

Chris

^ permalink raw reply

* [PATCH V3 net-next 00/17] add some features and fix some bugs for HNS3 driver
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321

This patchset adds some new feature support and fixes some bugs:
[Patch 1/17 - 5/17] add the support to modify/query the tqp number
through ethtool -L/l command, and also fix some related bugs for
change tqp number.
[Patch 6/17 - 9-17] add support vlan tag offload on tx&&rx direction
for pf, and fix some related bugs.
[patch 10/17 - 11/17] fix bugs for auto negotiation.
[patch 12/17] adds support for ethtool command set_pauseparam.
[patch 13/17 - 14/17] add support to update flow control settings after
autoneg.
[patch 15/17 - 17/17] fix some other bugs in net-next.

---
Change Log:
V2 -> V3:
1, order local variables requested by David Miller.
2, use "int" for index iteration loops requested by David Miller.

V1 -> V2:
1, fix the comments from Sergei Shtylyov.
---

Fuyun Liang (3):
  net: hns3: cleanup mac auto-negotiation state query
  net: hns3: fix for getting auto-negotiation state in hclge_get_autoneg
  net: hns3: add Asym Pause support to phy default features

Lipeng (13):
  net: hns3: add support to query tqps number
  net: hns3: add support to modify tqps number
  net: hns3: change the returned tqp number by ethtool -x
  net: hns3: Free the ring_data structrue when change tqps
  net: hns3: Add a mask initialization for mac_vlan table
  net: hns3: Add vlan offload config command
  net: hns3: Add ethtool related offload command
  net: hns3: Add handling vlan tag offload in bd
  net: hns3: add support for set_pauseparam
  net: hns3: add support to update flow control settings after autoneg
  net: hns3: add support for querying advertised pause frame by ethtool
    ethx
  net: hns3: Increase the default depth of bucket for TM shaper
  net: hns3: change TM sched mode to TC-based mode when SRIOV enabled

qumingguang (1):
  net: hns3: Get rss_size_max from configuration but not hardcode

 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |  10 +
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 225 ++++++++-
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |   2 +
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  41 +-
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |  57 +++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 514 +++++++++++++++++++--
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  38 ++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c    |   5 +
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c  |   6 +-
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h  |   1 +
 10 files changed, 855 insertions(+), 44 deletions(-)

-- 
1.9.1

^ permalink raw reply

* [PATCH V3 net-next 02/17] net: hns3: add support to modify tqps number
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

This patch adds the support to change tqps number for PF driver
by using ehtool -L command.

Signed-off-by: qumingguang <qumingguang@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |   3 +
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 122 +++++++++++++++++++++
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |   2 +
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |   1 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 112 +++++++++++++++++++
 5 files changed, 240 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index d887721..a5d3d22 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -388,6 +388,9 @@ struct hnae3_ae_ops {
 			    enum hnae3_reset_type reset);
 	void (*get_channels)(struct hnae3_handle *handle,
 			     struct ethtool_channels *ch);
+	void (*get_tqps_and_rss_info)(struct hnae3_handle *h,
+				      u16 *free_tqps, u16 *max_rss_size);
+	int (*set_channels)(struct hnae3_handle *handle, u32 new_tqps_num);
 };
 
 struct hnae3_dcb_ops {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index c2c1323..7e92068 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2651,6 +2651,19 @@ static int hns3_get_ring_config(struct hns3_nic_priv *priv)
 	return ret;
 }
 
+static void hns3_put_ring_config(struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	int i;
+
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		devm_kfree(priv->dev, priv->ring_data[i].ring);
+		devm_kfree(priv->dev,
+			   priv->ring_data[i + h->kinfo.num_tqps].ring);
+	}
+	devm_kfree(priv->dev, priv->ring_data);
+}
+
 static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
 {
 	int ret;
@@ -3162,6 +3175,115 @@ static int hns3_reset_notify(struct hnae3_handle *handle,
 	return ret;
 }
 
+static u16 hns3_get_max_available_channels(struct net_device *netdev)
+{
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+	u16 free_tqps, max_rss_size, max_tqps;
+
+	h->ae_algo->ops->get_tqps_and_rss_info(h, &free_tqps, &max_rss_size);
+	max_tqps = h->kinfo.num_tc * max_rss_size;
+
+	return min_t(u16, max_tqps, (free_tqps + h->kinfo.num_tqps));
+}
+
+static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+	int ret;
+
+	ret = h->ae_algo->ops->set_channels(h, new_tqp_num);
+	if (ret)
+		return ret;
+
+	ret = hns3_get_ring_config(priv);
+	if (ret)
+		return ret;
+
+	ret = hns3_nic_init_vector_data(priv);
+	if (ret)
+		goto err_uninit_vector;
+
+	ret = hns3_init_all_ring(priv);
+	if (ret)
+		goto err_put_ring;
+
+	return 0;
+
+err_put_ring:
+	hns3_put_ring_config(priv);
+err_uninit_vector:
+	hns3_nic_uninit_vector_data(priv);
+	return ret;
+}
+
+static int hns3_adjust_tqps_num(u8 num_tc, u32 new_tqp_num)
+{
+	return (new_tqp_num / num_tc) * num_tc;
+}
+
+int hns3_set_channels(struct net_device *netdev,
+		      struct ethtool_channels *ch)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hnae3_knic_private_info *kinfo = &h->kinfo;
+	bool if_running = netif_running(netdev);
+	u32 new_tqp_num = ch->combined_count;
+	u16 org_tqp_num;
+	int ret;
+
+	if (ch->rx_count || ch->tx_count)
+		return -EINVAL;
+
+	if (new_tqp_num > hns3_get_max_available_channels(netdev) ||
+	    new_tqp_num < kinfo->num_tc) {
+		dev_err(&netdev->dev,
+			"Change tqps fail, the tqp range is from %d to %d",
+			kinfo->num_tc,
+			hns3_get_max_available_channels(netdev));
+		return -EINVAL;
+	}
+
+	new_tqp_num = hns3_adjust_tqps_num(kinfo->num_tc, new_tqp_num);
+	if (kinfo->num_tqps == new_tqp_num)
+		return 0;
+
+	if (if_running)
+		dev_close(netdev);
+
+	hns3_clear_all_ring(h);
+
+	ret = hns3_nic_uninit_vector_data(priv);
+	if (ret) {
+		dev_err(&netdev->dev,
+			"Unbind vector with tqp fail, nothing is changed");
+		goto open_netdev;
+	}
+
+	hns3_uninit_all_ring(priv);
+
+	org_tqp_num = h->kinfo.num_tqps;
+	ret = hns3_modify_tqp_num(netdev, new_tqp_num);
+	if (ret) {
+		ret = hns3_modify_tqp_num(netdev, org_tqp_num);
+		if (ret) {
+			/* If revert to old tqp failed, fatal error occurred */
+			dev_err(&netdev->dev,
+				"Revert to old tqp num fail, ret=%d", ret);
+			return ret;
+		}
+		dev_info(&netdev->dev,
+			 "Change tqp num fail, Revert to old tqp num");
+	}
+
+open_netdev:
+	if (if_running)
+		dev_open(netdev);
+
+	return ret;
+}
+
 static const struct hnae3_client_ops client_ops = {
 	.init_instance = hns3_client_init,
 	.uninit_instance = hns3_client_uninit,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 8a9de75..a2a7ea3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -595,6 +595,8 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value)
 	(((struct hns3_nic_priv *)netdev_priv(ndev))->ae_handle)
 
 void hns3_ethtool_set_ops(struct net_device *netdev);
+int hns3_set_channels(struct net_device *netdev,
+		      struct ethtool_channels *ch);
 
 bool hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget);
 int hns3_init_all_ring(struct hns3_nic_priv *priv);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 23af36c..1b2d79b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -893,6 +893,7 @@ void hns3_get_channels(struct net_device *netdev,
 	.set_link_ksettings = hns3_set_link_ksettings,
 	.nway_reset = hns3_nway_reset,
 	.get_channels = hns3_get_channels,
+	.set_channels = hns3_set_channels,
 };
 
 void hns3_ethtool_set_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a3101bc..7fab102 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5022,6 +5022,116 @@ static void hclge_get_channels(struct hnae3_handle *handle,
 	ch->combined_count = vport->alloc_tqps;
 }
 
+static void hclge_get_tqps_and_rss_info(struct hnae3_handle *handle,
+					u16 *free_tqps, u16 *max_rss_size)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	u16 temp_tqps = 0;
+	int i;
+
+	for (i = 0; i < hdev->num_tqps; i++) {
+		if (!hdev->htqp[i].alloced)
+			temp_tqps++;
+	}
+	*free_tqps = temp_tqps;
+	*max_rss_size = hdev->rss_size_max;
+}
+
+static void hclge_release_tqp(struct hclge_vport *vport)
+{
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hclge_dev *hdev = vport->back;
+	int i;
+
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		struct hclge_tqp *tqp =
+			container_of(kinfo->tqp[i], struct hclge_tqp, q);
+
+		tqp->q.handle = NULL;
+		tqp->q.tqp_index = 0;
+		tqp->alloced = false;
+	}
+
+	devm_kfree(&hdev->pdev->dev, kinfo->tqp);
+	kinfo->tqp = NULL;
+}
+
+static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hclge_dev *hdev = vport->back;
+	int cur_rss_size = kinfo->rss_size;
+	int cur_tqps = kinfo->num_tqps;
+	u16 tc_offset[HCLGE_MAX_TC_NUM];
+	u16 tc_valid[HCLGE_MAX_TC_NUM];
+	u16 tc_size[HCLGE_MAX_TC_NUM];
+	u16 roundup_size;
+	u32 *rss_indir;
+	int ret, i;
+
+	hclge_release_tqp(vport);
+
+	ret = hclge_knic_setup(vport, new_tqps_num);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "setup nic fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_map_tqp_to_vport(hdev, vport);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "map vport tqp fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_tm_schd_init(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "tm schd init fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	roundup_size = roundup_pow_of_two(kinfo->rss_size);
+	roundup_size = ilog2(roundup_size);
+	/* Set the RSS TC mode according to the new RSS size */
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		tc_valid[i] = 0;
+
+		if (!(hdev->hw_tc_map & BIT(i)))
+			continue;
+
+		tc_valid[i] = 1;
+		tc_size[i] = roundup_size;
+		tc_offset[i] = kinfo->rss_size * i;
+	}
+	ret = hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
+	if (ret)
+		return ret;
+
+	/* Reinitializes the rss indirect table according to the new RSS size */
+	rss_indir = kcalloc(HCLGE_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL);
+	if (!rss_indir)
+		return -ENOMEM;
+
+	for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+		rss_indir[i] = i % kinfo->rss_size;
+
+	ret = hclge_set_rss(handle, rss_indir, NULL, 0);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "set rss indir table fail, ret=%d\n",
+			ret);
+
+	kfree(rss_indir);
+
+	if (!ret)
+		dev_info(&hdev->pdev->dev,
+			 "Channels changed, rss_size from %d to %d, tqps from %d to %d",
+			 cur_rss_size, kinfo->rss_size,
+			 cur_tqps, kinfo->rss_size * kinfo->num_tc);
+
+	return ret;
+}
+
 static const struct hnae3_ae_ops hclge_ops = {
 	.init_ae_dev = hclge_init_ae_dev,
 	.uninit_ae_dev = hclge_uninit_ae_dev,
@@ -5066,6 +5176,8 @@ static void hclge_get_channels(struct hnae3_handle *handle,
 	.set_vlan_filter = hclge_set_port_vlan_filter,
 	.set_vf_vlan_filter = hclge_set_vf_vlan_filter,
 	.reset_event = hclge_reset_event,
+	.get_tqps_and_rss_info = hclge_get_tqps_and_rss_info,
+	.set_channels = hclge_set_channels,
 	.get_channels = hclge_get_channels,
 };
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH V3 net-next 03/17] net: hns3: change the returned tqp number by ethtool -x
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

This patch modifies the return data of get_rxnfc, it will return
the current handle's rss_size but not the total tqp number.
because the tc_size has been change to the log2 of roundup
power of two of rss_size.

Signed-off-by: qumingguang <qumingguang@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 1b2d79b..2fd2656 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -730,7 +730,7 @@ static int hns3_get_rxnfc(struct net_device *netdev,
 
 	switch (cmd->cmd) {
 	case ETHTOOL_GRXRINGS:
-		cmd->data = h->kinfo.num_tc * h->kinfo.rss_size;
+		cmd->data = h->kinfo.rss_size;
 		break;
 	case ETHTOOL_GRXFH:
 		return h->ae_algo->ops->get_rss_tuple(h, cmd);
-- 
1.9.1

^ permalink raw reply related

* [PATCH V3 net-next 04/17] net: hns3: Free the ring_data structrue when change tqps
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

This patch fixes a memory leak problems in change tqps process,
the function hns3_uninit_all_ring and hns3_init_all_ring
may be called many times.

Signed-off-by: qumingguang <qumingguang@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 7e92068..1c93038 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2800,8 +2800,12 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
 			h->ae_algo->ops->reset_queue(h, i);
 
 		hns3_fini_ring(priv->ring_data[i].ring);
+		devm_kfree(priv->dev, priv->ring_data[i].ring);
 		hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring);
+		devm_kfree(priv->dev,
+			   priv->ring_data[i + h->kinfo.num_tqps].ring);
 	}
+	devm_kfree(priv->dev, priv->ring_data);
 
 	return 0;
 }
-- 
1.9.1

^ permalink raw reply related

* [PATCH V3 net-next 05/17] net: hns3: Get rss_size_max from configuration but not hardcode
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

From: qumingguang <qumingguang@huawei.com>

Add configuration for rss_size_max in hdev but not hardcode it.

Signed-off-by: qumingguang <qumingguang@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h  | 2 ++
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 +++++-
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index ce5ed88..1eb9ff0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -399,6 +399,8 @@ struct hclge_pf_res_cmd {
 #define HCLGE_CFG_MAC_ADDR_H_M	GENMASK(15, 0)
 #define HCLGE_CFG_DEFAULT_SPEED_S	16
 #define HCLGE_CFG_DEFAULT_SPEED_M	GENMASK(23, 16)
+#define HCLGE_CFG_RSS_SIZE_S	24
+#define HCLGE_CFG_RSS_SIZE_M	GENMASK(31, 24)
 
 struct hclge_cfg_param_cmd {
 	__le32 offset;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 7fab102..691f85e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -982,6 +982,10 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 	cfg->default_speed = hnae_get_field(__le32_to_cpu(req->param[3]),
 					    HCLGE_CFG_DEFAULT_SPEED_M,
 					    HCLGE_CFG_DEFAULT_SPEED_S);
+	cfg->rss_size_max = hnae_get_field(__le32_to_cpu(req->param[3]),
+					   HCLGE_CFG_RSS_SIZE_M,
+					   HCLGE_CFG_RSS_SIZE_S);
+
 	for (i = 0; i < ETH_ALEN; i++)
 		cfg->mac_addr[i] = (mac_addr_tmp >> (8 * i)) & 0xff;
 
@@ -1059,7 +1063,7 @@ static int hclge_configure(struct hclge_dev *hdev)
 
 	hdev->num_vmdq_vport = cfg.vmdq_vport_num;
 	hdev->base_tqp_pid = 0;
-	hdev->rss_size_max = 1;
+	hdev->rss_size_max = cfg.rss_size_max;
 	hdev->rx_buf_len = cfg.rx_buf_len;
 	ether_addr_copy(hdev->hw.mac.mac_addr, cfg.mac_addr);
 	hdev->hw.mac.media_type = cfg.media_type;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index fb043b5..4858909 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -220,6 +220,7 @@ struct hclge_cfg {
 	u8 tc_num;
 	u16 tqp_desc_num;
 	u16 rx_buf_len;
+	u16 rss_size_max;
 	u8 phy_addr;
 	u8 media_type;
 	u8 mac_addr[ETH_ALEN];
-- 
1.9.1

^ permalink raw reply related

* [PATCH V3 net-next 07/17] net: hns3: Add vlan offload config command
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

This patch adds vlan offload config commands, initializes
the rules of tx/rx vlan tag handle for hw.

Signed-off-by: Shenjian <shenjian15@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |  45 ++++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 158 ++++++++++++++++++++-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  36 +++++
 3 files changed, 233 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 10adf86..f5baba21 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -180,6 +180,10 @@ enum hclge_opcode_type {
 	/* Promisuous mode command */
 	HCLGE_OPC_CFG_PROMISC_MODE	= 0x0E01,
 
+	/* Vlan offload command */
+	HCLGE_OPC_VLAN_PORT_TX_CFG	= 0x0F01,
+	HCLGE_OPC_VLAN_PORT_RX_CFG	= 0x0F02,
+
 	/* Interrupts cmd */
 	HCLGE_OPC_ADD_RING_TO_VECTOR	= 0x1503,
 	HCLGE_OPC_DEL_RING_TO_VECTOR	= 0x1504,
@@ -670,6 +674,47 @@ struct hclge_vlan_filter_vf_cfg_cmd {
 	u8  vf_bitmap[16];
 };
 
+#define HCLGE_ACCEPT_TAG_B		0
+#define HCLGE_ACCEPT_UNTAG_B		1
+#define HCLGE_PORT_INS_TAG1_EN_B	2
+#define HCLGE_PORT_INS_TAG2_EN_B	3
+#define HCLGE_CFG_NIC_ROCE_SEL_B	4
+struct hclge_vport_vtag_tx_cfg_cmd {
+	u8 vport_vlan_cfg;
+	u8 vf_offset;
+	u8 rsv1[2];
+	__le16 def_vlan_tag1;
+	__le16 def_vlan_tag2;
+	u8 vf_bitmap[8];
+	u8 rsv2[8];
+};
+
+#define HCLGE_REM_TAG1_EN_B		0
+#define HCLGE_REM_TAG2_EN_B		1
+#define HCLGE_SHOW_TAG1_EN_B		2
+#define HCLGE_SHOW_TAG2_EN_B		3
+struct hclge_vport_vtag_rx_cfg_cmd {
+	u8 vport_vlan_cfg;
+	u8 vf_offset;
+	u8 rsv1[6];
+	u8 vf_bitmap[8];
+	u8 rsv2[8];
+};
+
+struct hclge_tx_vlan_type_cfg_cmd {
+	__le16 ot_vlan_type;
+	__le16 in_vlan_type;
+	u8 rsv[20];
+};
+
+struct hclge_rx_vlan_type_cfg_cmd {
+	__le16 ot_fst_vlan_type;
+	__le16 ot_sec_vlan_type;
+	__le16 in_fst_vlan_type;
+	__le16 in_sec_vlan_type;
+	u8 rsv[16];
+};
+
 struct hclge_cfg_com_tqp_queue_cmd {
 	__le16 tqp_id;
 	__le16 stream_id;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a1d9398..113b859 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4380,23 +4380,169 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
 	return hclge_set_vf_vlan_common(hdev, vfid, false, vlan, qos, proto);
 }
 
+static int hclge_set_vlan_tx_offload_cfg(struct hclge_vport *vport)
+{
+	struct hclge_tx_vtag_cfg *vcfg = &vport->txvlan_cfg;
+	struct hclge_vport_vtag_tx_cfg_cmd *req;
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_desc desc;
+	int status;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_PORT_TX_CFG, false);
+
+	req = (struct hclge_vport_vtag_tx_cfg_cmd *)desc.data;
+	req->def_vlan_tag1 = cpu_to_le16(vcfg->default_tag1);
+	req->def_vlan_tag2 = cpu_to_le16(vcfg->default_tag2);
+	hnae_set_bit(req->vport_vlan_cfg, HCLGE_ACCEPT_TAG_B,
+		     vcfg->accept_tag ? 1 : 0);
+	hnae_set_bit(req->vport_vlan_cfg, HCLGE_ACCEPT_UNTAG_B,
+		     vcfg->accept_untag ? 1 : 0);
+	hnae_set_bit(req->vport_vlan_cfg, HCLGE_PORT_INS_TAG1_EN_B,
+		     vcfg->insert_tag1_en ? 1 : 0);
+	hnae_set_bit(req->vport_vlan_cfg, HCLGE_PORT_INS_TAG2_EN_B,
+		     vcfg->insert_tag2_en ? 1 : 0);
+	hnae_set_bit(req->vport_vlan_cfg, HCLGE_CFG_NIC_ROCE_SEL_B, 0);
+
+	req->vf_offset = vport->vport_id / HCLGE_VF_NUM_PER_CMD;
+	req->vf_bitmap[req->vf_offset] =
+		1 << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE);
+
+	status = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Send port txvlan cfg command fail, ret =%d\n",
+			status);
+
+	return status;
+}
+
+static int hclge_set_vlan_rx_offload_cfg(struct hclge_vport *vport)
+{
+	struct hclge_rx_vtag_cfg *vcfg = &vport->rxvlan_cfg;
+	struct hclge_vport_vtag_rx_cfg_cmd *req;
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_desc desc;
+	int status;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_PORT_RX_CFG, false);
+
+	req = (struct hclge_vport_vtag_rx_cfg_cmd *)desc.data;
+	hnae_set_bit(req->vport_vlan_cfg, HCLGE_REM_TAG1_EN_B,
+		     vcfg->strip_tag1_en ? 1 : 0);
+	hnae_set_bit(req->vport_vlan_cfg, HCLGE_REM_TAG2_EN_B,
+		     vcfg->strip_tag2_en ? 1 : 0);
+	hnae_set_bit(req->vport_vlan_cfg, HCLGE_SHOW_TAG1_EN_B,
+		     vcfg->vlan1_vlan_prionly ? 1 : 0);
+	hnae_set_bit(req->vport_vlan_cfg, HCLGE_SHOW_TAG2_EN_B,
+		     vcfg->vlan2_vlan_prionly ? 1 : 0);
+
+	req->vf_offset = vport->vport_id / HCLGE_VF_NUM_PER_CMD;
+	req->vf_bitmap[req->vf_offset] =
+		1 << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE);
+
+	status = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Send port rxvlan cfg command fail, ret =%d\n",
+			status);
+
+	return status;
+}
+
+static int hclge_set_vlan_protocol_type(struct hclge_dev *hdev)
+{
+	struct hclge_rx_vlan_type_cfg_cmd *rx_req;
+	struct hclge_tx_vlan_type_cfg_cmd *tx_req;
+	struct hclge_desc desc;
+	int status;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_TYPE_ID, false);
+	rx_req = (struct hclge_rx_vlan_type_cfg_cmd *)desc.data;
+	rx_req->ot_fst_vlan_type =
+		cpu_to_le16(hdev->vlan_type_cfg.rx_ot_fst_vlan_type);
+	rx_req->ot_sec_vlan_type =
+		cpu_to_le16(hdev->vlan_type_cfg.rx_ot_sec_vlan_type);
+	rx_req->in_fst_vlan_type =
+		cpu_to_le16(hdev->vlan_type_cfg.rx_in_fst_vlan_type);
+	rx_req->in_sec_vlan_type =
+		cpu_to_le16(hdev->vlan_type_cfg.rx_in_sec_vlan_type);
+
+	status = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (status) {
+		dev_err(&hdev->pdev->dev,
+			"Send rxvlan protocol type command fail, ret =%d\n",
+			status);
+		return status;
+	}
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_INSERT, false);
+
+	tx_req = (struct hclge_tx_vlan_type_cfg_cmd *)&desc.data;
+	tx_req->ot_vlan_type = cpu_to_le16(hdev->vlan_type_cfg.tx_ot_vlan_type);
+	tx_req->in_vlan_type = cpu_to_le16(hdev->vlan_type_cfg.tx_in_vlan_type);
+
+	status = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Send txvlan protocol type command fail, ret =%d\n",
+			status);
+
+	return status;
+}
+
 static int hclge_init_vlan_config(struct hclge_dev *hdev)
 {
-#define HCLGE_VLAN_TYPE_VF_TABLE   0
-#define HCLGE_VLAN_TYPE_PORT_TABLE 1
+#define HCLGE_FILTER_TYPE_VF		0
+#define HCLGE_FILTER_TYPE_PORT		1
+#define HCLGE_DEF_VLAN_TYPE		0x8100
+
 	struct hnae3_handle *handle;
+	struct hclge_vport *vport;
 	int ret;
+	int i;
+
+	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF, true);
+	if (ret)
+		return ret;
 
-	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_VF_TABLE,
-					 true);
+	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT, true);
 	if (ret)
 		return ret;
 
-	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_PORT_TABLE,
-					 true);
+	hdev->vlan_type_cfg.rx_in_fst_vlan_type = HCLGE_DEF_VLAN_TYPE;
+	hdev->vlan_type_cfg.rx_in_sec_vlan_type = HCLGE_DEF_VLAN_TYPE;
+	hdev->vlan_type_cfg.rx_ot_fst_vlan_type = HCLGE_DEF_VLAN_TYPE;
+	hdev->vlan_type_cfg.rx_ot_sec_vlan_type = HCLGE_DEF_VLAN_TYPE;
+	hdev->vlan_type_cfg.tx_ot_vlan_type = HCLGE_DEF_VLAN_TYPE;
+	hdev->vlan_type_cfg.tx_in_vlan_type = HCLGE_DEF_VLAN_TYPE;
+
+	ret = hclge_set_vlan_protocol_type(hdev);
 	if (ret)
 		return ret;
 
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		vport = &hdev->vport[i];
+		vport->txvlan_cfg.accept_tag = true;
+		vport->txvlan_cfg.accept_untag = true;
+		vport->txvlan_cfg.insert_tag1_en = false;
+		vport->txvlan_cfg.insert_tag2_en = false;
+		vport->txvlan_cfg.default_tag1 = 0;
+		vport->txvlan_cfg.default_tag2 = 0;
+
+		ret = hclge_set_vlan_tx_offload_cfg(vport);
+		if (ret)
+			return ret;
+
+		vport->rxvlan_cfg.strip_tag1_en = false;
+		vport->rxvlan_cfg.strip_tag2_en = true;
+		vport->rxvlan_cfg.vlan1_vlan_prionly = false;
+		vport->rxvlan_cfg.vlan2_vlan_prionly = false;
+
+		ret = hclge_set_vlan_rx_offload_cfg(vport);
+		if (ret)
+			return ret;
+	}
+
 	handle = &hdev->vport[0].nic;
 	return hclge_set_port_vlan_filter(handle, htons(ETH_P_8021Q), 0, false);
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 4858909..cda520c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -79,6 +79,10 @@
 #define HCLGE_PHY_MDIX_STATUS_B	(6)
 #define HCLGE_PHY_SPEED_DUP_RESOLVE_B	(11)
 
+/* Factor used to calculate offset and bitmap of VF num */
+#define HCLGE_VF_NUM_PER_CMD           64
+#define HCLGE_VF_NUM_PER_BYTE          8
+
 /* Reset related Registers */
 #define HCLGE_MISC_RESET_STS_REG	0x20700
 #define HCLGE_GLOBAL_RESET_REG		0x20A00
@@ -424,6 +428,15 @@ struct hclge_hw_stats {
 	struct hclge_32_bit_stats   all_32_bit_stats;
 };
 
+struct hclge_vlan_type_cfg {
+	u16 rx_ot_fst_vlan_type;
+	u16 rx_ot_sec_vlan_type;
+	u16 rx_in_fst_vlan_type;
+	u16 rx_in_sec_vlan_type;
+	u16 tx_ot_vlan_type;
+	u16 tx_in_vlan_type;
+};
+
 struct hclge_dev {
 	struct pci_dev *pdev;
 	struct hnae3_ae_dev *ae_dev;
@@ -510,6 +523,26 @@ struct hclge_dev {
 	enum hclge_mta_dmac_sel_type mta_mac_sel_type;
 	bool enable_mta; /* Mutilcast filter enable */
 	bool accept_mta_mc; /* Whether accept mta filter multicast */
+
+	struct hclge_vlan_type_cfg vlan_type_cfg;
+};
+
+/* VPort level vlan tag configuration for TX direction */
+struct hclge_tx_vtag_cfg {
+	bool accept_tag;	/* Whether accept tagged packet from host */
+	bool accept_untag;	/* Whether accept untagged packet from host */
+	bool insert_tag1_en;	/* Whether insert inner vlan tag */
+	bool insert_tag2_en;	/* Whether insert outer vlan tag */
+	u16  default_tag1;	/* The default inner vlan tag to insert */
+	u16  default_tag2;	/* The default outer vlan tag to insert */
+};
+
+/* VPort level vlan tag configuration for RX direction */
+struct hclge_rx_vtag_cfg {
+	bool strip_tag1_en;	/* Whether strip inner vlan tag */
+	bool strip_tag2_en;	/* Whether strip outer vlan tag */
+	bool vlan1_vlan_prionly;/* Inner VLAN Tag up to descriptor Enable */
+	bool vlan2_vlan_prionly;/* Outer VLAN Tag up to descriptor Enable */
 };
 
 struct hclge_vport {
@@ -524,6 +557,9 @@ struct hclge_vport {
 	u16 bw_limit;		/* VSI BW Limit (0 = disabled) */
 	u8  dwrr;
 
+	struct hclge_tx_vtag_cfg  txvlan_cfg;
+	struct hclge_rx_vtag_cfg  rxvlan_cfg;
+
 	int vport_id;
 	struct hclge_dev *back;  /* Back reference to associated dev */
 	struct hnae3_handle nic;
-- 
1.9.1

^ permalink raw reply related

* [PATCH V3 net-next 08/17] net: hns3: Add ethtool related offload command
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

This patch adds offload command related to "ethtool -K".

Signed-off-by: Shenjian <shenjian15@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h             |  3 +++
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c         | 16 ++++++++++++++++
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 13 +++++++++++++
 3 files changed, 32 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index a5d3d22..a67d02a9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -278,6 +278,8 @@ struct hnae3_ae_dev {
  *   Set vlan filter config of Ports
  * set_vf_vlan_filter()
  *   Set vlan filter config of vf
+ * enable_hw_strip_rxvtag()
+ *   Enable/disable hardware strip vlan tag of packets received
  */
 struct hnae3_ae_ops {
 	int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev);
@@ -384,6 +386,7 @@ struct hnae3_ae_ops {
 			       u16 vlan_id, bool is_kill);
 	int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
 				  u16 vlan, u8 qos, __be16 proto);
+	int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable);
 	void (*reset_event)(struct hnae3_handle *handle,
 			    enum hnae3_reset_type reset);
 	void (*get_channels)(struct hnae3_handle *handle,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 1c93038..301b329 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1032,6 +1032,9 @@ static int hns3_nic_set_features(struct net_device *netdev,
 				 netdev_features_t features)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	netdev_features_t changed;
+	int ret;
 
 	if (features & (NETIF_F_TSO | NETIF_F_TSO6)) {
 		priv->ops.fill_desc = hns3_fill_desc_tso;
@@ -1041,6 +1044,17 @@ static int hns3_nic_set_features(struct net_device *netdev,
 		priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tx;
 	}
 
+	changed = netdev->features ^ features;
+	if (changed & NETIF_F_HW_VLAN_CTAG_RX) {
+		if (features & NETIF_F_HW_VLAN_CTAG_RX)
+			ret = h->ae_algo->ops->enable_hw_strip_rxvtag(h, true);
+		else
+			ret = h->ae_algo->ops->enable_hw_strip_rxvtag(h, false);
+
+		if (ret)
+			return ret;
+	}
+
 	netdev->features = features;
 	return 0;
 }
@@ -1492,6 +1506,7 @@ static void hns3_set_default_feature(struct net_device *netdev)
 
 	netdev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 		NETIF_F_HW_VLAN_CTAG_FILTER |
+		NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
 		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
 		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
 		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
@@ -1506,6 +1521,7 @@ static void hns3_set_default_feature(struct net_device *netdev)
 
 	netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 		NETIF_F_HW_VLAN_CTAG_FILTER |
+		NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
 		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
 		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
 		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 113b859..d77a6de 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4547,6 +4547,18 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
 	return hclge_set_port_vlan_filter(handle, htons(ETH_P_8021Q), 0, false);
 }
 
+static int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+
+	vport->rxvlan_cfg.strip_tag1_en = false;
+	vport->rxvlan_cfg.strip_tag2_en = enable;
+	vport->rxvlan_cfg.vlan1_vlan_prionly = false;
+	vport->rxvlan_cfg.vlan2_vlan_prionly = false;
+
+	return hclge_set_vlan_rx_offload_cfg(vport);
+}
+
 static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -5362,6 +5374,7 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num)
 	.get_mdix_mode = hclge_get_mdix_mode,
 	.set_vlan_filter = hclge_set_port_vlan_filter,
 	.set_vf_vlan_filter = hclge_set_vf_vlan_filter,
+	.enable_hw_strip_rxvtag = hclge_en_hw_strip_rxvtag,
 	.reset_event = hclge_reset_event,
 	.get_tqps_and_rss_info = hclge_get_tqps_and_rss_info,
 	.set_channels = hclge_set_channels,
-- 
1.9.1

^ permalink raw reply related

* [PATCH V3 net-next 09/17] net: hns3: Add handling vlan tag offload in bd
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

This patch deals with the vlan tag information between
sk_buff and rx/tx bd.

Signed-off-by: Shenjian <shenjian15@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 83 +++++++++++++++++++++++--
 1 file changed, 78 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 301b329..320ae88 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -723,6 +723,58 @@ static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end)
 	hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 0);
 }
 
+static int hns3_fill_desc_vtags(struct sk_buff *skb,
+				struct hns3_enet_ring *tx_ring,
+				u32 *inner_vlan_flag,
+				u32 *out_vlan_flag,
+				u16 *inner_vtag,
+				u16 *out_vtag)
+{
+#define HNS3_TX_VLAN_PRIO_SHIFT 13
+
+	if (skb->protocol == htons(ETH_P_8021Q) &&
+	    !(tx_ring->tqp->handle->kinfo.netdev->features &
+	    NETIF_F_HW_VLAN_CTAG_TX)) {
+		/* When HW VLAN acceleration is turned off, and the stack
+		 * sets the protocol to 802.1q, the driver just need to
+		 * set the protocol to the encapsulated ethertype.
+		 */
+		skb->protocol = vlan_get_protocol(skb);
+		return 0;
+	}
+
+	if (skb_vlan_tag_present(skb)) {
+		u16 vlan_tag;
+
+		vlan_tag = skb_vlan_tag_get(skb);
+		vlan_tag |= (skb->priority & 0x7) << HNS3_TX_VLAN_PRIO_SHIFT;
+
+		/* Based on hw strategy, use out_vtag in two layer tag case,
+		 * and use inner_vtag in one tag case.
+		 */
+		if (skb->protocol == htons(ETH_P_8021Q)) {
+			hnae_set_bit(*out_vlan_flag, HNS3_TXD_OVLAN_B, 1);
+			*out_vtag = vlan_tag;
+		} else {
+			hnae_set_bit(*inner_vlan_flag, HNS3_TXD_VLAN_B, 1);
+			*inner_vtag = vlan_tag;
+		}
+	} else if (skb->protocol == htons(ETH_P_8021Q)) {
+		struct vlan_ethhdr *vhdr;
+		int rc;
+
+		rc = skb_cow_head(skb, 0);
+		if (rc < 0)
+			return rc;
+		vhdr = (struct vlan_ethhdr *)skb->data;
+		vhdr->h_vlan_TCI |= cpu_to_be16((skb->priority & 0x7)
+					<< HNS3_TX_VLAN_PRIO_SHIFT);
+	}
+
+	skb->protocol = vlan_get_protocol(skb);
+	return 0;
+}
+
 static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 			  int size, dma_addr_t dma, int frag_end,
 			  enum hns_desc_type type)
@@ -733,6 +785,8 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 	u16 bdtp_fe_sc_vld_ra_ri = 0;
 	u32 type_cs_vlan_tso = 0;
 	struct sk_buff *skb;
+	u16 inner_vtag = 0;
+	u16 out_vtag = 0;
 	u32 paylen = 0;
 	u16 mss = 0;
 	__be16 protocol;
@@ -756,15 +810,16 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 		skb = (struct sk_buff *)priv;
 		paylen = skb->len;
 
+		ret = hns3_fill_desc_vtags(skb, ring, &type_cs_vlan_tso,
+					   &ol_type_vlan_len_msec,
+					   &inner_vtag, &out_vtag);
+		if (unlikely(ret))
+			return ret;
+
 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			skb_reset_mac_len(skb);
 			protocol = skb->protocol;
 
-			/* vlan packet*/
-			if (protocol == htons(ETH_P_8021Q)) {
-				protocol = vlan_get_protocol(skb);
-				skb->protocol = protocol;
-			}
 			ret = hns3_get_l4_protocol(skb, &ol4_proto, &il4_proto);
 			if (ret)
 				return ret;
@@ -790,6 +845,8 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 			cpu_to_le32(type_cs_vlan_tso);
 		desc->tx.paylen = cpu_to_le32(paylen);
 		desc->tx.mss = cpu_to_le16(mss);
+		desc->tx.vlan_tag = cpu_to_le16(inner_vtag);
+		desc->tx.outer_vlan_tag = cpu_to_le16(out_vtag);
 	}
 
 	/* move ring pointer to next.*/
@@ -2101,6 +2158,22 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
 
 	prefetchw(skb->data);
 
+	/* Based on hw strategy, the tag offloaded will be stored at
+	 * ot_vlan_tag in two layer tag case, and stored at vlan_tag
+	 * in one layer tag case.
+	 */
+	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
+		u16 vlan_tag;
+
+		vlan_tag = le16_to_cpu(desc->rx.ot_vlan_tag);
+		if (!(vlan_tag & VLAN_VID_MASK))
+			vlan_tag = le16_to_cpu(desc->rx.vlan_tag);
+		if (vlan_tag & VLAN_VID_MASK)
+			__vlan_hwaccel_put_tag(skb,
+					       htons(ETH_P_8021Q),
+					       vlan_tag);
+	}
+
 	bnum = 1;
 	if (length <= HNS3_RX_HEAD_SIZE) {
 		memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
-- 
1.9.1

^ permalink raw reply related

* [PATCH V3 net-next 10/17] net: hns3: cleanup mac auto-negotiation state query
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

From: Fuyun Liang <liangfuyun1@huawei.com>

When checking whether auto-negotiation is on, driver only needs to
check the value of mac.autoneg(SW) directly, and does not need to
query it from hardware. Because this value is always synchronized
with the auto-negotiation state of hardware.

This patch removes the mac auto-negotiation state query.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 24 ----------------------
 1 file changed, 24 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d77a6de..046f4bb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2137,28 +2137,6 @@ static int hclge_query_mac_an_speed_dup(struct hclge_dev *hdev, int *speed,
 	return 0;
 }
 
-static int hclge_query_autoneg_result(struct hclge_dev *hdev)
-{
-	struct hclge_mac *mac = &hdev->hw.mac;
-	struct hclge_query_an_speed_dup_cmd *req;
-	struct hclge_desc desc;
-	int ret;
-
-	req = (struct hclge_query_an_speed_dup_cmd *)desc.data;
-
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"autoneg result query cmd failed %d.\n", ret);
-		return ret;
-	}
-
-	mac->autoneg = hnae_get_bit(req->an_syn_dup_speed, HCLGE_QUERY_AN_B);
-
-	return 0;
-}
-
 static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable)
 {
 	struct hclge_config_auto_neg_cmd *req;
@@ -2195,8 +2173,6 @@ static int hclge_get_autoneg(struct hnae3_handle *handle)
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	hclge_query_autoneg_result(hdev);
-
 	return hdev->hw.mac.autoneg;
 }
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH V3 net-next 12/17] net: hns3: add support for set_pauseparam
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

This patch adds set_pauseparam support for ethtool cmd.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 13 ++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 83 ++++++++++++++++++++++
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c  |  2 +-
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h  |  1 +
 4 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 2fd2656..b829ec7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -559,6 +559,18 @@ static void hns3_get_pauseparam(struct net_device *netdev,
 			&param->rx_pause, &param->tx_pause);
 }
 
+static int hns3_set_pauseparam(struct net_device *netdev,
+			       struct ethtool_pauseparam *param)
+{
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+
+	if (h->ae_algo->ops->set_pauseparam)
+		return h->ae_algo->ops->set_pauseparam(h, param->autoneg,
+						       param->rx_pause,
+						       param->tx_pause);
+	return -EOPNOTSUPP;
+}
+
 static int hns3_get_link_ksettings(struct net_device *netdev,
 				   struct ethtool_link_ksettings *cmd)
 {
@@ -880,6 +892,7 @@ void hns3_get_channels(struct net_device *netdev,
 	.get_ringparam = hns3_get_ringparam,
 	.set_ringparam = hns3_set_ringparam,
 	.get_pauseparam = hns3_get_pauseparam,
+	.set_pauseparam = hns3_set_pauseparam,
 	.get_strings = hns3_get_strings,
 	.get_ethtool_stats = hns3_get_stats,
 	.get_sset_count = hns3_get_sset_count,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 14e1054..0f55ee6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4660,6 +4660,53 @@ static u32 hclge_get_fw_version(struct hnae3_handle *handle)
 	return hdev->fw_version;
 }
 
+static void hclge_set_flowctrl_adv(struct hclge_dev *hdev, u32 rx_en, u32 tx_en)
+{
+	struct phy_device *phydev = hdev->hw.mac.phydev;
+
+	if (!phydev)
+		return;
+
+	phydev->advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+
+	if (rx_en)
+		phydev->advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+
+	if (tx_en)
+		phydev->advertising ^= ADVERTISED_Asym_Pause;
+}
+
+static int hclge_cfg_pauseparam(struct hclge_dev *hdev, u32 rx_en, u32 tx_en)
+{
+	enum hclge_fc_mode fc_mode;
+	int ret;
+
+	if (rx_en && tx_en)
+		fc_mode = HCLGE_FC_FULL;
+	else if (rx_en && !tx_en)
+		fc_mode = HCLGE_FC_RX_PAUSE;
+	else if (!rx_en && tx_en)
+		fc_mode = HCLGE_FC_TX_PAUSE;
+	else
+		fc_mode = HCLGE_FC_NONE;
+
+	if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) {
+		hdev->fc_mode_last_time = fc_mode;
+		return 0;
+	}
+
+	ret = hclge_mac_pause_en_cfg(hdev, tx_en, rx_en);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "configure pauseparam error, ret = %d.\n",
+			ret);
+		return ret;
+	}
+
+	hdev->tm_info.fc_mode = fc_mode;
+
+	return 0;
+}
+
 static void hclge_get_pauseparam(struct hnae3_handle *handle, u32 *auto_neg,
 				 u32 *rx_en, u32 *tx_en)
 {
@@ -4689,6 +4736,41 @@ static void hclge_get_pauseparam(struct hnae3_handle *handle, u32 *auto_neg,
 	}
 }
 
+static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
+				u32 rx_en, u32 tx_en)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct phy_device *phydev = hdev->hw.mac.phydev;
+	u32 fc_autoneg;
+
+	/* Only support flow control negotiation for netdev with
+	 * phy attached for now.
+	 */
+	if (!phydev)
+		return -EOPNOTSUPP;
+
+	fc_autoneg = hclge_get_autoneg(handle);
+	if (auto_neg != fc_autoneg) {
+		dev_info(&hdev->pdev->dev,
+			 "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) {
+		dev_info(&hdev->pdev->dev,
+			 "Priority flow control enabled. Cannot set link flow control.\n");
+		return -EOPNOTSUPP;
+	}
+
+	hclge_set_flowctrl_adv(hdev, rx_en, tx_en);
+
+	if (!fc_autoneg)
+		return hclge_cfg_pauseparam(hdev, rx_en, tx_en);
+
+	return phy_start_aneg(phydev);
+}
+
 static void hclge_get_ksettings_an_result(struct hnae3_handle *handle,
 					  u8 *auto_neg, u32 *speed, u8 *duplex)
 {
@@ -5344,6 +5426,7 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num)
 	.set_autoneg = hclge_set_autoneg,
 	.get_autoneg = hclge_get_autoneg,
 	.get_pauseparam = hclge_get_pauseparam,
+	.set_pauseparam = hclge_set_pauseparam,
 	.set_mtu = hclge_set_mtu,
 	.reset_queue = hclge_reset_tqp,
 	.get_stats = hclge_get_stats,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 7bfa2e5..7cfe1eb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -112,7 +112,7 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 	return 0;
 }
 
-static int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
+int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
 {
 	struct hclge_desc desc;
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index bf59961..16f4139 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -118,4 +118,5 @@ struct hclge_port_shapping_cmd {
 int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
 int hclge_tm_map_cfg(struct hclge_dev *hdev);
 int hclge_tm_init_hw(struct hclge_dev *hdev);
+int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
 #endif
-- 
1.9.1

^ permalink raw reply related

* [PATCH V3 net-next 13/17] net: hns3: add support to update flow control settings after autoneg
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

When auto-negotiation is enabled, the MAC flow control settings is
based on the flow control negotiation result. And it should be configured
after a valid link has been established. This patch adds support to update
flow control settings after auto-negotiation has completed.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 36 ++++++++++++++++++++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  1 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c    |  4 +++
 3 files changed, 41 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 0f55ee6..bb31212 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4707,6 +4707,42 @@ static int hclge_cfg_pauseparam(struct hclge_dev *hdev, u32 rx_en, u32 tx_en)
 	return 0;
 }
 
+int hclge_cfg_flowctrl(struct hclge_dev *hdev)
+{
+	struct phy_device *phydev = hdev->hw.mac.phydev;
+	u16 remote_advertising = 0;
+	u16 local_advertising = 0;
+	u32 rx_pause, tx_pause;
+	u8 flowctl;
+
+	if (!phydev->link || !phydev->autoneg)
+		return 0;
+
+	if (phydev->advertising & ADVERTISED_Pause)
+		local_advertising = ADVERTISE_PAUSE_CAP;
+
+	if (phydev->advertising & ADVERTISED_Asym_Pause)
+		local_advertising |= ADVERTISE_PAUSE_ASYM;
+
+	if (phydev->pause)
+		remote_advertising = LPA_PAUSE_CAP;
+
+	if (phydev->asym_pause)
+		remote_advertising |= LPA_PAUSE_ASYM;
+
+	flowctl = mii_resolve_flowctrl_fdx(local_advertising,
+					   remote_advertising);
+	tx_pause = flowctl & FLOW_CTRL_TX;
+	rx_pause = flowctl & FLOW_CTRL_RX;
+
+	if (phydev->duplex == HCLGE_MAC_HALF) {
+		tx_pause = 0;
+		rx_pause = 0;
+	}
+
+	return hclge_cfg_pauseparam(hdev, rx_pause, tx_pause);
+}
+
 static void hclge_get_pauseparam(struct hnae3_handle *handle, u32 *auto_neg,
 				 u32 *rx_en, u32 *tx_en)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index cda520c..28cc063 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -602,4 +602,5 @@ int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
 
 void hclge_mbx_handler(struct hclge_dev *hdev);
 void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
+int hclge_cfg_flowctrl(struct hclge_dev *hdev);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 7069e94..3745153 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -183,6 +183,10 @@ static void hclge_mac_adjust_link(struct net_device *netdev)
 	ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex);
 	if (ret)
 		netdev_err(netdev, "failed to adjust link.\n");
+
+	ret = hclge_cfg_flowctrl(hdev);
+	if (ret)
+		netdev_err(netdev, "failed to configure flow control.\n");
 }
 
 int hclge_mac_start_phy(struct hclge_dev *hdev)
-- 
1.9.1

^ permalink raw reply related

* [PATCH V3 net-next 14/17] net: hns3: add Asym Pause support to phy default features
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

From: Fuyun Liang <liangfuyun1@huawei.com>

commit c4fb2cdf575d ("net: hns3: fix a bug for phy supported feature
initialization") adds default supported features for phy, but our hardware
also supports Asym Pause. This patch adds Asym Pause support to phy
default features to prevent Asym Pause can not be advertised when the phy
negotiates flow control.

Fixes: c4fb2cdf575d ("net: hns3: fix a bug for phy supported feature initialization")
Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 3745153..c1dea3a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -17,6 +17,7 @@
 #define HCLGE_PHY_SUPPORTED_FEATURES	(SUPPORTED_Autoneg | \
 					 SUPPORTED_TP | \
 					 SUPPORTED_Pause | \
+					 SUPPORTED_Asym_Pause | \
 					 PHY_10BT_FEATURES | \
 					 PHY_100BT_FEATURES | \
 					 PHY_1000BT_FEATURES)
-- 
1.9.1

^ permalink raw reply related

* [PATCH V3 net-next 15/17] net: hns3: add support for querying advertised pause frame by ethtool ethx
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

This patch adds support for querying advertised pause frame by using
ethtool command(ethtool ethx).

Fixes: 496d03e960ae ("net: hns3: Add Ethtool support to HNS3 driver")
Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h             |  2 ++
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c      | 15 +++++++++++++++
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 15 +++++++++++++++
 3 files changed, 32 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index a67d02a9..82e9a80 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -394,6 +394,8 @@ struct hnae3_ae_ops {
 	void (*get_tqps_and_rss_info)(struct hnae3_handle *h,
 				      u16 *free_tqps, u16 *max_rss_size);
 	int (*set_channels)(struct hnae3_handle *handle, u32 new_tqps_num);
+	void (*get_flowctrl_adv)(struct hnae3_handle *handle,
+				 u32 *flowctrl_adv);
 };
 
 struct hnae3_dcb_ops {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index b829ec7..2ae4d39 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -575,6 +575,7 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 				   struct ethtool_link_ksettings *cmd)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	u32 flowctrl_adv = 0;
 	u32 supported_caps;
 	u32 advertised_caps;
 	u8 media_type = HNAE3_MEDIA_TYPE_UNKNOWN;
@@ -650,6 +651,8 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 		if (!cmd->base.autoneg)
 			advertised_caps &= ~HNS3_LM_AUTONEG_BIT;
 
+		advertised_caps &= ~HNS3_LM_PAUSE_BIT;
+
 		/* now, map driver link modes to ethtool link modes */
 		hns3_driv_to_eth_caps(supported_caps, cmd, false);
 		hns3_driv_to_eth_caps(advertised_caps, cmd, true);
@@ -662,6 +665,18 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 	/* 4.mdio_support */
 	cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C22;
 
+	/* 5.get flow control setttings */
+	if (h->ae_algo->ops->get_flowctrl_adv)
+		h->ae_algo->ops->get_flowctrl_adv(h, &flowctrl_adv);
+
+	if (flowctrl_adv & ADVERTISED_Pause)
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Pause);
+
+	if (flowctrl_adv & ADVERTISED_Asym_Pause)
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Asym_Pause);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index bb31212..4d69688 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4660,6 +4660,20 @@ static u32 hclge_get_fw_version(struct hnae3_handle *handle)
 	return hdev->fw_version;
 }
 
+static void hclge_get_flowctrl_adv(struct hnae3_handle *handle,
+				   u32 *flowctrl_adv)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct phy_device *phydev = hdev->hw.mac.phydev;
+
+	if (!phydev)
+		return;
+
+	*flowctrl_adv |= (phydev->advertising & ADVERTISED_Pause) |
+			 (phydev->advertising & ADVERTISED_Asym_Pause);
+}
+
 static void hclge_set_flowctrl_adv(struct hclge_dev *hdev, u32 rx_en, u32 tx_en)
 {
 	struct phy_device *phydev = hdev->hw.mac.phydev;
@@ -5478,6 +5492,7 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num)
 	.get_tqps_and_rss_info = hclge_get_tqps_and_rss_info,
 	.set_channels = hclge_set_channels,
 	.get_channels = hclge_get_channels,
+	.get_flowctrl_adv = hclge_get_flowctrl_adv,
 };
 
 static struct hnae3_ae_algo ae_algo = {
-- 
1.9.1

^ permalink raw reply related

* [PATCH V3 net-next 16/17] net: hns3: Increase the default depth of bucket for TM shaper
From: Lipeng @ 2017-12-20  8:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

Burstiness of a flow is determined by the depth of a bucket, When the
upper rate of shaper is large, the current depth of a bucket is not
enough.

The default upper rate of shaper is 100G, so increase the depth of
a bucket according to UM.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 7cfe1eb..ea9355d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -23,8 +23,8 @@ enum hclge_shaper_level {
 	HCLGE_SHAPER_LVL_PF	= 1,
 };
 
-#define HCLGE_SHAPER_BS_U_DEF	1
-#define HCLGE_SHAPER_BS_S_DEF	4
+#define HCLGE_SHAPER_BS_U_DEF	5
+#define HCLGE_SHAPER_BS_S_DEF	20
 
 #define HCLGE_ETHER_MAX_RATE	100000
 
-- 
1.9.1

^ permalink raw reply related

* Re: [patch iproute2] tc: add -bs option for batch mode
From: Or Gerlitz @ 2017-12-20  8:47 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Chris Mi, Linux Netdev List
In-Reply-To: <20171219071547.35b002c8@xeon-e3>

On Tue, Dec 19, 2017 at 5:15 PM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> On Tue, 19 Dec 2017 15:33:46 +0900
> Chris Mi <chrism@mellanox.com> wrote:
>
>> Currently in tc batch mode, only one command is read from the batch
>> file and sent to kernel to process. With this patch, we can accumulate
>> several commands before sending to kernel. The batch size is specified
>> using option -bs or -batchsize.
>>
>> To accumulate the commands in tc, we allocate an array of struct iovec.
>> If batchsize is bigger than 1 and we haven't accumulated enough
>> commands, rtnl_talk() will return without actually sending the message.
>> One exception is that there is no more command in the batch file.
>>
>> But please note that kernel still processes the requests one by one.
>> To process the requests in parallel in kernel is another effort.
>> The time we're saving in this patch is the user mode and kernel mode
>> context switch. So this patch works on top of the current kernel.
>>
>> Using the following script in kernel, we can generate 1,000,000 rules.
>>       tools/testing/selftests/tc-testing/tdc_batch.py
>>
>> Without this patch, 'tc -b $file' exection time is:
>>
>> real    0m14.916s
>> user    0m6.808s
>> sys     0m8.046s
>>
>> With this patch, 'tc -b $file -bs 10' exection time is:
>>
>> real    0m12.286s
>> user    0m5.903s
>> sys     0m6.312s
>>
>> The insertion rate is improved more than 10%.
>>
>> Signed-off-by: Chris Mi <chrism@mellanox.com>
>
> I think this is probably optimizing for an unrealistic use case.
> If there are 1M rules then it should be managed by a dynamic process
> like a routing daemon with a real database and API. Such a daemon
> would talk to kernel directly. Plus at scale netlink gets overwhelmed
> and the daemon has to handle resync.

Standalone tools such as ip and tc are indeed used many ties mostly at staging,
development and performance benchmarking environments (where applications on
production systems (e.g FRR or OVS) would embed the NL code in house
and do direct
calls into the kernel).  As such, I think there's a value to max out
the tc tool ability to add flows.

> Not convinced that the added complexity and error handling issues
> warrant the change. The batch mode already sucks at handling errors.

Lets have Chris fix the patch and see how the revised bits look.

Or.

^ permalink raw reply

* [PATCH net v2] ipv4: Fix use-after-free when flushing FIB tables
From: Ido Schimmel @ 2017-12-20  8:51 UTC (permalink / raw)
  To: netdev; +Cc: davem, alexander.h.duyck, dsahern, fengguang.wu, mlxsw,
	Ido Schimmel

Since commit 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") the
local table uses the same trie allocated for the main table when custom
rules are not in use.

When a net namespace is dismantled, the main table is flushed and freed
(via an RCU callback) before the local table. In case the callback is
invoked before the local table is iterated, a use-after-free can occur.

Fix this by iterating over the FIB tables in reverse order, so that the
main table is always freed after the local table.

v2: Add a comment to make the fix more explicit per Dave's and Alex's
feedback.

Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
---
 net/ipv4/fib_frontend.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f52d27a422c3..08ce7f14ede1 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1298,14 +1298,17 @@ static int __net_init ip_fib_net_init(struct net *net)
 
 static void ip_fib_net_exit(struct net *net)
 {
-	unsigned int i;
+	int i;
 
 	rtnl_lock();
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
 	RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
 #endif
-	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+	/* The local table must be destroyed before the main table,
+	 * as it might be using main's trie.
+	 */
+	for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
 		struct hlist_head *head = &net->ipv4.fib_table_hash[i];
 		struct hlist_node *tmp;
 		struct fib_table *tb;
-- 
2.14.3

^ permalink raw reply related

* [QUESTION] Doubt about NAPI_GRO_CB(skb)->is_atomic in tcpv4 gro process
From: Yunsheng Lin @ 2017-12-20  9:09 UTC (permalink / raw)
  To: netdev@vger.kernel.org
  Cc: alexander.duyck, davem@davemloft.net, linuxarm@huawei.com,
	yuxiaowu, wzhen.wang, Xuehuahu

Hi, all
	I have some doubt about NAPI_GRO_CB(skb)->is_atomic when
analyzing the tcpv4 gro process:

Firstly we set NAPI_GRO_CB(skb)->is_atomic to 1 in dev_gro_receive:
https://elixir.free-electrons.com/linux/v4.15-rc4/source/net/core/dev.c#L4838

And then in inet_gro_receive, we check the NAPI_GRO_CB(skb)->is_atomic
before setting NAPI_GRO_CB(skb)->is_atomic according to IP_DF bit in the ip header:
https://elixir.free-electrons.com/linux/v4.15-rc4/source/net/ipv4/af_inet.c#L1319

struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
{
.....................
	for (p = *head; p; p = p->next) {
........................

		/* If the previous IP ID value was based on an atomic
		 * datagram we can overwrite the value and ignore it.
		 */
		if (NAPI_GRO_CB(skb)->is_atomic)                      //we check it here
			NAPI_GRO_CB(p)->flush_id = flush_id;
		else
			NAPI_GRO_CB(p)->flush_id |= flush_id;
	}

	NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF));  //we set it here
	NAPI_GRO_CB(skb)->flush |= flush;
	skb_set_network_header(skb, off);
................................
}

My question is whether we should check the NAPI_GRO_CB(skb)->is_atomic or NAPI_GRO_CB(p)->is_atomic?
If we should check NAPI_GRO_CB(skb)->is_atomic, then maybe it is unnecessary because it is alway true.
If we should check NAPI_GRO_CB(p)->is_atomic, maybe there is a bug here.

So what is the logic here? I am just start analyzing the gro, maybe I miss something obvious here.

^ permalink raw reply

* RE: [patch iproute2] tc: add -bs option for batch mode
From: Chris Mi @ 2017-12-20  9:23 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev@vger.kernel.org, gerlitz.or@gmail.com
In-Reply-To: <20171219072231.055bcc9d@xeon-e3>

> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Tuesday, December 19, 2017 11:23 PM
> To: Chris Mi <chrism@mellanox.com>
> Cc: netdev@vger.kernel.org; gerlitz.or@gmail.com
> Subject: Re: [patch iproute2] tc: add -bs option for batch mode
> 
> On Tue, 19 Dec 2017 15:33:46 +0900
> Chris Mi <chrism@mellanox.com> wrote:
> 
> > Currently in tc batch mode, only one command is read from the batch
> > file and sent to kernel to process. With this patch, we can accumulate
> > several commands before sending to kernel. The batch size is specified
> > using option -bs or -batchsize.
> >
> > To accumulate the commands in tc, we allocate an array of struct iovec.
> > If batchsize is bigger than 1 and we haven't accumulated enough
> > commands, rtnl_talk() will return without actually sending the message.
> > One exception is that there is no more command in the batch file.
> >
> > But please note that kernel still processes the requests one by one.
> > To process the requests in parallel in kernel is another effort.
> > The time we're saving in this patch is the user mode and kernel mode
> > context switch. So this patch works on top of the current kernel.
> >
> > Using the following script in kernel, we can generate 1,000,000 rules.
> > 	tools/testing/selftests/tc-testing/tdc_batch.py
> >
> > Without this patch, 'tc -b $file' exection time is:
> >
> > real    0m14.916s
> > user    0m6.808s
> > sys     0m8.046s
> >
> > With this patch, 'tc -b $file -bs 10' exection time is:
> >
> > real    0m12.286s
> > user    0m5.903s
> > sys     0m6.312s
> >
> > The insertion rate is improved more than 10%.
> >
> > Signed-off-by: Chris Mi <chrism@mellanox.com>
> > ---
> >  include/libnetlink.h | 27 ++++++++++++++++
> >  include/utils.h      |  8 +++++
> >  lib/libnetlink.c     | 30 +++++++++++++-----
> >  lib/utils.c          | 20 ++++++++++++
> >  man/man8/tc.8        |  5 +++
> >  tc/m_action.c        | 63 ++++++++++++++++++++++++++++---------
> >  tc/tc.c              | 27 ++++++++++++++--
> >  tc/tc_common.h       |  3 ++
> >  tc/tc_filter.c       | 89 ++++++++++++++++++++++++++++++++++++---------
> -------
> >  9 files changed, 221 insertions(+), 51 deletions(-)
> 
> In addition to my earlier comments, these are the implementation issues.
> 
> >
> > diff --git a/include/libnetlink.h b/include/libnetlink.h index
> > a4d83b9e..07e88c94 100644
> > --- a/include/libnetlink.h
> > +++ b/include/libnetlink.h
> > @@ -13,6 +13,8 @@
> >  #include <linux/netconf.h>
> >  #include <arpa/inet.h>
> >
> > +#define MSG_IOV_MAX 256
> > +
> >  struct rtnl_handle {
> >  	int			fd;
> >  	struct sockaddr_nl	local;
> > @@ -93,6 +95,31 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth,
> >  			void *arg, __u16 nc_flags);
> >  #define rtnl_dump_filter(rth, filter, arg) \
> >  	rtnl_dump_filter_nc(rth, filter, arg, 0)
> > +
> > +extern int msg_iov_index;
> > +static inline int get_msg_iov_index(void) {
> > +	return msg_iov_index;
> > +}
> > +static inline void set_msg_iov_index(int index) {
> > +	msg_iov_index = index;
> > +}
> > +static inline void incr_msg_iov_index(void) {
> > +	++msg_iov_index;
> > +}
> > +
> > +extern int batch_size;
> > +static inline int get_batch_size(void) {
> > +	return batch_size;
> > +}
> > +static inline void set_batch_size(int size) {
> > +	batch_size = size;
> > +}
> 
> Iproute2 is C not C++; no accessors for every variable.
I have changed them to C style.
> 
> 
> >  int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
> >  	      struct nlmsghdr **answer)
> >  	__attribute__((warn_unused_result));
> > diff --git a/include/utils.h b/include/utils.h index
> > d3895d56..66cb4747 100644
> > --- a/include/utils.h
> > +++ b/include/utils.h
> > @@ -235,6 +235,14 @@ void print_nlmsg_timestamp(FILE *fp, const struct
> > nlmsghdr *n);
> >
> >  extern int cmdlineno;
> >  ssize_t getcmdline(char **line, size_t *len, FILE *in);
> > +
> > +extern int cmdlinetotal;
> > +static inline int getcmdlinetotal(void) {
> > +	return cmdlinetotal;
> > +}
> > +void setcmdlinetotal(const char *name);
> > +
> >  int makeargs(char *line, char *argv[], int maxargs);  int
> > inet_get_addr(const char *src, __u32 *dst, struct in6_addr *dst6);
> >
> > diff --git a/lib/libnetlink.c b/lib/libnetlink.c index
> > 00e6ce0c..f9be1c6d 100644
> > --- a/lib/libnetlink.c
> > +++ b/lib/libnetlink.c
> > @@ -24,6 +24,7 @@
> >  #include <sys/uio.h>
> >
> >  #include "libnetlink.h"
> > +#include "utils.h"
> >
> >  #ifndef SOL_NETLINK
> >  #define SOL_NETLINK 270
> > @@ -581,6 +582,10 @@ static void rtnl_talk_error(struct nlmsghdr *h,
> struct nlmsgerr *err,
> >  		strerror(-err->error));
> >  }
> >
> > +static struct iovec msg_iov[MSG_IOV_MAX]; int msg_iov_index; int
> > +batch_size = 1;
> > +
> >  static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
> >  		       struct nlmsghdr **answer,
> >  		       bool show_rtnl_err, nl_ext_ack_fn_t errfn) @@ -589,23
> > +594,34 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr
> *n,
> >  	unsigned int seq;
> >  	struct nlmsghdr *h;
> >  	struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
> > -	struct iovec iov = {
> > -		.iov_base = n,
> > -		.iov_len = n->nlmsg_len
> > -	};
> > +	struct iovec *iov = &msg_iov[get_msg_iov_index()];
> > +	int index;
> > +	char *buf;
> > +
> > +	iov->iov_base = n;
> > +	iov->iov_len = n->nlmsg_len;
> > +
> > +	incr_msg_iov_index();
> >  	struct msghdr msg = {
> >  		.msg_name = &nladdr,
> >  		.msg_namelen = sizeof(nladdr),
> > -		.msg_iov = &iov,
> > -		.msg_iovlen = 1,
> > +		.msg_iov = msg_iov,
> > +		.msg_iovlen = get_msg_iov_index(),
> >  	};
> > -	char *buf;
> >
> >  	n->nlmsg_seq = seq = ++rtnl->seq;
> >
> >  	if (answer == NULL)
> >  		n->nlmsg_flags |= NLM_F_ACK;
> 
> Your real performance win is just not asking for ACK for every rule.
No. Even if batch_size > 1, we ack every rule. The real performance win is
to send multiple rules in one system call. If we are not asking for ACK for every rule,
the performance will be improved further.
> If you switched to pure streaming mode, then the batching wouldn't be
> necessary.
> 
> >
> > +	index = get_msg_iov_index() % get_batch_size();
> > +	if (index != 0 && get_batch_size() > 1 &&
> > +	    cmdlineno != getcmdlinetotal() &&
> > +	    (n->nlmsg_type == RTM_NEWTFILTER ||
> > +	    n->nlmsg_type == RTM_NEWACTION))
> > +		return 3;
> > +	set_msg_iov_index(index);
> > +
> >  	status = sendmsg(rtnl->fd, &msg, 0);
> >  	if (status < 0) {
> >  		perror("Cannot talk to rtnetlink"); diff --git a/lib/utils.c
> > b/lib/utils.c index 7ced8c06..53ca389f 100644
> > --- a/lib/utils.c
> > +++ b/lib/utils.c
> > @@ -1202,6 +1202,26 @@ ssize_t getcmdline(char **linep, size_t *lenp,
> FILE *in)
> >  	return cc;
> >  }
> >
> > +int cmdlinetotal;
> > +
> > +void setcmdlinetotal(const char *name) {
> > +	char *line = NULL;
> > +	size_t len = 0;
> > +
> > +	if (name && strcmp(name, "-") != 0) {
> > +		if (freopen(name, "r", stdin) == NULL) {
> > +			fprintf(stderr, "Cannot open file \"%s\" for
> reading: %s\n",
> > +				name, strerror(errno));
> > +			return;
> > +		}
> > +	}
> > +
> > +	cmdlinetotal = 0;
> > +	while (getcmdline(&line, &len, stdin) != -1)
> > +		cmdlinetotal++;
> > +}
> > +
> >  /* split command line into argument vector */  int makeargs(char
> > *line, char *argv[], int maxargs)  { diff --git a/man/man8/tc.8
> > b/man/man8/tc.8 index ff071b33..de137e16 100644
> > --- a/man/man8/tc.8
> > +++ b/man/man8/tc.8
> > @@ -601,6 +601,11 @@ must exist already.
> >  read commands from provided file or standard input and invoke them.
> >  First failure will cause termination of tc.
> >
> > +.TP
> > +.BR "\-bs", " \-bs size", " \-batchsize", " \-batchsize size"
> > +How many commands are accumulated before sending to kernel.
> > +By default, it is 1. It only takes effect in batch mode.
> > +
> >  .TP
> >  .BR "\-force"
> >  don't terminate tc on errors in batch mode.
> > diff --git a/tc/m_action.c b/tc/m_action.c index 13f942bf..574b78ca
> > 100644
> > --- a/tc/m_action.c
> > +++ b/tc/m_action.c
> > @@ -23,6 +23,7 @@
> >  #include <arpa/inet.h>
> >  #include <string.h>
> >  #include <dlfcn.h>
> > +#include <errno.h>
> >
> >  #include "utils.h"
> >  #include "tc_common.h"
> > @@ -546,33 +547,65 @@ bad_val:
> >  	return ret;
> >  }
> >
> > +typedef struct {
> > +	struct nlmsghdr		n;
> > +	struct tcamsg		t;
> > +	char			buf[MAX_MSG];
> > +} tc_action_req;
> > +
> > +static tc_action_req *action_reqs;
> > +
> > +void free_action_reqs(void)
> > +{
> > +	if (action_reqs)
> > +		free(action_reqs);
> > +}
> > +
> > +static tc_action_req *get_action_req(void) {
> > +	tc_action_req *req;
> > +
> > +	if (action_reqs == NULL) {
> > +		action_reqs = malloc(get_batch_size() * sizeof
> (tc_action_req));
> > +		if (action_reqs == NULL)
> > +			return NULL;
> > +	}
> > +	req = &action_reqs[get_msg_iov_index()];
> > +	memset(req, 0, sizeof (*req));
> > +
> > +	return req;
> > +}
> > +
> >  static int tc_action_modify(int cmd, unsigned int flags,
> >  			    int *argc_p, char ***argv_p)
> >  {
> >  	int argc = *argc_p;
> >  	char **argv = *argv_p;
> >  	int ret = 0;
> > -	struct {
> > -		struct nlmsghdr         n;
> > -		struct tcamsg           t;
> > -		char                    buf[MAX_MSG];
> > -	} req = {
> > -		.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg)),
> > -		.n.nlmsg_flags = NLM_F_REQUEST | flags,
> > -		.n.nlmsg_type = cmd,
> > -		.t.tca_family = AF_UNSPEC,
> > -	};
> > -	struct rtattr *tail = NLMSG_TAIL(&req.n);
> > +	tc_action_req *req;
> > +
> > +	req = get_action_req();
> > +	if (req == NULL) {
> > +		fprintf(stderr, "get_action_req error: not enough buffer\n");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg));
> > +	req->n.nlmsg_flags = NLM_F_REQUEST | flags;
> > +	req->n.nlmsg_type = cmd;
> > +	req->t.tca_family = AF_UNSPEC;
> > +	struct rtattr *tail = NLMSG_TAIL(&req->n);
> >
> >  	argc -= 1;
> >  	argv += 1;
> > -	if (parse_action(&argc, &argv, TCA_ACT_TAB, &req.n)) {
> > +	if (parse_action(&argc, &argv, TCA_ACT_TAB, &req->n)) {
> >  		fprintf(stderr, "Illegal \"action\"\n");
> >  		return -1;
> >  	}
> > -	tail->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail;
> > +	tail->rta_len = (void *) NLMSG_TAIL(&req->n) - (void *) tail;
> >
> > -	if (rtnl_talk(&rth, &req.n, NULL) < 0) {
> > +	ret = rtnl_talk(&rth, &req->n, NULL);
> > +	if (ret < 0) {
> >  		fprintf(stderr, "We have an error talking to the kernel\n");
> >  		ret = -1;
> >  	}
> > @@ -739,5 +772,5 @@ int do_action(int argc, char **argv)
> >  			return -1;
> >  	}
> >
> > -	return 0;
> > +	return ret;
> >  }
> > diff --git a/tc/tc.c b/tc/tc.c
> > index ad9f07e9..9c8e828b 100644
> > --- a/tc/tc.c
> > +++ b/tc/tc.c
> > @@ -189,7 +189,7 @@ static void usage(void)
> >  	fprintf(stderr, "Usage: tc [ OPTIONS ] OBJECT { COMMAND |
> help }\n"
> >  			"       tc [-force] -batch filename\n"
> >  			"where  OBJECT := { qdisc | class | filter | action |
> monitor | exec }\n"
> > -	                "       OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -p[retty] |
> -b[atch] [filename] | -n[etns] name |\n"
> > +	                "       OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -p[retty] |
> -b[atch] [filename] | -bs | -batchsize [size] | -n[etns] name |\n"
> >  			"                    -nm | -nam[es] | { -cf | -conf } path } | -
> j[son]\n");
> >  }
> >
> > @@ -223,6 +223,9 @@ static int batch(const char *name)
> >  	size_t len = 0;
> >  	int ret = 0;
> >
> > +	if (get_batch_size() > 1)
> > +		setcmdlinetotal(name);
> > +
> >  	batch_mode = 1;
> >  	if (name && strcmp(name, "-") != 0) {
> >  		if (freopen(name, "r", stdin) == NULL) { @@ -248,15 +251,22
> @@
> > static int batch(const char *name)
> >  		if (largc == 0)
> >  			continue;	/* blank line */
> >
> > -		if (do_cmd(largc, largv)) {
> > +		ret = do_cmd(largc, largv);
> > +		if (ret != 0 && ret != 3) {
> >  			fprintf(stderr, "Command failed %s:%d\n", name,
> cmdlineno);
> >  			ret = 1;
> >  			if (!force)
> >  				break;
> >  		}
> >  	}
> > +	if (ret == 3)
> > +		fprintf(stderr,
> > +			"Command is not sent to kernel due to internal
> error %s:%d\n",
> > +			name, cmdlineno);
> >  	if (line)
> >  		free(line);
> > +	free_filter_reqs();
> > +	free_action_reqs();
> >
> >  	rtnl_close(&rth);
> >  	return ret;
> > @@ -267,6 +277,7 @@ int main(int argc, char **argv)  {
> >  	int ret;
> >  	char *batch_file = NULL;
> > +	int size;
> >
> >  	while (argc > 1) {
> >  		if (argv[1][0] != '-')
> > @@ -297,6 +308,16 @@ int main(int argc, char **argv)
> >  			if (argc <= 1)
> >  				usage();
> >  			batch_file = argv[1];
> > +		} else if (matches(argv[1], "-batchsize") == 0 ||
> > +				matches(argv[1], "-bs") == 0) {
> > +			argc--;	argv++;
> > +			if (argc <= 1)
> > +				usage();
> > +			size = atoi(argv[1]);
> > +			if (size > MSG_IOV_MAX)
> > +				set_batch_size(MSG_IOV_MAX);
> > +			else
> > +				set_batch_size(size);
> >  		} else if (matches(argv[1], "-netns") == 0) {
> >  			NEXT_ARG();
> >  			if (netns_switch(argv[1]))
> > @@ -342,6 +363,8 @@ int main(int argc, char **argv)
> >  	}
> >
> >  	ret = do_cmd(argc-1, argv+1);
> > +	free_filter_reqs();
> > +	free_action_reqs();
> >  Exit:
> >  	rtnl_close(&rth);
> >
> > diff --git a/tc/tc_common.h b/tc/tc_common.h index 264fbdac..fde8db1b
> > 100644
> > --- a/tc/tc_common.h
> > +++ b/tc/tc_common.h
> > @@ -24,5 +24,8 @@ struct tc_sizespec;
> >  extern int parse_size_table(int *p_argc, char ***p_argv, struct
> > tc_sizespec *s);  extern int check_size_table_opts(struct tc_sizespec
> > *s);
> >
> > +extern void free_filter_reqs(void);
> > +extern void free_action_reqs(void);
> > +
> >  extern int show_graph;
> >  extern bool use_names;
> > diff --git a/tc/tc_filter.c b/tc/tc_filter.c index 545cc3a1..dc53c128
> > 100644
> > --- a/tc/tc_filter.c
> > +++ b/tc/tc_filter.c
> > @@ -19,6 +19,7 @@
> >  #include <arpa/inet.h>
> >  #include <string.h>
> >  #include <linux/if_ether.h>
> > +#include <errno.h>
> >
> >  #include "rt_names.h"
> >  #include "utils.h"
> > @@ -42,18 +43,51 @@ static void usage(void)
> >  		"OPTIONS := ... try tc filter add <desired FILTER_KIND>
> help\n");
> > }
> >
> > +typedef struct {
> > +	struct nlmsghdr		n;
> > +	struct tcmsg		t;
> > +	char			buf[MAX_MSG];
> > +} tc_filter_req;
> > +
> > +static tc_filter_req *filter_reqs;
> > +
> > +void free_filter_reqs(void)
> > +{
> > +	if (filter_reqs)
> > +		free(filter_reqs);
> > +}
> 
> You don't need to check for NULL when calling free.
> free(NULL) is a nop.
Done.

^ permalink raw reply

* [patch iproute2 v2] tc: add -bs option for batch mode
From: Chris Mi @ 2017-12-20  9:26 UTC (permalink / raw)
  To: netdev; +Cc: gerlitz.or

Currently in tc batch mode, only one command is read from the batch
file and sent to kernel to process. With this patch, we can accumulate
several commands before sending to kernel. The batch size is specified
using option -bs or -batchsize.

To accumulate the commands in tc, we allocate an array of struct iovec.
If batchsize is bigger than 1 and we haven't accumulated enough
commands, rtnl_talk() will return without actually sending the message.
One exception is that there is no more command in the batch file.

But please note that kernel still processes the requests one by one.
To process the requests in parallel in kernel is another effort.
The time we're saving in this patch is the user mode and kernel mode
context switch. So this patch works on top of the current kernel.

Using the following script in kernel, we can generate 1,000,000 rules.
        tools/testing/selftests/tc-testing/tdc_batch.py

Without this patch, 'tc -b $file' exection time is:

real    0m14.916s
user    0m6.808s
sys     0m8.046s

With this patch, 'tc -b $file -bs 10' exection time is:

real    0m12.286s
user    0m5.903s
sys     0m6.312s

The insertion rate is improved more than 10%.

Signed-off-by: Chris Mi <chrism@mellanox.com>
---
 include/libnetlink.h |  6 ++++
 include/utils.h      |  4 +++
 lib/libnetlink.c     | 25 ++++++++++-----
 lib/utils.c          | 20 ++++++++++++
 man/man8/tc.8        |  5 +++
 tc/m_action.c        | 62 +++++++++++++++++++++++++++---------
 tc/tc.c              | 24 ++++++++++++--
 tc/tc_common.h       |  3 ++
 tc/tc_filter.c       | 88 ++++++++++++++++++++++++++++++++++++----------------
 9 files changed, 186 insertions(+), 51 deletions(-)

diff --git a/include/libnetlink.h b/include/libnetlink.h
index a4d83b9e..775f830b 100644
--- a/include/libnetlink.h
+++ b/include/libnetlink.h
@@ -13,6 +13,8 @@
 #include <linux/netconf.h>
 #include <arpa/inet.h>
 
+#define MSG_IOV_MAX 256
+
 struct rtnl_handle {
 	int			fd;
 	struct sockaddr_nl	local;
@@ -93,6 +95,10 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth,
 			void *arg, __u16 nc_flags);
 #define rtnl_dump_filter(rth, filter, arg) \
 	rtnl_dump_filter_nc(rth, filter, arg, 0)
+
+extern int msg_iov_index;
+extern int batch_size;
+
 int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
 	      struct nlmsghdr **answer)
 	__attribute__((warn_unused_result));
diff --git a/include/utils.h b/include/utils.h
index d3895d56..113a8c31 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -235,6 +235,10 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n);
 
 extern int cmdlineno;
 ssize_t getcmdline(char **line, size_t *len, FILE *in);
+
+extern int cmdlinetotal;
+void setcmdlinetotal(const char *name);
+
 int makeargs(char *line, char *argv[], int maxargs);
 int inet_get_addr(const char *src, __u32 *dst, struct in6_addr *dst6);
 
diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index 00e6ce0c..7ff32d64 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -24,6 +24,7 @@
 #include <sys/uio.h>
 
 #include "libnetlink.h"
+#include "utils.h"
 
 #ifndef SOL_NETLINK
 #define SOL_NETLINK 270
@@ -581,6 +582,10 @@ static void rtnl_talk_error(struct nlmsghdr *h, struct nlmsgerr *err,
 		strerror(-err->error));
 }
 
+static struct iovec msg_iov[MSG_IOV_MAX];
+int msg_iov_index;
+int batch_size = 1;
+
 static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
 		       struct nlmsghdr **answer,
 		       bool show_rtnl_err, nl_ext_ack_fn_t errfn)
@@ -589,23 +594,29 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
 	unsigned int seq;
 	struct nlmsghdr *h;
 	struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
-	struct iovec iov = {
-		.iov_base = n,
-		.iov_len = n->nlmsg_len
-	};
+	struct iovec *iov = &msg_iov[msg_iov_index];
+	char *buf;
+
+	iov->iov_base = n;
+	iov->iov_len = n->nlmsg_len;
+
 	struct msghdr msg = {
 		.msg_name = &nladdr,
 		.msg_namelen = sizeof(nladdr),
-		.msg_iov = &iov,
-		.msg_iovlen = 1,
+		.msg_iov = msg_iov,
+		.msg_iovlen = ++msg_iov_index,
 	};
-	char *buf;
 
 	n->nlmsg_seq = seq = ++rtnl->seq;
 
 	if (answer == NULL)
 		n->nlmsg_flags |= NLM_F_ACK;
 
+	msg_iov_index %= batch_size;
+	if (msg_iov_index != 0 && batch_size > 1 && cmdlineno != cmdlinetotal &&
+	    (n->nlmsg_type == RTM_NEWTFILTER || n->nlmsg_type == RTM_NEWACTION))
+		return 3;
+
 	status = sendmsg(rtnl->fd, &msg, 0);
 	if (status < 0) {
 		perror("Cannot talk to rtnetlink");
diff --git a/lib/utils.c b/lib/utils.c
index 7ced8c06..53ca389f 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -1202,6 +1202,26 @@ ssize_t getcmdline(char **linep, size_t *lenp, FILE *in)
 	return cc;
 }
 
+int cmdlinetotal;
+
+void setcmdlinetotal(const char *name)
+{
+	char *line = NULL;
+	size_t len = 0;
+
+	if (name && strcmp(name, "-") != 0) {
+		if (freopen(name, "r", stdin) == NULL) {
+			fprintf(stderr, "Cannot open file \"%s\" for reading: %s\n",
+				name, strerror(errno));
+			return;
+		}
+	}
+
+	cmdlinetotal = 0;
+	while (getcmdline(&line, &len, stdin) != -1)
+		cmdlinetotal++;
+}
+
 /* split command line into argument vector */
 int makeargs(char *line, char *argv[], int maxargs)
 {
diff --git a/man/man8/tc.8 b/man/man8/tc.8
index ff071b33..de137e16 100644
--- a/man/man8/tc.8
+++ b/man/man8/tc.8
@@ -601,6 +601,11 @@ must exist already.
 read commands from provided file or standard input and invoke them.
 First failure will cause termination of tc.
 
+.TP
+.BR "\-bs", " \-bs size", " \-batchsize", " \-batchsize size"
+How many commands are accumulated before sending to kernel.
+By default, it is 1. It only takes effect in batch mode.
+
 .TP
 .BR "\-force"
 don't terminate tc on errors in batch mode.
diff --git a/tc/m_action.c b/tc/m_action.c
index 13f942bf..99c75da6 100644
--- a/tc/m_action.c
+++ b/tc/m_action.c
@@ -23,6 +23,7 @@
 #include <arpa/inet.h>
 #include <string.h>
 #include <dlfcn.h>
+#include <errno.h>
 
 #include "utils.h"
 #include "tc_common.h"
@@ -546,33 +547,64 @@ bad_val:
 	return ret;
 }
 
+typedef struct {
+	struct nlmsghdr		n;
+	struct tcamsg		t;
+	char			buf[MAX_MSG];
+} tc_action_req;
+
+static tc_action_req *action_reqs;
+
+void free_action_reqs(void)
+{
+	free(action_reqs);
+}
+
+static tc_action_req *get_action_req(void)
+{
+	tc_action_req *req;
+
+	if (action_reqs == NULL) {
+		action_reqs = malloc(batch_size * sizeof (tc_action_req));
+		if (action_reqs == NULL)
+			return NULL;
+	}
+	req = &action_reqs[msg_iov_index];
+	memset(req, 0, sizeof (*req));
+
+	return req;
+}
+
 static int tc_action_modify(int cmd, unsigned int flags,
 			    int *argc_p, char ***argv_p)
 {
 	int argc = *argc_p;
 	char **argv = *argv_p;
 	int ret = 0;
-	struct {
-		struct nlmsghdr         n;
-		struct tcamsg           t;
-		char                    buf[MAX_MSG];
-	} req = {
-		.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg)),
-		.n.nlmsg_flags = NLM_F_REQUEST | flags,
-		.n.nlmsg_type = cmd,
-		.t.tca_family = AF_UNSPEC,
-	};
-	struct rtattr *tail = NLMSG_TAIL(&req.n);
+	tc_action_req *req;
+
+	req = get_action_req();
+	if (req == NULL) {
+		fprintf(stderr, "get_action_req error: not enough buffer\n");
+		return -ENOMEM;
+	}
+
+	req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg));
+	req->n.nlmsg_flags = NLM_F_REQUEST | flags;
+	req->n.nlmsg_type = cmd;
+	req->t.tca_family = AF_UNSPEC;
+	struct rtattr *tail = NLMSG_TAIL(&req->n);
 
 	argc -= 1;
 	argv += 1;
-	if (parse_action(&argc, &argv, TCA_ACT_TAB, &req.n)) {
+	if (parse_action(&argc, &argv, TCA_ACT_TAB, &req->n)) {
 		fprintf(stderr, "Illegal \"action\"\n");
 		return -1;
 	}
-	tail->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail;
+	tail->rta_len = (void *) NLMSG_TAIL(&req->n) - (void *) tail;
 
-	if (rtnl_talk(&rth, &req.n, NULL) < 0) {
+	ret = rtnl_talk(&rth, &req->n, NULL);
+	if (ret < 0) {
 		fprintf(stderr, "We have an error talking to the kernel\n");
 		ret = -1;
 	}
@@ -739,5 +771,5 @@ int do_action(int argc, char **argv)
 			return -1;
 	}
 
-	return 0;
+	return ret;
 }
diff --git a/tc/tc.c b/tc/tc.c
index ad9f07e9..d63e7c03 100644
--- a/tc/tc.c
+++ b/tc/tc.c
@@ -189,7 +189,7 @@ static void usage(void)
 	fprintf(stderr, "Usage: tc [ OPTIONS ] OBJECT { COMMAND | help }\n"
 			"       tc [-force] -batch filename\n"
 			"where  OBJECT := { qdisc | class | filter | action | monitor | exec }\n"
-	                "       OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -p[retty] | -b[atch] [filename] | -n[etns] name |\n"
+	                "       OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -p[retty] | -b[atch] [filename] | -bs | -batchsize [size] | -n[etns] name |\n"
 			"                    -nm | -nam[es] | { -cf | -conf } path } | -j[son]\n");
 }
 
@@ -223,6 +223,9 @@ static int batch(const char *name)
 	size_t len = 0;
 	int ret = 0;
 
+	if (batch_size > 1)
+		setcmdlinetotal(name);
+
 	batch_mode = 1;
 	if (name && strcmp(name, "-") != 0) {
 		if (freopen(name, "r", stdin) == NULL) {
@@ -248,15 +251,22 @@ static int batch(const char *name)
 		if (largc == 0)
 			continue;	/* blank line */
 
-		if (do_cmd(largc, largv)) {
+		ret = do_cmd(largc, largv);
+		if (ret < 0) {
 			fprintf(stderr, "Command failed %s:%d\n", name, cmdlineno);
 			ret = 1;
 			if (!force)
 				break;
 		}
 	}
+	if (batch_size > 1 && ret == 3)
+		fprintf(stderr,
+			"Command is not sent to kernel due to internal error %s:%d\n",
+			name, cmdlineno);
 	if (line)
 		free(line);
+	free_filter_reqs();
+	free_action_reqs();
 
 	rtnl_close(&rth);
 	return ret;
@@ -297,6 +307,14 @@ int main(int argc, char **argv)
 			if (argc <= 1)
 				usage();
 			batch_file = argv[1];
+		} else if (matches(argv[1], "-batchsize") == 0 ||
+				matches(argv[1], "-bs") == 0) {
+			argc--;	argv++;
+			if (argc <= 1)
+				usage();
+			batch_size = atoi(argv[1]);
+			if (batch_size > MSG_IOV_MAX)
+				batch_size = MSG_IOV_MAX;
 		} else if (matches(argv[1], "-netns") == 0) {
 			NEXT_ARG();
 			if (netns_switch(argv[1]))
@@ -342,6 +360,8 @@ int main(int argc, char **argv)
 	}
 
 	ret = do_cmd(argc-1, argv+1);
+	free_filter_reqs();
+	free_action_reqs();
 Exit:
 	rtnl_close(&rth);
 
diff --git a/tc/tc_common.h b/tc/tc_common.h
index 264fbdac..fde8db1b 100644
--- a/tc/tc_common.h
+++ b/tc/tc_common.h
@@ -24,5 +24,8 @@ struct tc_sizespec;
 extern int parse_size_table(int *p_argc, char ***p_argv, struct tc_sizespec *s);
 extern int check_size_table_opts(struct tc_sizespec *s);
 
+extern void free_filter_reqs(void);
+extern void free_action_reqs(void);
+
 extern int show_graph;
 extern bool use_names;
diff --git a/tc/tc_filter.c b/tc/tc_filter.c
index 545cc3a1..89ac4402 100644
--- a/tc/tc_filter.c
+++ b/tc/tc_filter.c
@@ -19,6 +19,7 @@
 #include <arpa/inet.h>
 #include <string.h>
 #include <linux/if_ether.h>
+#include <errno.h>
 
 #include "rt_names.h"
 #include "utils.h"
@@ -42,18 +43,50 @@ static void usage(void)
 		"OPTIONS := ... try tc filter add <desired FILTER_KIND> help\n");
 }
 
+typedef struct {
+	struct nlmsghdr		n;
+	struct tcmsg		t;
+	char			buf[MAX_MSG];
+} tc_filter_req;
+
+static tc_filter_req *filter_reqs;
+
+void free_filter_reqs(void)
+{
+	free(filter_reqs);
+}
+
+static tc_filter_req *get_filter_req(void)
+{
+	tc_filter_req *req;
+
+	if (filter_reqs == NULL) {
+		filter_reqs = malloc(batch_size * sizeof (tc_filter_req));
+		if (filter_reqs == NULL)
+			return NULL;
+	}
+	req = &filter_reqs[msg_iov_index];
+	memset(req, 0, sizeof (*req));
+
+	return req;
+}
+
 static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv)
 {
-	struct {
-		struct nlmsghdr	n;
-		struct tcmsg		t;
-		char			buf[MAX_MSG];
-	} req = {
-		.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
-		.n.nlmsg_flags = NLM_F_REQUEST | flags,
-		.n.nlmsg_type = cmd,
-		.t.tcm_family = AF_UNSPEC,
-	};
+	tc_filter_req *req;
+	int ret;
+
+	req = get_filter_req();
+	if (req == NULL) {
+		fprintf(stderr, "get_filter_req error: not enough buffer\n");
+		return -ENOMEM;
+	}
+
+	req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+	req->n.nlmsg_flags = NLM_F_REQUEST | flags;
+	req->n.nlmsg_type = cmd;
+	req->t.tcm_family = AF_UNSPEC;
+
 	struct filter_util *q = NULL;
 	__u32 prio = 0;
 	__u32 protocol = 0;
@@ -75,37 +108,37 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv)
 				duparg("dev", *argv);
 			strncpy(d, *argv, sizeof(d)-1);
 		} else if (strcmp(*argv, "root") == 0) {
-			if (req.t.tcm_parent) {
+			if (req->t.tcm_parent) {
 				fprintf(stderr,
 					"Error: \"root\" is duplicate parent ID\n");
 				return -1;
 			}
-			req.t.tcm_parent = TC_H_ROOT;
+			req->t.tcm_parent = TC_H_ROOT;
 		} else if (strcmp(*argv, "ingress") == 0) {
-			if (req.t.tcm_parent) {
+			if (req->t.tcm_parent) {
 				fprintf(stderr,
 					"Error: \"ingress\" is duplicate parent ID\n");
 				return -1;
 			}
-			req.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT,
+			req->t.tcm_parent = TC_H_MAKE(TC_H_CLSACT,
 						     TC_H_MIN_INGRESS);
 		} else if (strcmp(*argv, "egress") == 0) {
-			if (req.t.tcm_parent) {
+			if (req->t.tcm_parent) {
 				fprintf(stderr,
 					"Error: \"egress\" is duplicate parent ID\n");
 				return -1;
 			}
-			req.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT,
+			req->t.tcm_parent = TC_H_MAKE(TC_H_CLSACT,
 						     TC_H_MIN_EGRESS);
 		} else if (strcmp(*argv, "parent") == 0) {
 			__u32 handle;
 
 			NEXT_ARG();
-			if (req.t.tcm_parent)
+			if (req->t.tcm_parent)
 				duparg("parent", *argv);
 			if (get_tc_classid(&handle, *argv))
 				invarg("Invalid parent ID", *argv);
-			req.t.tcm_parent = handle;
+			req->t.tcm_parent = handle;
 		} else if (strcmp(*argv, "handle") == 0) {
 			NEXT_ARG();
 			if (fhandle)
@@ -152,26 +185,26 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv)
 		argc--; argv++;
 	}
 
-	req.t.tcm_info = TC_H_MAKE(prio<<16, protocol);
+	req->t.tcm_info = TC_H_MAKE(prio<<16, protocol);
 
 	if (chain_index_set)
-		addattr32(&req.n, sizeof(req), TCA_CHAIN, chain_index);
+		addattr32(&req->n, sizeof(*req), TCA_CHAIN, chain_index);
 
 	if (k[0])
-		addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1);
+		addattr_l(&req->n, sizeof(*req), TCA_KIND, k, strlen(k)+1);
 
 	if (d[0])  {
 		ll_init_map(&rth);
 
-		req.t.tcm_ifindex = ll_name_to_index(d);
-		if (req.t.tcm_ifindex == 0) {
+		req->t.tcm_ifindex = ll_name_to_index(d);
+		if (req->t.tcm_ifindex == 0) {
 			fprintf(stderr, "Cannot find device \"%s\"\n", d);
 			return 1;
 		}
 	}
 
 	if (q) {
-		if (q->parse_fopt(q, fhandle, argc, argv, &req.n))
+		if (q->parse_fopt(q, fhandle, argc, argv, &req->n))
 			return 1;
 	} else {
 		if (fhandle) {
@@ -190,14 +223,15 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv)
 	}
 
 	if (est.ewma_log)
-		addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est));
+		addattr_l(&req->n, sizeof(*req), TCA_RATE, &est, sizeof(est));
 
-	if (rtnl_talk(&rth, &req.n, NULL) < 0) {
+	ret = rtnl_talk(&rth, &req->n, NULL);
+	if (ret < 0) {
 		fprintf(stderr, "We have an error talking to the kernel\n");
 		return 2;
 	}
 
-	return 0;
+	return ret;
 }
 
 static __u32 filter_parent;
-- 
2.14.3

^ permalink raw reply related

* Re: [-next PATCH 0/4] sysfs and DEVICE_ATTR_<foo>
From: Felipe Balbi @ 2017-12-20  9:46 UTC (permalink / raw)
  To: Joe Perches, linux-arm-kernel, linux-acpi, openipmi-developer,
	intel-gfx, linuxppc-dev, netdev, linux-nvme, platform-driver-x86,
	linux-s390, esc.storagedev, linux-scsi, linux-pm, linux-serial,
	linux-usb, linux-kernel, alsa-devel, linux-omap
  Cc: devel, linux-fbdev, linux-sh, dri-devel, linux-input, linux-media
In-Reply-To: <cover.1513706701.git.joe@perches.com>


Hi,

Joe Perches <joe@perches.com> writes:
>  drivers/usb/phy/phy-tahvo.c                        |  2 +-

Acked-by: Felipe Balbi <felipe.balbi@linux.intel.com>

-- 
balbi
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply

* [PATCH net 1/2] dt-bindings: net: mediatek: add condition to property mediatek,pctl
From: sean.wang @ 2017-12-20  9:47 UTC (permalink / raw)
  To: robh+dt, davem, mark.rutland
  Cc: matthias.bgg, john, nbd, nelson.chang, devicetree,
	linux-arm-kernel, netdev, linux-kernel, linux-mediatek, Sean Wang

From: Sean Wang <sean.wang@mediatek.com>

The property "mediatek,pctl" is only required for SoCs such as MT2701 and
MT7623, so adding a few words for stating the condition.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
---
 Documentation/devicetree/bindings/net/mediatek-net.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt
index 214eaa9..53c13ee 100644
--- a/Documentation/devicetree/bindings/net/mediatek-net.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-net.txt
@@ -28,7 +28,7 @@ Required properties:
 - mediatek,sgmiisys: phandle to the syscon node that handles the SGMII setup
 	which is required for those SoCs equipped with SGMII such as MT7622 SoC.
 - mediatek,pctl: phandle to the syscon node that handles the ports slew rate
-	and driver current
+	and driver current: only for MT2701 and MT7623 SoC
 
 Optional properties:
 - interrupt-parent: Should be the phandle for the interrupt controller
-- 
2.7.4

^ permalink raw reply related

* [PATCH net 2/2] net: mediatek: remove superfluous pin setup for MT7622 SoC
From: sean.wang @ 2017-12-20  9:47 UTC (permalink / raw)
  To: robh+dt, davem, mark.rutland
  Cc: matthias.bgg, john, nbd, nelson.chang, devicetree,
	linux-arm-kernel, netdev, linux-kernel, linux-mediatek, Sean Wang
In-Reply-To: <e366efc29985d3292c8a1afb1389b5eac57c9037.1513762066.git.sean.wang@mediatek.com>

From: Sean Wang <sean.wang@mediatek.com>

Remove superfluous pin setup to get out of accessing invalid I/O pin
registers because the way for pin configuring tends to be different from
various SoCs and thus it should be better being managed and controlled by
the pinctrl driver which MT7622 already can support.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 35 +++++++++++++++++------------
 drivers/net/ethernet/mediatek/mtk_eth_soc.h |  3 +++
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index fc67e35..29826dd 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1952,14 +1952,16 @@ static int mtk_hw_init(struct mtk_eth *eth)
 	}
 	regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
 
-	/* Set GE2 driving and slew rate */
-	regmap_write(eth->pctl, GPIO_DRV_SEL10, 0xa00);
+	if (eth->pctl) {
+		/* Set GE2 driving and slew rate */
+		regmap_write(eth->pctl, GPIO_DRV_SEL10, 0xa00);
 
-	/* set GE2 TDSEL */
-	regmap_write(eth->pctl, GPIO_OD33_CTRL8, 0x5);
+		/* set GE2 TDSEL */
+		regmap_write(eth->pctl, GPIO_OD33_CTRL8, 0x5);
 
-	/* set GE2 TUNE */
-	regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
+		/* set GE2 TUNE */
+		regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
+	}
 
 	/* Set linkdown as the default for each GMAC. Its own MCR would be set
 	 * up with the more appropriate value when mtk_phy_link_adjust call is
@@ -2538,11 +2540,13 @@ static int mtk_probe(struct platform_device *pdev)
 		}
 	}
 
-	eth->pctl = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
-						    "mediatek,pctl");
-	if (IS_ERR(eth->pctl)) {
-		dev_err(&pdev->dev, "no pctl regmap found\n");
-		return PTR_ERR(eth->pctl);
+	if (eth->soc->required_pctl) {
+		eth->pctl = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+							    "mediatek,pctl");
+		if (IS_ERR(eth->pctl)) {
+			dev_err(&pdev->dev, "no pctl regmap found\n");
+			return PTR_ERR(eth->pctl);
+		}
 	}
 
 	for (i = 0; i < 3; i++) {
@@ -2668,17 +2672,20 @@ static int mtk_remove(struct platform_device *pdev)
 
 static const struct mtk_soc_data mt2701_data = {
 	.caps = MTK_GMAC1_TRGMII,
-	.required_clks = MT7623_CLKS_BITMAP
+	.required_clks = MT7623_CLKS_BITMAP,
+	.required_pctl = true,
 };
 
 static const struct mtk_soc_data mt7622_data = {
 	.caps = MTK_DUAL_GMAC_SHARED_SGMII | MTK_GMAC1_ESW,
-	.required_clks = MT7622_CLKS_BITMAP
+	.required_clks = MT7622_CLKS_BITMAP,
+	.required_pctl = false,
 };
 
 static const struct mtk_soc_data mt7623_data = {
 	.caps = MTK_GMAC1_TRGMII,
-	.required_clks = MT7623_CLKS_BITMAP
+	.required_clks = MT7623_CLKS_BITMAP,
+	.required_pctl = true,
 };
 
 const struct of_device_id of_mtk_match[] = {
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index a3af466..672b8c3 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -573,10 +573,13 @@ struct mtk_rx_ring {
  * @caps			Flags shown the extra capability for the SoC
  * @required_clks		Flags shown the bitmap for required clocks on
  *				the target SoC
+ * @required_pctl		A bool value to show whether the SoC requires
+ *				the extra setup for those pins used by GMAC.
  */
 struct mtk_soc_data {
 	u32		caps;
 	u32		required_clks;
+	bool		required_pctl;
 };
 
 /* currently no SoC has more than 2 macs */
-- 
2.7.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox