Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH rdma-next v2 07/13] IB/core: Support passing uhw for create_flow
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180529130917.13592-1-leon@kernel.org>

From: Matan Barak <matanb@mellanox.com>

This is required when user-space drivers need to pass extra information
regarding how to handle this flow steering specification.

Tested-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 7 ++++++-
 drivers/infiniband/core/verbs.c      | 2 +-
 drivers/infiniband/hw/mlx4/main.c    | 6 +++++-
 drivers/infiniband/hw/mlx5/main.c    | 7 ++++++-
 include/rdma/ib_verbs.h              | 3 ++-
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index e74262ee104c..ddb9d79691be 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3542,11 +3542,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 		err = -EINVAL;
 		goto err_free;
 	}
-	flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
+
+	flow_id = qp->device->create_flow(qp, flow_attr,
+					  IB_FLOW_DOMAIN_USER, uhw);
+
 	if (IS_ERR(flow_id)) {
 		err = PTR_ERR(flow_id);
 		goto err_free;
 	}
+	atomic_inc(&qp->usecnt);
+	flow_id->qp = qp;
 	flow_id->uobject = uobj;
 	uobj->object = flow_id;
 	uflow = container_of(uobj, typeof(*uflow), uobject);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 6ddfb1fade79..0b56828c1319 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1983,7 +1983,7 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
 	if (!qp->device->create_flow)
 		return ERR_PTR(-EOPNOTSUPP);

-	flow_id = qp->device->create_flow(qp, flow_attr, domain);
+	flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL);
 	if (!IS_ERR(flow_id)) {
 		atomic_inc(&qp->usecnt);
 		flow_id->qp = qp;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index bf12394c13c1..6fe5d5d1d1d9 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1848,7 +1848,7 @@ static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,

 static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 				    struct ib_flow_attr *flow_attr,
-				    int domain)
+				    int domain, struct ib_udata *udata)
 {
 	int err = 0, i = 0, j = 0;
 	struct mlx4_ib_flow *mflow;
@@ -1866,6 +1866,10 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
 		return ERR_PTR(-EOPNOTSUPP);

+	if (udata &&
+	    udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
+		return ERR_PTR(-EOPNOTSUPP);
+
 	memset(type, 0, sizeof(type));

 	mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 25a271ef8374..59f86198eb3b 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3363,7 +3363,8 @@ static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,

 static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 					   struct ib_flow_attr *flow_attr,
-					   int domain)
+					   int domain,
+					   struct ib_udata *udata)
 {
 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
 	struct mlx5_ib_qp *mqp = to_mqp(qp);
@@ -3375,6 +3376,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 	int err;
 	int underlay_qpn;

+	if (udata &&
+	    udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
+		return ERR_PTR(-EOPNOTSUPP);
+
 	if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
 		return ERR_PTR(-ENOMEM);

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index f6bd3b97b971..80956b1c9f4d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2459,7 +2459,8 @@ struct ib_device {
 	struct ib_flow *	   (*create_flow)(struct ib_qp *qp,
 						  struct ib_flow_attr
 						  *flow_attr,
-						  int domain);
+						  int domain,
+						  struct ib_udata *udata);
 	int			   (*destroy_flow)(struct ib_flow *flow_id);
 	int			   (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
 						      struct ib_mr_status *mr_status);

^ permalink raw reply related

* [PATCH rdma-next v2 13/13] IB/mlx5: Add counters read support
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180529130917.13592-1-leon@kernel.org>

From: Raed Salem <raeds@mellanox.com>

This patch implements the uverbs counters read API, it will use the
specific read counters function to the given type to accomplish its
task.

Tested-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c | 43 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index f4da59e39c9e..d775fac9a1ef 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5327,6 +5327,48 @@ static void depopulate_specs_root(struct mlx5_ib_dev *dev)
 	uverbs_free_spec_tree(dev->ib_dev.specs_root);
 }

+static int mlx5_ib_read_counters(struct ib_counters *counters,
+				 struct ib_counters_read_attr *read_attr,
+				 struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+	struct mlx5_read_counters_attr mread_attr = {};
+	u32 *desc;
+	int ret, i;
+
+	mutex_lock(&mcounters->mcntrs_mutex);
+	if (mcounters->cntrs_max_index > read_attr->ncounters) {
+		ret = -EINVAL;
+		goto err_bound;
+	}
+
+	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
+				 GFP_KERNEL);
+	if (!mread_attr.out) {
+		ret = -ENOMEM;
+		goto err_bound;
+	}
+
+	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
+	mread_attr.flags = read_attr->flags;
+	ret = mcounters->read_counters(counters->device, &mread_attr);
+	if (ret)
+		goto err_read;
+
+	/* do the pass over the counters data array to assign according to the
+	 * descriptions and indexing pairs
+	 */
+	desc = mcounters->counters_data;
+	for (i = 0; i < mcounters->ncounters * 2; i += 2)
+		read_attr->counters_buff[desc[i + 1]] += mread_attr.out[desc[i]];
+
+err_read:
+	kfree(mread_attr.out);
+err_bound:
+	mutex_unlock(&mcounters->mcntrs_mutex);
+	return ret;
+}
+
 static int mlx5_ib_destroy_counters(struct ib_counters *counters)
 {
 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
@@ -5600,6 +5642,7 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 	dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
 	dev->ib_dev.create_counters = mlx5_ib_create_counters;
 	dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters;
+	dev->ib_dev.read_counters = mlx5_ib_read_counters;

 	err = init_node_data(dev);
 	if (err)

^ permalink raw reply related

* [PATCH 15/19] net: qualcomm: MODULE_DEVICE_TABLE(serdev)
From: Ricardo Ribalda Delgado @ 2018-05-29 13:10 UTC (permalink / raw)
  To: linux-kernel, linux-serial
  Cc: Ricardo Ribalda Delgado, Lino Sanfilippo, David S . Miller,
	Stefan Wahren, Rob Herring, Johan Hovold, netdev
In-Reply-To: <20180529131014.18641-1-ricardo.ribalda@gmail.com>

Export serdev table to the module header, allowing module autoload via
udev/modprobe.

Cc: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Stefan Wahren <stefan.wahren@i2se.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Johan Hovold <johan@kernel.org>
Cc: netdev@vger.kernel.org
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
---
 drivers/net/ethernet/qualcomm/qca_uart.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
index db6068cd7a1f..bb7aed805083 100644
--- a/drivers/net/ethernet/qualcomm/qca_uart.c
+++ b/drivers/net/ethernet/qualcomm/qca_uart.c
@@ -405,6 +405,12 @@ static void qca_uart_remove(struct serdev_device *serdev)
 	free_netdev(qca->net_dev);
 }
 
+static struct serdev_device_id qca_uart_serdev_id[] = {
+	{ QCAUART_DRV_NAME, },
+	{},
+};
+MODULE_DEVICE_TABLE(serdev, qca_uart_serdev_id);
+
 static struct serdev_device_driver qca_uart_driver = {
 	.probe = qca_uart_probe,
 	.remove = qca_uart_remove,
@@ -412,6 +418,7 @@ static struct serdev_device_driver qca_uart_driver = {
 		.name = QCAUART_DRV_NAME,
 		.of_match_table = of_match_ptr(qca_uart_of_match),
 	},
+	.id_table = qca_uart_serdev_id,
 };
 
 module_serdev_device_driver(qca_uart_driver);
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH net-next v3 6/7] net: bridge: Notify about bridge VLANs
From: Petr Machata @ 2018-05-29 13:12 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: devel, andrew, f.fainelli, vivien.didelot, nikolay, netdev,
	bridge, idosch, jiri, razvan.stefanescu, gregkh, davem
In-Reply-To: <20180529105535.fhap2w4tvj3tesei@mwanda>

Dan Carpenter <dan.carpenter@oracle.com> writes:

> On Mon, May 28, 2018 at 05:11:04PM +0200, Petr Machata wrote:
>> @@ -580,6 +591,9 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
>>  			vg->num_vlans++;
>>  			*changed = true;
>>  		}
>> +		ret = br_switchdev_port_vlan_add(br->dev, vid, flags);
>> +		if (ret && ret != -EOPNOTSUPP)
>> +			return ret;
>
> We should probably do some error handling instead of returning directly?

I missed that, you are right. There's a bunch of mutations in the block
above.

Thanks,
Petr

^ permalink raw reply

* [PATCH] can: m_can: Fix runtime resume call
From: Faiz Abbas @ 2018-05-29 13:24 UTC (permalink / raw)
  To: linux-can, netdev, linux-kernel, linux-omap; +Cc: mkl, wg, faiz_abbas

pm_runtime_get_sync() returns a 1 if the state of the device is already
'active'. This is not a failure case and should return a success.

Therefore fix error handling for pm_runtime_get_sync() call such that
it returns success when the value is 1.

Also cleanup the TODO for using runtime PM for sleep mode as that is
implemented.

Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
---
 drivers/net/can/m_can/m_can.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index a9fbf81ac3d4..04c48371ab2a 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -634,10 +634,12 @@ static int m_can_clk_start(struct m_can_priv *priv)
 	int err;
 
 	err = pm_runtime_get_sync(priv->device);
-	if (err)
+	if (err < 0) {
 		pm_runtime_put_noidle(priv->device);
+		return err;
+	}
 
-	return err;
+	return 0;
 }
 
 static void m_can_clk_stop(struct m_can_priv *priv)
@@ -1687,8 +1689,6 @@ static int m_can_plat_probe(struct platform_device *pdev)
 	return ret;
 }
 
-/* TODO: runtime PM with power down or sleep mode  */
-
 static __maybe_unused int m_can_suspend(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH] net: davinci: fix building davinci mdio code without CONFIG_OF
From: Sekhar Nori @ 2018-05-29 13:25 UTC (permalink / raw)
  To: Arnd Bergmann, David S. Miller
  Cc: Grygorii Strashko, Florian Fainelli, linux-omap, netdev,
	linux-kernel
In-Reply-To: <20180528155059.2736080-1-arnd@arndb.de>

Hi Arnd,

On Monday 28 May 2018 09:20 PM, Arnd Bergmann wrote:
> Test-building this driver on targets without CONFIG_OF revealed a build
> failure:
> 
> drivers/net/ethernet/ti/davinci_mdio.c: In function 'davinci_mdio_probe':
> drivers/net/ethernet/ti/davinci_mdio.c:380:9: error: implicit declaration of function 'davinci_mdio_probe_dt'; did you mean 'davinci_mdio_probe'? [-Werror=implicit-function-declaration]
> 
> This adjusts the #ifdef logic in the driver to make it build in
> all configurations.
> 
> Fixes: 2652113ff043 ("net: ethernet: ti: Allow most drivers with COMPILE_TEST")
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>

Your patch fixes the issue.

Acked-by: Sekhar Nori <nsekhar@ti.com>

One question below:

> ---
>  drivers/net/ethernet/ti/davinci_mdio.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
> index 8ac72831af05..a98aedae1b41 100644
> --- a/drivers/net/ethernet/ti/davinci_mdio.c
> +++ b/drivers/net/ethernet/ti/davinci_mdio.c
> @@ -321,7 +321,6 @@ static int davinci_mdio_write(struct mii_bus *bus, int phy_id,
>  	return ret;
>  }
>  
> -#if IS_ENABLED(CONFIG_OF)
>  static int davinci_mdio_probe_dt(struct mdio_platform_data *data,
>  			 struct platform_device *pdev)
>  {
> @@ -339,7 +338,6 @@ static int davinci_mdio_probe_dt(struct mdio_platform_data *data,
>  
>  	return 0;
>  }
> -#endif
>  
>  #if IS_ENABLED(CONFIG_OF)
>  static const struct davinci_mdio_of_param of_cpsw_mdio_data = {
> @@ -374,7 +372,7 @@ static int davinci_mdio_probe(struct platform_device *pdev)
>  		return -ENOMEM;
>  	}
>  
> -	if (dev->of_node) {
> +	if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
>  		const struct of_device_id	*of_id;
>  
>  		ret = davinci_mdio_probe_dt(&data->pdata, pdev);

I was expecting this one change to fix the issue since the if() block
should be compiled away removing references to davinci_mdio_probe_dt().

The code does get compiled out and there are no references to
davinci_mdio_probe_dt() in the final object when !CONFIG_OF.

But the compile error remains if the #ifdefs you removed above are
installed back. Not sure why.

Thanks,
Sekhar

^ permalink raw reply

* Re: [PATCH net-next] net: davinci_mdio: fix building error without CONFIG_OF
From: YueHaibing @ 2018-05-29 13:34 UTC (permalink / raw)
  To: davem, grygorii.strashko, muvarov
  Cc: netdev, linux-kernel, andrew, fugang.duan, linux-omap
In-Reply-To: <20180529115651.3420-1-yuehaibing@huawei.com>

pls ignore this, there has been a Patch from Arnd Bergmann <arnd@arndb.de>

[PATCH] net: davinci: fix building davinci mdio code without CONFIG_OF


On 2018/5/29 19:56, YueHaibing wrote:
> gcc report a build error when compiling without CONFIG_OF
> drivers/net/ethernet/ti/davinci_mdio.c: In function ‘davinci_mdio_probe’:
> drivers/net/ethernet/ti/davinci_mdio.c:380:9: error: implicit declaration of function ‘davinci_mdio_probe_dt’ [-Werror=implicit-function-declaration]
>    ret = davinci_mdio_probe_dt(&data->pdata, pdev);
>          ^
> Fixes: 9eae9c7d0875 ("drivers: net: davinci_mdio: enable pm runtime auto for ti cpsw-mdio")
> Signed-off-by: YueHaibing <yuehaibing@huawei.com>
> ---
>  drivers/net/ethernet/ti/davinci_mdio.c | 8 ++++++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
> index 8ac7283..6e544d9 100644
> --- a/drivers/net/ethernet/ti/davinci_mdio.c
> +++ b/drivers/net/ethernet/ti/davinci_mdio.c
> @@ -339,9 +339,7 @@ static int davinci_mdio_probe_dt(struct mdio_platform_data *data,
>  
>  	return 0;
>  }
> -#endif
>  
> -#if IS_ENABLED(CONFIG_OF)
>  static const struct davinci_mdio_of_param of_cpsw_mdio_data = {
>  	.autosuspend_delay_ms = 100,
>  };
> @@ -352,6 +350,12 @@ static const struct of_device_id davinci_mdio_of_mtable[] = {
>  	{ /* sentinel */ },
>  };
>  MODULE_DEVICE_TABLE(of, davinci_mdio_of_mtable);
> +#else
> +static int davinci_mdio_probe_dt(struct mdio_platform_data *data,
> +				 struct platform_device *pdev)
> +{
> +	return -EINVAL;
> +}
>  #endif
>  
>  static int davinci_mdio_probe(struct platform_device *pdev)
> 

^ permalink raw reply

* [PATCH v4 net-next 1/2] tcp: use data length instead of skb->len in tcp_probe
From: Yafang Shao @ 2018-05-29 13:35 UTC (permalink / raw)
  To: songliubraving, edumazet, davem; +Cc: netdev, linux-kernel, Yafang Shao

At this point skb->len is including tcp header length, so it is meaningless
to user. data length could be more helpful, with which we can easily filter
out the packet without payload.

Cc: Eric Dumazet <edumazet@google.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>

---
v4: no change on this patch
v3: tcp_hdr() is a little expensive than skb->data, so replace it with
    skb->data.

---
 include/trace/events/tcp.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index c1a5284..7ff0446 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -236,7 +236,7 @@
 		__field(__u16, sport)
 		__field(__u16, dport)
 		__field(__u32, mark)
-		__field(__u16, length)
+		__field(__u16, data_len)
 		__field(__u32, snd_nxt)
 		__field(__u32, snd_una)
 		__field(__u32, snd_cwnd)
@@ -250,6 +250,7 @@
 	TP_fast_assign(
 		const struct tcp_sock *tp = tcp_sk(sk);
 		const struct inet_sock *inet = inet_sk(sk);
+		const struct tcphdr *th = (const struct tcphdr *)skb->data;
 
 		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
 		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
@@ -261,7 +262,7 @@
 		__entry->dport = ntohs(inet->inet_dport);
 		__entry->mark = skb->mark;
 
-		__entry->length = skb->len;
+		__entry->data_len = skb->len - __tcp_hdrlen(th);
 		__entry->snd_nxt = tp->snd_nxt;
 		__entry->snd_una = tp->snd_una;
 		__entry->snd_cwnd = tp->snd_cwnd;
@@ -272,9 +273,9 @@
 		__entry->sock_cookie = sock_gen_cookie(sk);
 	),
 
-	TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx",
+	TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx",
 		  __entry->saddr, __entry->daddr, __entry->mark,
-		  __entry->length, __entry->snd_nxt, __entry->snd_una,
+		  __entry->data_len, __entry->snd_nxt, __entry->snd_una,
 		  __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
 		  __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie)
 );
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v4 net-next 2/2] tcp: minor optimization around tcp_hdr() usage in tcp receive path
From: Yafang Shao @ 2018-05-29 13:35 UTC (permalink / raw)
  To: songliubraving, edumazet, davem; +Cc: netdev, linux-kernel, Yafang Shao
In-Reply-To: <1527600915-10955-1-git-send-email-laoar.shao@gmail.com>

This is additional to the commit ea1627c20c34 ("tcp: minor optimizations around tcp_hdr() usage").
At this point, skb->data is same with tcp_hdr() as tcp header has not
been pulled yet.
Remove the third parameter of tcp_rcv_established() and put it into
the function body.

Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>

---
v4: remove the third parameter of tcp_rcv_established()
---
 include/net/tcp.h    | 3 +--
 net/ipv4/tcp_input.c | 4 ++--
 net/ipv4/tcp_ipv4.c  | 2 +-
 net/ipv6/tcp_ipv6.c  | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 952d842..029a51b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -334,8 +334,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 void tcp_delack_timer_handler(struct sock *sk);
 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
-void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			 const struct tcphdr *th);
+void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
 void tcp_rcv_space_adjust(struct sock *sk);
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
 void tcp_twsk_destructor(struct sock *sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1191cac..1d70dab 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5390,11 +5390,11 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
  *	the rest is checked inline. Fast processing is turned on in
  *	tcp_data_queue when everything is OK.
  */
-void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			 const struct tcphdr *th)
+void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 {
 	unsigned int len = skb->len;
 	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcphdr *th = (const struct tcphdr *)skb->data;
 
 	/* TCP congestion window tracking */
 	trace_tcp_probe(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index adbdb50..749b0ef 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1486,7 +1486,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 				sk->sk_rx_dst = NULL;
 			}
 		}
-		tcp_rcv_established(sk, skb, tcp_hdr(skb));
+		tcp_rcv_established(sk, skb);
 		return 0;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7d47c2b..8764a63 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1322,7 +1322,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 			}
 		}
 
-		tcp_rcv_established(sk, skb, tcp_hdr(skb));
+		tcp_rcv_established(sk, skb);
 		if (opt_skb)
 			goto ipv6_pktoptions;
 		return 0;
-- 
1.8.3.1

^ permalink raw reply related

* Re: [pull request][for-next 00/12] Mellanox, mlx5e updates 2018-05-25
From: David Miller @ 2018-05-29 13:47 UTC (permalink / raw)
  To: saeedm; +Cc: netdev
In-Reply-To: <20180526000207.19568-1-saeedm@mellanox.com>

From: Saeed Mahameed <saeedm@mellanox.com>
Date: Fri, 25 May 2018 17:01:55 -0700

> This is a mlx5e only pull request, for more information please see tag
> log below.
> 
> Please pull and let me know if there's any problem.

Pulled, thanks Saeed.

There was a minor conflict to resolve (simple overlapping changes).

^ permalink raw reply

* Re: [PATCH net-next 00/14] nfp: abm: RED/MQ qdisc offload
From: David Miller @ 2018-05-29 13:51 UTC (permalink / raw)
  To: jakub.kicinski
  Cc: jiri, xiyou.wangcong, john.fastabend, netdev, oss-drivers,
	alexei.starovoitov, nogahf, yuvalm, gerlitz.or
In-Reply-To: <20180526045338.10993-1-jakub.kicinski@netronome.com>

From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 25 May 2018 21:53:24 -0700

> This is second batch of advanced buffer management nfp driver
> changes.  This series adds the qdisc offload.  Support for
> a very simple subset of RED qdisc offload is added as needed
> for DCTCP ECN marking (min and max thresholds set to the same
> value).
> 
> The first two patches fix glitches introduced by the previous
> series.  We have to be careful about phys_port_name handling,
> because VFs share the same code path, and some user space may
> get confused by the names we chose.
> 
> Since unlike previous offloads we can report the queue backlog
> both in bytes and packets we need to adjust how statistics are
> added up in the core (patch 6).
> 
> There are some extra statistics we want to expose which don't
> fit into TC stats, namely counts of packets which have been fast-
> -forwarded without getting enqueued because there was no
> contention and number of packets that were ever queued (sum of
> all momentary backlogs).  We expose those through ethtool stats
> (patches 8 and 9).
> 
> Remaining 5 patches add MQ offload - to be able to set different
> configurations on different queues.  Representors are made multi-
> -queue and we add offload support to MQ.  MQ stats are added up
> before calling ->dump qdiscs on the children, and therefore don't
> include updated offload values.  To avoid clearly incorrect stats
> MQ is made to also request stats update from offloads.  This way
> we can correct the diff at the driver level.

Series applied, thanks Jakub.

^ permalink raw reply

* Re: [PATCH v4 net-next 2/2] tcp: minor optimization around tcp_hdr() usage in tcp receive path
From: Eric Dumazet @ 2018-05-29 13:54 UTC (permalink / raw)
  To: Yafang Shao, songliubraving, edumazet, davem; +Cc: netdev, linux-kernel
In-Reply-To: <1527600915-10955-2-git-send-email-laoar.shao@gmail.com>



On 05/29/2018 06:35 AM, Yafang Shao wrote:
> This is additional to the commit ea1627c20c34 ("tcp: minor optimizations around tcp_hdr() usage").
> At this point, skb->data is same with tcp_hdr() as tcp header has not
> been pulled yet.
> Remove the third parameter of tcp_rcv_established() and put it into
> the function body.
> 
> Cc: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> 
> ---
> v4: remove the third parameter of tcp_rcv_established()
> ---
>  include/net/tcp.h    | 3 +--
>  net/ipv4/tcp_input.c | 4 ++--
>  net/ipv4/tcp_ipv4.c  | 2 +-
>  net/ipv6/tcp_ipv6.c  | 2 +-
>  4 files changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 952d842..029a51b 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -334,8 +334,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
>  void tcp_delack_timer_handler(struct sock *sk);
>  int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
>  int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
> -void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
> -			 const struct tcphdr *th);
> +void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
>  void tcp_rcv_space_adjust(struct sock *sk);
>  int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
>  void tcp_twsk_destructor(struct sock *sk);
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 1191cac..1d70dab 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -5390,11 +5390,11 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
>   *	the rest is checked inline. Fast processing is turned on in
>   *	tcp_data_queue when everything is OK.
>   */
> -void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
> -			 const struct tcphdr *th)
> +void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
>  {
>  	unsigned int len = skb->len;
>  	struct tcp_sock *tp = tcp_sk(sk);
> +	const struct tcphdr *th = (const struct tcphdr *)skb->data;
>  
>

Please reorder list to get a reverse christmas tree.

const struct tcphdr *th = (const struct tcphdr *)skb->data;
struct tcp_sock *tp = tcp_sk(sk);
unsigned int len = skb->len;

^ permalink raw reply

* Re: [PATCH net-next] net: remove unnecessary genlmsg_cancel() calls
From: David Miller @ 2018-05-29 13:54 UTC (permalink / raw)
  To: yuehaibing
  Cc: jiri, netdev, linux-kernel, johannes, kvalo, kuznet, yoshfuji,
	sameo, linux-wireless
In-Reply-To: <20180526111548.17184-1-yuehaibing@huawei.com>

From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 26 May 2018 19:15:48 +0800

> the message be freed immediately, no need to trim it
> back to the previous size.
> 
> Inspired by commit 7a9b3ec1e19f ("nl80211: remove unnecessary genlmsg_cancel() calls")
> 
> Signed-off-by: YueHaibing <yuehaibing@huawei.com>

Applied.

^ permalink raw reply

* Re: [PATCH] atm: zatm: fix memcmp casting
From: David Miller @ 2018-05-29 14:00 UTC (permalink / raw)
  To: brnkv.i1; +Cc: 3chas3, linux-atm-general, netdev, linux-kernel
In-Reply-To: <20180525174952.21965-1-brnkv.i1@gmail.com>

From: Ivan Bornyakov <brnkv.i1@gmail.com>
Date: Fri, 25 May 2018 20:49:52 +0300

> memcmp() returns int, but eprom_try_esi() cast it to unsigned char. One
> can lose significant bits and get 0 from non-0 value returned by the
> memcmp().
> 
> Signed-off-by: Ivan Bornyakov <brnkv.i1@gmail.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net] net: sched: check netif_xmit_frozen_or_stopped() in sch_direct_xmit()
From: David Miller @ 2018-05-29 14:02 UTC (permalink / raw)
  To: songliubraving; +Cc: netdev, kernel-team, john.fastabend
In-Reply-To: <20180525181144.224395-1-songliubraving@fb.com>

From: Song Liu <songliubraving@fb.com>
Date: Fri, 25 May 2018 11:11:44 -0700

> Summary:
> 
> At the end of sch_direct_xmit(), we are in the else path of
> !dev_xmit_complete(ret), which means ret == NETDEV_TX_OK. The following
> condition will always fail and netif_xmit_frozen_or_stopped() is not
> checked at all.
> 
>     if (ret && netif_xmit_frozen_or_stopped(txq))
>          return false;
> 
> In this patch, this condition is fixed as:
> 
>     if (netif_xmit_frozen_or_stopped(txq))
>          return false;
> 
> and further simplifies the code as:
> 
>     return !netif_xmit_frozen_or_stopped(txq);
> 
> Fixes: 29b86cdac00a ("net: sched: remove remaining uses for qdisc_qlen in xmit path")
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: David S. Miller <davem@davemloft.net>
> Signed-off-by: Song Liu <songliubraving@fb.com>

I expect a new version of this patch which removes the test entirely.

^ permalink raw reply

* Re: [PATCH, net-next] net/mlx5e: fix TLS dependency
From: David Miller @ 2018-05-29 14:04 UTC (permalink / raw)
  To: arnd
  Cc: saeedm, leon, borisp, ilant, ogerlitz, ilyal, ferasda, netdev,
	linux-rdma, linux-kernel
In-Reply-To: <20180525213630.2119214-1-arnd@arndb.de>

From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 25 May 2018 23:36:06 +0200

> With CONFIG_TLS=m and MLX5_CORE_EN=y, we get a link failure:
> 
> drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.o: In function `mlx5e_tls_handle_ooo':
> tls_rxtx.c:(.text+0x24c): undefined reference to `tls_get_record'
> drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.o: In function `mlx5e_tls_handle_tx_skb':
> tls_rxtx.c:(.text+0x9a8): undefined reference to `tls_device_sk_destruct'
> 
> This narrows down the dependency to only allow the configurations
> that will actually work. The existing dependency on TLS_DEVICE is
> not sufficient here since MLX5_EN_TLS is a 'bool' symbol.
> 
> Fixes: c83294b9efa5 ("net/mlx5e: TLS, Add Innova TLS TX support")
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>

Applied, thank you.

^ permalink raw reply

* Re: [PATCH] net: qcom/emac: fix device tree initialization
From: David Miller @ 2018-05-29 14:06 UTC (permalink / raw)
  To: timur; +Cc: arnd, hpuranik, netdev
In-Reply-To: <1527384554-14493-1-git-send-email-timur@codeaurora.org>

From: Timur Tabi <timur@codeaurora.org>
Date: Sat, 26 May 2018 20:29:14 -0500

> Commit "net: qcom/emac: Encapsulate sgmii ops under one structure"
> introduced the sgmii_ops structure, but did not correctly initialize
> it on device tree platforms.  This resulted in compiler warnings when
> ACPI is not enabled.
> 
> Reported-by: Arnd Bergmann <arnd@arndb.de>
> Signed-off-by: Timur Tabi <timur@codeaurora.org>

Applied to net-next, thank you.

^ permalink raw reply

* Re: [PATCH net] mlxsw: spectrum: Forbid creation of VLAN 1 over port/LAG
From: David Miller @ 2018-05-29 14:08 UTC (permalink / raw)
  To: idosch; +Cc: netdev, jiri, petrm, mlxsw
In-Reply-To: <20180527064841.32199-1-idosch@mellanox.com>

From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 27 May 2018 09:48:41 +0300

> From: Petr Machata <petrm@mellanox.com>
> 
> VLAN 1 is internally used for untagged traffic. Prevent creation of
> explicit netdevice for that VLAN, because that currently isn't supported
> and leads to the NULL pointer dereference cited below.
> 
> Fix by preventing creation of VLAN devices with VID of 1 over mlxsw
> devices or LAG devices that involve mlxsw devices.
 ...
> Fixes: 9589a7b5d7d9 ("mlxsw: spectrum: Handle VLAN devices linking / unlinking")
> Suggested-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Petr Machata <petrm@mellanox.com>
> Signed-off-by: Ido Schimmel <idosch@mellanox.com>

Applied and queued up for -stable.

^ permalink raw reply

* Re: [PATCH net-next 0/8] nfp: offload LAG for tc flower egress
From: John Hurley @ 2018-05-29 14:08 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Jiri Pirko, David Miller, Linux Netdev List, oss-drivers,
	Jay Vosburgh, Veaceslav Falico, Andy Gospodarek
In-Reply-To: <20180525194728.7aa4a116@cakuba>

On Sat, May 26, 2018 at 3:47 AM, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
> On Fri, 25 May 2018 08:48:09 +0200, Jiri Pirko wrote:
>> Thu, May 24, 2018 at 04:22:47AM CEST, jakub.kicinski@netronome.com wrote:
>> >Hi!
>> >
>> >This series from John adds bond offload to the nfp driver.  Patch 5
>> >exposes the hash type for NETDEV_LAG_TX_TYPE_HASH to make sure nfp
>> >hashing matches that of the software LAG.  This may be unnecessarily
>> >conservative, let's see what LAG maintainers think :)
>>
>> So you need to restrict offload to only certain hash algo? In mlxsw, we
>> just ignore the lag setting and do some hw default hashing. Would not be
>> enough? Note that there's a good reason for it, as you see, in team, the
>> hashing is done in a BPF function and could be totally arbitrary.
>> Your patchset effectively disables team offload for nfp.
>
> My understanding is that the project requirements only called for L3/L4
> hash algorithm offload, hence the temptation to err on the side of
> caution and not offload all the bond configurations.  John can provide
> more details.  Not being able to offload team is unfortunate indeed.

Hi Jiri,
Yes, as Jakub mentions, we restrict ourselves to L3/L4 hash algorithm
as this is currently what is supported in fw.
Hopefully this will change as fw features are expanded.
I understand the issue this presents with offloading team.
Perhaps resorting to a default hw hash for team is acceptable.
John

^ permalink raw reply

* Re: [PATCH net-next 0/3] mlxsw: use MRSR register for FW reset
From: David Miller @ 2018-05-29 14:10 UTC (permalink / raw)
  To: idosch; +Cc: netdev, jiri, mlxsw
In-Reply-To: <20180527065615.1329-1-idosch@mellanox.com>

From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 27 May 2018 09:56:12 +0300

> Jiri says:
> 
> Introduce a MRSR register definition and use it to do FW reset instead
> of existing mechanism using PCI BAR0 register.

Series applied to net-next.

^ permalink raw reply

* Re: [PATCH net-next 0/7] net: Add address attribute to control metric of prefix route
From: David Miller @ 2018-05-29 14:13 UTC (permalink / raw)
  To: dsahern; +Cc: netdev, roopa, dsahern
In-Reply-To: <20180527151000.30488-1-dsahern@kernel.org>

From: dsahern@kernel.org
Date: Sun, 27 May 2018 08:09:52 -0700

> For use cases such as VRR (Virtual Router Redundancy) interface managers
> want efficient control over the order of prefix routes when multiple
> interfaces have addresses with overlapping/duplicate subnets.
> 
> Currently, if two interfaces have addresses in the same subnet, the order
> of the prefix route entries is determined by the order in which the
> addresses are assigned or the links brought up. Any actions like cycling
> an interface up and down changes that order. This set adds a new attribute
> for addresses to allow a user to specify the metric of the prefix route
> associated with an address giving interface managers better and more
> efficient control of the order of prefix routes.

Looks great, series applied, thanks David.

^ permalink raw reply

* Re: [PATCH v2 net-next] tcp: use data length instead of skb->len in tcp_probe
From: David Miller @ 2018-05-29 14:15 UTC (permalink / raw)
  To: laoar.shao; +Cc: songliubraving, netdev, linux-kernel
In-Reply-To: <1527243245-29582-1-git-send-email-laoar.shao@gmail.com>

From: Yafang Shao <laoar.shao@gmail.com>
Date: Fri, 25 May 2018 18:14:05 +0800

> skb->len is meaningless to user.
> data length could be more helpful, with which we can easily filter out
> the packet without payload.
> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>

Applied, thank you.

^ permalink raw reply

* [PATCH] can: m_can: Move acessing of message ram to after clocks are enabled
From: Faiz Abbas @ 2018-05-29 14:19 UTC (permalink / raw)
  To: linux-can, netdev, linux-kernel, linux-omap; +Cc: mkl, wg, faiz_abbas

MCAN message ram should only be accessed once clocks are enabled.
Therefore, move the call to parse/init the message ram to after
clocks are enabled.

Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
---
 drivers/net/can/m_can/m_can.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index b397a33f3d32..a9fbf81ac3d4 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1642,8 +1642,6 @@ static int m_can_plat_probe(struct platform_device *pdev)
 	priv->can.clock.freq = clk_get_rate(cclk);
 	priv->mram_base = mram_addr;
 
-	m_can_of_parse_mram(priv, mram_config_vals);
-
 	platform_set_drvdata(pdev, dev);
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
@@ -1666,6 +1664,8 @@ static int m_can_plat_probe(struct platform_device *pdev)
 		goto clk_disable;
 	}
 
+	m_can_of_parse_mram(priv, mram_config_vals);
+
 	devm_can_led_init(dev);
 
 	of_can_transceiver(dev);
@@ -1715,8 +1715,6 @@ static __maybe_unused int m_can_resume(struct device *dev)
 
 	pinctrl_pm_select_default_state(dev);
 
-	m_can_init_ram(priv);
-
 	priv->can.state = CAN_STATE_ERROR_ACTIVE;
 
 	if (netif_running(ndev)) {
@@ -1726,6 +1724,7 @@ static __maybe_unused int m_can_resume(struct device *dev)
 		if (ret)
 			return ret;
 
+		m_can_init_ram(priv);
 		m_can_start(ndev);
 		netif_device_attach(ndev);
 		netif_start_queue(ndev);
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH] PCI: allow drivers to limit the number of VFs to 0
From: Don Dutile @ 2018-05-29 14:29 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Jakub Kicinski, Bjorn Helgaas, linux-pci, netdev, Sathya Perla,
	Felix Manlunas, alexander.duyck, john.fastabend, Jacob Keller,
	oss-drivers, Christoph Hellwig
In-Reply-To: <20180525204651.GA92995@bhelgaas-glaptop.roam.corp.google.com>

On 05/25/2018 04:46 PM, Bjorn Helgaas wrote:
> On Fri, May 25, 2018 at 03:27:52PM -0400, Don Dutile wrote:
>> On 05/25/2018 10:02 AM, Bjorn Helgaas wrote:
>>> On Thu, May 24, 2018 at 06:20:15PM -0700, Jakub Kicinski wrote:
>>>> Hi Bjorn!
>>>>
>>>> On Thu, 24 May 2018 18:57:48 -0500, Bjorn Helgaas wrote:
>>>>> On Mon, Apr 02, 2018 at 03:46:52PM -0700, Jakub Kicinski wrote:
>>>>>> Some user space depends on enabling sriov_totalvfs number of VFs
>>>>>> to not fail, e.g.:
>>>>>>
>>>>>> $ cat .../sriov_totalvfs > .../sriov_numvfs
>>>>>>
>>>>>> For devices which VF support depends on loaded FW we have the
>>>>>> pci_sriov_{g,s}et_totalvfs() API.  However, this API uses 0 as
>>>>>> a special "unset" value, meaning drivers can't limit sriov_totalvfs
>>>>>> to 0.  Remove the special values completely and simply initialize
>>>>>> driver_max_VFs to total_VFs.  Then always use driver_max_VFs.
>>>>>> Add a helper for drivers to reset the VF limit back to total.
>>>>>
>>>>> I still can't really make sense out of the changelog.
>>>>>
>>>>> I think part of the reason it's confusing is because there are two
>>>>> things going on:
>>>>>
>>>>>     1) You want this:
>>>>>          pci_sriov_set_totalvfs(dev, 0);
>>>>>          x = pci_sriov_get_totalvfs(dev)
>>>>>
>>>>>        to return 0 instead of total_VFs.  That seems to connect with
>>>>>        your subject line.  It means "sriov_totalvfs" in sysfs could be
>>>>>        0, but I don't know how that is useful (I'm sure it is; just
>>>>>        educate me :))
>>>>
>>>> Let me just quote the bug report that got filed on our internal bug
>>>> tracker :)
>>>>
>>>>     When testing Juju Openstack with Ubuntu 18.04, enabling SR-IOV causes
>>>>     errors because Juju gets the sriov_totalvfs for SR-IOV-capable device
>>>>     then tries to set that as the sriov_numvfs parameter.
>>>>
>>>>     For SR-IOV incapable FW, the sriov_totalvfs parameter should be 0,
>>>>     but it's set to max.  When FW is switched to flower*, the correct
>>>>     sriov_totalvfs value is presented.
>>>>
>>>> * flower is a project name
>>>
>>>   From the point of view of the PCI core (which knows nothing about
>>> device firmware and relies on the architected config space described
>>> by the PCIe spec), this sounds like an erratum: with some firmware
>>> installed, the device is not capable of SR-IOV, but still advertises
>>> an SR-IOV capability with "TotalVFs > 0".
>>>
>>> Regardless of whether that's an erratum, we do allow PF drivers to use
>>> pci_sriov_set_totalvfs() to limit the number of VFs that may be
>>> enabled by writing to the PF's "sriov_numvfs" sysfs file.
>>>
>> +1.
>>
>>> But the current implementation does not allow a PF driver to limit VFs
>>> to 0, and that does seem nonsensical.
>>>
>> Well, not really -- claiming to support VFs, and then wanting it to be 0...
>> I could certainly argue is non-sensical.
>>  From a sw perspective, sure, see if we can set VFs to 0 (and reset to another value later).
>>
>> /me wishes that implementers would follow the architecture vs torquing it into strange shapes.
>>
>>>> My understanding is OpenStack uses sriov_totalvfs to determine how many
>>>> VFs can be enabled, looks like this is the code:
>>>>
>>>> http://git.openstack.org/cgit/openstack/charm-neutron-openvswitch/tree/hooks/neutron_ovs_utils.py#n464
>>>>
>>>>>     2) You're adding the pci_sriov_reset_totalvfs() interface.  I'm not
>>>>>        sure what you intend for this.  Is *every* driver supposed to
>>>>>        call it in .remove()?  Could/should this be done in the core
>>>>>        somehow instead of depending on every driver?
>>>>
>>>> Good question, I was just thinking yesterday we may want to call it
>>>> from the core, but I don't think it's strictly necessary nor always
>>>> sufficient (we may reload FW without re-probing).
>>>>
>>>> We have a device which supports different number of VFs based on the FW
>>>> loaded.  Some legacy FWs does not inform the driver how many VFs it can
>>>> support, because it supports max.  So the flow in our driver is this:
>>>>
>>>> load_fw(dev);
>>>> ...
>>>> max_vfs = ask_fw_for_max_vfs(dev);
>>>> if (max_vfs >= 0)
>>>> 	return pci_sriov_set_totalvfs(dev, max_vfs);
>>>> else /* FW didn't tell us, assume max */
>>>> 	return pci_sriov_reset_totalvfs(dev);
>>>>
>>>> We also reset the max on device remove, but that's not strictly
>>>> necessary.
>>>>
>>>> Other users of pci_sriov_set_totalvfs() always know the value to set
>>>> the total to (either always get it from FW or it's a constant).
>>>>
>>>> If you prefer we can work out the correct max for those legacy cases in
>>>> the driver as well, although it seemed cleaner to just ask the core,
>>>> since it already has total_VFs value handy :)
>>>>
>>>>> I'm also having a hard time connecting your user-space command example
>>>>> with the rest of this.  Maybe it will make more sense to me tomorrow
>>>>> after some coffee.
>>>>
>>>> OpenStack assumes it will always be able to set sriov_numvfs to
>>>> sriov_totalvfs, see this 'if':
>>>>
>>>> http://git.openstack.org/cgit/openstack/charm-neutron-openvswitch/tree/hooks/neutron_ovs_utils.py#n512
>>>
>>> Thanks for educating me.  I think there are two issues here that we
>>> can separate.  I extracted the patch below for the first.
>>>
>>> The second is the question of resetting driver_max_VFs.  I think we
>>> currently have a general issue in the core:
>>>
>>>     - load PF driver 1
>>>     - driver calls pci_sriov_set_totalvfs() to reduce driver_max_VFs
>>>     - unload PF driver 1
>>>     - load PF driver 2
>>>
>>> Now driver_max_VFs is still stuck at the lower value set by driver 1.
>>> I don't think that's the way this should work.
>>>
>>> I guess this is partly a consequence of setting driver_max_VFs in
>>> sriov_init(), which is called before driver attach and should only
>> um, if it's at sriov_init() how is max changed by a PF driver?
>> or am I missing something subtle (a new sysfs param) as to what is being changed?
> 
> sriov_init() basically just sets the default driver_max_VFs to Total_VFs.
> 
> If the PF driver later calls pci_sriov_set_totalvfs(), it can reduce
> driver_max_VFs.
> 
> My concern is that there's nothing that resets driver_max_VFs back to
> Total_VFs if we unload and reload the PF driver.
> 
ok, gotcha.
any complication of this non-arch quirk. :-/

>>> depend on hardware characteristics, so it is related to the patch
>>> below.  But I think we should fix it in general, not just for
>>> netronome.
>>>
>>>
>>> commit 4a338bc6f94b9ad824ac944f5dfc249d6838719c
>>> Author: Jakub Kicinski <jakub.kicinski@netronome.com>
>>> Date:   Fri May 25 08:18:34 2018 -0500
>>>
>>>       PCI/IOV: Allow PF drivers to limit total_VFs to 0
>>>       Some SR-IOV PF drivers implement .sriov_configure(), which allows
>>>       user-space to enable VFs by writing the desired number of VFs to the sysfs
>>>       "sriov_numvfs" file (see sriov_numvfs_store()).
>>>       The PCI core limits the number of VFs to the TotalVFs advertised by the
>>>       device in its SR-IOV capability.  The PF driver can limit the number of VFs
>>>       to even fewer (it may have pre-allocated data structures or knowledge of
>>>       device limitations) by calling pci_sriov_set_totalvfs(), but previously it
>>>       could not limit the VFs to 0.
>>>       Change pci_sriov_get_totalvfs() so it always respects the VF limit imposed
>>>       by the PF driver, even if the limit is 0.
>>>       This sequence:
>>>         pci_sriov_set_totalvfs(dev, 0);
>>>         x = pci_sriov_get_totalvfs(dev);
>>>       previously set "x" to TotalVFs from the SR-IOV capability.  Now it will set
>>>       "x" to 0.
>>>       Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
>>>       Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
>>>
>>> diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
>>> index 192b82898a38..d0d73dbbd5ca 100644
>>> --- a/drivers/pci/iov.c
>>> +++ b/drivers/pci/iov.c
>>> @@ -469,6 +469,7 @@ static int sriov_init(struct pci_dev *dev, int pos)
>>>    	iov->nres = nres;
>>>    	iov->ctrl = ctrl;
>>>    	iov->total_VFs = total;
>>> +	iov->driver_max_VFs = total;
>>>    	pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &iov->vf_device);
>>>    	iov->pgsz = pgsz;
>>>    	iov->self = dev;
>>> @@ -827,10 +828,7 @@ int pci_sriov_get_totalvfs(struct pci_dev *dev)
>>>    	if (!dev->is_physfn)
>>>    		return 0;
>>> -	if (dev->sriov->driver_max_VFs)
>>> -		return dev->sriov->driver_max_VFs;
>>> -
>>> -	return dev->sriov->total_VFs;
>>> +	return dev->sriov->driver_max_VFs;
>>>    }
>>>    EXPORT_SYMBOL_GPL(pci_sriov_get_totalvfs);
>>>
>>

^ permalink raw reply

* Re: [PATCH] net: davinci: fix building davinci mdio code without CONFIG_OF
From: Arnd Bergmann @ 2018-05-29 14:31 UTC (permalink / raw)
  To: Sekhar Nori
  Cc: David S. Miller, Grygorii Strashko, Florian Fainelli, linux-omap,
	Networking, Linux Kernel Mailing List
In-Reply-To: <3b99eb56-f303-98b8-cb1e-7ff134769c43@ti.com>

On Tue, May 29, 2018 at 3:25 PM, Sekhar Nori <nsekhar@ti.com> wrote:

>> @@ -374,7 +372,7 @@ static int davinci_mdio_probe(struct platform_device *pdev)
>>               return -ENOMEM;
>>       }
>>
>> -     if (dev->of_node) {
>> +     if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
>>               const struct of_device_id       *of_id;
>>
>>               ret = davinci_mdio_probe_dt(&data->pdata, pdev);
>
> I was expecting this one change to fix the issue since the if() block
> should be compiled away removing references to davinci_mdio_probe_dt().
>
> The code does get compiled out and there are no references to
> davinci_mdio_probe_dt() in the final object when !CONFIG_OF.
>
> But the compile error remains if the #ifdefs you removed above are
> installed back. Not sure why.

The way that "if (IS_ENABLED())" works, everything gets compiled at
first, but then the compiler uses dead code elimination to remove it
from the output after verifying that everything builds.

       Arnd

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox