Netdev List
 help / color / mirror / Atom feed
* [PATCH rdma-next 3/3] IB/mlx5: Add page fault handler for DC initiator WQE
From: Leon Romanovsky @ 2019-08-01 12:21 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Michael Guralnik, Moni Shoua,
	Saeed Mahameed, linux-netdev
In-Reply-To: <20190801122139.25224-1-leon@kernel.org>

From: Michael Guralnik <michaelgur@mellanox.com>

Parsing DC initiator WQEs upon page fault requires skipping an address
vector segment, as in UD WQEs.

Signed-off-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/odp.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 5e87a5e25574..6f1de5edbe8e 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1065,6 +1065,12 @@ static int mlx5_ib_mr_initiator_pfault_handler(
 	case IB_QPT_UD:
 		transport_caps = dev->odp_caps.per_transport_caps.ud_odp_caps;
 		break;
+	case IB_QPT_DRIVER:
+		if (qp->qp_sub_type == MLX5_IB_QPT_DCI) {
+			transport_caps = dev->dc_odp_caps;
+			break;
+		}
+		/* fall through */
 	default:
 		mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport 0x%x\n",
 			    qp->ibqp.qp_type);
@@ -1078,7 +1084,8 @@ static int mlx5_ib_mr_initiator_pfault_handler(
 		return -EFAULT;
 	}
 
-	if (qp->ibqp.qp_type == IB_QPT_UD) {
+	if (qp->ibqp.qp_type == IB_QPT_UD ||
+	    qp->qp_sub_type == MLX5_IB_QPT_DCI) {
 		av = *wqe;
 		if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
 			*wqe += sizeof(struct mlx5_av);
-- 
2.20.1


^ permalink raw reply related

* [PATCH rdma-next 2/3] IB/mlx5: Expose ODP for DC capabilities to user
From: Leon Romanovsky @ 2019-08-01 12:21 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Michael Guralnik, Moni Shoua,
	Saeed Mahameed, linux-netdev
In-Reply-To: <20190801122139.25224-1-leon@kernel.org>

From: Michael Guralnik <michaelgur@mellanox.com>

Return ODP capabilities for DC to user in alloc_context.

Signed-off-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c | 6 ++++++
 include/uapi/rdma/mlx5-abi.h      | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4a3d700cd783..a53e0dc7c17f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1954,6 +1954,12 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
 		resp.response_length += sizeof(resp.dump_fill_mkey);
 	}
 
+	if (field_avail(typeof(resp), dc_odp_caps, udata->outlen)) {
+		resp.dc_odp_caps = dev->dc_odp_caps;
+		resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DC_ODP_CAPS;
+		resp.response_length += sizeof(resp.dc_odp_caps);
+	}
+
 	err = ib_copy_to_udata(udata, &resp, resp.response_length);
 	if (err)
 		goto out_mdev;
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 624f5b53eb1f..12abc585f2ac 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -98,6 +98,7 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
 enum mlx5_ib_alloc_ucontext_resp_mask {
 	MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
 	MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY    = 1UL << 1,
+	MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DC_ODP_CAPS	   = 1UL << 2,
 };
 
 enum mlx5_user_cmds_supp_uhw {
@@ -147,6 +148,7 @@ struct mlx5_ib_alloc_ucontext_resp {
 	__u32	num_uars_per_page;
 	__u32	num_dyn_bfregs;
 	__u32	dump_fill_mkey;
+	__u32	dc_odp_caps;
 };
 
 struct mlx5_ib_alloc_pd_resp {
-- 
2.20.1


^ permalink raw reply related

* [PATCH mlx5-next 1/3] IB/mlx5: Query ODP capabilities for DC
From: Leon Romanovsky @ 2019-08-01 12:21 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Michael Guralnik, Moni Shoua,
	Saeed Mahameed, linux-netdev
In-Reply-To: <20190801122139.25224-1-leon@kernel.org>

From: Michael Guralnik <michaelgur@mellanox.com>

Set current capabilities of ODP for DC to max capabilities and cache
them in mlx5_ib.

Signed-off-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h           |  1 +
 drivers/infiniband/hw/mlx5/odp.c               | 18 ++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  6 ++++++
 include/linux/mlx5/mlx5_ifc.h                  |  4 +++-
 4 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index cb41a7e6255a..f99c71b3c876 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -967,6 +967,7 @@ struct mlx5_ib_dev {
 	struct mutex			slow_path_mutex;
 	int				fill_delay;
 	struct ib_odp_caps	odp_caps;
+	uint32_t		dc_odp_caps;
 	u64			odp_max_size;
 	struct mlx5_ib_pf_eq	odp_pf_eq;
 
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index b0c5de39d186..5e87a5e25574 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -353,6 +353,24 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 	if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.srq_receive))
 		caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
 
+	if (MLX5_CAP_ODP(dev->mdev, dc_odp_caps.send))
+		dev->dc_odp_caps |= IB_ODP_SUPPORT_SEND;
+
+	if (MLX5_CAP_ODP(dev->mdev, dc_odp_caps.receive))
+		dev->dc_odp_caps |= IB_ODP_SUPPORT_RECV;
+
+	if (MLX5_CAP_ODP(dev->mdev, dc_odp_caps.write))
+		dev->dc_odp_caps |= IB_ODP_SUPPORT_WRITE;
+
+	if (MLX5_CAP_ODP(dev->mdev, dc_odp_caps.read))
+		dev->dc_odp_caps |= IB_ODP_SUPPORT_READ;
+
+	if (MLX5_CAP_ODP(dev->mdev, dc_odp_caps.atomic))
+		dev->dc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
+
+	if (MLX5_CAP_ODP(dev->mdev, dc_odp_caps.srq_receive))
+		dev->dc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+
 	if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
 	    MLX5_CAP_GEN(dev->mdev, null_mkey) &&
 	    MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b15b27a497fc..3995fc6d4d34 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -495,6 +495,12 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
 	ODP_CAP_SET_MAX(dev, xrc_odp_caps.write);
 	ODP_CAP_SET_MAX(dev, xrc_odp_caps.read);
 	ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.send);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.receive);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.write);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.read);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
 
 	if (do_set)
 		err = set_caps(dev, set_ctx, set_sz,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ec571fd7fcf8..5eae8d734435 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -944,7 +944,9 @@ struct mlx5_ifc_odp_cap_bits {
 
 	struct mlx5_ifc_odp_per_transport_service_cap_bits xrc_odp_caps;
 
-	u8         reserved_at_100[0x700];
+	struct mlx5_ifc_odp_per_transport_service_cap_bits dc_odp_caps;
+
+	u8         reserved_at_100[0x6E0];
 };
 
 struct mlx5_ifc_calc_op {
-- 
2.20.1


^ permalink raw reply related

* [PATCH rdma-next 0/3] ODP support for mlx5 DC QPs
From: Leon Romanovsky @ 2019-08-01 12:21 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Michael Guralnik, Moni Shoua,
	Saeed Mahameed, linux-netdev

From: Leon Romanovsky <leonro@mellanox.com>

From Michael,

The series adds support for on-demand paging for DC transport.
Adding handling of DC WQE parsing upon page faults and exposing
capabilities.

As DC is mlx-only transport, the capabilities are exposed to the user
using the direct-verbs mechanism. Namely through the mlx5dv_query_device.

Thanks

Michael Guralnik (3):
  IB/mlx5: Query ODP capabilities for DC
  IB/mlx5: Expose ODP for DC capabilities to user
  IB/mlx5: Add page fault handler for DC initiator WQE

 drivers/infiniband/hw/mlx5/main.c             |  6 +++++
 drivers/infiniband/hw/mlx5/mlx5_ib.h          |  1 +
 drivers/infiniband/hw/mlx5/odp.c              | 27 ++++++++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/main.c    |  6 +++++
 include/linux/mlx5/mlx5_ifc.h                 |  4 ++-
 include/uapi/rdma/mlx5-abi.h                  |  2 ++
 6 files changed, 44 insertions(+), 2 deletions(-)

--
2.20.1


^ permalink raw reply

* [PATCH net v2] mvpp2: fix panic on module removal
From: Matteo Croce @ 2019-08-01 12:13 UTC (permalink / raw)
  To: netdev
  Cc: Miquel Raynal, linux-kernel, Lorenzo Bianconi, Antoine Tenart,
	Maxime Chevallier, David S. Miller, Marcin Wojtas, Stefan Chulski

mvpp2 uses a delayed workqueue to gather traffic statistics.
On module removal the workqueue can be destroyed before calling
cancel_delayed_work_sync() on its works.
Fix it by moving the destroy_workqueue() call after mvpp2_port_remove().
Also remove an unneeded call to flush_workqueue()

    # rmmod mvpp2
    [ 2743.311722] mvpp2 f4000000.ethernet eth1: phy link down 10gbase-kr/10Gbps/Full
    [ 2743.320063] mvpp2 f4000000.ethernet eth1: Link is Down
    [ 2743.572263] mvpp2 f4000000.ethernet eth2: phy link down sgmii/1Gbps/Full
    [ 2743.580076] mvpp2 f4000000.ethernet eth2: Link is Down
    [ 2744.102169] mvpp2 f2000000.ethernet eth0: phy link down 10gbase-kr/10Gbps/Full
    [ 2744.110441] mvpp2 f2000000.ethernet eth0: Link is Down
    [ 2744.115614] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
    [ 2744.115615] Mem abort info:
    [ 2744.115616]   ESR = 0x96000005
    [ 2744.115617]   Exception class = DABT (current EL), IL = 32 bits
    [ 2744.115618]   SET = 0, FnV = 0
    [ 2744.115619]   EA = 0, S1PTW = 0
    [ 2744.115620] Data abort info:
    [ 2744.115621]   ISV = 0, ISS = 0x00000005
    [ 2744.115622]   CM = 0, WnR = 0
    [ 2744.115624] user pgtable: 4k pages, 39-bit VAs, pgdp=0000000422681000
    [ 2744.115626] [0000000000000000] pgd=0000000000000000, pud=0000000000000000
    [ 2744.115630] Internal error: Oops: 96000005 [#1] SMP
    [ 2744.115632] Modules linked in: mvpp2(-) algif_hash af_alg nls_iso8859_1 nls_cp437 vfat fat xhci_plat_hcd m25p80 spi_nor xhci_hcd mtd usbcore i2c_mv64xxx sfp usb_common marvell10g phy_generic spi_orion mdio_i2c i2c_core mvmdio phylink sbsa_gwdt ip_tables x_tables autofs4 [last unloaded: mvpp2]
    [ 2744.115654] CPU: 3 PID: 8357 Comm: kworker/3:2 Not tainted 5.3.0-rc2 #1
    [ 2744.115655] Hardware name: Marvell 8040 MACCHIATOBin Double-shot (DT)
    [ 2744.115665] Workqueue: events_power_efficient phylink_resolve [phylink]
    [ 2744.115669] pstate: a0000085 (NzCv daIf -PAN -UAO)
    [ 2744.115675] pc : __queue_work+0x9c/0x4d8
    [ 2744.115677] lr : __queue_work+0x170/0x4d8
    [ 2744.115678] sp : ffffff801001bd50
    [ 2744.115680] x29: ffffff801001bd50 x28: ffffffc422597600
    [ 2744.115684] x27: ffffff80109ae6f0 x26: ffffff80108e4018
    [ 2744.115688] x25: 0000000000000003 x24: 0000000000000004
    [ 2744.115691] x23: ffffff80109ae6e0 x22: 0000000000000017
    [ 2744.115694] x21: ffffffc42c030000 x20: ffffffc42209e8f8
    [ 2744.115697] x19: 0000000000000000 x18: 0000000000000000
    [ 2744.115699] x17: 0000000000000000 x16: 0000000000000000
    [ 2744.115701] x15: 0000000000000010 x14: ffffffffffffffff
    [ 2744.115702] x13: ffffff8090e2b95f x12: ffffff8010e2b967
    [ 2744.115704] x11: ffffff8010906000 x10: 0000000000000040
    [ 2744.115706] x9 : ffffff80109223b8 x8 : ffffff80109223b0
    [ 2744.115707] x7 : ffffffc42bc00068 x6 : 0000000000000000
    [ 2744.115709] x5 : ffffffc42bc00000 x4 : 0000000000000000
    [ 2744.115710] x3 : 0000000000000000 x2 : 0000000000000000
    [ 2744.115712] x1 : 0000000000000008 x0 : ffffffc42c030000
    [ 2744.115714] Call trace:
    [ 2744.115716]  __queue_work+0x9c/0x4d8
    [ 2744.115718]  delayed_work_timer_fn+0x28/0x38
    [ 2744.115722]  call_timer_fn+0x3c/0x180
    [ 2744.115723]  expire_timers+0x60/0x168
    [ 2744.115724]  run_timer_softirq+0xbc/0x1e8
    [ 2744.115727]  __do_softirq+0x128/0x320
    [ 2744.115731]  irq_exit+0xa4/0xc0
    [ 2744.115734]  __handle_domain_irq+0x70/0xc0
    [ 2744.115735]  gic_handle_irq+0x58/0xa8
    [ 2744.115737]  el1_irq+0xb8/0x140
    [ 2744.115738]  console_unlock+0x3a0/0x568
    [ 2744.115740]  vprintk_emit+0x200/0x2a0
    [ 2744.115744]  dev_vprintk_emit+0x1c8/0x1e4
    [ 2744.115747]  dev_printk_emit+0x6c/0x7c
    [ 2744.115751]  __netdev_printk+0x104/0x1d8
    [ 2744.115752]  netdev_printk+0x60/0x70
    [ 2744.115756]  phylink_resolve+0x38c/0x3c8 [phylink]
    [ 2744.115758]  process_one_work+0x1f8/0x448
    [ 2744.115760]  worker_thread+0x54/0x500
    [ 2744.115762]  kthread+0x12c/0x130
    [ 2744.115764]  ret_from_fork+0x10/0x1c
    [ 2744.115768] Code: aa1403e0 97fffbbe aa0003f5 b4000700 (f9400261)

Fixes: 118d6298f6f0 ("net: mvpp2: add ethtool GOP statistics")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Matteo Croce <mcroce@redhat.com>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index c51f1d5b550b..ad42cc0a2b4a 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -5759,9 +5759,6 @@ static int mvpp2_remove(struct platform_device *pdev)
 
 	mvpp2_dbgfs_cleanup(priv);
 
-	flush_workqueue(priv->stats_queue);
-	destroy_workqueue(priv->stats_queue);
-
 	fwnode_for_each_available_child_node(fwnode, port_fwnode) {
 		if (priv->port_list[i]) {
 			mutex_destroy(&priv->port_list[i]->gather_stats_lock);
@@ -5770,6 +5767,8 @@ static int mvpp2_remove(struct platform_device *pdev)
 		i++;
 	}
 
+	destroy_workqueue(priv->stats_queue);
+
 	for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
 		struct mvpp2_bm_pool *bm_pool = &priv->bm_pools[i];
 
-- 
2.21.0


^ permalink raw reply related

* Re: [PATCH net] mvpp2: fix panic on module removal
From: Antoine Tenart @ 2019-08-01 12:00 UTC (permalink / raw)
  To: Matteo Croce
  Cc: Antoine Tenart, netdev, Miquel Raynal, LKML, Lorenzo Bianconi,
	Maxime Chevallier, David S. Miller
In-Reply-To: <CAGnkfhyx7MHaG=YNhS7VrzsBqhVCPw5VeHPM7SFpiLeq3cb5Gw@mail.gmail.com>

On Thu, Aug 01, 2019 at 01:46:39PM +0200, Matteo Croce wrote:
> On Thu, Aug 1, 2019 at 9:18 AM Antoine Tenart
> <antoine.tenart@bootlin.com> wrote:
> > On Wed, Jul 31, 2019 at 08:31:16PM +0200, Matteo Croce wrote:
> > >
> > > diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > > index c51f1d5b550b..5002d51fc9d6 100644
> > > --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > > +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > > @@ -5760,7 +5760,6 @@ static int mvpp2_remove(struct platform_device *pdev)
> > >       mvpp2_dbgfs_cleanup(priv);
> > >
> > >       flush_workqueue(priv->stats_queue);
> > > -     destroy_workqueue(priv->stats_queue);
> > >
> > >       fwnode_for_each_available_child_node(fwnode, port_fwnode) {
> > >               if (priv->port_list[i]) {
> > > @@ -5770,6 +5769,8 @@ static int mvpp2_remove(struct platform_device *pdev)
> > >               i++;
> > >       }
> >
> > Shouldn't you also move flush_workqueue() here?
> 
> I think that that flush it's unneeded at all, as all port remove calls
> cancel_delayed_work_sync().
> 
> I tried removing it and it doesn't crash on rmmod.

I was wondering this, and looking at the documentation it seems to me
removing flush_workqueue() should be fine.

Thanks!
Antoine

-- 
Antoine Ténart, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com

^ permalink raw reply

* [PATCH net-next v5 0/5] enetc: Add mdio bus driver for the PCIe MDIO endpoint
From: Claudiu Manoil @ 2019-08-01 11:52 UTC (permalink / raw)
  To: David S . Miller
  Cc: andrew, Rob Herring, Li Yang, alexandru.marginean, netdev,
	devicetree, linux-arm-kernel, linux-kernel

First patch fixes a sparse issue and cleans up accessors to avoid
casting to __iomem.  The second one cleans up the Makefile, to make
it easier to add new entries.

Third patch just registers the PCIe endpoint device containing
the MDIO registers as a standalone MDIO bus driver, to provide
an alternative way to control the MDIO bus.  The same code used
by the ENETC ports (eth controllers) to manage MDIO via local
registers applies and is reused.

Bindings are provided for the new MDIO node, similarly to ENETC
port nodes bindings.

Last patch enables the ENETC port 1 and its RGMII PHY on the
LS1028A QDS board, where the MDIO muxing configuration relies
on the MDIO support provided in the first patch.

Changes since v0:
v1 - fixed mdio bus allocation
v2 - cleaned up accessors to avoid casting
v3 - fixed spelling (mostly commit message)
v4 - fixed err path check blunder
v5 - fixed loadble module build, provided separate kbuild module
     for the driver


Claudiu Manoil (5):
  enetc: Clean up local mdio bus allocation
  enetc: Clean up makefile
  enetc: Add mdio bus driver for the PCIe MDIO endpoint
  dt-bindings: net: fsl: enetc: Add bindings for the central MDIO PCIe
    endpoint
  arm64: dts: fsl: ls1028a: Enable eth port1 on the ls1028a QDS board

 .../devicetree/bindings/net/fsl-enetc.txt     |  42 +++++++-
 .../boot/dts/freescale/fsl-ls1028a-qds.dts    |  40 +++++++
 .../arm64/boot/dts/freescale/fsl-ls1028a.dtsi |   6 ++
 drivers/net/ethernet/freescale/enetc/Kconfig  |   9 ++
 drivers/net/ethernet/freescale/enetc/Makefile |  19 ++--
 .../net/ethernet/freescale/enetc/enetc_mdio.c |  97 ++++++++---------
 .../net/ethernet/freescale/enetc/enetc_mdio.h |  12 +++
 .../ethernet/freescale/enetc/enetc_pci_mdio.c | 101 ++++++++++++++++++
 .../net/ethernet/freescale/enetc/enetc_pf.c   |   5 +-
 9 files changed, 264 insertions(+), 67 deletions(-)
 create mode 100644 drivers/net/ethernet/freescale/enetc/enetc_mdio.h
 create mode 100644 drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c

-- 
2.17.1


^ permalink raw reply

* [PATCH net-next v5 5/5] arm64: dts: fsl: ls1028a: Enable eth port1 on the ls1028a QDS board
From: Claudiu Manoil @ 2019-08-01 11:52 UTC (permalink / raw)
  To: David S . Miller
  Cc: andrew, Rob Herring, Li Yang, alexandru.marginean, netdev,
	devicetree, linux-arm-kernel, linux-kernel
In-Reply-To: <1564660373-4607-1-git-send-email-claudiu.manoil@nxp.com>

LS1028a has one Ethernet management interface. On the QDS board, the
MDIO signals are multiplexed to either on-board AR8035 PHY device or
to 4 PCIe slots allowing for SGMII cards.
To enable the Ethernet ENETC Port 1, which can only be connected to a
RGMII PHY, the multiplexer needs to be configured to route the MDIO to
the AR8035 PHY.  The MDIO/MDC routing is controlled by bits 7:4 of FPGA
board config register 0x54, and value 0 selects the on-board RGMII PHY.
The FPGA board config registers are accessible on the i2c bus, at address
0x66.

The PF3 MDIO PCIe integrated endpoint device allows for centralized access
to the MDIO bus.  Add the corresponding devicetree node and set it to be
the MDIO bus parent.

Signed-off-by: Alex Marginean <alexandru.marginean@nxp.com>
Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
---
v1-v5 - none

 .../boot/dts/freescale/fsl-ls1028a-qds.dts    | 40 +++++++++++++++++++
 .../arm64/boot/dts/freescale/fsl-ls1028a.dtsi |  6 +++
 2 files changed, 46 insertions(+)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts
index de6ef39f3118..663c4b728c07 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts
@@ -85,6 +85,26 @@
 			system-clock-frequency = <25000000>;
 		};
 	};
+
+	mdio-mux {
+		compatible = "mdio-mux-multiplexer";
+		mux-controls = <&mux 0>;
+		mdio-parent-bus = <&enetc_mdio_pf3>;
+		#address-cells=<1>;
+		#size-cells = <0>;
+
+		/* on-board RGMII PHY */
+		mdio@0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0>;
+
+			qds_phy1: ethernet-phy@5 {
+				/* Atheros 8035 */
+				reg = <5>;
+			};
+		};
+	};
 };
 
 &duart0 {
@@ -164,6 +184,26 @@
 			};
 		};
 	};
+
+	fpga@66 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,ls1028aqds-fpga", "fsl,fpga-qixis-i2c",
+			     "simple-mfd";
+		reg = <0x66>;
+
+		mux: mux-controller {
+			compatible = "reg-mux";
+			#mux-control-cells = <1>;
+			mux-reg-masks = <0x54 0xf0>; /* 0: reg 0x54, bits 7:4 */
+		};
+	};
+
+};
+
+&enetc_port1 {
+	phy-handle = <&qds_phy1>;
+	phy-connection-type = "rgmii-id";
 };
 
 &sai1 {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index 7975519b4f56..de71153fda00 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -536,6 +536,12 @@
 				compatible = "fsl,enetc";
 				reg = <0x000100 0 0 0 0>;
 			};
+			enetc_mdio_pf3: mdio@0,3 {
+				compatible = "fsl,enetc-mdio";
+				reg = <0x000300 0 0 0 0>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+			};
 			ethernet@0,4 {
 				compatible = "fsl,enetc-ptp";
 				reg = <0x000400 0 0 0 0>;
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next v5 3/5] enetc: Add mdio bus driver for the PCIe MDIO endpoint
From: Claudiu Manoil @ 2019-08-01 11:52 UTC (permalink / raw)
  To: David S . Miller
  Cc: andrew, Rob Herring, Li Yang, alexandru.marginean, netdev,
	devicetree, linux-arm-kernel, linux-kernel
In-Reply-To: <1564660373-4607-1-git-send-email-claudiu.manoil@nxp.com>

ENETC ports can manage the MDIO bus via local register
interface.  However there's also a centralized way
to manage the MDIO bus, via the MDIO PCIe endpoint
device integrated by the same root complex that also
integrates the ENETC ports (eth controllers).

Depending on board design and use case, centralized
access to MDIO may be better than using local ENETC
port registers.  For instance, on the LS1028A QDS board
where MDIO muxing is required.  Also, the LS1028A on-chip
switch doesn't have a local MDIO register interface.

The current patch registers the above PCIe endpoint as a
separate MDIO bus and provides a driver for it by re-using
the code used for local MDIO access.  It also allows the
ENETC port PHYs to be managed by this driver if the local
"mdio" node is missing from the ENETC port node.

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
---
v1 - fixed mdio bus allocation
   - requested only BAR0 region, as it's the only one used by the driver
v2 - reworked accessors as per Andrew Lunn's request
v3 - none
v4 - err path check fix
v5 - refactored as separate kbuild module, to fix loadable module build

 drivers/net/ethernet/freescale/enetc/Kconfig  |   9 ++
 drivers/net/ethernet/freescale/enetc/Makefile |   3 +
 .../net/ethernet/freescale/enetc/enetc_mdio.c |  11 +-
 .../net/ethernet/freescale/enetc/enetc_mdio.h |  12 +++
 .../ethernet/freescale/enetc/enetc_pci_mdio.c | 101 ++++++++++++++++++
 .../net/ethernet/freescale/enetc/enetc_pf.c   |   5 +-
 6 files changed, 132 insertions(+), 9 deletions(-)
 create mode 100644 drivers/net/ethernet/freescale/enetc/enetc_mdio.h
 create mode 100644 drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c

diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index ed0d010c7cf2..9c530f75134f 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -18,6 +18,15 @@ config FSL_ENETC_VF
 
 	  If compiled as module (M), the module name is fsl-enetc-vf.
 
+config FSL_ENETC_MDIO
+	tristate "ENETC MDIO driver"
+	depends on PCI && (ARCH_LAYERSCAPE || COMPILE_TEST)
+	help
+	  This driver supports NXP ENETC Central MDIO controller as a PCIe
+	  physical function (PF) device.
+
+	  If compiled as module (M), the module name is fsl-enetc-mdio.
+
 config FSL_ENETC_PTP_CLOCK
 	tristate "ENETC PTP clock driver"
 	depends on PTP_1588_CLOCK_QORIQ && (FSL_ENETC || FSL_ENETC_VF)
diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile
index 164453a5dc1d..d200c27c3bf6 100644
--- a/drivers/net/ethernet/freescale/enetc/Makefile
+++ b/drivers/net/ethernet/freescale/enetc/Makefile
@@ -9,5 +9,8 @@ fsl-enetc-$(CONFIG_PCI_IOV) += enetc_msg.o
 obj-$(CONFIG_FSL_ENETC_VF) += fsl-enetc-vf.o
 fsl-enetc-vf-y := enetc_vf.o $(common-objs)
 
+obj-$(CONFIG_FSL_ENETC_MDIO) += fsl-enetc-mdio.o
+fsl-enetc-mdio-y := enetc_pci_mdio.o enetc_mdio.o
+
 obj-$(CONFIG_FSL_ENETC_PTP_CLOCK) += fsl-enetc-ptp.o
 fsl-enetc-ptp-y := enetc_ptp.o
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
index 05094601ece8..149883c8f0b8 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
@@ -6,7 +6,7 @@
 #include <linux/iopoll.h>
 #include <linux/of.h>
 
-#include "enetc_pf.h"
+#include "enetc_mdio.h"
 
 #define	ENETC_MDIO_REG_OFFSET	0x1c00
 #define	ENETC_MDIO_CFG	0x0	/* MDIO configuration and status */
@@ -20,10 +20,6 @@
 	enetc_port_wr(hw, ENETC_##off + ENETC_MDIO_REG_OFFSET, val)
 #define enetc_mdio_rd_reg(off)	enetc_mdio_rd(hw, off)
 
-struct enetc_mdio_priv {
-	struct enetc_hw *hw;
-};
-
 #define ENETC_MDC_DIV		258
 
 #define MDIO_CFG_CLKDIV(x)	((((x) >> 1) & 0xff) << 8)
@@ -47,8 +43,7 @@ static int enetc_mdio_wait_complete(struct enetc_hw *hw)
 				  !(val & MDIO_CFG_BSY), 10, 10 * TIMEOUT);
 }
 
-static int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum,
-			    u16 value)
+int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value)
 {
 	struct enetc_mdio_priv *mdio_priv = bus->priv;
 	struct enetc_hw *hw = mdio_priv->hw;
@@ -95,7 +90,7 @@ static int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum,
 	return 0;
 }
 
-static int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
+int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 {
 	struct enetc_mdio_priv *mdio_priv = bus->priv;
 	struct enetc_hw *hw = mdio_priv->hw;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.h b/drivers/net/ethernet/freescale/enetc/enetc_mdio.h
new file mode 100644
index 000000000000..60c9a3889824
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2019 NXP */
+
+#include <linux/phy.h>
+#include "enetc_pf.h"
+
+struct enetc_mdio_priv {
+	struct enetc_hw *hw;
+};
+
+int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value);
+int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c
new file mode 100644
index 000000000000..fbd41ce01f06
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2019 NXP */
+#include <linux/of_mdio.h>
+#include "enetc_mdio.h"
+
+#define ENETC_MDIO_DEV_ID	0xee01
+#define ENETC_MDIO_DEV_NAME	"FSL PCIe IE Central MDIO"
+#define ENETC_MDIO_BUS_NAME	ENETC_MDIO_DEV_NAME " Bus"
+#define ENETC_MDIO_DRV_NAME	ENETC_MDIO_DEV_NAME " driver"
+
+static int enetc_pci_mdio_probe(struct pci_dev *pdev,
+				const struct pci_device_id *ent)
+{
+	struct enetc_mdio_priv *mdio_priv;
+	struct device *dev = &pdev->dev;
+	struct enetc_hw *hw;
+	struct mii_bus *bus;
+	int err;
+
+	hw = devm_kzalloc(dev, sizeof(*hw), GFP_KERNEL);
+	if (!hw)
+		return -ENOMEM;
+
+	bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
+	if (!bus)
+		return -ENOMEM;
+
+	bus->name = ENETC_MDIO_BUS_NAME;
+	bus->read = enetc_mdio_read;
+	bus->write = enetc_mdio_write;
+	bus->parent = dev;
+	mdio_priv = bus->priv;
+	mdio_priv->hw = hw;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
+
+	pcie_flr(pdev);
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(dev, "device enable failed\n");
+		return err;
+	}
+
+	err = pci_request_region(pdev, 0, KBUILD_MODNAME);
+	if (err) {
+		dev_err(dev, "pci_request_region failed\n");
+		goto err_pci_mem_reg;
+	}
+
+	hw->port = pci_iomap(pdev, 0, 0);
+	if (!hw->port) {
+		err = -ENXIO;
+		dev_err(dev, "iomap failed\n");
+		goto err_ioremap;
+	}
+
+	err = of_mdiobus_register(bus, dev->of_node);
+	if (err)
+		goto err_mdiobus_reg;
+
+	pci_set_drvdata(pdev, bus);
+
+	return 0;
+
+err_mdiobus_reg:
+	iounmap(mdio_priv->hw->port);
+err_ioremap:
+	pci_release_mem_regions(pdev);
+err_pci_mem_reg:
+	pci_disable_device(pdev);
+
+	return err;
+}
+
+static void enetc_pci_mdio_remove(struct pci_dev *pdev)
+{
+	struct mii_bus *bus = pci_get_drvdata(pdev);
+	struct enetc_mdio_priv *mdio_priv;
+
+	mdiobus_unregister(bus);
+	mdio_priv = bus->priv;
+	iounmap(mdio_priv->hw->port);
+	pci_release_mem_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static const struct pci_device_id enetc_pci_mdio_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_MDIO_DEV_ID) },
+	{ 0, } /* End of table. */
+};
+MODULE_DEVICE_TABLE(pci, enetc_pci_mdio_id_table);
+
+static struct pci_driver enetc_pci_mdio_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = enetc_pci_mdio_id_table,
+	.probe = enetc_pci_mdio_probe,
+	.remove = enetc_pci_mdio_remove,
+};
+module_pci_driver(enetc_pci_mdio_driver);
+
+MODULE_DESCRIPTION(ENETC_MDIO_DRV_NAME);
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 258b3cb38a6f..7d6513ff8507 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -750,6 +750,7 @@ static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
 {
 	struct enetc_pf *pf = enetc_si_priv(priv->si);
 	struct device_node *np = priv->dev->of_node;
+	struct device_node *mdio_np;
 	int err;
 
 	if (!np) {
@@ -773,7 +774,9 @@ static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
 		priv->phy_node = of_node_get(np);
 	}
 
-	if (!of_phy_is_fixed_link(np)) {
+	mdio_np = of_get_child_by_name(np, "mdio");
+	if (mdio_np) {
+		of_node_put(mdio_np);
 		err = enetc_mdio_probe(pf);
 		if (err) {
 			of_node_put(priv->phy_node);
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next v5 1/5] enetc: Clean up local mdio bus allocation
From: Claudiu Manoil @ 2019-08-01 11:52 UTC (permalink / raw)
  To: David S . Miller
  Cc: andrew, Rob Herring, Li Yang, alexandru.marginean, netdev,
	devicetree, linux-arm-kernel, linux-kernel
In-Reply-To: <1564660373-4607-1-git-send-email-claudiu.manoil@nxp.com>

What's needed is basically a pointer to the mdio registers.
This is one way to store it inside bus->priv allocated space,
without upsetting sparse.
Reworked accessors to avoid __iomem casting.
Used devm_* variant to further clean up the init error /
remove paths.

Fixes following sparse warning:
 warning: incorrect type in assignment (different address spaces)
    expected void *priv
    got struct enetc_mdio_regs [noderef] <asn:2>*[assigned] regs

Fixes: ebfcb23d62ab ("enetc: Add ENETC PF level external MDIO support")

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
---
v1 - added this patch
v2 - reworked accessors as per Andrew Lunn's request
v3 - cleaned up commit message
v4 - none
v5 - none

 .../net/ethernet/freescale/enetc/enetc_mdio.c | 94 +++++++++----------
 1 file changed, 46 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
index 77b9cd10ba2b..05094601ece8 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
@@ -8,16 +8,22 @@
 
 #include "enetc_pf.h"
 
-struct enetc_mdio_regs {
-	u32	mdio_cfg;	/* MDIO configuration and status */
-	u32	mdio_ctl;	/* MDIO control */
-	u32	mdio_data;	/* MDIO data */
-	u32	mdio_addr;	/* MDIO address */
+#define	ENETC_MDIO_REG_OFFSET	0x1c00
+#define	ENETC_MDIO_CFG	0x0	/* MDIO configuration and status */
+#define	ENETC_MDIO_CTL	0x4	/* MDIO control */
+#define	ENETC_MDIO_DATA	0x8	/* MDIO data */
+#define	ENETC_MDIO_ADDR	0xc	/* MDIO address */
+
+#define enetc_mdio_rd(hw, off) \
+	enetc_port_rd(hw, ENETC_##off + ENETC_MDIO_REG_OFFSET)
+#define enetc_mdio_wr(hw, off, val) \
+	enetc_port_wr(hw, ENETC_##off + ENETC_MDIO_REG_OFFSET, val)
+#define enetc_mdio_rd_reg(off)	enetc_mdio_rd(hw, off)
+
+struct enetc_mdio_priv {
+	struct enetc_hw *hw;
 };
 
-#define bus_to_enetc_regs(bus)	(struct enetc_mdio_regs __iomem *)((bus)->priv)
-
-#define ENETC_MDIO_REG_OFFSET	0x1c00
 #define ENETC_MDC_DIV		258
 
 #define MDIO_CFG_CLKDIV(x)	((((x) >> 1) & 0xff) << 8)
@@ -33,18 +39,19 @@ struct enetc_mdio_regs {
 #define MDIO_DATA(x)		((x) & 0xffff)
 
 #define TIMEOUT	1000
-static int enetc_mdio_wait_complete(struct enetc_mdio_regs __iomem *regs)
+static int enetc_mdio_wait_complete(struct enetc_hw *hw)
 {
 	u32 val;
 
-	return readx_poll_timeout(enetc_rd_reg, &regs->mdio_cfg, val,
+	return readx_poll_timeout(enetc_mdio_rd_reg, MDIO_CFG, val,
 				  !(val & MDIO_CFG_BSY), 10, 10 * TIMEOUT);
 }
 
 static int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum,
 			    u16 value)
 {
-	struct enetc_mdio_regs __iomem *regs = bus_to_enetc_regs(bus);
+	struct enetc_mdio_priv *mdio_priv = bus->priv;
+	struct enetc_hw *hw = mdio_priv->hw;
 	u32 mdio_ctl, mdio_cfg;
 	u16 dev_addr;
 	int ret;
@@ -59,29 +66,29 @@ static int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum,
 		mdio_cfg &= ~MDIO_CFG_ENC45;
 	}
 
-	enetc_wr_reg(&regs->mdio_cfg, mdio_cfg);
+	enetc_mdio_wr(hw, MDIO_CFG, mdio_cfg);
 
-	ret = enetc_mdio_wait_complete(regs);
+	ret = enetc_mdio_wait_complete(hw);
 	if (ret)
 		return ret;
 
 	/* set port and dev addr */
 	mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr);
-	enetc_wr_reg(&regs->mdio_ctl, mdio_ctl);
+	enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl);
 
 	/* set the register address */
 	if (regnum & MII_ADDR_C45) {
-		enetc_wr_reg(&regs->mdio_addr, regnum & 0xffff);
+		enetc_mdio_wr(hw, MDIO_ADDR, regnum & 0xffff);
 
-		ret = enetc_mdio_wait_complete(regs);
+		ret = enetc_mdio_wait_complete(hw);
 		if (ret)
 			return ret;
 	}
 
 	/* write the value */
-	enetc_wr_reg(&regs->mdio_data, MDIO_DATA(value));
+	enetc_mdio_wr(hw, MDIO_DATA, MDIO_DATA(value));
 
-	ret = enetc_mdio_wait_complete(regs);
+	ret = enetc_mdio_wait_complete(hw);
 	if (ret)
 		return ret;
 
@@ -90,7 +97,8 @@ static int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum,
 
 static int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 {
-	struct enetc_mdio_regs __iomem *regs = bus_to_enetc_regs(bus);
+	struct enetc_mdio_priv *mdio_priv = bus->priv;
+	struct enetc_hw *hw = mdio_priv->hw;
 	u32 mdio_ctl, mdio_cfg;
 	u16 dev_addr, value;
 	int ret;
@@ -104,41 +112,41 @@ static int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 		mdio_cfg &= ~MDIO_CFG_ENC45;
 	}
 
-	enetc_wr_reg(&regs->mdio_cfg, mdio_cfg);
+	enetc_mdio_wr(hw, MDIO_CFG, mdio_cfg);
 
-	ret = enetc_mdio_wait_complete(regs);
+	ret = enetc_mdio_wait_complete(hw);
 	if (ret)
 		return ret;
 
 	/* set port and device addr */
 	mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr);
-	enetc_wr_reg(&regs->mdio_ctl, mdio_ctl);
+	enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl);
 
 	/* set the register address */
 	if (regnum & MII_ADDR_C45) {
-		enetc_wr_reg(&regs->mdio_addr, regnum & 0xffff);
+		enetc_mdio_wr(hw, MDIO_ADDR, regnum & 0xffff);
 
-		ret = enetc_mdio_wait_complete(regs);
+		ret = enetc_mdio_wait_complete(hw);
 		if (ret)
 			return ret;
 	}
 
 	/* initiate the read */
-	enetc_wr_reg(&regs->mdio_ctl, mdio_ctl | MDIO_CTL_READ);
+	enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl | MDIO_CTL_READ);
 
-	ret = enetc_mdio_wait_complete(regs);
+	ret = enetc_mdio_wait_complete(hw);
 	if (ret)
 		return ret;
 
 	/* return all Fs if nothing was there */
-	if (enetc_rd_reg(&regs->mdio_cfg) & MDIO_CFG_RD_ER) {
+	if (enetc_mdio_rd(hw, MDIO_CFG) & MDIO_CFG_RD_ER) {
 		dev_dbg(&bus->dev,
 			"Error while reading PHY%d reg at %d.%hhu\n",
 			phy_id, dev_addr, regnum);
 		return 0xffff;
 	}
 
-	value = enetc_rd_reg(&regs->mdio_data) & 0xffff;
+	value = enetc_mdio_rd(hw, MDIO_DATA) & 0xffff;
 
 	return value;
 }
@@ -146,12 +154,12 @@ static int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 int enetc_mdio_probe(struct enetc_pf *pf)
 {
 	struct device *dev = &pf->si->pdev->dev;
-	struct enetc_mdio_regs __iomem *regs;
+	struct enetc_mdio_priv *mdio_priv;
 	struct device_node *np;
 	struct mii_bus *bus;
-	int ret;
+	int err;
 
-	bus = mdiobus_alloc_size(sizeof(regs));
+	bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
 	if (!bus)
 		return -ENOMEM;
 
@@ -159,41 +167,31 @@ int enetc_mdio_probe(struct enetc_pf *pf)
 	bus->read = enetc_mdio_read;
 	bus->write = enetc_mdio_write;
 	bus->parent = dev;
+	mdio_priv = bus->priv;
+	mdio_priv->hw = &pf->si->hw;
 	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
 
-	/* store the enetc mdio base address for this bus */
-	regs = pf->si->hw.port + ENETC_MDIO_REG_OFFSET;
-	bus->priv = regs;
-
 	np = of_get_child_by_name(dev->of_node, "mdio");
 	if (!np) {
 		dev_err(dev, "MDIO node missing\n");
-		ret = -EINVAL;
-		goto err_registration;
+		return -EINVAL;
 	}
 
-	ret = of_mdiobus_register(bus, np);
-	if (ret) {
+	err = of_mdiobus_register(bus, np);
+	if (err) {
 		of_node_put(np);
 		dev_err(dev, "cannot register MDIO bus\n");
-		goto err_registration;
+		return err;
 	}
 
 	of_node_put(np);
 	pf->mdio = bus;
 
 	return 0;
-
-err_registration:
-	mdiobus_free(bus);
-
-	return ret;
 }
 
 void enetc_mdio_remove(struct enetc_pf *pf)
 {
-	if (pf->mdio) {
+	if (pf->mdio)
 		mdiobus_unregister(pf->mdio);
-		mdiobus_free(pf->mdio);
-	}
 }
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next v5 2/5] enetc: Clean up makefile
From: Claudiu Manoil @ 2019-08-01 11:52 UTC (permalink / raw)
  To: David S . Miller
  Cc: andrew, Rob Herring, Li Yang, alexandru.marginean, netdev,
	devicetree, linux-arm-kernel, linux-kernel
In-Reply-To: <1564660373-4607-1-git-send-email-claudiu.manoil@nxp.com>

Clean up overcomplicated makefile to make it more maintainable.
Basically, there's a set of common objects shared between
the PF and VF driver modules.  This can be implemented in a
simpler way, without conditionals, less repetition, allowing
also for easier updates in the future.

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
---
v5 - added this patch

 drivers/net/ethernet/freescale/enetc/Makefile | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile
index 7139e414dccf..164453a5dc1d 100644
--- a/drivers/net/ethernet/freescale/enetc/Makefile
+++ b/drivers/net/ethernet/freescale/enetc/Makefile
@@ -1,19 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0
+
+common-objs := enetc.o enetc_cbdr.o enetc_ethtool.o
+
 obj-$(CONFIG_FSL_ENETC) += fsl-enetc.o
-fsl-enetc-$(CONFIG_FSL_ENETC) += enetc.o enetc_cbdr.o enetc_ethtool.o \
-				 enetc_mdio.o
+fsl-enetc-y := enetc_pf.o enetc_mdio.o $(common-objs)
 fsl-enetc-$(CONFIG_PCI_IOV) += enetc_msg.o
-fsl-enetc-objs := enetc_pf.o $(fsl-enetc-y)
 
 obj-$(CONFIG_FSL_ENETC_VF) += fsl-enetc-vf.o
-
-ifeq ($(CONFIG_FSL_ENETC)$(CONFIG_FSL_ENETC_VF), yy)
-fsl-enetc-vf-objs := enetc_vf.o
-else
-fsl-enetc-vf-$(CONFIG_FSL_ENETC_VF) += enetc.o enetc_cbdr.o \
-				       enetc_ethtool.o
-fsl-enetc-vf-objs := enetc_vf.o $(fsl-enetc-vf-y)
-endif
+fsl-enetc-vf-y := enetc_vf.o $(common-objs)
 
 obj-$(CONFIG_FSL_ENETC_PTP_CLOCK) += fsl-enetc-ptp.o
-fsl-enetc-ptp-$(CONFIG_FSL_ENETC_PTP_CLOCK) += enetc_ptp.o
+fsl-enetc-ptp-y := enetc_ptp.o
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next v5 4/5] dt-bindings: net: fsl: enetc: Add bindings for the central MDIO PCIe endpoint
From: Claudiu Manoil @ 2019-08-01 11:52 UTC (permalink / raw)
  To: David S . Miller
  Cc: andrew, Rob Herring, Li Yang, alexandru.marginean, netdev,
	devicetree, linux-arm-kernel, linux-kernel
In-Reply-To: <1564660373-4607-1-git-send-email-claudiu.manoil@nxp.com>

The on-chip PCIe root complex that integrates the ENETC ethernet
controllers also integrates a PCIe endpoint for the MDIO controller
providing for centralized control of the ENETC mdio bus.
Add bindings for this "central" MDIO Integrated PCIe Endpoint.

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
---
v1 - none
v2 - none
v3 - fixed spelling (commit message mostly)
v4 - none
v4 - none

 .../devicetree/bindings/net/fsl-enetc.txt     | 42 +++++++++++++++++--
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/fsl-enetc.txt b/Documentation/devicetree/bindings/net/fsl-enetc.txt
index 25fc687419db..b7034ccbc1bd 100644
--- a/Documentation/devicetree/bindings/net/fsl-enetc.txt
+++ b/Documentation/devicetree/bindings/net/fsl-enetc.txt
@@ -11,7 +11,9 @@ Required properties:
 		  to parent node bindings.
 - compatible	: Should be "fsl,enetc".
 
-1) The ENETC external port is connected to a MDIO configurable phy:
+1. The ENETC external port is connected to a MDIO configurable phy
+
+1.1. Using the local ENETC Port MDIO interface
 
 In this case, the ENETC node should include a "mdio" sub-node
 that in turn should contain the "ethernet-phy" node describing the
@@ -47,8 +49,42 @@ Example:
 		};
 	};
 
-2) The ENETC port is an internal port or has a fixed-link external
-connection:
+1.2. Using the central MDIO PCIe endpoint device
+
+In this case, the mdio node should be defined as another PCIe
+endpoint node, at the same level with the ENETC port nodes.
+
+Required properties:
+
+- reg		: Specifies PCIe Device Number and Function
+		  Number of the ENETC endpoint device, according
+		  to parent node bindings.
+- compatible	: Should be "fsl,enetc-mdio".
+
+The remaining required mdio bus properties are standard, their bindings
+already defined in Documentation/devicetree/bindings/net/mdio.txt.
+
+Example:
+
+	ethernet@0,0 {
+		compatible = "fsl,enetc";
+		reg = <0x000000 0 0 0 0>;
+		phy-handle = <&sgmii_phy0>;
+		phy-connection-type = "sgmii";
+	};
+
+	mdio@0,3 {
+		compatible = "fsl,enetc-mdio";
+		reg = <0x000300 0 0 0 0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		sgmii_phy0: ethernet-phy@2 {
+			reg = <0x2>;
+		};
+	};
+
+2. The ENETC port is an internal port or has a fixed-link external
+connection
 
 In this case, the ENETC port node defines a fixed link connection,
 as specified by Documentation/devicetree/bindings/net/fixed-link.txt.
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH net] mvpp2: fix panic on module removal
From: Matteo Croce @ 2019-08-01 11:46 UTC (permalink / raw)
  To: Antoine Tenart
  Cc: netdev, Miquel Raynal, LKML, Lorenzo Bianconi, Maxime Chevallier,
	David S. Miller
In-Reply-To: <20190801071801.GF3579@kwain>

On Thu, Aug 1, 2019 at 9:18 AM Antoine Tenart
<antoine.tenart@bootlin.com> wrote:
>
> Hi Matteo,
>
> On Wed, Jul 31, 2019 at 08:31:16PM +0200, Matteo Croce wrote:
> >
> > diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > index c51f1d5b550b..5002d51fc9d6 100644
> > --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> > @@ -5760,7 +5760,6 @@ static int mvpp2_remove(struct platform_device *pdev)
> >       mvpp2_dbgfs_cleanup(priv);
> >
> >       flush_workqueue(priv->stats_queue);
> > -     destroy_workqueue(priv->stats_queue);
> >
> >       fwnode_for_each_available_child_node(fwnode, port_fwnode) {
> >               if (priv->port_list[i]) {
> > @@ -5770,6 +5769,8 @@ static int mvpp2_remove(struct platform_device *pdev)
> >               i++;
> >       }
>
> Shouldn't you also move flush_workqueue() here?
>

I think that that flush it's unneeded at all, as all port remove calls
cancel_delayed_work_sync().

I tried removing it and it doesn't crash on rmmod.

--
Matteo Croce
per aspera ad upstream

^ permalink raw reply

* Re: [PATCH net-next] net/mlx5e: Allow dropping specific tunnel packets
From: Roi Dayan @ 2019-08-01 11:40 UTC (permalink / raw)
  To: xiangxia.m.yue@gmail.com, Saeed Mahameed; +Cc: netdev@vger.kernel.org
In-Reply-To: <1564648859-17369-1-git-send-email-xiangxia.m.yue@gmail.com>



On 2019-08-01 11:40 AM, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> 
> In some case, we don't want to allow specific tunnel packets
> to host that can avoid to take up high CPU (e.g network attacks).
> But other tunnel packets which not matched in hardware will be
> sent to host too.
> 
>     $ tc filter add dev vxlan_sys_4789 \
> 	    protocol ip chain 0 parent ffff: prio 1 handle 1 \
> 	    flower dst_ip 1.1.1.100 ip_proto tcp dst_port 80 \
> 	    enc_dst_ip 2.2.2.100 enc_key_id 100 enc_dst_port 4789 \
> 	    action tunnel_key unset pipe action drop
> 
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> index f3ed028..25d423e 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> @@ -2485,7 +2485,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
>  
>  	if (flow_flag_test(flow, EGRESS) &&
>  	    !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
> -	      (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)))
> +	      (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
> +	      (actions & MLX5_FLOW_CONTEXT_ACTION_DROP)))
>  		return false;
>  
>  	if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
> 

thanks!

Reviewed-by: Roi Dayan <roid@mellanox.com>

^ permalink raw reply

* [PATCH net 1/2] net: sched: use major priority number as hardware priority
From: Pablo Neira Ayuso @ 2019-08-01 11:28 UTC (permalink / raw)
  To: netfilter-devel
  Cc: davem, netdev, jakub.kicinski, marcelo.leitner, jiri, wenxu,
	saeedm, paulb, gerlitz.or
In-Reply-To: <20190801112817.24976-1-pablo@netfilter.org>

tc transparently maps the software priority number to hardware. Update
it to pass the major priority which is what most drivers expect. Update
drivers too so they do not need to lshift the priority field of the
flow_cls_common_offload object. The stmmac driver is an exception, since
this code assumes the tc software priority is fine, therefore, lshift it
just to be conservative.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
v1: initial version.

 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c      |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c   |  2 +-
 drivers/net/ethernet/mscc/ocelot_flower.c            | 12 +++---------
 drivers/net/ethernet/netronome/nfp/flower/qos_conf.c |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c      |  2 +-
 include/net/pkt_cls.h                                |  2 +-
 6 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index cc096f6011d9..744c0c640c10 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -3158,7 +3158,7 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
 
 	esw_attr->parse_attr = parse_attr;
 	esw_attr->chain = f->common.chain_index;
-	esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
+	esw_attr->prio = f->common.prio;
 
 	esw_attr->in_rep = in_rep;
 	esw_attr->in_mdev = in_mdev;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index e8ac90564dbe..84a87d059333 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -471,7 +471,7 @@ int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei)
 void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei,
 				 unsigned int priority)
 {
-	rulei->priority = priority >> 16;
+	rulei->priority = priority;
 }
 
 void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei,
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index 59487d446a09..b894bc0c9c16 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -13,12 +13,6 @@ struct ocelot_port_block {
 	struct ocelot_port *port;
 };
 
-static u16 get_prio(u32 prio)
-{
-	/* prio starts from 0x1000 while the ids starts from 0 */
-	return prio >> 16;
-}
-
 static int ocelot_flower_parse_action(struct flow_cls_offload *f,
 				      struct ocelot_ace_rule *rule)
 {
@@ -168,7 +162,7 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 	}
 
 finished_key_parsing:
-	ocelot_rule->prio = get_prio(f->common.prio);
+	ocelot_rule->prio = f->common.prio;
 	ocelot_rule->id = f->cookie;
 	return ocelot_flower_parse_action(f, ocelot_rule);
 }
@@ -218,7 +212,7 @@ static int ocelot_flower_destroy(struct flow_cls_offload *f,
 	struct ocelot_ace_rule rule;
 	int ret;
 
-	rule.prio = get_prio(f->common.prio);
+	rule.prio = f->common.prio;
 	rule.port = port_block->port;
 	rule.id = f->cookie;
 
@@ -236,7 +230,7 @@ static int ocelot_flower_stats_update(struct flow_cls_offload *f,
 	struct ocelot_ace_rule rule;
 	int ret;
 
-	rule.prio = get_prio(f->common.prio);
+	rule.prio = f->common.prio;
 	rule.port = port_block->port;
 	rule.id = f->cookie;
 	ret = ocelot_ace_rule_stats_update(&rule);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
index 86e968cd5ffd..124a43dc136a 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
@@ -93,7 +93,7 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev,
 		return -EOPNOTSUPP;
 	}
 
-	if (flow->common.prio != (1 << 16)) {
+	if (flow->common.prio != 1) {
 		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload requires highest priority");
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 58ea18af9813..5cd040215469 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -94,7 +94,7 @@ static int tc_fill_entry(struct stmmac_priv *priv,
 	struct stmmac_tc_entry *entry, *frag = NULL;
 	struct tc_u32_sel *sel = cls->knode.sel;
 	u32 off, data, mask, real_off, rem;
-	u32 prio = cls->common.prio;
+	u32 prio = cls->common.prio << 16;
 	int ret;
 
 	/* Only 1 match per entry */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e429809ca90d..98be18ef1ed3 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -646,7 +646,7 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common,
 {
 	cls_common->chain_index = tp->chain->index;
 	cls_common->protocol = tp->protocol;
-	cls_common->prio = tp->prio;
+	cls_common->prio = tp->prio >> 16;
 	if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE)
 		cls_common->extack = extack;
 }
-- 
2.11.0


^ permalink raw reply related

* [PATCH net 2/2,v3] netfilter: nf_tables: map basechain priority to hardware priority
From: Pablo Neira Ayuso @ 2019-08-01 11:28 UTC (permalink / raw)
  To: netfilter-devel
  Cc: davem, netdev, jakub.kicinski, marcelo.leitner, jiri, wenxu,
	saeedm, paulb, gerlitz.or
In-Reply-To: <20190801112817.24976-1-pablo@netfilter.org>

This patch adds initial support for offloading basechains using
the priority range from -32767 to 32767. This is restricting the
netfilter priority range to 16-bit integer since this is what most
drivers assume so far from tc.

The software to hardware priority mapping is not exposed to userspace.
Hence, it should be possible to extend this range of supported priorities
later on once drivers are updated to support for 32-bit integer
priorities.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
v3: update mapping not to skip tc priority space, each subsystem has
    its own priority space. - Jakub Kicinski.

 include/net/netfilter/nf_tables_offload.h |  2 ++
 net/netfilter/nf_tables_api.c             |  4 ++++
 net/netfilter/nf_tables_offload.c         | 18 +++++++++++++++---
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 3196663a10e3..3c31e9d55028 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -73,4 +73,6 @@ int nft_flow_rule_offload_commit(struct net *net);
 	(__reg)->key		= __key;				\
 	memset(&(__reg)->mask, 0xff, (__reg)->len);
 
+u32 nft_chain_offload_priority(struct nft_base_chain *basechain);
+
 #endif
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 605a7cfe7ca7..9cf0fecf5cb9 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1662,6 +1662,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 
 		chain->flags |= NFT_BASE_CHAIN | flags;
 		basechain->policy = NF_ACCEPT;
+		if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+		    !nft_chain_offload_priority(basechain))
+			return -EOPNOTSUPP;
+
 		flow_block_init(&basechain->flow_block);
 	} else {
 		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 64f5fd5f240e..81d636fac571 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -103,10 +103,11 @@ void nft_offload_update_dependency(struct nft_offload_ctx *ctx,
 }
 
 static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
-					 __be16 proto,
-					struct netlink_ext_ack *extack)
+					 __be16 proto, int priority,
+					 struct netlink_ext_ack *extack)
 {
 	common->protocol = proto;
+	common->prio = priority;
 	common->extack = extack;
 }
 
@@ -124,6 +125,15 @@ static int nft_setup_cb_call(struct nft_base_chain *basechain,
 	return 0;
 }
 
+u32 nft_chain_offload_priority(struct nft_base_chain *basechain)
+{
+	if (basechain->ops.priority < SHRT_MIN ||
+	    basechain->ops.priority > SHRT_MAX)
+		return 0;
+
+	return basechain->ops.priority + abs(SHRT_MIN);
+}
+
 static int nft_flow_offload_rule(struct nft_trans *trans,
 				 enum flow_cls_command command)
 {
@@ -142,7 +152,9 @@ static int nft_flow_offload_rule(struct nft_trans *trans,
 	if (flow)
 		proto = flow->proto;
 
-	nft_flow_offload_common_init(&cls_flow.common, proto, &extack);
+	nft_flow_offload_common_init(&cls_flow.common, proto,
+				     nft_chain_offload_priority(basechain),
+				     &extack);
 	cls_flow.command = command;
 	cls_flow.cookie = (unsigned long) rule;
 	if (flow)
-- 
2.11.0


^ permalink raw reply related

* [PATCH net 0/2] flow_offload hardware priority fixes
From: Pablo Neira Ayuso @ 2019-08-01 11:28 UTC (permalink / raw)
  To: netfilter-devel
  Cc: davem, netdev, jakub.kicinski, marcelo.leitner, jiri, wenxu,
	saeedm, paulb, gerlitz.or

Hi,

This patchset contains two updates for the flow_offload users:

1) Pass the major tc priority to drivers so they do not have to
   lshift it. This is a preparation patch for the fix coming in
   patch #2.

2) Set the hardware priority from the netfilter basechain priority,
   some drivers break when using the existing hardware priority
   number that is set to zero.

Please, apply, thank you.

Pablo Neira Ayuso (2):
  net: sched: use major priority number as hardware priority
  netfilter: nf_tables: map basechain priority to hardware priority

 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c      |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c   |  2 +-
 drivers/net/ethernet/mscc/ocelot_flower.c            | 12 +++---------
 drivers/net/ethernet/netronome/nfp/flower/qos_conf.c |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c      |  2 +-
 include/net/netfilter/nf_tables_offload.h            |  2 ++
 include/net/pkt_cls.h                                |  2 +-
 net/netfilter/nf_tables_api.c                        |  4 ++++
 net/netfilter/nf_tables_offload.c                    | 18 +++++++++++++++---
 9 files changed, 29 insertions(+), 17 deletions(-)

-- 
2.11.0


^ permalink raw reply

* Re: [PATCH] net: sctp: Rename fallthrough label to unhandled
From: Neil Horman @ 2019-08-01 10:50 UTC (permalink / raw)
  To: Joe Perches
  Cc: Vlad Yasevich, Marcelo Ricardo Leitner, David S. Miller,
	linux-sctp, netdev, linux-kernel
In-Reply-To: <d68403ce9f7e8a68fff09d6b17e5d1327eb1e12d.camel@perches.com>

On Wed, Jul 31, 2019 at 03:23:46PM -0700, Joe Perches wrote:
> On Wed, 2019-07-31 at 16:58 -0400, Neil Horman wrote:
> > On Wed, Jul 31, 2019 at 09:35:31AM -0700, Joe Perches wrote:
> > > On Wed, 2019-07-31 at 08:16 -0400, Neil Horman wrote:
> > > > On Wed, Jul 31, 2019 at 04:32:43AM -0700, Joe Perches wrote:
> > > > > On Wed, 2019-07-31 at 07:19 -0400, Neil Horman wrote:
> > > > > > On Tue, Jul 30, 2019 at 10:04:37PM -0700, Joe Perches wrote:
> > > > > > > fallthrough may become a pseudo reserved keyword so this only use of
> > > > > > > fallthrough is better renamed to allow it.
> > > > > > > 
> > > > > > > Signed-off-by: Joe Perches <joe@perches.com>
> > > > > > Are you referring to the __attribute__((fallthrough)) statement that gcc
> > > > > > supports?  If so the compiler should by all rights be able to differentiate
> > > > > > between a null statement attribute and a explicit goto and label without the
> > > > > > need for renaming here.  Or are you referring to something else?
> > > > > 
> > > > > Hi.
> > > > > 
> > > > > I sent after this a patch that adds
> > > > > 
> > > > > # define fallthrough                    __attribute__((__fallthrough__))
> > > > > 
> > > > > https://lore.kernel.org/patchwork/patch/1108577/
> > > > > 
> > > > > So this rename is a prerequisite to adding this #define.
> > > > > 
> > > > why not just define __fallthrough instead, like we do for all the other
> > > > attributes we alias (i.e. __read_mostly, __protected_by, __unused, __exception,
> > > > etc)
> > > 
> > > Because it's not as intelligible when used as a statement.
> > I think thats somewhat debatable.  __fallthrough to me looks like an internal
> > macro, whereas fallthrough looks like a comment someone forgot to /* */
> 
> 
> I'd rather see:
> 
> 	switch (foo) {
> 	case FOO:
> 		bar |= baz;
> 		fallthrough;
> 	case BAR:
> 		bar |= qux;
> 		break;
> 	default:
> 		error();
> 	}
> 
> than
> 
> 	switch (foo) {
> 	case FOO:
> 		bar |= baz;
> 		__fallthrough;
> 	case BAR:
> 		bar |= qux;
> 		break;
> 	default:
> 		error();
> 	}
> 
> or esoecially
> 
> 	switch (foo) {
> 	case FOO:
> 		bar |= baz;
> 		/* fallthrough
> */;
> 	case BAR:
> 		bar |= qux;
> 		break;
> 	default:
> 		error();
> 	}
> 
> but <shrug>, bikeshed ahoy!...
You can say that if you want, but you made the point that your think the macro
as you have written is more readable.  You example illustrates though that /*
fallthrough */ is a pretty common comment, and not prefixing it makes it look
like someone didn't add a comment that they meant to.  The __ prefix is standard
practice for defining macros to attributes (212 instances of it by my count).  I
don't mind rewriting the goto labels at all, but I think consistency is
valuable.

Neil

> 
> 
> 

^ permalink raw reply

* Re: [PATCH v4 1/5] vsock/virtio: limit the memory used per-socket
From: Stefano Garzarella @ 2019-08-01 10:47 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, linux-kernel, Stefan Hajnoczi, David S. Miller,
	virtualization, Jason Wang, kvm
In-Reply-To: <20190730163807-mutt-send-email-mst@kernel.org>

On Tue, Jul 30, 2019 at 04:42:25PM -0400, Michael S. Tsirkin wrote:
> On Tue, Jul 30, 2019 at 11:35:39AM +0200, Stefano Garzarella wrote:

(...)

> > 
> > The problem here is the compatibility. Before this series virtio-vsock
> > and vhost-vsock modules had the RX buffer size hard-coded
> > (VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE = 4K). So, if we send a buffer smaller
> > of 4K, there might be issues.
> 
> Shouldn't be if they are following the spec. If not let's fix
> the broken parts.
> 
> > 
> > Maybe it is the time to add add 'features' to virtio-vsock device.
> > 
> > Thanks,
> > Stefano
> 
> Why would a remote care about buffer sizes?
> 
> Let's first see what the issues are. If they exist
> we can either fix the bugs, or code the bug as a feature in spec.
> 

The vhost_transport '.stream_enqueue' callback
[virtio_transport_stream_enqueue()] calls the virtio_transport_send_pkt_info(),
passing the user message. This function allocates a new packet, copying
the user message, but (before this series) it limits the packet size to
the VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (4K):

static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
					  struct virtio_vsock_pkt_info *info)
{
 ...
	/* we can send less than pkt_len bytes */
	if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE)
		pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;

	/* virtio_transport_get_credit might return less than pkt_len credit */
	pkt_len = virtio_transport_get_credit(vvs, pkt_len);

	/* Do not send zero length OP_RW pkt */
	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
		return pkt_len;
 ...
}

then it queues the packet for the TX worker calling .send_pkt()
[vhost_transport_send_pkt() in the vhost_transport case]

The main function executed by the TX worker is
vhost_transport_do_send_pkt() that picks up a buffer from the virtqueue
and it tries to copy the packet (up to 4K) on it.  If the buffer
allocated from the guest will be smaller then 4K, I think here it will
be discarded with an error:

static void
vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
				struct vhost_virtqueue *vq)
{
 ...
		nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter);
		if (nbytes != pkt->len) {
			virtio_transport_free_pkt(pkt);
			vq_err(vq, "Faulted on copying pkt buf\n");
			break;
		}
 ...
}


This series changes this behavior since now we will split the packet in
vhost_transport_do_send_pkt() depending on the buffer found in the
virtqueue.

We didn't change the buffer size in this series, so we still backward
compatible, but if we will use buffers smaller than 4K, we should
encounter the error described above.

How do you suggest we proceed if we want to change the buffer size?
Maybe adding a feature to "support any buffer size"?

Thanks,
Stefano

^ permalink raw reply

* Re: [PATCH net] ibmveth: use net_err_ratelimited when set_multicast_list
From: Joe Perches @ 2019-08-01 10:28 UTC (permalink / raw)
  To: Hangbin Liu, netdev; +Cc: Thomas Falcon, David S . Miller
In-Reply-To: <20190801090347.8258-1-liuhangbin@gmail.com>

On Thu, 2019-08-01 at 17:03 +0800, Hangbin Liu wrote:
> When setting lots of multicast list on ibmveth, e.g. add 3000 membership on a
> multicast group, the following error message flushes our log file
> 
> 8507    [  901.478251] ibmveth 30000003 env3: h_multicast_ctrl rc=4 when adding an entry to the filter table
> ...
> 1718386 [ 5636.808658] ibmveth 30000003 env3: h_multicast_ctrl rc=4 when adding an entry to the filter table
> 
> We got 1.5 million lines of messages in 1.3h. Let's replace netdev_err() by
> net_err_ratelimited() to avoid this issue. I don't use netdev_err_once() in
> case h_multicast_ctrl() return different lpar_rc types.
> 
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> ---
>  drivers/net/ethernet/ibm/ibmveth.c | 8 +++++---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
> index d654c234aaf7..3b9406a4ca92 100644
> --- a/drivers/net/ethernet/ibm/ibmveth.c
> +++ b/drivers/net/ethernet/ibm/ibmveth.c
> @@ -1446,9 +1446,11 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
>  						   IbmVethMcastAddFilter,
>  						   mcast_addr);
>  			if (lpar_rc != H_SUCCESS) {
> -				netdev_err(netdev, "h_multicast_ctrl rc=%ld "
> -					   "when adding an entry to the filter "
> -					   "table\n", lpar_rc);
> +				net_err_ratelimited("%s %s%s: h_multicast_ctrl rc=%ld when adding an entry to the filter table\n",
> +						    ibmveth_driver_name,
> +						    netdev_name(netdev),
> +						    netdev_reg_state(netdev),
> +						    lpar_rc);

Perhaps add the netdev_<level>_ratelimited variants and use that instead

Somthing like:

---
 include/linux/netdevice.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 88292953aa6f..37116019e14f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4737,6 +4737,60 @@ do {								\
 #define netdev_info_once(dev, fmt, ...) \
 	netdev_level_once(KERN_INFO, dev, fmt, ##__VA_ARGS__)
 
+#define netdev_level_ratelimited(netdev_level, dev, fmt, ...)		\
+do {									\
+	static DEFINE_RATELIMIT_STATE(_rs,				\
+				      DEFAULT_RATELIMIT_INTERVAL,	\
+				      DEFAULT_RATELIMIT_BURST);		\
+	if (__ratelimit(&_rs))						\
+		netdev_level(dev, fmt, ##__VA_ARGS__);			\
+} while (0)
+
+#define netdev_emerg_ratelimited(dev, fmt, ...)				\
+	netdev_level_ratelimited(netdev_emerg, dev, fmt, ##__VA_ARGS__)
+#define netdev_alert_ratelimited(dev, fmt, ...)				\
+	netdev_level_ratelimited(netdev_alert, dev, fmt, ##__VA_ARGS__)
+#define netdev_crit_ratelimited(dev, fmt, ...)				\
+	netdev_level_ratelimited(netdev_crit, dev, fmt, ##__VA_ARGS__)
+#define netdev_err_ratelimited(dev, fmt, ...)				\
+	netdev_level_ratelimited(netdev_err, dev, fmt, ##__VA_ARGS__)
+#define netdev_warn_ratelimited(dev, fmt, ...)				\
+	netdev_level_ratelimited(netdev_warn, dev, fmt, ##__VA_ARGS__)
+#define netdev_notice_ratelimited(dev, fmt, ...)			\
+	netdev_level_ratelimited(netdev_notice, dev, fmt, ##__VA_ARGS__)
+#define netdev_info_ratelimited(dev, fmt, ...)				\
+	netdev_level_ratelimited(netdev_info, dev, fmt, ##__VA_ARGS__)
+
+#if defined(CONFIG_DYNAMIC_DEBUG)
+/* descriptor check is first to prevent flooding with "callbacks suppressed" */
+#define netdev_dbg_ratelimited(dev, fmt, ...)				\
+do {									\
+	static DEFINE_RATELIMIT_STATE(_rs,				\
+				      DEFAULT_RATELIMIT_INTERVAL,	\
+				      DEFAULT_RATELIMIT_BURST);		\
+	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
+	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&				\
+	    __ratelimit(&_rs))						\
+		__dynamic_netdev_dbg(&descriptor, dev, fmt,		\
+				     ##__VA_ARGS__);			\
+} while (0)
+#elif defined(DEBUG)
+#define netdev_dbg_ratelimited(dev, fmt, ...)				\
+do {									\
+	static DEFINE_RATELIMIT_STATE(_rs,				\
+				      DEFAULT_RATELIMIT_INTERVAL,	\
+				      DEFAULT_RATELIMIT_BURST);		\
+	if (__ratelimit(&_rs))						\
+		netdev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);	\
+} while (0)
+#else
+#define netdev_dbg_ratelimited(dev, fmt, ...)				\
+do {									\
+	if (0)								\
+		netdev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);	\
+} while (0)
+#endif
+
 #define MODULE_ALIAS_NETDEV(device) \
 	MODULE_ALIAS("netdev-" device)
 



^ permalink raw reply related

* [PATCH iproute2-next] ip tunnel: add json output
From: Andrea Claudi @ 2019-08-01 10:12 UTC (permalink / raw)
  To: netdev; +Cc: stephen, dsahern

Add json support on iptunnel and ip6tunnel.
The plain text output format should remain the same.

Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
---
 ip/ip6tunnel.c | 82 +++++++++++++++++++++++++++++++--------------
 ip/iptunnel.c  | 90 +++++++++++++++++++++++++++++++++-----------------
 ip/tunnel.c    | 42 +++++++++++++++++------
 3 files changed, 148 insertions(+), 66 deletions(-)

diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c
index d7684a673fdc4..f2b9710c1320f 100644
--- a/ip/ip6tunnel.c
+++ b/ip/ip6tunnel.c
@@ -71,57 +71,90 @@ static void usage(void)
 static void print_tunnel(const void *t)
 {
 	const struct ip6_tnl_parm2 *p = t;
-	char s1[1024];
-	char s2[1024];
+	SPRINT_BUF(b1);
 
 	/* Do not use format_host() for local addr,
 	 * symbolic name will not be useful.
 	 */
-	printf("%s: %s/ipv6 remote %s local %s",
-	       p->name,
-	       tnl_strproto(p->proto),
-	       format_host_r(AF_INET6, 16, &p->raddr, s1, sizeof(s1)),
-	       rt_addr_n2a_r(AF_INET6, 16, &p->laddr, s2, sizeof(s2)));
+	open_json_object(NULL);
+	print_string(PRINT_ANY, "ifname", "%s: ", p->name);
+	snprintf(b1, sizeof(b1), "%s/ipv6", tnl_strproto(p->proto));
+	print_string(PRINT_ANY, "mode", "%s ", b1);
+	print_string(PRINT_ANY,
+		     "remote",
+		     "remote %s ",
+		     format_host_r(AF_INET6, 16, &p->raddr, b1, sizeof(b1)));
+	print_string(PRINT_ANY,
+		     "local",
+		     "local %s",
+		     rt_addr_n2a_r(AF_INET6, 16, &p->laddr, b1, sizeof(b1)));
+
 	if (p->link) {
 		const char *n = ll_index_to_name(p->link);
 
 		if (n)
-			printf(" dev %s", n);
+			print_string(PRINT_ANY, "link", " dev %s", n);
 	}
 
 	if (p->flags & IP6_TNL_F_IGN_ENCAP_LIMIT)
-		printf(" encaplimit none");
+		print_bool(PRINT_ANY,
+			   "ip6_tnl_f_ign_encap_limit",
+			   " encaplimit none",
+			   true);
 	else
-		printf(" encaplimit %u", p->encap_limit);
+		print_uint(PRINT_ANY,
+			   "encap_limit",
+			   " encaplimit %u",
+			   p->encap_limit);
 
 	if (p->hop_limit)
-		printf(" hoplimit %u", p->hop_limit);
+		print_uint(PRINT_ANY, "hoplimit", " hoplimit %u", p->hop_limit);
 	else
-		printf(" hoplimit inherit");
-
-	if (p->flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		printf(" tclass inherit");
-	else {
+		print_string(PRINT_FP, "hoplimit", " hoplimit %s", "inherit");
+
+	if (p->flags & IP6_TNL_F_USE_ORIG_TCLASS) {
+		print_bool(PRINT_ANY,
+			   "ip6_tnl_f_use_orig_tclass",
+			   " tclass inherit",
+			   true);
+	} else {
 		__u32 val = ntohl(p->flowinfo & IP6_FLOWINFO_TCLASS);
 
-		printf(" tclass 0x%02x", (__u8)(val >> 20));
+		snprintf(b1, sizeof(b1), "0x%02x", (__u8)(val >> 20));
+		print_string(PRINT_ANY, "tclass", " tclass %s", b1);
 	}
 
-	if (p->flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
-		printf(" flowlabel inherit");
-	else
-		printf(" flowlabel 0x%05x", ntohl(p->flowinfo & IP6_FLOWINFO_FLOWLABEL));
+	if (p->flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) {
+		print_bool(PRINT_ANY,
+			   "ip6_tnl_f_use_orig_flowlabel",
+			   " flowlabel inherit",
+			   true);
+	} else {
+		__u32 val = ntohl(p->flowinfo & IP6_FLOWINFO_FLOWLABEL);
 
-	printf(" (flowinfo 0x%08x)", ntohl(p->flowinfo));
+		snprintf(b1, sizeof(b1), "0x%05x", val);
+		print_string(PRINT_ANY, "flowlabel", " flowlabel %s", b1);
+	}
+
+	snprintf(b1, sizeof(b1), "0x%08x", ntohl(p->flowinfo));
+	print_string(PRINT_ANY, "flowinfo", " (flowinfo %s)", b1);
 
 	if (p->flags & IP6_TNL_F_RCV_DSCP_COPY)
-		printf(" dscp inherit");
+		print_bool(PRINT_ANY,
+			   "ip6_tnl_f_rcv_dscp_copy",
+			   " dscp inherit",
+			   true);
 
 	if (p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE)
-		printf(" allow-localremote");
+		print_bool(PRINT_ANY,
+			   "ip6_tnl_f_allow_local_remote",
+			   " allow-localremote",
+			   true);
 
 	tnl_print_gre_flags(p->proto, p->i_flags, p->o_flags,
 			    p->i_key, p->o_key);
+
+	close_json_object();
 }
 
 static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p)
@@ -357,7 +390,6 @@ static int do_show(int argc, char **argv)
 		return -1;
 
 	print_tunnel(&p);
-	fputc('\n', stdout);
 	return 0;
 }
 
diff --git a/ip/iptunnel.c b/ip/iptunnel.c
index 66929e75c7448..84f708c727dc7 100644
--- a/ip/iptunnel.c
+++ b/ip/iptunnel.c
@@ -289,17 +289,23 @@ static void print_tunnel(const void *t)
 {
 	const struct ip_tunnel_parm *p = t;
 	struct ip_tunnel_6rd ip6rd = {};
-	char s1[1024];
-	char s2[1024];
+	SPRINT_BUF(b1);
 
 	/* Do not use format_host() for local addr,
 	 * symbolic name will not be useful.
 	 */
-	printf("%s: %s/ip remote %s local %s",
-	       p->name,
-	       tnl_strproto(p->iph.protocol),
-	       p->iph.daddr ? format_host_r(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any",
-	       p->iph.saddr ? rt_addr_n2a_r(AF_INET, 4, &p->iph.saddr, s2, sizeof(s2)) : "any");
+	open_json_object(NULL);
+	print_string(PRINT_ANY, "ifname", "%s: ", p->name);
+	snprintf(b1, sizeof(b1), "%s/ip", tnl_strproto(p->iph.protocol));
+	print_string(PRINT_ANY, "mode", "%s ", b1);
+	print_string(PRINT_ANY, "remote", "remote %s ",
+		     p->iph.daddr || is_json_context()
+			? format_host_r(AF_INET, 4, &p->iph.daddr, b1, sizeof(b1))
+			: "any");
+	print_string(PRINT_ANY, "local", "local %s",
+		     p->iph.saddr || is_json_context()
+			? rt_addr_n2a_r(AF_INET, 4, &p->iph.saddr, b1, sizeof(b1))
+			: "any");
 
 	if (p->iph.protocol == IPPROTO_IPV6 && (p->i_flags & SIT_ISATAP)) {
 		struct ip_tunnel_prl prl[16] = {};
@@ -308,54 +314,78 @@ static void print_tunnel(const void *t)
 		prl[0].datalen = sizeof(prl) - sizeof(prl[0]);
 		prl[0].addr = htonl(INADDR_ANY);
 
-		if (!tnl_prl_ioctl(SIOCGETPRL, p->name, prl))
+		if (!tnl_prl_ioctl(SIOCGETPRL, p->name, prl)) {
 			for (i = 1; i < ARRAY_SIZE(prl); i++) {
-				if (prl[i].addr != htonl(INADDR_ANY)) {
-					printf(" %s %s ",
-					       (prl[i].flags & PRL_DEFAULT) ? "pdr" : "pr",
-					       format_host(AF_INET, 4, &prl[i].addr));
-				}
+				if (prl[i].addr == htonl(INADDR_ANY))
+					continue;
+				if (prl[i].flags & PRL_DEFAULT)
+					print_string(PRINT_ANY,
+						     "pdr",
+						     " pdr %s",
+						     format_host(AF_INET, 4, &prl[i].addr));
+				else
+					print_string(PRINT_ANY,
+						     "pr",
+						     " pr %s",
+						     format_host(AF_INET, 4, &prl[i].addr));
 			}
+		}
 	}
 
 	if (p->link) {
 		const char *n = ll_index_to_name(p->link);
 
 		if (n)
-			printf(" dev %s", n);
+			print_string(PRINT_ANY, "dev", " dev %s", n);
 	}
 
 	if (p->iph.ttl)
-		printf(" ttl %u", p->iph.ttl);
+		print_uint(PRINT_ANY, "ttl", " ttl %u", p->iph.ttl);
 	else
-		printf(" ttl inherit");
+		print_string(PRINT_FP, "ttl", " ttl %s", "inherit");
 
 	if (p->iph.tos) {
-		SPRINT_BUF(b1);
-		printf(" tos");
-		if (p->iph.tos & 1)
-			printf(" inherit");
-		if (p->iph.tos & ~1)
-			printf("%c%s ", p->iph.tos & 1 ? '/' : ' ',
-			       rtnl_dsfield_n2a(p->iph.tos & ~1, b1, sizeof(b1)));
+		SPRINT_BUF(b2);
+
+		if (p->iph.tos != 1) {
+			if (!is_json_context() && p->iph.tos & 1)
+				snprintf(b2, sizeof(b2), "%s%s",
+					 p->iph.tos & 1 ? "inherit/" : "",
+					 rtnl_dsfield_n2a(p->iph.tos & ~1, b1, sizeof(b1)));
+			else
+				snprintf(b2, sizeof(b2), "%s",
+					 rtnl_dsfield_n2a(p->iph.tos, b1, sizeof(b1)));
+			print_string(PRINT_ANY, "tos", " tos %s", b2);
+		} else {
+			print_string(PRINT_FP, NULL, " tos %s", "inherit");
+		}
 	}
 
 	if (!(p->iph.frag_off & htons(IP_DF)))
-		printf(" nopmtudisc");
+		print_bool(PRINT_ANY, "nopmtudisc", " nopmtudisc", true);
 
 	if (p->iph.protocol == IPPROTO_IPV6 && !tnl_ioctl_get_6rd(p->name, &ip6rd) && ip6rd.prefixlen) {
-		printf(" 6rd-prefix %s/%u",
-		       inet_ntop(AF_INET6, &ip6rd.prefix, s1, sizeof(s1)),
-		       ip6rd.prefixlen);
+		print_string(PRINT_ANY,
+			     "6rd-prefix",
+			     " 6rd-prefix %s",
+			     inet_ntop(AF_INET6, &ip6rd.prefix, b1, sizeof(b1)));
+		print_uint(PRINT_ANY, "6rd-prefixlen", "/%u", ip6rd.prefixlen);
 		if (ip6rd.relay_prefix) {
-			printf(" 6rd-relay_prefix %s/%u",
-			       format_host(AF_INET, 4, &ip6rd.relay_prefix),
-			       ip6rd.relay_prefixlen);
+			print_string(PRINT_ANY,
+				     "6rd-relay_prefix",
+				     " 6rd-relay_prefix %s",
+				     format_host(AF_INET, 4, &ip6rd.relay_prefix));
+			print_uint(PRINT_ANY,
+				   "6rd-relay_prefixlen",
+				   "/%u",
+				   ip6rd.relay_prefixlen);
 		}
 	}
 
 	tnl_print_gre_flags(p->iph.protocol, p->i_flags, p->o_flags,
 			    p->i_key, p->o_key);
+
+	close_json_object();
 }
 
 
diff --git a/ip/tunnel.c b/ip/tunnel.c
index 41b0ef3165fdc..c2c0b9bc94356 100644
--- a/ip/tunnel.c
+++ b/ip/tunnel.c
@@ -314,24 +314,42 @@ void tnl_print_gre_flags(__u8 proto,
 {
 	if ((i_flags & GRE_KEY) && (o_flags & GRE_KEY) &&
 	    o_key == i_key) {
-		printf(" key %u", ntohl(i_key));
+		print_uint(PRINT_ANY, "key", " key %u", ntohl(i_key));
 	} else {
 		if (i_flags & GRE_KEY)
-			printf(" ikey %u", ntohl(i_key));
+			print_uint(PRINT_ANY, "ikey", " ikey %u", ntohl(i_key));
 		if (o_flags & GRE_KEY)
-			printf(" okey %u", ntohl(o_key));
+			print_uint(PRINT_ANY, "okey", " okey %u", ntohl(o_key));
 	}
 
+	open_json_array(PRINT_JSON, "flags");
 	if (proto == IPPROTO_GRE) {
-		if (i_flags & GRE_SEQ)
-			printf("%s  Drop packets out of sequence.", _SL_);
-		if (i_flags & GRE_CSUM)
-			printf("%s  Checksum in received packet is required.", _SL_);
-		if (o_flags & GRE_SEQ)
-			printf("%s  Sequence packets on output.", _SL_);
-		if (o_flags & GRE_CSUM)
-			printf("%s  Checksum output packets.", _SL_);
+		if (i_flags & GRE_SEQ) {
+			if (is_json_context())
+				print_string(PRINT_JSON, NULL, "%s", "rx_drop_ooseq");
+			else
+				printf("%s  Drop packets out of sequence.", _SL_);
+		}
+		if (i_flags & GRE_CSUM) {
+			if (is_json_context())
+				print_string(PRINT_JSON, NULL, "%s", "rx_csum");
+			else
+				printf("%s  Checksum in received packet is required.", _SL_);
+		}
+		if (o_flags & GRE_SEQ) {
+			if (is_json_context())
+				print_string(PRINT_JSON, NULL, "%s", "tx_seq");
+			else
+				printf("%s  Sequence packets on output.", _SL_);
+		}
+		if (o_flags & GRE_CSUM) {
+			if (is_json_context())
+				print_string(PRINT_JSON, NULL, "%s", "tx_csum");
+			else
+				printf("%s  Checksum output packets.", _SL_);
+		}
 	}
+	close_json_array(PRINT_JSON, NULL);
 }
 
 static void tnl_print_stats(const struct rtnl_link_stats64 *s)
@@ -417,6 +435,7 @@ static int print_nlmsg_tunnel(struct nlmsghdr *n, void *arg)
 
 int do_tunnels_list(struct tnl_print_nlmsg_info *info)
 {
+	new_json_obj(json);
 	if (rtnl_linkdump_req(&rth, preferred_family) < 0) {
 		perror("Cannot send dump request\n");
 		return -1;
@@ -426,6 +445,7 @@ int do_tunnels_list(struct tnl_print_nlmsg_info *info)
 		fprintf(stderr, "Dump terminated\n");
 		return -1;
 	}
+	delete_json_obj();
 
 	return 0;
 }
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH bpf-next v4 07/11] mlx5e: modify driver for handling offsets
From: Maxim Mikityanskiy @ 2019-08-01 10:05 UTC (permalink / raw)
  To: Kevin Laatz
  Cc: netdev@vger.kernel.org, ast@kernel.org, daniel@iogearbox.net,
	bjorn.topel@intel.com, magnus.karlsson@intel.com,
	jakub.kicinski@netronome.com, jonathan.lemon@gmail.com,
	Saeed Mahameed, stephen@networkplumber.org,
	bruce.richardson@intel.com, ciara.loftus@intel.com,
	bpf@vger.kernel.org, intel-wired-lan@lists.osuosl.org
In-Reply-To: <20190730085400.10376-8-kevin.laatz@intel.com>

On 2019-07-30 11:53, Kevin Laatz wrote:
> With the addition of the unaligned chunks option, we need to make sure we
> handle the offsets accordingly based on the mode we are currently running
> in. This patch modifies the driver to appropriately mask the address for
> each case.
> 
> Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>

Please note that this patch doesn't actually add the support for the new 
feature, because the validation checks in mlx5e_rx_get_linear_frag_sz 
and mlx5e_validate_xsk_param need to be relaxed. Currently the frame 
size of PAGE_SIZE is forced, and the fragment size is increased to 
PAGE_SIZE in case of XDP (including XSK).

After making the changes required to permit frame sizes smaller than 
PAGE_SIZE, our Striding RQ feature will be used in a way we haven't used 
it before, so we need to verify with the hardware team that this usage 
is legitimate.

> ---
> v3:
>    - Use new helper function to handle offset
> 
> v4:
>    - fixed headroom addition to handle. Using xsk_umem_adjust_headroom()
>      now.
> ---
>   drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c    | 8 ++++++--
>   drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 3 ++-
>   2 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> index b0b982cf69bb..d5245893d2c8 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> @@ -122,6 +122,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
>   		      void *va, u16 *rx_headroom, u32 *len, bool xsk)
>   {
>   	struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
> +	struct xdp_umem *umem = rq->umem;
>   	struct xdp_buff xdp;
>   	u32 act;
>   	int err;
> @@ -138,8 +139,11 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
>   	xdp.rxq = &rq->xdp_rxq;
>   
>   	act = bpf_prog_run_xdp(prog, &xdp);
> -	if (xsk)
> -		xdp.handle += xdp.data - xdp.data_hard_start;
> +	if (xsk) {
> +		u64 off = xdp.data - xdp.data_hard_start;
> +
> +		xdp.handle = xsk_umem_handle_offset(umem, xdp.handle, off);
> +	}
>   	switch (act) {
>   	case XDP_PASS:
>   		*rx_headroom = xdp.data - xdp.data_hard_start;
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
> index 6a55573ec8f2..7c49a66d28c9 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
> @@ -24,7 +24,8 @@ int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
>   	if (!xsk_umem_peek_addr_rq(umem, &handle))
>   		return -ENOMEM;
>   
> -	dma_info->xsk.handle = handle + rq->buff.umem_headroom;
> +	dma_info->xsk.handle = xsk_umem_adjust_offset(umem, handle,
> +						      rq->buff.umem_headroom);
>   	dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle);
>   
>   	/* No need to add headroom to the DMA address. In striding RQ case, we
> 


^ permalink raw reply

* Re: next-20190723: bpf/seccomp - systemd/journald issue?
From: Sedat Dilek @ 2019-08-01  9:35 UTC (permalink / raw)
  To: Yonghong Song, Josh Poimboeuf
  Cc: Alexei Starovoitov, Daniel Borkmann, Martin Lau, Song Liu,
	netdev@vger.kernel.org, bpf@vger.kernel.org, Clang-Built-Linux ML,
	Kees Cook, Nick Desaulniers, Nathan Chancellor
In-Reply-To: <CA+icZUW1YQqDjFCX5Ek100SbveX0Zevr7T5gbtdpcmZD+kCuZg@mail.gmail.com>

On Thu, Aug 1, 2019 at 9:39 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> Hi,
>
> just want to let you know that I did a git bisect with Linux v5.3-rc2
> (where the problem also exists) and the result (details see [1]):
>
> e55a73251da335873a6e87d68fb17e5aabb8978e is the first bad commit
> commit e55a73251da335873a6e87d68fb17e5aabb8978e
> Author: Josh Poimboeuf <jpoimboe@redhat.com>
> Date:   Thu Jun 27 20:50:47 2019 -0500
>
>     bpf: Fix ORC unwinding in non-JIT BPF code
>
>     Objtool previously ignored ___bpf_prog_run() because it didn't understand
>     the jump table.  This resulted in the ORC unwinder not being able to unwind
>     through non-JIT BPF code.
>
>     Now that objtool knows how to read jump tables, remove the whitelist and
>     annotate the jump table so objtool can recognize it.
>
>     Also add an additional "const" to the jump table definition to clarify that
>     the text pointers are constant.  Otherwise GCC sets the section writable
>     flag and the assembler spits out warnings.
>
>     Fixes: d15d356887e7 ("perf/x86: Make perf callchains work without
> CONFIG_FRAME_POINTER")
>     Reported-by: Song Liu <songliubraving@fb.com>
>     Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
>     Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>     Acked-by: Alexei Starovoitov <ast@kernel.org>
>     Cc: Peter Zijlstra <peterz@infradead.org>
>     Cc: Kairui Song <kasong@redhat.com>
>     Cc: Steven Rostedt <rostedt@goodmis.org>
>     Cc: Borislav Petkov <bp@alien8.de>
>     Cc: Daniel Borkmann <daniel@iogearbox.net>
>     Link: https://lkml.kernel.org/r/881939122b88f32be4c374d248c09d7527a87e35.1561685471.git.jpoimboe@redhat.com
>     Signed-off-by: Ingo Molnar <mingo@kernel.org>
>
> :040000 040000 4735e9d14fa416c1c361ec3923440a3d586a627d
> 31de80b85c7b0292e47a719ecb6b1a451de2f8ef M      kernel
>
> Maybe you want to look at this, too.
>
> The object files are attached in [2].
>
> Thanks,
> - Sedat -
>
> [0] https://github.com/ClangBuiltLinux/linux/issues/619
> [1] https://github.com/ClangBuiltLinux/linux/issues/619#issuecomment-517152467
> [2] https://github.com/ClangBuiltLinux/linux/issues/619#issuecomment-517159635

After reverting above commit I can boot into Linux v5.3-rc2 built with
clang-9.0.0-rc1 and lld with no issues.

- Sedat -

^ permalink raw reply

* [PATCH net-next] net/mlx5e: Allow dropping specific tunnel packets
From: xiangxia.m.yue @ 2019-08-01  8:40 UTC (permalink / raw)
  To: roid, saeedm; +Cc: netdev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

In some case, we don't want to allow specific tunnel packets
to host that can avoid to take up high CPU (e.g network attacks).
But other tunnel packets which not matched in hardware will be
sent to host too.

    $ tc filter add dev vxlan_sys_4789 \
	    protocol ip chain 0 parent ffff: prio 1 handle 1 \
	    flower dst_ip 1.1.1.100 ip_proto tcp dst_port 80 \
	    enc_dst_ip 2.2.2.100 enc_key_id 100 enc_dst_port 4789 \
	    action tunnel_key unset pipe action drop

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index f3ed028..25d423e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2485,7 +2485,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
 
 	if (flow_flag_test(flow, EGRESS) &&
 	    !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
-	      (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)))
+	      (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
+	      (actions & MLX5_FLOW_CONTEXT_ACTION_DROP)))
 		return false;
 
 	if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v2 net-next] be2net: disable bh with spin_lock in be_process_mcc
From: Denis Kirjanov @ 2019-08-01  9:24 UTC (permalink / raw)
  To: sathya.perla, ajit.khaparde, sriharsha.basavapatna; +Cc: netdev, Denis Kirjanov

be_process_mcc() is invoked in 3 different places and
always with BHs disabled except the be_poll function
but since it's invoked from softirq with BHs
disabled it won't hurt.

v1->v2: added explanation to the patch

Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c | 4 ++--
 drivers/net/ethernet/emulex/benet/be_main.c | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index ef5d61d57597..9365218f4d3b 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -550,7 +550,7 @@ int be_process_mcc(struct be_adapter *adapter)
 	int num = 0, status = 0;
 	struct be_mcc_obj *mcc_obj = &adapter->mcc_obj;
 
-	spin_lock(&adapter->mcc_cq_lock);
+	spin_lock_bh(&adapter->mcc_cq_lock);
 
 	while ((compl = be_mcc_compl_get(adapter))) {
 		if (compl->flags & CQE_FLAGS_ASYNC_MASK) {
@@ -566,7 +566,7 @@ int be_process_mcc(struct be_adapter *adapter)
 	if (num)
 		be_cq_notify(adapter, mcc_obj->cq.id, mcc_obj->rearm_cq, num);
 
-	spin_unlock(&adapter->mcc_cq_lock);
+	spin_unlock_bh(&adapter->mcc_cq_lock);
 	return status;
 }
 
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 2edb86ec9fe9..4d8e40ac66d2 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -5630,9 +5630,7 @@ static void be_worker(struct work_struct *work)
 	 * mcc completions
 	 */
 	if (!netif_running(adapter->netdev)) {
-		local_bh_disable();
 		be_process_mcc(adapter);
-		local_bh_enable();
 		goto reschedule;
 	}
 
-- 
2.12.3


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox