Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH 2/7] net: dsa: mv88e6xxx: add ability to set default queue priorities per port
From: Vivien Didelot @ 2019-09-10 16:43 UTC (permalink / raw)
  To: Robert Beckett
  Cc: netdev, Robert Beckett, Andrew Lunn, Florian Fainelli,
	David S. Miller
In-Reply-To: <20190910154238.9155-3-bob.beckett@collabora.com>

Hi Robert,

On Tue, 10 Sep 2019 16:41:48 +0100, Robert Beckett <bob.beckett@collabora.com> wrote:
> +static int mv88e6xxx_set_port_defqpri(struct mv88e6xxx_chip *chip, int port)
> +{
> +	struct dsa_switch *ds = chip->ds;
> +	struct device_node *dn = ds->ports[port].dn;
> +	int err;
> +	u32 pri;
> +
> +	if (!dn || !chip->info->ops->port_set_defqpri)
> +		return 0;
> +
> +	err = of_property_read_u32(dn, "defqpri", &pri);
> +	if (err < 0)
> +		return 0;
> +
> +	return chip->info->ops->port_set_defqpri(chip, port, (u16)pri);
> +}
> +
>  static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
>  {
>  	struct dsa_switch *ds = chip->ds;
> @@ -2176,6 +2193,10 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
>  			return err;
>  	}
>  
> +	err = mv88e6xxx_set_port_defqpri(chip, port);
> +	if (err)
> +		return err;
> +
>  	/* Port Association Vector: when learning source addresses
>  	 * of packets, add the address to the address database using
>  	 * a port bitmap that has only the bit for this port set and
> @@ -3107,6 +3128,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
>  	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
>  	.port_set_ether_type = mv88e6351_port_set_ether_type,
>  	.port_set_jumbo_size = mv88e6165_port_set_jumbo_size,
> +	.port_set_defqpri = mv88e6xxx_port_set_defqpri,

Please use a reference model, like mv88e6352_port_set_defqpri to avoid
confusion with a generic wrapper or implementation.

>  	.port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
>  	.port_pause_limit = mv88e6097_port_pause_limit,
>  	.port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
> @@ -3190,6 +3212,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
>  	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
>  	.port_set_ether_type = mv88e6351_port_set_ether_type,
>  	.port_set_jumbo_size = mv88e6165_port_set_jumbo_size,
> +	.port_set_defqpri = mv88e6xxx_port_set_defqpri,
>  	.port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
>  	.port_pause_limit = mv88e6097_port_pause_limit,
>  	.port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
> @@ -3407,6 +3430,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
>  	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
>  	.port_set_ether_type = mv88e6351_port_set_ether_type,
>  	.port_set_jumbo_size = mv88e6165_port_set_jumbo_size,
> +	.port_set_defqpri = mv88e6xxx_port_set_defqpri,
>  	.port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
>  	.port_pause_limit = mv88e6097_port_pause_limit,
>  	.port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
> @@ -3750,6 +3774,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
>  	.port_set_egress_floods = mv88e6352_port_set_egress_floods,
>  	.port_set_ether_type = mv88e6351_port_set_ether_type,
>  	.port_set_jumbo_size = mv88e6165_port_set_jumbo_size,
> +	.port_set_defqpri = mv88e6xxx_port_set_defqpri,
>  	.port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
>  	.port_pause_limit = mv88e6097_port_pause_limit,
>  	.port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
> diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
> index 4646e46d47f2..2d2c24f5a79d 100644
> --- a/drivers/net/dsa/mv88e6xxx/chip.h
> +++ b/drivers/net/dsa/mv88e6xxx/chip.h
> @@ -383,6 +383,7 @@ struct mv88e6xxx_ops {
>  				   u16 etype);
>  	int (*port_set_jumbo_size)(struct mv88e6xxx_chip *chip, int port,
>  				   size_t size);
> +	int (*port_set_defqpri)(struct mv88e6xxx_chip *chip, int port, u16 pri);

The default queue priority seems to be an integer in the [0:3] range, not
a register mask or value per-se. So an unsigned int seems more appropriate.

>  
>  	int (*port_egress_rate_limiting)(struct mv88e6xxx_chip *chip, int port);
>  	int (*port_pause_limit)(struct mv88e6xxx_chip *chip, int port, u8 in,
> diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
> index 04309ef0a1cc..3a45fcd5cd9c 100644
> --- a/drivers/net/dsa/mv88e6xxx/port.c
> +++ b/drivers/net/dsa/mv88e6xxx/port.c
> @@ -1147,6 +1147,25 @@ int mv88e6165_port_set_jumbo_size(struct mv88e6xxx_chip *chip, int port,
>  	return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL2, reg);
>  }
>  
> +int mv88e6xxx_port_set_defqpri(struct mv88e6xxx_chip *chip, int port, u16 pri)
> +{
> +	u16 reg;
> +	int err;
> +
> +	if (pri > 3)
> +		return -EINVAL;
> +
> +	err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL2, &reg);
> +	if (err)
> +		return err;
> +
> +	reg &= ~MV88E6XXX_PORT_CTL2_DEFQPRI_MASK;
> +	reg |= pri << MV88E6XXX_PORT_CTL2_DEFQPRI_SHIFT;

                      __bf_shf(MV88E6XXX_PORT_CTL2_DEFQPRI_MASK)

> +	reg |= MV88E6XXX_PORT_CTL2_USE_DEFQPRI;
> +
> +	return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL2, reg);
> +}
> +
>  /* Offset 0x09: Port Rate Control */
>  
>  int mv88e6095_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port)
> diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
> index 8d5a6cd6fb19..03884bbaa762 100644
> --- a/drivers/net/dsa/mv88e6xxx/port.h
> +++ b/drivers/net/dsa/mv88e6xxx/port.h
> @@ -197,6 +197,9 @@
>  #define MV88E6XXX_PORT_CTL2_DEFAULT_FORWARD		0x0040
>  #define MV88E6XXX_PORT_CTL2_EGRESS_MONITOR		0x0020
>  #define MV88E6XXX_PORT_CTL2_INGRESS_MONITOR		0x0010
> +#define MV88E6XXX_PORT_CTL2_USE_DEFQPRI		0x0008
> +#define MV88E6XXX_PORT_CTL2_DEFQPRI_MASK		0x0006
> +#define MV88E6XXX_PORT_CTL2_DEFQPRI_SHIFT		1

No shift macro needed, MV88E6XXX_PORT_CTL2_DEFQPRI_MASK is enough.

>  #define MV88E6095_PORT_CTL2_CPU_PORT_MASK		0x000f
>  
>  /* Offset 0x09: Egress Rate Control */
> @@ -326,6 +329,7 @@ int mv88e6xxx_port_set_message_port(struct mv88e6xxx_chip *chip, int port,
>  				    bool message_port);
>  int mv88e6165_port_set_jumbo_size(struct mv88e6xxx_chip *chip, int port,
>  				  size_t size);
> +int mv88e6xxx_port_set_defqpri(struct mv88e6xxx_chip *chip, int port, u16 pri);
>  int mv88e6095_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port);
>  int mv88e6097_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port);
>  int mv88e6097_port_pause_limit(struct mv88e6xxx_chip *chip, int port, u8 in,

Thanks,

	Vivien

^ permalink raw reply

* Re: [PATCH 3/7] dt-bindings: mv88e6xxx: add ability to set default queue priorities per port
From: Florian Fainelli @ 2019-09-10 16:42 UTC (permalink / raw)
  To: Robert Beckett, netdev
  Cc: Andrew Lunn, Vivien Didelot, David S. Miller, Rob Herring,
	Mark Rutland, devicetree, Jiri Pirko, Ido Schimmel
In-Reply-To: <20190910154238.9155-4-bob.beckett@collabora.com>

On 9/10/19 8:41 AM, Robert Beckett wrote:
> Document a new setting for Marvell switch chips to set the default queue
> priorities per port.
> 
> Signed-off-by: Robert Beckett <bob.beckett@collabora.com>
> ---
>  Documentation/devicetree/bindings/net/dsa/marvell.txt | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt
> index 6f9538974bb9..e097c3c52eac 100644
> --- a/Documentation/devicetree/bindings/net/dsa/marvell.txt
> +++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt
> @@ -47,6 +47,10 @@ Optional properties:
>  			  bus. The node must contains a compatible string of
>  			  "marvell,mv88e6xxx-mdio-external"
>  
> +Optional properties for ports:
> +- defqpri=<n>		: Enforced default queue priority for the given port.
> +			  Valid range is 0..3

This is a vendor specific driver/property,
marvell,default-queue-priority (which be cheapskate on words) would be
more readable. But still, I have some more fundamental issues with the
general approach, see my response in the cover letter.
-- 
Florian

^ permalink raw reply

* Re: VRF Issue Since kernel 5
From: David Ahern @ 2019-09-10 16:39 UTC (permalink / raw)
  To: Gowen, netdev@vger.kernel.org
In-Reply-To: <CWLP265MB1554308A1373D9ECE68CB854FDB70@CWLP265MB1554.GBRP265.PROD.OUTLOOK.COM>

On 9/9/19 8:46 AM, Gowen wrote:
> 
> I can run:
> 
> 
> Admin@NETM06:~$ host www.google.co.uk
> www.google.co.uk has address 172.217.169.3
> www.google.co.uk has IPv6 address 2a00:1450:4009:80d::2003
> 
> 
> but I get a timeout for:
> 
> 
> sudo ip vrf  exec mgmt-vrf host www.google.co.uk

sudo perf record -e fib:*  ip vrf  exec mgmt-vrf host www.google.co.uk
sudo perf script

I am guessing the table is wrong for your setup, but maybe the output
(or ordering of events) sheds some light on the problem.

^ permalink raw reply

* Re: VRF Issue Since kernel 5
From: David Ahern @ 2019-09-10 16:36 UTC (permalink / raw)
  To: Alexis Bauvin, Gowen; +Cc: netdev@vger.kernel.org
In-Reply-To: <7CAF2F23-5D88-4BE7-B703-06B71D1EDD11@online.net>

On 9/9/19 1:01 PM, Alexis Bauvin wrote:
> Could you try swapping the local and l3mdev rules?
> 
> `ip rule del pref 0; ip rule add from all lookup local pref 1001`

yes, the rules should be re-ordered so that local rule is after l3mdev
rule (VRF is implemented as policy routing). In general, I would reverse
the order of those commands to ensure no breakage.

Also, 5.0 I think it was (too many kernel versions) added a new l3mdev
sysctl (raw_l3mdev_accept). Check all 3 of them and nmake sure they are
set properly for your use case.

These slides do not cover 5.0 changes but are still the best collection
of notes on VRF:
http://schd.ws/hosted_files/ossna2017/fe/vrf-tutorial-oss.pdf

^ permalink raw reply

* [net-next 00/14][pull request] Intel Wired LAN Driver Updates 2019-09-10
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, nhorman, sassmann

This series contains updates to i40e, ixgbe/vf and iavf.

Wenwen Wang fixes a potential memory leak where 3 allocated variables
are not properly cleaned up on failure for ixgbe.

Stefan Assmann fixes a potential kernel panic found when repeatedly
spawning and destroying VFs in i40e when a NULL pointer is dereferenced
due to a race condition.  Fixed up the i40e driver to clear the
__I40E_VIRTCHNL_OP_PENDING bit before returning after an invalid
minimum transmit rate is requested.  Updates the iavf driver to only
apply the MAC address change when the PF ACK's the requested change.

Tonghao Zhang updates ixgbe to use the skb_get_queue_mapping() API call
instead of the driver accessing the queue mapping directly.

Jake updates i40e to use ktime_get_real_ts64() instead of
ktime_to_timespec64().  Removes the define for bit 0x0001 for cloud
filters, since it is a reserved bit and not a valid type.  Also added
code comments to clearly state which bits are reserved and should not be
used or defined for cloud filter adminq command.  Clarify the macros
used to specify the cloud filter fields are individual bits, so use the
BIT() macro.

Aleksandr fixes up the print_link_message() to include the "negotiated"
FEC status for i40e.

Czeslaw also adds additional log message for devices without FEC in the
print_link_message() for i40e.

Alex fixes up the adaptive ITR scheme for ixgbe which could result in a
value that was either 0 or something less than 10 which was causing
issues with hardware features, like RSC, that do not function well with
ITR values that low.

Colin Ian King reduces the object code size by making the array API
static constant.

Magnus fixes a potential receive buffer starvation issue for AF_XDP by
kicking the NAPI context of any queue with an attached AF_XDP zero-copy
socket.

The following are changes since commit c21815f1c199a2ffb77aa862206b0f8d94263d14:
  net/mlx4_en: ethtool: make array modes static const, makes object smaller
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 40GbE

Aleksandr Loktionov (1):
  i40e: fix missed "Negotiated" string in i40e_print_link_message()

Alexander Duyck (1):
  ixgbe: Prevent u8 wrapping of ITR value to something less than 10us

Colin Ian King (1):
  net/ixgbevf: make array api static const, makes object smaller

Czeslaw Zagorski (1):
  i40e: Fix message for other card without FEC.

Jacob Keller (4):
  i40e: use ktime_get_real_ts64 instead of ktime_to_timespec64
  i40e: remove I40E_AQC_ADD_CLOUD_FILTER_OIP
  i40e: mark additional missing bits as reserved
  i40e: use BIT macro to specify the cloud filter field flags

Magnus Karlsson (1):
  i40e: fix potential RX buffer starvation for AF_XDP

Stefan Assmann (3):
  i40e: check __I40E_VF_DISABLE bit in i40e_sync_filters_subtask
  i40e: clear __I40E_VIRTCHNL_OP_PENDING on invalid min Tx rate
  iavf: fix MAC address setting for VFs when filter is rejected

Tonghao Zhang (1):
  ixgbe: use skb_get_queue_mapping in tx path

Wenwen Wang (1):
  ixgbe: fix memory leaks

 drivers/net/ethernet/intel/i40e/i40e.h        | 10 +++----
 .../net/ethernet/intel/i40e/i40e_adminq_cmd.h |  5 +++-
 drivers/net/ethernet/intel/i40e/i40e_main.c   | 30 ++++++++++++-------
 drivers/net/ethernet/intel/i40e/i40e_ptp.c    |  2 +-
 .../ethernet/intel/i40e/i40e_virtchnl_pf.c    |  3 +-
 drivers/net/ethernet/intel/i40e/i40e_xsk.c    |  5 ++++
 drivers/net/ethernet/intel/iavf/iavf_main.c   |  1 -
 .../net/ethernet/intel/iavf/iavf_virtchnl.c   |  7 +++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 10 +++++--
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 14 +++++----
 10 files changed, 60 insertions(+), 27 deletions(-)

-- 
2.21.0

^ permalink raw reply

* [net-next 04/14] i40e: use ktime_get_real_ts64 instead of ktime_to_timespec64
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

Remove a call to ktime_to_timespec64 by calling ktime_get_real_ts64
directly.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ptp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 11394a52e21c..9bf1ad4319f5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -725,7 +725,7 @@ static long i40e_ptp_create_clock(struct i40e_pf *pf)
 	pf->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
 
 	/* Set the previous "reset" time to the current Kernel clock time */
-	pf->ptp_prev_hw_time = ktime_to_timespec64(ktime_get_real());
+	ktime_get_real_ts64(&pf->ptp_prev_hw_time);
 	pf->ptp_reset_start = ktime_get();
 
 	return 0;
-- 
2.21.0


^ permalink raw reply related

* [net-next 02/14] i40e: check __I40E_VF_DISABLE bit in i40e_sync_filters_subtask
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem
  Cc: Stefan Assmann, netdev, nhorman, sassmann, stable, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Stefan Assmann <sassmann@kpanic.de>

While testing VF spawn/destroy the following panic occurred.

BUG: unable to handle kernel NULL pointer dereference at 0000000000000029
[...]
Workqueue: i40e i40e_service_task [i40e]
RIP: 0010:i40e_sync_vsi_filters+0x6fd/0xc60 [i40e]
[...]
Call Trace:
 ? __switch_to_asm+0x35/0x70
 ? __switch_to_asm+0x41/0x70
 ? __switch_to_asm+0x35/0x70
 ? _cond_resched+0x15/0x30
 i40e_sync_filters_subtask+0x56/0x70 [i40e]
 i40e_service_task+0x382/0x11b0 [i40e]
 ? __switch_to_asm+0x41/0x70
 ? __switch_to_asm+0x41/0x70
 process_one_work+0x1a7/0x3b0
 worker_thread+0x30/0x390
 ? create_worker+0x1a0/0x1a0
 kthread+0x112/0x130
 ? kthread_bind+0x30/0x30
 ret_from_fork+0x35/0x40

Investigation revealed a race where pf->vf[vsi->vf_id].trusted may get
accessed by the watchdog via i40e_sync_filters_subtask() although
i40e_free_vfs() already free'd pf->vf.
To avoid this the call to i40e_sync_vsi_filters() in
i40e_sync_filters_subtask() needs to be guarded by __I40E_VF_DISABLE,
which is also used by i40e_free_vfs().

Note: put the __I40E_VF_DISABLE check after the
__I40E_MACVLAN_SYNC_PENDING check as the latter is more likely to
trigger.

CC: stable@vger.kernel.org
Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index e9f2f276bf27..3e2e465f43f9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2592,6 +2592,10 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
 		return;
 	if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state))
 		return;
+	if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) {
+		set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
+		return;
+	}
 
 	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		if (pf->vsi[v] &&
@@ -2606,6 +2610,7 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
 			}
 		}
 	}
+	clear_bit(__I40E_VF_DISABLE, pf->state);
 }
 
 /**
-- 
2.21.0


^ permalink raw reply related

* [net-next 06/14] i40e: mark additional missing bits as reserved
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

Mark bits 0xD through 0xF for the command flags of a cloud filter as
reserved. These bits are not yet defined and are considered as reserved
in the data sheet.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 7ff768761659..530613f31527 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -1394,6 +1394,9 @@ struct i40e_aqc_cloud_filters_element_data {
 #define I40E_AQC_ADD_CLOUD_FILTER_IMAC			0x000A
 #define I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC	0x000B
 #define I40E_AQC_ADD_CLOUD_FILTER_IIP			0x000C
+/* 0x000D reserved */
+/* 0x000E reserved */
+/* 0x000F reserved */
 /* 0x0010 to 0x0017 is for custom filters */
 #define I40E_AQC_ADD_CLOUD_FILTER_IP_PORT		0x0010 /* Dest IP + L4 Port */
 #define I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT		0x0011 /* Dest MAC + L4 Port */
-- 
2.21.0


^ permalink raw reply related

* [net-next 13/14] net/ixgbevf: make array api static const, makes object smaller
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem
  Cc: Colin Ian King, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Colin Ian King <colin.king@canonical.com>

Don't populate the array API on the stack but instead make it
static const. Makes the object code smaller by 58 bytes.

Before:
   text	   data	    bss	    dec	    hex	filename
  82969	   9763	    256	  92988	  16b3c	ixgbevf/ixgbevf_main.o

After:
   text	   data	    bss	    dec	    hex	filename
  82815	   9859	    256	  92930	  16b02	ixgbevf/ixgbevf_main.o

(gcc version 9.2.1, amd64)

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 75e849a64db7..75e93ce2ed99 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -2261,12 +2261,14 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter)
 static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
-	int api[] = { ixgbe_mbox_api_14,
-		      ixgbe_mbox_api_13,
-		      ixgbe_mbox_api_12,
-		      ixgbe_mbox_api_11,
-		      ixgbe_mbox_api_10,
-		      ixgbe_mbox_api_unknown };
+	static const int api[] = {
+		ixgbe_mbox_api_14,
+		ixgbe_mbox_api_13,
+		ixgbe_mbox_api_12,
+		ixgbe_mbox_api_11,
+		ixgbe_mbox_api_10,
+		ixgbe_mbox_api_unknown
+	};
 	int err, idx = 0;
 
 	spin_lock_bh(&adapter->mbx_lock);
-- 
2.21.0


^ permalink raw reply related

* [net-next 14/14] i40e: fix potential RX buffer starvation for AF_XDP
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem
  Cc: Magnus Karlsson, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Magnus Karlsson <magnus.karlsson@intel.com>

When the RX rings are created they are also populated with buffers
so that packets can be received. Usually these are kernel buffers,
but for AF_XDP in zero-copy mode, these are user-space buffers and
in this case the application might not have sent down any buffers
to the driver at this point. And if no buffers are allocated at ring
creation time, no packets can be received and no interrupts will be
generated so the NAPI poll function that allocates buffers to the
rings will never get executed.

To rectify this, we kick the NAPI context of any queue with an
attached AF_XDP zero-copy socket in two places in the code. Once
after an XDP program has loaded and once after the umem is registered.
This take care of both cases: XDP program gets loaded first then AF_XDP
socket is created, and the reverse, AF_XDP socket is created first,
then XDP program is loaded.

Fixes: 0a714186d3c0 ("i40e: add AF_XDP zero-copy Rx support")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_xsk.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 0373bc6c7e61..feb5bd54d840 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -157,6 +157,11 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
 		err = i40e_queue_pair_enable(vsi, qid);
 		if (err)
 			return err;
+
+		/* Kick start the NAPI context so that receiving will start */
+		err = i40e_xsk_wakeup(vsi->netdev, qid, XDP_WAKEUP_RX);
+		if (err)
+			return err;
 	}

 	return 0;
-- 
2.21.0

^ permalink raw reply related

* [net-next 12/14] iavf: fix MAC address setting for VFs when filter is rejected
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem
  Cc: Stefan Assmann, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Stefan Assmann <sassmann@kpanic.de>

Currently iavf unconditionally applies MAC address change requests. This
brings the VF in a state where it is no longer able to pass traffic if
the PF rejects a MAC filter change for the VF.
A typical scenario for a rejected MAC filter is for an untrusted VF to
request to change the MAC address when an administratively set MAC is
present.

To keep iavf working in this scenario the MAC filter handling in iavf
needs to act on the PF reply regarding the MAC filter change. In the
case of an ack the new MAC address gets set, whereas in the case of a
nack the previous MAC address needs to stay in place.

Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_main.c     | 1 -
 drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 7 +++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 07f5541a0f01..8f310e520b06 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -804,7 +804,6 @@ static int iavf_set_mac(struct net_device *netdev, void *p)
 
 	if (f) {
 		ether_addr_copy(hw->mac.addr, addr->sa_data);
-		ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
 	}
 
 	return (f == NULL) ? -ENOMEM : 0;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index d49d58a6de80..c46770eba320 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -1252,6 +1252,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 		case VIRTCHNL_OP_ADD_ETH_ADDR:
 			dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n",
 				iavf_stat_str(&adapter->hw, v_retval));
+			/* restore administratively set MAC address */
+			ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
 			break;
 		case VIRTCHNL_OP_DEL_VLAN:
 			dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n",
@@ -1319,6 +1321,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 		}
 	}
 	switch (v_opcode) {
+	case VIRTCHNL_OP_ADD_ETH_ADDR: {
+		if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr))
+			ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+		}
+		break;
 	case VIRTCHNL_OP_GET_STATS: {
 		struct iavf_eth_stats *stats =
 			(struct iavf_eth_stats *)msg;
-- 
2.21.0


^ permalink raw reply related

* [net-next 09/14] i40e: use BIT macro to specify the cloud filter field flags
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

The macros used to specify the cloud filter fields are intended to be
individual bits. Declare them using the BIT() macro to make their
intention a little more clear.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index f1a1bd324b50..2af9f6308f84 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -243,11 +243,11 @@ struct i40e_fdir_filter {
 	u32 fd_id;
 };
 
-#define I40E_CLOUD_FIELD_OMAC	0x01
-#define I40E_CLOUD_FIELD_IMAC	0x02
-#define I40E_CLOUD_FIELD_IVLAN	0x04
-#define I40E_CLOUD_FIELD_TEN_ID	0x08
-#define I40E_CLOUD_FIELD_IIP	0x10
+#define I40E_CLOUD_FIELD_OMAC		BIT(0)
+#define I40E_CLOUD_FIELD_IMAC		BIT(1)
+#define I40E_CLOUD_FIELD_IVLAN		BIT(2)
+#define I40E_CLOUD_FIELD_TEN_ID		BIT(3)
+#define I40E_CLOUD_FIELD_IIP		BIT(4)
 
 #define I40E_CLOUD_FILTER_FLAGS_OMAC	I40E_CLOUD_FIELD_OMAC
 #define I40E_CLOUD_FILTER_FLAGS_IMAC	I40E_CLOUD_FIELD_IMAC
-- 
2.21.0


^ permalink raw reply related

* [net-next 10/14] i40e: clear __I40E_VIRTCHNL_OP_PENDING on invalid min Tx rate
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem
  Cc: Stefan Assmann, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Stefan Assmann <sassmann@kpanic.de>

In the case of an invalid min Tx rate being requested
i40e_ndo_set_vf_bw() immediately returns -EINVAL instead of releasing
__I40E_VIRTCHNL_OP_PENDING first.

Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index f8aa4deceb5e..3d2440838822 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -4263,7 +4263,8 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
 	if (min_tx_rate) {
 		dev_err(&pf->pdev->dev, "Invalid min tx rate (%d) (greater than 0) specified for VF %d.\n",
 			min_tx_rate, vf_id);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto error;
 	}
 
 	vf = &pf->vf[vf_id];
-- 
2.21.0


^ permalink raw reply related

* [net-next 11/14] ixgbe: Prevent u8 wrapping of ITR value to something less than 10us
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem
  Cc: Alexander Duyck, netdev, nhorman, sassmann, Gregg Leventhal,
	Andrew Bowers, Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@linux.intel.com>

There were a couple cases where the ITR value generated via the adaptive
ITR scheme could exceed 126. This resulted in the value becoming either 0
or something less than 10. Switching back and forth between a value less
than 10 and a value greater than 10 can cause issues as certain hardware
features such as RSC to not function well when the ITR value has dropped
that low.

Reported-by: Gregg Leventhal <gleventhal@janestreet.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index dc034f4e8cf6..a5398b691aa8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2623,7 +2623,7 @@ static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector,
 		/* 16K ints/sec to 9.2K ints/sec */
 		avg_wire_size *= 15;
 		avg_wire_size += 11452;
-	} else if (avg_wire_size <= 1980) {
+	} else if (avg_wire_size < 1968) {
 		/* 9.2K ints/sec to 8K ints/sec */
 		avg_wire_size *= 5;
 		avg_wire_size += 22420;
@@ -2656,6 +2656,8 @@ static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector,
 	case IXGBE_LINK_SPEED_2_5GB_FULL:
 	case IXGBE_LINK_SPEED_1GB_FULL:
 	case IXGBE_LINK_SPEED_10_FULL:
+		if (avg_wire_size > 8064)
+			avg_wire_size = 8064;
 		itr += DIV_ROUND_UP(avg_wire_size,
 				    IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
 		       IXGBE_ITR_ADAPTIVE_MIN_INC;
-- 
2.21.0


^ permalink raw reply related

* [net-next 08/14] i40e: Fix message for other card without FEC.
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem
  Cc: Czeslaw Zagorski, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Czeslaw Zagorski <czeslawx.zagorski@intel.com>

When variable "req_fec, fec, an" are empty,
dmesg shows log with "Requested FEC: , Negotiated FEC: , Autoneg:".
Add link dmesg log for cards without FEC.

Signed-off-by: Czeslaw Zagorski <czeslawx.zagorski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 700f38ec8e91..6031223eafab 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6594,11 +6594,15 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 			else
 				req_fec = "CL74 FC-FEC/BASE-R";
 		}
+		netdev_info(vsi->netdev,
+			    "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
+			    speed, req_fec, fec, an, fc);
+	} else {
+		netdev_info(vsi->netdev,
+			    "NIC Link is Up, %sbps Full Duplex, Flow Control: %s\n",
+			    speed, fc);
 	}
 
-	netdev_info(vsi->netdev,
-		    "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
-		    speed, req_fec, fec, an, fc);
 }
 
 /**
-- 
2.21.0


^ permalink raw reply related

* [net-next 07/14] i40e: fix missed "Negotiated" string in i40e_print_link_message()
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem
  Cc: Aleksandr Loktionov, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Aleksandr Loktionov <aleksandr.loktionov@intel.com>

The "Negotiated" string in i40e_print_link_message() function was missed.
This string has been added to the dmesg and small refactoring done removing
common substrings and unifying link status message format.
Without this patch it was not clear that FEC is related to negotiated FEC.

Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 3e2e465f43f9..700f38ec8e91 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6569,19 +6569,19 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 	}
 
 	if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
-		req_fec = ", Requested FEC: None";
-		fec = ", FEC: None";
-		an = ", Autoneg: False";
+		req_fec = "None";
+		fec = "None";
+		an = "False";
 
 		if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
-			an = ", Autoneg: True";
+			an = "True";
 
 		if (pf->hw.phy.link_info.fec_info &
 		    I40E_AQ_CONFIG_FEC_KR_ENA)
-			fec = ", FEC: CL74 FC-FEC/BASE-R";
+			fec = "CL74 FC-FEC/BASE-R";
 		else if (pf->hw.phy.link_info.fec_info &
 			 I40E_AQ_CONFIG_FEC_RS_ENA)
-			fec = ", FEC: CL108 RS-FEC";
+			fec = "CL108 RS-FEC";
 
 		/* 'CL108 RS-FEC' should be displayed when RS is requested, or
 		 * both RS and FC are requested
@@ -6590,13 +6590,14 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 		    (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) {
 			if (vsi->back->hw.phy.link_info.req_fec_info &
 			    I40E_AQ_REQUEST_FEC_RS)
-				req_fec = ", Requested FEC: CL108 RS-FEC";
+				req_fec = "CL108 RS-FEC";
 			else
-				req_fec = ", Requested FEC: CL74 FC-FEC/BASE-R";
+				req_fec = "CL74 FC-FEC/BASE-R";
 		}
 	}
 
-	netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s%s, Flow Control: %s\n",
+	netdev_info(vsi->netdev,
+		    "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
 		    speed, req_fec, fec, an, fc);
 }
 
-- 
2.21.0


^ permalink raw reply related

* [net-next 05/14] i40e: remove I40E_AQC_ADD_CLOUD_FILTER_OIP
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

The bit 0x0001 used in the cloud filters adminq command is reserved, and
is not actually a valid type.

The Linux driver has never used this type, and it's not clear if any
driver ever has.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 21cccec328e3..7ff768761659 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -1382,7 +1382,7 @@ struct i40e_aqc_cloud_filters_element_data {
 #define I40E_AQC_ADD_CLOUD_FILTER_MASK	(0x3F << \
 					I40E_AQC_ADD_CLOUD_FILTER_SHIFT)
 /* 0x0000 reserved */
-#define I40E_AQC_ADD_CLOUD_FILTER_OIP			0x0001
+/* 0x0001 reserved */
 /* 0x0002 reserved */
 #define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN		0x0003
 #define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID	0x0004
-- 
2.21.0


^ permalink raw reply related

* [net-next 03/14] ixgbe: use skb_get_queue_mapping in tx path
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem; +Cc: Tonghao Zhang, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Use the common api, and don't access queue_mapping directly.

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 95c0827dfd4c..dc034f4e8cf6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8748,7 +8748,7 @@ static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb,
 	if (skb_put_padto(skb, 17))
 		return NETDEV_TX_OK;
 
-	tx_ring = ring ? ring : adapter->tx_ring[skb->queue_mapping];
+	tx_ring = ring ? ring : adapter->tx_ring[skb_get_queue_mapping(skb)];
 	if (unlikely(test_bit(__IXGBE_TX_DISABLED, &tx_ring->state)))
 		return NETDEV_TX_BUSY;
 
-- 
2.21.0


^ permalink raw reply related

* [net-next 01/14] ixgbe: fix memory leaks
From: Jeff Kirsher @ 2019-09-10 16:34 UTC (permalink / raw)
  To: davem; +Cc: Wenwen Wang, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190910163434.2449-1-jeffrey.t.kirsher@intel.com>

From: Wenwen Wang <wenwen@cs.uga.edu>

In ixgbe_configure_clsu32(), 'jump', 'input', and 'mask' are allocated
through kzalloc() respectively in a for loop body. Then,
ixgbe_clsu32_build_input() is invoked to build the input. If this process
fails, next iteration of the for loop will be executed. However, the
allocated 'jump', 'input', and 'mask' are not deallocated on this execution
path, leading to memory leaks.

Signed-off-by: Wenwen Wang <wenwen@cs.uga.edu>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 99df595abfba..95c0827dfd4c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9490,6 +9490,10 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
 				jump->mat = nexthdr[i].jump;
 				adapter->jump_tables[link_uhtid] = jump;
 				break;
+			} else {
+				kfree(mask);
+				kfree(input);
+				kfree(jump);
 			}
 		}
 		return 0;
-- 
2.21.0


^ permalink raw reply related

* Re: [PATCH net-next v2 00/11] nfp: implement firmware loading policy
From: David Miller @ 2019-09-10 16:30 UTC (permalink / raw)
  To: simon.horman; +Cc: jakub.kicinski, netdev, oss-drivers, dirk.vandermerwe
In-Reply-To: <20190908235427.9757-1-simon.horman@netronome.com>

From: Simon Horman <simon.horman@netronome.com>
Date: Mon,  9 Sep 2019 00:54:16 +0100

> Dirk says:
> 
> This series adds configuration capabilities to the firmware loading policy of
> the NFP driver.
> 
> NFP firmware loading is controlled via three HWinfo keys which can be set per
> device: 'abi_drv_reset', 'abi_drv_load_ifc' and 'app_fw_from_flash'.
> Refer to patch #11 for more detail on how these control the firmware loading.
> 
> In order to configure the full extend of FW loading policy, a new devlink
> parameter has been introduced, 'reset_dev_on_drv_probe', which controls if the
> driver should reset the device when it's probed. This, in conjunction with the
> existing 'fw_load_policy' (extended to include a 'disk' option) provides the
> means to tweak the NFP HWinfo keys as required by users.
> 
> Patches 1 and 2 adds the devlink modifications and patches 3 through 9 adds the
> support into the NFP driver. Furthermore, the last 2 patches are documentation
> only.
> 
> v2:
>   Renamed all 'reset_dev_on_drv_probe' defines the same as the devlink parameter
>   name (Jiri)

Series applied, thanks Simon.

^ permalink raw reply

* Re: [PATCH 1/7] net/dsa: configure autoneg for CPU port
From: Vivien Didelot @ 2019-09-10 16:14 UTC (permalink / raw)
  To: Robert Beckett
  Cc: netdev, Robert Beckett, Andrew Lunn, Florian Fainelli,
	David S. Miller
In-Reply-To: <20190910154238.9155-2-bob.beckett@collabora.com>

Hi Robert,

On Tue, 10 Sep 2019 16:41:47 +0100, Robert Beckett <bob.beckett@collabora.com> wrote:
> Configure autoneg for phy connected CPU ports.
> This allows us to use autoneg between the CPU port's phy and the link
> partner's phy.
> This enables us to negoatiate pause frame transmission to prioritise
> packet delivery over throughput.
> 
> Signed-off-by: Robert Beckett <bob.beckett@collabora.com>
> ---
>  net/dsa/port.c | 10 ++++++++++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/net/dsa/port.c b/net/dsa/port.c
> index f071acf2842b..1b6832eac2c5 100644
> --- a/net/dsa/port.c
> +++ b/net/dsa/port.c
> @@ -538,10 +538,20 @@ static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
>  		return PTR_ERR(phydev);
>  
>  	if (enable) {
> +		phydev->supported = PHY_GBIT_FEATURES | SUPPORTED_MII |
> +				    SUPPORTED_AUI | SUPPORTED_FIBRE |
> +				    SUPPORTED_BNC | SUPPORTED_Pause |
> +				    SUPPORTED_Asym_Pause;
> +		phydev->advertising = phydev->supported;
> +

This seems a bit intruisive to me. I'll get back to you.

>  		err = genphy_config_init(phydev);
>  		if (err < 0)
>  			goto err_put_dev;
>  
> +		err = genphy_config_aneg(phydev);
> +		if (err < 0)
> +			goto err_put_dev;
> +
>  		err = genphy_resume(phydev);
>  		if (err < 0)
>  			goto err_put_dev;

^ permalink raw reply

* Re: [PATCH net-next 0/3] net: dsa: mv88e6xxx: add PCL support
From: David Miller @ 2019-09-10 15:53 UTC (permalink / raw)
  To: vivien.didelot; +Cc: netdev, f.fainelli, andrew
In-Reply-To: <20190907200049.25273-1-vivien.didelot@gmail.com>

From: Vivien Didelot <vivien.didelot@gmail.com>
Date: Sat,  7 Sep 2019 16:00:46 -0400

> This small series implements the ethtool RXNFC operations in the
> mv88e6xxx DSA driver to configure a port's Layer 2 Policy Control List
> (PCL) supported by models such as 88E6352 and 88E6390 and equivalent.
> 
> This allows to configure a port to discard frames based on a configured
> destination or source MAC address and an optional VLAN, with e.g.:
> 
>     # ethtool --config-nfc lan1 flow-type ether src 00:11:22:33:44:55 action -1

Series applied, thanks.

^ permalink raw reply

* Re: Is bug 200755 in anyone's queue??
From: Willem de Bruijn @ 2019-09-10 15:52 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Steve Zabele, Eric Dumazet, Mark KEATON, Network Development,
	shum@canndrew.org, vladimir116@gmail.com, saifi.khan@strikr.in,
	Daniel Borkmann, on2k16nm@gmail.com, Stephen Hemminger,
	Craig Gallek
In-Reply-To: <CA+FuTSf24VrjOxS9Kg3+DFEYn7ihe6vMj5o7rggOz_6KH_rNpQ@mail.gmail.com>

On Wed, Sep 4, 2019 at 11:46 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Wed, Sep 4, 2019 at 10:51 AM Steve Zabele <zabele@comcast.net> wrote:
> >
> > I think a dual table approach makes a lot of sense here, especially if we look at the different use cases. For the DNS server example, almost certainly there will not be any connected sockets using the server port, so a test of whether the connected table is empty (maybe a boolean stored with the unconnected table?) should get to the existing code very quickly and not require accessing the memory holding the connected table. For our use case, the connected sockets persist for long periods (at network timescales at least) and so any rehashing should be infrequent and so have limited impact on performance overall.
> >
> > So does a dual table approach seem workable to other folks that know the internals?
>
> Let me take a stab and compare. A dual table does bring it more in
> line with how the TCP code is structured.

On closer look, I think two tables is too much code churn and risk for
a stable fix. It requires lookup changes across ipv4 and ipv6 unicast,
multicast, early demux, .. Though I'm happy to be proven wrong, of
course.

One variant that is easy to see only modifies behavior for reuseport
groups with connections is to mark those as such:

"
@@ -21,7 +21,8 @@ struct sock_reuseport {
        unsigned int            synq_overflow_ts;
        /* ID stays the same even after the size of socks[] grows. */
        unsigned int            reuseport_id;
-       bool                    bind_inany;
+       unsigned int            bind_inany:1;
+       unsigned int            has_conns:1;
        struct bpf_prog __rcu   *prog;          /* optional BPF sock selector */
        struct sock             *socks[0];      /* array of sock pointers */
 };
@@ -37,6 +38,23 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
 extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
 extern int reuseport_detach_prog(struct sock *sk);

+static inline bool reuseport_has_conns(struct sock *sk, bool set)
+{
+       struct sock_reuseport *reuse;
+       bool ret = false;
+
+       rcu_read_lock();
+       reuse = rcu_dereference(sk->sk_reuseport_cb);
+       if (reuse) {
+               if (set)
+                       reuse->has_conns = 1;
+               ret = reuse->has_conns;
+       }
+       rcu_read_unlock();
+
+       return ret;
+}

@@ -67,6 +68,7 @@ int __ip4_datagram_connect(struct sock *sk, struct
sockaddr *uaddr, int addr_len
                if (sk->sk_prot->rehash)
                        sk->sk_prot->rehash(sk);
        }
+       reuseport_has_conns(sk, true);
        inet->inet_daddr = fl4->daddr;
        inet->inet_dport = usin->sin_port;
        sk->sk_state = TCP_ESTABLISHED;
"

Then at lookup treat connected reuseport sockets are regular sockets
and do not return early on a reuseport match if there may be higher
scoring connections:

"
@@ -423,13 +423,15 @@ static struct sock *udp4_lib_lookup2(struct net *net,
                score = compute_score(sk, net, saddr, sport,
                                      daddr, hnum, dif, sdif);
                if (score > badness) {
-                       if (sk->sk_reuseport) {
+                       if (sk->sk_reuseport &&
+                           sk->sk_state != TCP_ESTABLISHED) {
                                hash = udp_ehashfn(net, daddr, hnum,
                                                   saddr, sport);
                                result = reuseport_select_sock(sk, hash, skb,
                                                        sizeof(struct udphdr));
-                               if (result)
+                               if (result && !reuseport_has_conns(sk, false))
                                        return result;
+                               sk = result ? : sk;
                        }
                        badness = score;
                        result = sk;
"

and finally for reuseport matches only return unconnected sockets in the group:

"
@@ -295,8 +295,19 @@ struct sock *reuseport_select_sock(struct sock *sk,

 select_by_hash:
                /* no bpf or invalid bpf result: fall back to hash usage */
-               if (!sk2)
-                       sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+               if (!sk2) {
+                       int i, j;
+
+                       i = j = reciprocal_scale(hash, socks);
+                       while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
+                               i++;
+                               if (i == reuse->num_socks)
+                                       i = 0;
+                               if (i == j)
+                                       goto out;
+                       }
+                       sk2 = reuse->socks[i];
+               }
        }
"

This is hardly a short patch, but the behavioral change is contained.

I also coded up the alternative to rely on order in the list: entries
are listed at the head and the list is traversed at the head. To keep
all connections within a group ahead of all the unconnected sockets in
a group (1) rehash on connect and (2) do a more complex
insert-after-connected-reuseport for new reuseport sockets:

"
@@ -67,12 +68,16 @@ int __ip4_datagram_connect(struct sock *sk, struct
sockaddr *uaddr, int addr_len
                if (sk->sk_prot->rehash)
                        sk->sk_prot->rehash(sk);
        }
+
        inet->inet_daddr = fl4->daddr;
        inet->inet_dport = usin->sin_port;
        sk->sk_state = TCP_ESTABLISHED;
        sk_set_txhash(sk);
        inet->inet_id = jiffies;

+       if (rcu_access_pointer(sk->sk_reuseport_cb) && sk->sk_prot->rehash)
+               sk->sk_prot->rehash(sk);
+
        sk_dst_set(sk, &rt->dst);
        err = 0;

@@ -323,7 +323,21 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
                    sk->sk_family == AF_INET6)
                        hlist_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node,
                                           &hslot2->head);
-               else
+               else if (sk->sk_reuseport) {
+                       struct sock *cur, *last_conn = NULL;
+
+                       udp_portaddr_for_each_entry_rcu(cur, &hslot2->head) {
+                               if (cur->sk_state == TCP_ESTABLISHED &&
+                                   rcu_access_pointer(cur->sk_reuseport_cb))
+                                       last_conn = cur;
+                       }
+                       if (last_conn)
+
hlist_add_behind_rcu(&udp_sk(sk)->udp_portaddr_node,
+
&udp_sk(last_conn)->udp_portaddr_node);
+                       else
+
hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+                                                        &hslot2->head);
+               } else
                        hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
                                           &hslot2->head);

@@ -423,7 +437,8 @@ static struct sock *udp4_lib_lookup2(struct net *net,
                score = compute_score(sk, net, saddr, sport,
                                      daddr, hnum, dif, sdif);
                if (score > badness) {
-                       if (sk->sk_reuseport) {
+                       if (sk->sk_reuseport &&
+                           sk->sk_state != TCP_ESTABLISHED) {
                                hash = udp_ehashfn(net, daddr, hnum,
                                                   saddr, sport);
                                result = reuseport_select_sock(sk, hash, skb,
@@ -1891,10 +1906,12 @@ void udp_lib_rehash(struct sock *sk, u16 newhash)
                                             udp_sk(sk)->udp_port_hash);
                        /* we must lock primary chain too */
                        spin_lock_bh(&hslot->lock);
+#if 0
                        /* TODO: differentiate inet_rcv_saddr change
from regular connect */
                        if (rcu_access_pointer(sk->sk_reuseport_cb))
                                reuseport_detach_sock(sk);
+#endif

-                       if (hslot2 != nhslot2) {
+                       if (1) {
                                spin_lock(&hslot2->lock);

hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
                                hslot2->count--;
"

This clearly has some loose ends and is no shorter or simpler. So
unless anyone has comments or a different solution, I'll finish
up the first variant.

^ permalink raw reply

* Re: [PATCH v3 2/2] PTP: add support for one-shot output
From: Richard Cochran @ 2019-09-10 15:47 UTC (permalink / raw)
  To: Felipe Balbi; +Cc: Christopher S Hall, netdev, linux-kernel
In-Reply-To: <20190909075940.12843-2-felipe.balbi@linux.intel.com>

On Mon, Sep 09, 2019 at 10:59:40AM +0300, Felipe Balbi wrote:

>  /*
>   * Bits of the ptp_perout_request.flags field:
>   */
> -#define PTP_PEROUT_VALID_FLAGS (~0)
> -
> +#define PTP_PEROUT_ONE_SHOT (1<<0)
> +#define PTP_PEROUT_VALID_FLAGS	(~PTP_PEROUT_ONE_SHOT)

Here also, the bitwise not is backwards. ^

Thanks,
Richard

^ permalink raw reply

* Re: [PATCH] net: stmmac: socfpga: re-use the `interface` parameter from platform data
From: David Miller @ 2019-09-10 15:46 UTC (permalink / raw)
  To: alexandru.ardelean
  Cc: netdev, linux-stm32, linux-arm-kernel, linux-kernel,
	peppe.cavallaro, alexandre.torgue, joabreu, mcoquelin.stm32
In-Reply-To: <20190910.174544.945128884852877943.davem@davemloft.net>

From: David Miller <davem@davemloft.net>
Date: Tue, 10 Sep 2019 17:45:44 +0200 (CEST)

> From: Alexandru Ardelean <alexandru.ardelean@analog.com>
> Date: Fri, 6 Sep 2019 15:30:54 +0300
> 
>> The socfpga sub-driver defines an `interface` field in the `socfpga_dwmac`
>> struct and parses it on init.
>> 
>> The shared `stmmac_probe_config_dt()` function also parses this from the
>> device-tree and makes it available on the returned `plat_data` (which is
>> the same data available via `netdev_priv()`).
>> 
>> All that's needed now is to dig that information out, via some
>> `dev_get_drvdata()` && `netdev_priv()` calls and re-use it.
>> 
>> Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
> 
> This doesn't build even on net-next.

Specifically:

drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c: In function ‘socfpga_gen5_set_phy_mode’:
drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c:264:44: error: ‘phymode’ undeclared (first use in this function); did you mean ‘phy_modes’?
  264 |   dev_err(dwmac->dev, "bad phy mode %d\n", phymode);
      |                                            ^~~~~~~
./include/linux/device.h:1499:32: note: in definition of macro ‘dev_err’
 1499 |  _dev_err(dev, dev_fmt(fmt), ##__VA_ARGS__)
      |                                ^~~~~~~~~~~
drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c:264:44: note: each undeclared identifier is reported only once for each function it appears in
  264 |   dev_err(dwmac->dev, "bad phy mode %d\n", phymode);
      |                                            ^~~~~~~
./include/linux/device.h:1499:32: note: in definition of macro ‘dev_err’
 1499 |  _dev_err(dev, dev_fmt(fmt), ##__VA_ARGS__)
      |                                ^~~~~~~~~~~
drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c: In function ‘socfpga_gen10_set_phy_mode’:
drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c:340:6: error: ‘phymode’ undeclared (first use in this function); did you mean ‘phy_modes’?
  340 |      phymode == PHY_INTERFACE_MODE_MII ||
      |      ^~~~~~~
      |      phy_modes

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox