Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH net-next 2/2] intel: ixgbevf: Support Windows hosts (Hyper-V)
From: kbuild test robot @ 2016-04-14 22:34 UTC (permalink / raw)
  To: K. Y. Srinivasan
  Cc: kbuild-all, davem, netdev, linux-kernel, devel, olaf, apw,
	jasowang, eli, jackm, yevgenyp, john.ronciak, intel-wired-lan
In-Reply-To: <1460678142-30353-2-git-send-email-kys@microsoft.com>

[-- Attachment #1: Type: text/plain, Size: 2086 bytes --]

Hi Srinivasan,

[auto build test ERROR on net-next/master]

url:    https://github.com/0day-ci/linux/commits/K-Y-Srinivasan/ethernet-intel-Add-the-device-ID-s-presented-while-running-on-Hyper-V/20160415-061821
config: sparc64-allyesconfig (attached as .config)
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=sparc64 

All errors (new ones prefixed by >>):

   drivers/net/ethernet/intel/ixgbevf/vf.c: In function 'ixgbevf_rlpml_set_vf':
>> drivers/net/ethernet/intel/ixgbevf/vf.c:754:6: error: 'x86_hyper' undeclared (first use in this function)
     if (x86_hyper == &x86_hyper_ms_hyperv) {
         ^
   drivers/net/ethernet/intel/ixgbevf/vf.c:754:6: note: each undeclared identifier is reported only once for each function it appears in
>> drivers/net/ethernet/intel/ixgbevf/vf.c:754:20: error: 'x86_hyper_ms_hyperv' undeclared (first use in this function)
     if (x86_hyper == &x86_hyper_ms_hyperv) {
                       ^
   drivers/net/ethernet/intel/ixgbevf/vf.c: In function 'ixgbevf_negotiate_api_version':
   drivers/net/ethernet/intel/ixgbevf/vf.c:781:6: error: 'x86_hyper' undeclared (first use in this function)
     if (x86_hyper == &x86_hyper_ms_hyperv) {
         ^
   drivers/net/ethernet/intel/ixgbevf/vf.c:781:20: error: 'x86_hyper_ms_hyperv' undeclared (first use in this function)
     if (x86_hyper == &x86_hyper_ms_hyperv) {
                       ^

vim +/x86_hyper +754 drivers/net/ethernet/intel/ixgbevf/vf.c

   748	 **/
   749	void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size)
   750	{
   751		u32 msgbuf[2];
   752		u32 reg;
   753	
 > 754		if (x86_hyper == &x86_hyper_ms_hyperv) {
   755			/*
   756			 * If we are on Hyper-V, we implement
   757			 * this functionality differently.

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 45930 bytes --]

^ permalink raw reply

* Re: [PATCH] dsa: mv88e6xxx: Kill the REG_READ and REG_WRITE macros
From: Vivien Didelot @ 2016-04-14 22:16 UTC (permalink / raw)
  To: Andrew Lunn, David Miller; +Cc: Florian Fainelli, netdev, Andrew Lunn
In-Reply-To: <1460670432-23801-1-git-send-email-andrew@lunn.ch>

Andrew Lunn <andrew@lunn.ch> writes:

> These macros hide a ds variable and a return statement on error, which
> can lead to locking issues. Kill them off.
>
> Signed-off-by: Andrew Lunn <andrew@lunn.ch>
> ---
>
> As requested by Vivien, this patch has been split out of the series.
>
> v2: Use the existing ret variable
>     Hold the lock for the whole function, unlock on exit.
>     Removed duplicate error test
>     Fixed indentation.
> ---
>  drivers/net/dsa/mv88e6123.c |  13 ++-
>  drivers/net/dsa/mv88e6131.c |  41 ++++----
>  drivers/net/dsa/mv88e6171.c |  16 +--
>  drivers/net/dsa/mv88e6352.c |  15 +--
>  drivers/net/dsa/mv88e6xxx.c | 241 +++++++++++++++++++++++++++++++-------------
>  drivers/net/dsa/mv88e6xxx.h |  21 ----
>  6 files changed, 224 insertions(+), 123 deletions(-)
>
> diff --git a/drivers/net/dsa/mv88e6123.c b/drivers/net/dsa/mv88e6123.c
> index c34283d929c4..140e44e50e8a 100644
> --- a/drivers/net/dsa/mv88e6123.c
> +++ b/drivers/net/dsa/mv88e6123.c
> @@ -52,7 +52,9 @@ static int mv88e6123_setup_global(struct dsa_switch *ds)
>  	 * external PHYs to poll), don't discard packets with
>  	 * excessive collisions, and mask all interrupt sources.
>  	 */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, 0x0000);
> +	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL, 0x0000);
> +	if (ret)
> +		return ret;
>  
>  	/* Configure the upstream port, and configure the upstream
>  	 * port as the port to which ingress and egress monitor frames
> @@ -61,14 +63,15 @@ static int mv88e6123_setup_global(struct dsa_switch *ds)
>  	reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT |
>  		upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT |
>  		upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT;
> -	REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
> +	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
> +	if (ret)
> +		return ret;
>  
>  	/* Disable remote management for now, and set the switch's
>  	 * DSA device number.
>  	 */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, ds->index & 0x1f);
> -
> -	return 0;
> +	return mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL_2,
> +				   ds->index & 0x1f);
>  }
>  
>  static int mv88e6123_setup(struct dsa_switch *ds)
> diff --git a/drivers/net/dsa/mv88e6131.c b/drivers/net/dsa/mv88e6131.c
> index f5d75fce1e96..34d297b65040 100644
> --- a/drivers/net/dsa/mv88e6131.c
> +++ b/drivers/net/dsa/mv88e6131.c
> @@ -49,11 +49,16 @@ static int mv88e6131_setup_global(struct dsa_switch *ds)
>  	 * to arbitrate between packet queues, set the maximum frame
>  	 * size to 1632, and mask all interrupt sources.
>  	 */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL,
> -		  GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_MAX_FRAME_1632);
> +	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL,
> +				  GLOBAL_CONTROL_PPU_ENABLE |
> +				  GLOBAL_CONTROL_MAX_FRAME_1632);
> +	if (ret)
> +		return ret;
>  
>  	/* Set the VLAN ethertype to 0x8100. */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_CORE_TAG_TYPE, 0x8100);
> +	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CORE_TAG_TYPE, 0x8100);
> +	if (ret)
> +		return ret;
>  
>  	/* Disable ARP mirroring, and configure the upstream port as
>  	 * the port to which ingress and egress monitor frames are to
> @@ -62,31 +67,33 @@ static int mv88e6131_setup_global(struct dsa_switch *ds)
>  	reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT |
>  		upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT |
>  		GLOBAL_MONITOR_CONTROL_ARP_DISABLED;
> -	REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
> +	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
> +	if (ret)
> +		return ret;
>  
>  	/* Disable cascade port functionality unless this device
>  	 * is used in a cascade configuration, and set the switch's
>  	 * DSA device number.
>  	 */
>  	if (ds->dst->pd->nr_chips > 1)
> -		REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2,
> -			  GLOBAL_CONTROL_2_MULTIPLE_CASCADE |
> -			  (ds->index & 0x1f));
> +		ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL_2,
> +					  GLOBAL_CONTROL_2_MULTIPLE_CASCADE |
> +					  (ds->index & 0x1f));
>  	else
> -		REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2,
> -			  GLOBAL_CONTROL_2_NO_CASCADE |
> -			  (ds->index & 0x1f));
> +		ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL_2,
> +					  GLOBAL_CONTROL_2_NO_CASCADE |
> +					  (ds->index & 0x1f));
> +	if (ret)
> +		return ret;
>  
>  	/* Force the priority of IGMP/MLD snoop frames and ARP frames
>  	 * to the highest setting.
>  	 */
> -	REG_WRITE(REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE,
> -		  GLOBAL2_PRIO_OVERRIDE_FORCE_SNOOP |
> -		  7 << GLOBAL2_PRIO_OVERRIDE_SNOOP_SHIFT |
> -		  GLOBAL2_PRIO_OVERRIDE_FORCE_ARP |
> -		  7 << GLOBAL2_PRIO_OVERRIDE_ARP_SHIFT);
> -
> -	return 0;
> +	return mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE,
> +				   GLOBAL2_PRIO_OVERRIDE_FORCE_SNOOP |
> +				   7 << GLOBAL2_PRIO_OVERRIDE_SNOOP_SHIFT |
> +				   GLOBAL2_PRIO_OVERRIDE_FORCE_ARP |
> +				   7 << GLOBAL2_PRIO_OVERRIDE_ARP_SHIFT);
>  }
>  
>  static int mv88e6131_setup(struct dsa_switch *ds)
> diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c
> index f5622506cdfa..b7af2b78f8ee 100644
> --- a/drivers/net/dsa/mv88e6171.c
> +++ b/drivers/net/dsa/mv88e6171.c
> @@ -46,8 +46,11 @@ static int mv88e6171_setup_global(struct dsa_switch *ds)
>  	/* Discard packets with excessive collisions, mask all
>  	 * interrupt sources, enable PPU.
>  	 */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL,
> -		  GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_DISCARD_EXCESS);
> +	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL,
> +				  GLOBAL_CONTROL_PPU_ENABLE |
> +				  GLOBAL_CONTROL_DISCARD_EXCESS);
> +	if (ret)
> +		return ret;
>  
>  	/* Configure the upstream port, and configure the upstream
>  	 * port as the port to which ingress and egress monitor frames
> @@ -57,14 +60,15 @@ static int mv88e6171_setup_global(struct dsa_switch *ds)
>  		upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT |
>  		upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT |
>  		upstream_port << GLOBAL_MONITOR_CONTROL_MIRROR_SHIFT;
> -	REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
> +	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
> +	if (ret)
> +		return ret;
>  
>  	/* Disable remote management for now, and set the switch's
>  	 * DSA device number.
>  	 */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, ds->index & 0x1f);
> -
> -	return 0;
> +	return mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL_2,
> +				   ds->index & 0x1f);
>  }
>  
>  static int mv88e6171_setup(struct dsa_switch *ds)
> diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
> index e54ee27db129..e8cb03fad21a 100644
> --- a/drivers/net/dsa/mv88e6352.c
> +++ b/drivers/net/dsa/mv88e6352.c
> @@ -59,8 +59,11 @@ static int mv88e6352_setup_global(struct dsa_switch *ds)
>  	/* Discard packets with excessive collisions,
>  	 * mask all interrupt sources, enable PPU (bit 14, undocumented).
>  	 */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL,
> -		  GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_DISCARD_EXCESS);
> +	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL,
> +				  GLOBAL_CONTROL_PPU_ENABLE |
> +				  GLOBAL_CONTROL_DISCARD_EXCESS);
> +	if (ret)
> +		return ret;
>  
>  	/* Configure the upstream port, and configure the upstream
>  	 * port as the port to which ingress and egress monitor frames
> @@ -69,14 +72,14 @@ static int mv88e6352_setup_global(struct dsa_switch *ds)
>  	reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT |
>  		upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT |
>  		upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT;
> -	REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
> +	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
> +	if (ret)
> +		return ret;
>  
>  	/* Disable remote management for now, and set the switch's
>  	 * DSA device number.
>  	 */
> -	REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f);
> -
> -	return 0;
> +	return mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x1c, ds->index & 0x1f);
>  }
>  
>  static int mv88e6352_setup(struct dsa_switch *ds)
> diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
> index 9985a0cf31f1..b018f20829fb 100644
> --- a/drivers/net/dsa/mv88e6xxx.c
> +++ b/drivers/net/dsa/mv88e6xxx.c
> @@ -180,28 +180,44 @@ int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
>  
>  int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr)
>  {
> -	REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, (addr[0] << 8) | addr[1]);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]);
> +	int err;
>  
> -	return 0;
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MAC_01,
> +				  (addr[0] << 8) | addr[1]);
> +	if (err)
> +		return err;
> +
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MAC_23,
> +				  (addr[2] << 8) | addr[3]);
> +	if (err)
> +		return err;
> +
> +	return mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MAC_45,
> +				   (addr[4] << 8) | addr[5]);
>  }
>  
>  int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr)
>  {
> -	int i;
>  	int ret;
> +	int i;

Unnecessary.

>  
>  	for (i = 0; i < 6; i++) {
>  		int j;
>  
>  		/* Write the MAC address byte. */
> -		REG_WRITE(REG_GLOBAL2, GLOBAL2_SWITCH_MAC,
> -			  GLOBAL2_SWITCH_MAC_BUSY | (i << 8) | addr[i]);
> +		ret = mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SWITCH_MAC,
> +					  GLOBAL2_SWITCH_MAC_BUSY |
> +					  (i << 8) | addr[i]);
> +		if (ret)
> +			return ret;
>  
>  		/* Wait for the write to complete. */
>  		for (j = 0; j < 16; j++) {
> -			ret = REG_READ(REG_GLOBAL2, GLOBAL2_SWITCH_MAC);
> +			ret = mv88e6xxx_reg_read(ds, REG_GLOBAL2,
> +						 GLOBAL2_SWITCH_MAC);
> +			if (ret < 0)
> +				return ret;
> +
>  			if ((ret & GLOBAL2_SWITCH_MAC_BUSY) == 0)
>  				break;
>  		}
> @@ -233,13 +249,21 @@ static int mv88e6xxx_ppu_disable(struct dsa_switch *ds)
>  	int ret;
>  	unsigned long timeout;
>  
> -	ret = REG_READ(REG_GLOBAL, GLOBAL_CONTROL);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL,
> -		  ret & ~GLOBAL_CONTROL_PPU_ENABLE);
> +	ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_CONTROL);
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL,
> +				  ret & ~GLOBAL_CONTROL_PPU_ENABLE);
> +	if (ret)
> +		return ret;
>  
>  	timeout = jiffies + 1 * HZ;
>  	while (time_before(jiffies, timeout)) {
> -		ret = REG_READ(REG_GLOBAL, GLOBAL_STATUS);
> +		ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATUS);
> +		if (ret < 0)
> +			return ret;
> +
>  		usleep_range(1000, 2000);
>  		if ((ret & GLOBAL_STATUS_PPU_MASK) !=
>  		    GLOBAL_STATUS_PPU_POLLING)
> @@ -251,15 +275,24 @@ static int mv88e6xxx_ppu_disable(struct dsa_switch *ds)
>  
>  static int mv88e6xxx_ppu_enable(struct dsa_switch *ds)
>  {
> -	int ret;
> +	int ret, err;
>  	unsigned long timeout;
>  
> -	ret = REG_READ(REG_GLOBAL, GLOBAL_CONTROL);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, ret | GLOBAL_CONTROL_PPU_ENABLE);
> +	ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_CONTROL);
> +	if (ret < 0)
> +		return ret;
> +
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL,
> +				  ret | GLOBAL_CONTROL_PPU_ENABLE);
> +	if (err)
> +		return err;

You could've kept ret here.

>  
>  	timeout = jiffies + 1 * HZ;
>  	while (time_before(jiffies, timeout)) {
> -		ret = REG_READ(REG_GLOBAL, GLOBAL_STATUS);
> +		ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATUS);
> +		if (ret < 0)
> +			return ret;
> +
>  		usleep_range(1000, 2000);
>  		if ((ret & GLOBAL_STATUS_PPU_MASK) ==
>  		    GLOBAL_STATUS_PPU_POLLING)
> @@ -2667,7 +2700,9 @@ int mv88e6xxx_setup_common(struct dsa_switch *ds)
>  	ps->ds = ds;
>  	mutex_init(&ps->smi_mutex);
>  
> -	ps->id = REG_READ(REG_PORT(0), PORT_SWITCH_ID) & 0xfff0;
> +	ps->id = mv88e6xxx_reg_read(ds, REG_PORT(0), PORT_SWITCH_ID) & 0xfff0;
> +	if (ps->id < 0)
> +		return ps->id;
>  
>  	INIT_WORK(&ps->bridge_work, mv88e6xxx_bridge_work);
>  
> @@ -2677,42 +2712,67 @@ int mv88e6xxx_setup_common(struct dsa_switch *ds)
>  int mv88e6xxx_setup_global(struct dsa_switch *ds)
>  {
>  	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
> -	int ret;
> +	int err;
>  	int i;
>  
> +	mutex_lock(&ps->smi_mutex);
>  	/* Set the default address aging time to 5 minutes, and
>  	 * enable address learn messages to be sent to all message
>  	 * ports.
>  	 */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL,
> -		  0x0140 | GLOBAL_ATU_CONTROL_LEARN2ALL);
> +	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_ATU_CONTROL,
> +				   0x0140 | GLOBAL_ATU_CONTROL_LEARN2ALL);
> +	if (err)
> +		goto unlock;
>  
>  	/* Configure the IP ToS mapping registers. */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff);
> +	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000);
> +	if (err)
> +		goto unlock;
> +	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000);
> +	if (err)
> +		goto unlock;
> +	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555);
> +	if (err)
> +		goto unlock;
> +	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555);
> +	if (err)
> +		goto unlock;
> +	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa);
> +	if (err)
> +		goto unlock;
> +	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa);
> +	if (err)
> +		goto unlock;
> +	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff);
> +	if (err)
> +		goto unlock;
> +	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff);
> +	if (err)
> +		goto unlock;
>  
>  	/* Configure the IEEE 802.1p priority mapping register. */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41);
> +	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41);
> +	if (err)
> +		goto unlock;
>  
>  	/* Send all frames with destination addresses matching
>  	 * 01:80:c2:00:00:0x to the CPU port.
>  	 */
> -	REG_WRITE(REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, 0xffff);
> +	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, 0xffff);
> +	if (err)
> +		goto unlock;
>  
>  	/* Ignore removed tag data on doubly tagged packets, disable
>  	 * flow control messages, force flow control priority to the
>  	 * highest, and send all special multicast frames to the CPU
>  	 * port at the highest priority.
>  	 */
> -	REG_WRITE(REG_GLOBAL2, GLOBAL2_SWITCH_MGMT,
> -		  0x7 | GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x70 |
> -		  GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI);
> +	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT,
> +				   0x7 | GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x70 |
> +				   GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI);
> +	if (err)
> +		goto unlock;
>  
>  	/* Program the DSA routing table. */
>  	for (i = 0; i < 32; i++) {
> @@ -2722,23 +2782,35 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
>  		    i != ds->index && i < ds->dst->pd->nr_chips)
>  			nexthop = ds->pd->rtable[i] & 0x1f;
>  
> -		REG_WRITE(REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING,
> -			  GLOBAL2_DEVICE_MAPPING_UPDATE |
> -			  (i << GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT) |
> -			  nexthop);
> +		err = _mv88e6xxx_reg_write(
> +			ds, REG_GLOBAL2,
> +			GLOBAL2_DEVICE_MAPPING,
> +			GLOBAL2_DEVICE_MAPPING_UPDATE |
> +			(i << GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT) | nexthop);
> +		if (err)
> +			goto unlock;
>  	}
>  
>  	/* Clear all trunk masks. */
> -	for (i = 0; i < 8; i++)
> -		REG_WRITE(REG_GLOBAL2, GLOBAL2_TRUNK_MASK,
> -			  0x8000 | (i << GLOBAL2_TRUNK_MASK_NUM_SHIFT) |
> -			  ((1 << ps->num_ports) - 1));
> +	for (i = 0; i < 8; i++) {
> +		err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_TRUNK_MASK,
> +					   0x8000 |
> +					   (i << GLOBAL2_TRUNK_MASK_NUM_SHIFT) |
> +					   ((1 << ps->num_ports) - 1));
> +		if (err)
> +			goto unlock;
> +	}
>  
>  	/* Clear all trunk mappings. */
> -	for (i = 0; i < 16; i++)
> -		REG_WRITE(REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING,
> -			  GLOBAL2_TRUNK_MAPPING_UPDATE |
> -			  (i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT));
> +	for (i = 0; i < 16; i++) {
> +		err = _mv88e6xxx_reg_write(
> +			ds, REG_GLOBAL2,
> +			GLOBAL2_TRUNK_MAPPING,
> +			GLOBAL2_TRUNK_MAPPING_UPDATE |
> +			(i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT));
> +		if (err)
> +			goto unlock;
> +	}
>  
>  	if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
>  	    mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
> @@ -2746,17 +2818,27 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
>  		/* Send all frames with destination addresses matching
>  		 * 01:80:c2:00:00:2x to the CPU port.
>  		 */
> -		REG_WRITE(REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, 0xffff);
> +		err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2,
> +					   GLOBAL2_MGMT_EN_2X, 0xffff);
> +		if (err)
> +			goto unlock;
>  
>  		/* Initialise cross-chip port VLAN table to reset
>  		 * defaults.
>  		 */
> -		REG_WRITE(REG_GLOBAL2, GLOBAL2_PVT_ADDR, 0x9000);
> +		err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2,
> +					   GLOBAL2_PVT_ADDR, 0x9000);
> +		if (err)
> +			goto unlock;
>  
>  		/* Clear the priority override table. */
> -		for (i = 0; i < 16; i++)
> -			REG_WRITE(REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE,
> -				  0x8000 | (i << 8));
> +		for (i = 0; i < 16; i++) {
> +			err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2,
> +						   GLOBAL2_PRIO_OVERRIDE,
> +						   0x8000 | (i << 8));
> +			if (err)
> +				goto unlock;
> +		}
>  	}
>  
>  	if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
> @@ -2767,31 +2849,37 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
>  		 * ingress rate limit registers to their initial
>  		 * state.
>  		 */
> -		for (i = 0; i < ps->num_ports; i++)
> -			REG_WRITE(REG_GLOBAL2, GLOBAL2_INGRESS_OP,
> -				  0x9000 | (i << 8));
> +		for (i = 0; i < ps->num_ports; i++) {
> +			err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2,
> +						   GLOBAL2_INGRESS_OP,
> +						   0x9000 | (i << 8));
> +			if (err)
> +				goto unlock;
> +		}
>  	}
>  
>  	/* Clear the statistics counters for all ports */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_STATS_OP, GLOBAL_STATS_OP_FLUSH_ALL);
> +	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_STATS_OP,
> +				   GLOBAL_STATS_OP_FLUSH_ALL);
> +	if (err)
> +		goto unlock;
>  
>  	/* Wait for the flush to complete. */
> -	mutex_lock(&ps->smi_mutex);
> -	ret = _mv88e6xxx_stats_wait(ds);
> -	if (ret < 0)
> +	err = _mv88e6xxx_stats_wait(ds);
> +	if (err < 0)
>  		goto unlock;
>  
>  	/* Clear all ATU entries */
> -	ret = _mv88e6xxx_atu_flush(ds, 0, true);
> -	if (ret < 0)
> +	err = _mv88e6xxx_atu_flush(ds, 0, true);
> +	if (err < 0)
>  		goto unlock;
>  
>  	/* Clear all the VTU and STU entries */
> -	ret = _mv88e6xxx_vtu_stu_flush(ds);
> +	err = _mv88e6xxx_vtu_stu_flush(ds);
>  unlock:
>  	mutex_unlock(&ps->smi_mutex);
>  
> -	return ret;
> +	return err;
>  }
>  
>  int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active)
> @@ -2803,10 +2891,18 @@ int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active)
>  	int ret;
>  	int i;
>  
> +	mutex_lock(&ps->smi_mutex);
> +
>  	/* Set all ports to the disabled state. */
>  	for (i = 0; i < ps->num_ports; i++) {
> -		ret = REG_READ(REG_PORT(i), PORT_CONTROL);
> -		REG_WRITE(REG_PORT(i), PORT_CONTROL, ret & 0xfffc);
> +		ret = _mv88e6xxx_reg_read(ds, REG_PORT(i), PORT_CONTROL);
> +		if (ret < 0)
> +			goto unlock;
> +
> +		ret = _mv88e6xxx_reg_write(ds, REG_PORT(i), PORT_CONTROL,
> +					   ret & 0xfffc);
> +		if (ret)
> +			goto unlock;
>  	}
>  
>  	/* Wait for transmit queues to drain. */
> @@ -2825,22 +2921,31 @@ int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active)
>  	 * through global registers 0x18 and 0x19.
>  	 */
>  	if (ppu_active)
> -		REG_WRITE(REG_GLOBAL, 0x04, 0xc000);
> +		ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x04, 0xc000);
>  	else
> -		REG_WRITE(REG_GLOBAL, 0x04, 0xc400);
> +		ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x04, 0xc400);
> +	if (ret)
> +		goto unlock;
>  
>  	/* Wait up to one second for reset to complete. */
>  	timeout = jiffies + 1 * HZ;
>  	while (time_before(jiffies, timeout)) {
> -		ret = REG_READ(REG_GLOBAL, 0x00);
> +		ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x00);
> +		if (ret < 0)
> +			goto unlock;
> +
>  		if ((ret & is_reset) == is_reset)
>  			break;
>  		usleep_range(1000, 2000);
>  	}
>  	if (time_after(jiffies, timeout))
> -		return -ETIMEDOUT;
> +		ret = -ETIMEDOUT;
> +	else
> +		ret = 0;
> +unlock:
> +	mutex_unlock(&ps->smi_mutex);
>  
> -	return 0;
> +	return ret;
>  }
>  
>  int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg)
> diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
> index 5d27decc85cb..0debb9f3cf0a 100644
> --- a/drivers/net/dsa/mv88e6xxx.h
> +++ b/drivers/net/dsa/mv88e6xxx.h
> @@ -542,25 +542,4 @@ extern struct dsa_switch_driver mv88e6123_switch_driver;
>  extern struct dsa_switch_driver mv88e6352_switch_driver;
>  extern struct dsa_switch_driver mv88e6171_switch_driver;
>  
> -#define REG_READ(addr, reg)						\
> -	({								\
> -		int __ret;						\
> -									\
> -		__ret = mv88e6xxx_reg_read(ds, addr, reg);		\
> -		if (__ret < 0)						\
> -			return __ret;					\
> -		__ret;							\
> -	})
> -
> -#define REG_WRITE(addr, reg, val)					\
> -	({								\
> -		int __ret;						\
> -									\
> -		__ret = mv88e6xxx_reg_write(ds, addr, reg, val);	\
> -		if (__ret < 0)						\
> -			return __ret;					\
> -	})
> -
> -
> -
>  #endif
> -- 
> 2.7.0

But the idea is here, we need these macro killed.

Tested-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>

Thank you,
Vivien

^ permalink raw reply

* Re: [PATCH 1/2] [v4] net: emac: emac gigabit ethernet controller driver
From: Vikram Sethi @ 2016-04-14 22:00 UTC (permalink / raw)
  To: Florian Fainelli, Timur Tabi, netdev, linux-kernel, devicetree,
	linux-arm-msm, sdharia, Shanker Donthineni, Greg Kroah-Hartman,
	cov, gavidov, Rob Herring, andrew, bjorn.andersson,
	Mark Langsdorf, Jon Masters, Andy Gross, David S. Miller
In-Reply-To: <57100962.40404@gmail.com>

A couple of clarifications on the SGMII internal PHY and the DMA capability of the EMAC inline.

On 04/14/2016 04:19 PM, Florian Fainelli wrote:
> On 14/04/16 13:19, Timur Tabi wrote:
>> Florian Fainelli wrote:
>>> On 13/04/16 10:59, Timur Tabi wrote:
>>>> From: Gilad Avidov <gavidov@codeaurora.org>
>>>>
>>>> Add supports for ethernet controller HW on Qualcomm Technologies,
>>>> Inc. SoC.
>>>> This driver supports the following features:
>>>> 1) Checksum offload.
>>>> 2) Runtime power management support.
>>>> 3) Interrupt coalescing support.
>>>> 4) SGMII phy.
>>>> 5) SGMII direct connection without external phy.
>>>
>>>
>>> [snip]
>>>
>>>> +- qcom,no-external-phy      : Indicates there is no external PHY
>>>> connected to
>>>> +                  EMAC. Include this only if the EMAC is directly
>>>> +                  connected to the peer end without EPHY.
>>> How is the internal PHY accessed, is it responding on the MDIO bus at a
>>> particular address?
>> There is a set of memory-mapped registers.  It's not connected via MDIO
>> at all.  It's mapped via the "sgmii" addresses in the device tree (see
>> function emac_sgmii_config).
>>
>>> If so, standard MDIO scanning/probing works, and you
>>> can have your PHY driver flag this device has internal. Worst case, you
>>> can do what BCMGENET does, and have a special "phy-mode" value set to
>>> "internal" when this knowledge needs to exist prior to MDIO bus scanning
>>> (e.g: to power on the PHY).
>> So the internal phy is not a real phy.  It's not capable of driving an
>> RJ45 port (there's no analog part).  It's an SGMII-like device that is
>> hard-wired to the EMAC itself.
There *is* an analog part to the internal SGMII PHY. Please check the SGMII specification. The only non-standard part is that it's not on MDIO.

> OK, that explains things a bit, thanks, this is quite a bit of important
> detail actually.
>
>> In theory, the internal PHY is optional.  You could design an SOC that
>> has just the EMAC connected via normal MDIO to an external phy.  I
>> really wish our hardware designers has done that.  But unfortunately,
>> there are no SOCs like that, and so we have to treat the internal phy as
>> an extension of the EMAC.
>>
>> My preference would be to get rid of the "qcom,no-external-phy" property
>> and have an external phy be required, at least until Qualcomm creates an
>> SOC without the internal phy (which may never happen, for all I know).
>>
> Can we just say that, an absence of PHY specified in the Device Tree (no
> phy-handle property and PHY not a child node of the MDIO bus), means
> that there is no external PHY?
>
> [snip]
>
>
[snip]
>>>> +    dev_set_drvdata(&pdev->dev, netdev);
>>>> +    SET_NETDEV_DEV(netdev, &pdev->dev);
>>>> +
>>>> +    adpt = netdev_priv(netdev);
>>>> +    adpt->netdev = netdev;
>>>> +    phy = &adpt->phy;
>>>> +    adpt->msg_enable = netif_msg_init(debug, EMAC_MSG_DEFAULT);
>>>> +
>>>> +    dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
>>> Really, is not that supposed to run on ARM64 servers?
>> Well, this version of the driver isn't, which is why it supports DT and
>> not ACPI.  I'm planning on adding that support in a later patch.
>> However, I'll add support for 64-bit masks in the next version of this
>> patch.
>>
>> Would this be okay:
>>
>>     retval = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
>>     if (retval) {
>>         dev_err(&pdev->dev, "failed to set DMA mask err %d\n", retval);
>>         goto err_res;
>>     }

How can you set the mask to 64 bits when the EMAC IP on FSM9900 and QDF2432 can only do 32 bit DMA?
The mask in that API is a bit mask describing which bits of an address your device supports.

>> I've seen code like this in other drivers:
>>
>>         ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
>>         if (ret) {
>>                 ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
>>                 if (ret) {
>>                         dev_err(dev, "failed to set dma mask\n");
>>                         return ret;
>>                 }
>>         }
>>
>> and I've never understood why it's necessary to fall back to 32-bits if
>> 64 bits fails.  Isn't 64 bits a superset of 32 bits?  The driver is
>> saying that the hardware supports all of DDR.  How could fail, and how
>> could 32-bit succeed if 64-bits fails?
> I believe there could be cases where the HW is capable of addressing
> more physical memory than the CPU itself (usually unlikely, but it
> could), there could be cases where the HW is behind an IOMMMU which only
> has a window into the DDR, and that could prevent a higher DMA_BIT_MASK
> from being successfully configured.


-- 
Vikram Sethi
Qualcomm Technologies Inc, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project

^ permalink raw reply

* Re: [PATCH net-next v4] packet: uses kfree_skb() for errors.
From: David Miller @ 2016-04-14 21:53 UTC (permalink / raw)
  To: weongyo.linux; +Cc: netdev, willemb
In-Reply-To: <1460668204-27112-1-git-send-email-weongyo.linux@gmail.com>

From: Weongyo Jeong <weongyo.linux@gmail.com>
Date: Thu, 14 Apr 2016 14:10:04 -0700

> consume_skb() isn't for error cases that kfree_skb() is more proper
> one.  At this patch, it fixed tpacket_rcv() and packet_rcv() to be
> consistent for error or non-error cases letting perf trace its event
> properly.
> 
> Signed-off-by: Weongyo Jeong <weongyo.linux@gmail.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next 1/8] dsa: mv88e6xxx: Prepare for turning this into a library module
From: David Miller @ 2016-04-14 21:49 UTC (permalink / raw)
  To: f.fainelli; +Cc: vivien.didelot, andrew, netdev
In-Reply-To: <57100B0F.4040909@gmail.com>

From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 14 Apr 2016 14:26:39 -0700

> On 14/04/16 13:22, Vivien Didelot wrote:
>> Hi Andrew,
>> 
>> Andrew Lunn <andrew@lunn.ch> writes:
>> 
>>> Export all the functions so that we can later turn the module into a
>>> library module.
>>>
>>> Signed-off-by: Andrew Lunn <andrew@lunn.ch>
>> 
>> Sorry but I don't like this. We don't want one module per 88E6xxx switch
>> model. We need one driver supporting them all, like any other driver.
> 
> Are you sure this is a good model moving forward? This means the library
> needs to know about every new switch added and all its little gory
> details, whereas the point is that it represents *most* of what is
> needed, defines a good enough, generic model, but does not have to deal
> (too much) with HW-specifics, see below.

I also think all of the mv88e6xxx drivers are insanely similar, and could
be driven by one monolithic driver.  It's not going to be that big at all.

Symbol exporting from a library and having several small similar
drivers reference those symbols, on the other hand, tends to be messy
in my opinion.

^ permalink raw reply

* [PATCH] dsa: mv88e6xxx: Kill the REG_READ and REG_WRITE macros
From: Andrew Lunn @ 2016-04-14 21:47 UTC (permalink / raw)
  To: David Miller; +Cc: Florian Fainelli, netdev, Vivien Didelot, Andrew Lunn

These macros hide a ds variable and a return statement on error, which
can lead to locking issues. Kill them off.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
---

As requested by Vivien, this patch has been split out of the series.

v2: Use the existing ret variable
    Hold the lock for the whole function, unlock on exit.
    Removed duplicate error test
    Fixed indentation.
---
 drivers/net/dsa/mv88e6123.c |  13 ++-
 drivers/net/dsa/mv88e6131.c |  41 ++++----
 drivers/net/dsa/mv88e6171.c |  16 +--
 drivers/net/dsa/mv88e6352.c |  15 +--
 drivers/net/dsa/mv88e6xxx.c | 241 +++++++++++++++++++++++++++++++-------------
 drivers/net/dsa/mv88e6xxx.h |  21 ----
 6 files changed, 224 insertions(+), 123 deletions(-)

diff --git a/drivers/net/dsa/mv88e6123.c b/drivers/net/dsa/mv88e6123.c
index c34283d929c4..140e44e50e8a 100644
--- a/drivers/net/dsa/mv88e6123.c
+++ b/drivers/net/dsa/mv88e6123.c
@@ -52,7 +52,9 @@ static int mv88e6123_setup_global(struct dsa_switch *ds)
 	 * external PHYs to poll), don't discard packets with
 	 * excessive collisions, and mask all interrupt sources.
 	 */
-	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, 0x0000);
+	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL, 0x0000);
+	if (ret)
+		return ret;
 
 	/* Configure the upstream port, and configure the upstream
 	 * port as the port to which ingress and egress monitor frames
@@ -61,14 +63,15 @@ static int mv88e6123_setup_global(struct dsa_switch *ds)
 	reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT |
 		upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT |
 		upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT;
-	REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
+	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
+	if (ret)
+		return ret;
 
 	/* Disable remote management for now, and set the switch's
 	 * DSA device number.
 	 */
-	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, ds->index & 0x1f);
-
-	return 0;
+	return mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL_2,
+				   ds->index & 0x1f);
 }
 
 static int mv88e6123_setup(struct dsa_switch *ds)
diff --git a/drivers/net/dsa/mv88e6131.c b/drivers/net/dsa/mv88e6131.c
index f5d75fce1e96..34d297b65040 100644
--- a/drivers/net/dsa/mv88e6131.c
+++ b/drivers/net/dsa/mv88e6131.c
@@ -49,11 +49,16 @@ static int mv88e6131_setup_global(struct dsa_switch *ds)
 	 * to arbitrate between packet queues, set the maximum frame
 	 * size to 1632, and mask all interrupt sources.
 	 */
-	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL,
-		  GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_MAX_FRAME_1632);
+	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL,
+				  GLOBAL_CONTROL_PPU_ENABLE |
+				  GLOBAL_CONTROL_MAX_FRAME_1632);
+	if (ret)
+		return ret;
 
 	/* Set the VLAN ethertype to 0x8100. */
-	REG_WRITE(REG_GLOBAL, GLOBAL_CORE_TAG_TYPE, 0x8100);
+	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CORE_TAG_TYPE, 0x8100);
+	if (ret)
+		return ret;
 
 	/* Disable ARP mirroring, and configure the upstream port as
 	 * the port to which ingress and egress monitor frames are to
@@ -62,31 +67,33 @@ static int mv88e6131_setup_global(struct dsa_switch *ds)
 	reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT |
 		upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT |
 		GLOBAL_MONITOR_CONTROL_ARP_DISABLED;
-	REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
+	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
+	if (ret)
+		return ret;
 
 	/* Disable cascade port functionality unless this device
 	 * is used in a cascade configuration, and set the switch's
 	 * DSA device number.
 	 */
 	if (ds->dst->pd->nr_chips > 1)
-		REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2,
-			  GLOBAL_CONTROL_2_MULTIPLE_CASCADE |
-			  (ds->index & 0x1f));
+		ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL_2,
+					  GLOBAL_CONTROL_2_MULTIPLE_CASCADE |
+					  (ds->index & 0x1f));
 	else
-		REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2,
-			  GLOBAL_CONTROL_2_NO_CASCADE |
-			  (ds->index & 0x1f));
+		ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL_2,
+					  GLOBAL_CONTROL_2_NO_CASCADE |
+					  (ds->index & 0x1f));
+	if (ret)
+		return ret;
 
 	/* Force the priority of IGMP/MLD snoop frames and ARP frames
 	 * to the highest setting.
 	 */
-	REG_WRITE(REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE,
-		  GLOBAL2_PRIO_OVERRIDE_FORCE_SNOOP |
-		  7 << GLOBAL2_PRIO_OVERRIDE_SNOOP_SHIFT |
-		  GLOBAL2_PRIO_OVERRIDE_FORCE_ARP |
-		  7 << GLOBAL2_PRIO_OVERRIDE_ARP_SHIFT);
-
-	return 0;
+	return mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE,
+				   GLOBAL2_PRIO_OVERRIDE_FORCE_SNOOP |
+				   7 << GLOBAL2_PRIO_OVERRIDE_SNOOP_SHIFT |
+				   GLOBAL2_PRIO_OVERRIDE_FORCE_ARP |
+				   7 << GLOBAL2_PRIO_OVERRIDE_ARP_SHIFT);
 }
 
 static int mv88e6131_setup(struct dsa_switch *ds)
diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c
index f5622506cdfa..b7af2b78f8ee 100644
--- a/drivers/net/dsa/mv88e6171.c
+++ b/drivers/net/dsa/mv88e6171.c
@@ -46,8 +46,11 @@ static int mv88e6171_setup_global(struct dsa_switch *ds)
 	/* Discard packets with excessive collisions, mask all
 	 * interrupt sources, enable PPU.
 	 */
-	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL,
-		  GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_DISCARD_EXCESS);
+	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL,
+				  GLOBAL_CONTROL_PPU_ENABLE |
+				  GLOBAL_CONTROL_DISCARD_EXCESS);
+	if (ret)
+		return ret;
 
 	/* Configure the upstream port, and configure the upstream
 	 * port as the port to which ingress and egress monitor frames
@@ -57,14 +60,15 @@ static int mv88e6171_setup_global(struct dsa_switch *ds)
 		upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT |
 		upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT |
 		upstream_port << GLOBAL_MONITOR_CONTROL_MIRROR_SHIFT;
-	REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
+	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
+	if (ret)
+		return ret;
 
 	/* Disable remote management for now, and set the switch's
 	 * DSA device number.
 	 */
-	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, ds->index & 0x1f);
-
-	return 0;
+	return mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL_2,
+				   ds->index & 0x1f);
 }
 
 static int mv88e6171_setup(struct dsa_switch *ds)
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index e54ee27db129..e8cb03fad21a 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -59,8 +59,11 @@ static int mv88e6352_setup_global(struct dsa_switch *ds)
 	/* Discard packets with excessive collisions,
 	 * mask all interrupt sources, enable PPU (bit 14, undocumented).
 	 */
-	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL,
-		  GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_DISCARD_EXCESS);
+	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL,
+				  GLOBAL_CONTROL_PPU_ENABLE |
+				  GLOBAL_CONTROL_DISCARD_EXCESS);
+	if (ret)
+		return ret;
 
 	/* Configure the upstream port, and configure the upstream
 	 * port as the port to which ingress and egress monitor frames
@@ -69,14 +72,14 @@ static int mv88e6352_setup_global(struct dsa_switch *ds)
 	reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT |
 		upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT |
 		upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT;
-	REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
+	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg);
+	if (ret)
+		return ret;
 
 	/* Disable remote management for now, and set the switch's
 	 * DSA device number.
 	 */
-	REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f);
-
-	return 0;
+	return mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x1c, ds->index & 0x1f);
 }
 
 static int mv88e6352_setup(struct dsa_switch *ds)
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 9985a0cf31f1..b018f20829fb 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -180,28 +180,44 @@ int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
 
 int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr)
 {
-	REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, (addr[0] << 8) | addr[1]);
-	REG_WRITE(REG_GLOBAL, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]);
-	REG_WRITE(REG_GLOBAL, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]);
+	int err;
 
-	return 0;
+	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MAC_01,
+				  (addr[0] << 8) | addr[1]);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MAC_23,
+				  (addr[2] << 8) | addr[3]);
+	if (err)
+		return err;
+
+	return mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_MAC_45,
+				   (addr[4] << 8) | addr[5]);
 }
 
 int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr)
 {
-	int i;
 	int ret;
+	int i;
 
 	for (i = 0; i < 6; i++) {
 		int j;
 
 		/* Write the MAC address byte. */
-		REG_WRITE(REG_GLOBAL2, GLOBAL2_SWITCH_MAC,
-			  GLOBAL2_SWITCH_MAC_BUSY | (i << 8) | addr[i]);
+		ret = mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SWITCH_MAC,
+					  GLOBAL2_SWITCH_MAC_BUSY |
+					  (i << 8) | addr[i]);
+		if (ret)
+			return ret;
 
 		/* Wait for the write to complete. */
 		for (j = 0; j < 16; j++) {
-			ret = REG_READ(REG_GLOBAL2, GLOBAL2_SWITCH_MAC);
+			ret = mv88e6xxx_reg_read(ds, REG_GLOBAL2,
+						 GLOBAL2_SWITCH_MAC);
+			if (ret < 0)
+				return ret;
+
 			if ((ret & GLOBAL2_SWITCH_MAC_BUSY) == 0)
 				break;
 		}
@@ -233,13 +249,21 @@ static int mv88e6xxx_ppu_disable(struct dsa_switch *ds)
 	int ret;
 	unsigned long timeout;
 
-	ret = REG_READ(REG_GLOBAL, GLOBAL_CONTROL);
-	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL,
-		  ret & ~GLOBAL_CONTROL_PPU_ENABLE);
+	ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_CONTROL);
+	if (ret < 0)
+		return ret;
+
+	ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL,
+				  ret & ~GLOBAL_CONTROL_PPU_ENABLE);
+	if (ret)
+		return ret;
 
 	timeout = jiffies + 1 * HZ;
 	while (time_before(jiffies, timeout)) {
-		ret = REG_READ(REG_GLOBAL, GLOBAL_STATUS);
+		ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATUS);
+		if (ret < 0)
+			return ret;
+
 		usleep_range(1000, 2000);
 		if ((ret & GLOBAL_STATUS_PPU_MASK) !=
 		    GLOBAL_STATUS_PPU_POLLING)
@@ -251,15 +275,24 @@ static int mv88e6xxx_ppu_disable(struct dsa_switch *ds)
 
 static int mv88e6xxx_ppu_enable(struct dsa_switch *ds)
 {
-	int ret;
+	int ret, err;
 	unsigned long timeout;
 
-	ret = REG_READ(REG_GLOBAL, GLOBAL_CONTROL);
-	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, ret | GLOBAL_CONTROL_PPU_ENABLE);
+	ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_CONTROL);
+	if (ret < 0)
+		return ret;
+
+	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_CONTROL,
+				  ret | GLOBAL_CONTROL_PPU_ENABLE);
+	if (err)
+		return err;
 
 	timeout = jiffies + 1 * HZ;
 	while (time_before(jiffies, timeout)) {
-		ret = REG_READ(REG_GLOBAL, GLOBAL_STATUS);
+		ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATUS);
+		if (ret < 0)
+			return ret;
+
 		usleep_range(1000, 2000);
 		if ((ret & GLOBAL_STATUS_PPU_MASK) ==
 		    GLOBAL_STATUS_PPU_POLLING)
@@ -2667,7 +2700,9 @@ int mv88e6xxx_setup_common(struct dsa_switch *ds)
 	ps->ds = ds;
 	mutex_init(&ps->smi_mutex);
 
-	ps->id = REG_READ(REG_PORT(0), PORT_SWITCH_ID) & 0xfff0;
+	ps->id = mv88e6xxx_reg_read(ds, REG_PORT(0), PORT_SWITCH_ID) & 0xfff0;
+	if (ps->id < 0)
+		return ps->id;
 
 	INIT_WORK(&ps->bridge_work, mv88e6xxx_bridge_work);
 
@@ -2677,42 +2712,67 @@ int mv88e6xxx_setup_common(struct dsa_switch *ds)
 int mv88e6xxx_setup_global(struct dsa_switch *ds)
 {
 	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	int ret;
+	int err;
 	int i;
 
+	mutex_lock(&ps->smi_mutex);
 	/* Set the default address aging time to 5 minutes, and
 	 * enable address learn messages to be sent to all message
 	 * ports.
 	 */
-	REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL,
-		  0x0140 | GLOBAL_ATU_CONTROL_LEARN2ALL);
+	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_ATU_CONTROL,
+				   0x0140 | GLOBAL_ATU_CONTROL_LEARN2ALL);
+	if (err)
+		goto unlock;
 
 	/* Configure the IP ToS mapping registers. */
-	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000);
-	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000);
-	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555);
-	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555);
-	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa);
-	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa);
-	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff);
-	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff);
+	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000);
+	if (err)
+		goto unlock;
+	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000);
+	if (err)
+		goto unlock;
+	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555);
+	if (err)
+		goto unlock;
+	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555);
+	if (err)
+		goto unlock;
+	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa);
+	if (err)
+		goto unlock;
+	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa);
+	if (err)
+		goto unlock;
+	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff);
+	if (err)
+		goto unlock;
+	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff);
+	if (err)
+		goto unlock;
 
 	/* Configure the IEEE 802.1p priority mapping register. */
-	REG_WRITE(REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41);
+	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41);
+	if (err)
+		goto unlock;
 
 	/* Send all frames with destination addresses matching
 	 * 01:80:c2:00:00:0x to the CPU port.
 	 */
-	REG_WRITE(REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, 0xffff);
+	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, 0xffff);
+	if (err)
+		goto unlock;
 
 	/* Ignore removed tag data on doubly tagged packets, disable
 	 * flow control messages, force flow control priority to the
 	 * highest, and send all special multicast frames to the CPU
 	 * port at the highest priority.
 	 */
-	REG_WRITE(REG_GLOBAL2, GLOBAL2_SWITCH_MGMT,
-		  0x7 | GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x70 |
-		  GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI);
+	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT,
+				   0x7 | GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x70 |
+				   GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI);
+	if (err)
+		goto unlock;
 
 	/* Program the DSA routing table. */
 	for (i = 0; i < 32; i++) {
@@ -2722,23 +2782,35 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
 		    i != ds->index && i < ds->dst->pd->nr_chips)
 			nexthop = ds->pd->rtable[i] & 0x1f;
 
-		REG_WRITE(REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING,
-			  GLOBAL2_DEVICE_MAPPING_UPDATE |
-			  (i << GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT) |
-			  nexthop);
+		err = _mv88e6xxx_reg_write(
+			ds, REG_GLOBAL2,
+			GLOBAL2_DEVICE_MAPPING,
+			GLOBAL2_DEVICE_MAPPING_UPDATE |
+			(i << GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT) | nexthop);
+		if (err)
+			goto unlock;
 	}
 
 	/* Clear all trunk masks. */
-	for (i = 0; i < 8; i++)
-		REG_WRITE(REG_GLOBAL2, GLOBAL2_TRUNK_MASK,
-			  0x8000 | (i << GLOBAL2_TRUNK_MASK_NUM_SHIFT) |
-			  ((1 << ps->num_ports) - 1));
+	for (i = 0; i < 8; i++) {
+		err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_TRUNK_MASK,
+					   0x8000 |
+					   (i << GLOBAL2_TRUNK_MASK_NUM_SHIFT) |
+					   ((1 << ps->num_ports) - 1));
+		if (err)
+			goto unlock;
+	}
 
 	/* Clear all trunk mappings. */
-	for (i = 0; i < 16; i++)
-		REG_WRITE(REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING,
-			  GLOBAL2_TRUNK_MAPPING_UPDATE |
-			  (i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT));
+	for (i = 0; i < 16; i++) {
+		err = _mv88e6xxx_reg_write(
+			ds, REG_GLOBAL2,
+			GLOBAL2_TRUNK_MAPPING,
+			GLOBAL2_TRUNK_MAPPING_UPDATE |
+			(i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT));
+		if (err)
+			goto unlock;
+	}
 
 	if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
 	    mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
@@ -2746,17 +2818,27 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
 		/* Send all frames with destination addresses matching
 		 * 01:80:c2:00:00:2x to the CPU port.
 		 */
-		REG_WRITE(REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, 0xffff);
+		err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2,
+					   GLOBAL2_MGMT_EN_2X, 0xffff);
+		if (err)
+			goto unlock;
 
 		/* Initialise cross-chip port VLAN table to reset
 		 * defaults.
 		 */
-		REG_WRITE(REG_GLOBAL2, GLOBAL2_PVT_ADDR, 0x9000);
+		err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2,
+					   GLOBAL2_PVT_ADDR, 0x9000);
+		if (err)
+			goto unlock;
 
 		/* Clear the priority override table. */
-		for (i = 0; i < 16; i++)
-			REG_WRITE(REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE,
-				  0x8000 | (i << 8));
+		for (i = 0; i < 16; i++) {
+			err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2,
+						   GLOBAL2_PRIO_OVERRIDE,
+						   0x8000 | (i << 8));
+			if (err)
+				goto unlock;
+		}
 	}
 
 	if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
@@ -2767,31 +2849,37 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
 		 * ingress rate limit registers to their initial
 		 * state.
 		 */
-		for (i = 0; i < ps->num_ports; i++)
-			REG_WRITE(REG_GLOBAL2, GLOBAL2_INGRESS_OP,
-				  0x9000 | (i << 8));
+		for (i = 0; i < ps->num_ports; i++) {
+			err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2,
+						   GLOBAL2_INGRESS_OP,
+						   0x9000 | (i << 8));
+			if (err)
+				goto unlock;
+		}
 	}
 
 	/* Clear the statistics counters for all ports */
-	REG_WRITE(REG_GLOBAL, GLOBAL_STATS_OP, GLOBAL_STATS_OP_FLUSH_ALL);
+	err = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_STATS_OP,
+				   GLOBAL_STATS_OP_FLUSH_ALL);
+	if (err)
+		goto unlock;
 
 	/* Wait for the flush to complete. */
-	mutex_lock(&ps->smi_mutex);
-	ret = _mv88e6xxx_stats_wait(ds);
-	if (ret < 0)
+	err = _mv88e6xxx_stats_wait(ds);
+	if (err < 0)
 		goto unlock;
 
 	/* Clear all ATU entries */
-	ret = _mv88e6xxx_atu_flush(ds, 0, true);
-	if (ret < 0)
+	err = _mv88e6xxx_atu_flush(ds, 0, true);
+	if (err < 0)
 		goto unlock;
 
 	/* Clear all the VTU and STU entries */
-	ret = _mv88e6xxx_vtu_stu_flush(ds);
+	err = _mv88e6xxx_vtu_stu_flush(ds);
 unlock:
 	mutex_unlock(&ps->smi_mutex);
 
-	return ret;
+	return err;
 }
 
 int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active)
@@ -2803,10 +2891,18 @@ int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active)
 	int ret;
 	int i;
 
+	mutex_lock(&ps->smi_mutex);
+
 	/* Set all ports to the disabled state. */
 	for (i = 0; i < ps->num_ports; i++) {
-		ret = REG_READ(REG_PORT(i), PORT_CONTROL);
-		REG_WRITE(REG_PORT(i), PORT_CONTROL, ret & 0xfffc);
+		ret = _mv88e6xxx_reg_read(ds, REG_PORT(i), PORT_CONTROL);
+		if (ret < 0)
+			goto unlock;
+
+		ret = _mv88e6xxx_reg_write(ds, REG_PORT(i), PORT_CONTROL,
+					   ret & 0xfffc);
+		if (ret)
+			goto unlock;
 	}
 
 	/* Wait for transmit queues to drain. */
@@ -2825,22 +2921,31 @@ int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active)
 	 * through global registers 0x18 and 0x19.
 	 */
 	if (ppu_active)
-		REG_WRITE(REG_GLOBAL, 0x04, 0xc000);
+		ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x04, 0xc000);
 	else
-		REG_WRITE(REG_GLOBAL, 0x04, 0xc400);
+		ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x04, 0xc400);
+	if (ret)
+		goto unlock;
 
 	/* Wait up to one second for reset to complete. */
 	timeout = jiffies + 1 * HZ;
 	while (time_before(jiffies, timeout)) {
-		ret = REG_READ(REG_GLOBAL, 0x00);
+		ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x00);
+		if (ret < 0)
+			goto unlock;
+
 		if ((ret & is_reset) == is_reset)
 			break;
 		usleep_range(1000, 2000);
 	}
 	if (time_after(jiffies, timeout))
-		return -ETIMEDOUT;
+		ret = -ETIMEDOUT;
+	else
+		ret = 0;
+unlock:
+	mutex_unlock(&ps->smi_mutex);
 
-	return 0;
+	return ret;
 }
 
 int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg)
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index 5d27decc85cb..0debb9f3cf0a 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -542,25 +542,4 @@ extern struct dsa_switch_driver mv88e6123_switch_driver;
 extern struct dsa_switch_driver mv88e6352_switch_driver;
 extern struct dsa_switch_driver mv88e6171_switch_driver;
 
-#define REG_READ(addr, reg)						\
-	({								\
-		int __ret;						\
-									\
-		__ret = mv88e6xxx_reg_read(ds, addr, reg);		\
-		if (__ret < 0)						\
-			return __ret;					\
-		__ret;							\
-	})
-
-#define REG_WRITE(addr, reg, val)					\
-	({								\
-		int __ret;						\
-									\
-		__ret = mv88e6xxx_reg_write(ds, addr, reg, val);	\
-		if (__ret < 0)						\
-			return __ret;					\
-	})
-
-
-
 #endif
-- 
2.7.0

^ permalink raw reply related

* Re: [PATCH net-next 1/8] dsa: mv88e6xxx: Prepare for turning this into a library module
From: Andrew Lunn @ 2016-04-14 21:37 UTC (permalink / raw)
  To: Florian Fainelli; +Cc: Vivien Didelot, David Miller, netdev
In-Reply-To: <57100B0F.4040909@gmail.com>

> I believe this is currently the case for most of what is being done by
> mv88e6xxx.c, Andrew's patches are not making things worse.

Hi Florian

That was my aim. I did the minimum restructuring needed to make these
DSA drivers classical Linux drivers. Nothing i've done would prevent
them being merged into one later. I don't want to do such a merge now,
because that is just a distraction from the bigger picture of
reworking how probe works.

	  Andrew

^ permalink raw reply

* Re: [PATCH net-next 1/8] dsa: mv88e6xxx: Prepare for turning this into a library module
From: Andrew Lunn @ 2016-04-14 21:32 UTC (permalink / raw)
  To: Vivien Didelot; +Cc: David Miller, Florian Fainelli, netdev
In-Reply-To: <874mb454b3.fsf@ketchup.mtl.sfl>

Hi Vivien

> I'm working on a few patches right away to factorize this and lighten up
> that part from your current refactoring of DSA.

Please take a look at the full series of 40 patches, before deciding
what you want to clean up. It is too many to post at once, so i'm
breaking them up into chunks. But that does mean some of the 'why' is
missing, which i'm trying to add back via a good description in the
cover note.

https://github.com/lunn/linux.git v5.6-rc2-net-next-dsa-probe

> Here's an example of duplicated code fixed for the 6131 PHY access code:
> 
>     http://ix.io/wJm

I've done this already.

     Andrew

^ permalink raw reply

* Re: [PATCH net-next 1/8] dsa: mv88e6xxx: Prepare for turning this into a library module
From: Florian Fainelli @ 2016-04-14 21:26 UTC (permalink / raw)
  To: Vivien Didelot, Andrew Lunn, David Miller; +Cc: netdev
In-Reply-To: <874mb454b3.fsf@ketchup.mtl.sfl>

On 14/04/16 13:22, Vivien Didelot wrote:
> Hi Andrew,
> 
> Andrew Lunn <andrew@lunn.ch> writes:
> 
>> Export all the functions so that we can later turn the module into a
>> library module.
>>
>> Signed-off-by: Andrew Lunn <andrew@lunn.ch>
> 
> Sorry but I don't like this. We don't want one module per 88E6xxx switch
> model. We need one driver supporting them all, like any other driver.

Are you sure this is a good model moving forward? This means the library
needs to know about every new switch added and all its little gory
details, whereas the point is that it represents *most* of what is
needed, defines a good enough, generic model, but does not have to deal
(too much) with HW-specifics, see below.

> 
> Multiple modules will continue to confuse us with duplicated code. For
> instance, every specific mv88e6*_setup_global functions program the
> switch's DSA device number with something like:
> 
>     REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, ds->index & 0x1f);
> 
> Looking at every drivers/net/dsa/mv88e6*.c file, there are only few
> differences in their dsa_switch_driver structures:
> 
> The .setup function is always specific, but easily factorizable in a
> mv88e6xxx_setup function. The .probe function can be merged once we have
> a single driver. mv88e6131 has different phy_{read,write} functions
> which can be abstracted in mv88e6xxx_phy_{read,write}. Only mv88e6352
> has support for the EEPROM, which is simple to abstract too.
> 
> I'm working on a few patches right away to factorize this and lighten up
> that part from your current refactoring of DSA.
> 
> Here's an example of duplicated code fixed for the 6131 PHY access code:
> 
>     http://ix.io/wJm

The cost of maintaining a smallish piece of driver code that deals with
things that are extremely specific to a given switch HW seems like a
reasonable thing to do. The library should ideally be mostly
HW-independent in the sense that it should only deal with switch HW
properties that are shared and common (number of ports, number of
FIB/VTUs etc.) and the indidivual switch drivers need to deal with all
the ad-hoc stuff that has no place everywhere else.

I believe this is currently the case for most of what is being done by
mv88e6xxx.c, Andrew's patches are not making things worse.
-- 
Florian

^ permalink raw reply

* Re: [PATCH 1/2] [v4] net: emac: emac gigabit ethernet controller driver
From: Florian Fainelli @ 2016-04-14 21:19 UTC (permalink / raw)
  To: Timur Tabi, netdev, linux-kernel, devicetree, linux-arm-msm,
	sdharia, Shanker Donthineni, Greg Kroah-Hartman, vikrams, cov,
	gavidov, Rob Herring, andrew, bjorn.andersson, Mark Langsdorf,
	Jon Masters, Andy Gross, David S. Miller
In-Reply-To: <570FFB6B.5060305@codeaurora.org>

On 14/04/16 13:19, Timur Tabi wrote:
> Florian Fainelli wrote:
>> On 13/04/16 10:59, Timur Tabi wrote:
>>> From: Gilad Avidov <gavidov@codeaurora.org>
>>>
>>> Add supports for ethernet controller HW on Qualcomm Technologies,
>>> Inc. SoC.
>>> This driver supports the following features:
>>> 1) Checksum offload.
>>> 2) Runtime power management support.
>>> 3) Interrupt coalescing support.
>>> 4) SGMII phy.
>>> 5) SGMII direct connection without external phy.
>>
>> I think you should shoot for more simple for an initial submission:
>>
>> - no offload
>> - no timestamping
>>
>> get that accepted, and then add features one by one, it sure is more
>> work, but it helps with the review, and makes you work off a solid base.
> 
> Unfortunately, I didn't write this driver initially, so I'm not sure how
> to remove these features from it.  Variants of this driver have been
> bouncing around Qualcomm for years, and even the author of this patch
> (Gilad) is no longer around.

Well, good luck :)

> 
> So although I have a lot of experience upstreaming code, I have little
> experience and knowledge with network drivers.  I'm going to need a lot
> of hand-holding.  I hope you will be patient with me.
> 
> Timestamping support seems to be just a few lines of code, so I can
> probably remove that.  I don't know where offloading is in the driver,
> however.  I don't know how offloading in netdev drivers works.

Based on what the driver seems to do right now, it would be located in
the transmit and receive paths, and would have to access
mac/network/transport offsets and deal with checksums, so anything that
deals with checksums, provided that the HW does not require that to
transmit/receive packets, could be eliminated entirely for now and be
added later.

It is not the biggest part that needs to be slightly re-architected
though, the SGMII/PHY/MDIO stuff is more important as it impacts the
Device Tree binding, see below.

> 
>> You will see below, but a pet peeve of mine is authors reimplementing
>> code that exists in PHYLIB.
> 
> I can understand that, but the PHYs on these SOCs are non-standard.  The
> "internal PHY" (for lack of a better name) is part of the EMAC itself,
> and it acts as a middle-man for the external PHY.  There is an MDIO bus,
> but it's hard-wired to the EMAC, and most of the time you don't touch it
> directly.  Instead you let the EMAC and/or the internal PHY send/receive
> commands/data to the external PHY on your behalf.  The internal phy
> talks to the external phy via SGMII only.  Only the EMAC uses the mdio bus.

Humm OK, this PHY proxy, provided that this is really how it works,
seems a bit unusual, but, is not necessarily a roadblock to having a
proper MDIO implementation here which is standard and will allow you to
utilize re-usable drivers and facilities that are already there.

> 
> I will look at PHYLIB, but I can't tell you whether it will work with
> this hardware (Gilad previously claim that it wouldn't work well).

Well, PHYLIB does prefer using MDIO accesses to "speak" to PHYs,
built-in or external, but there is always the option of investing into
some custom development with the subsystem to make it play nicely with
your HW.

> 
>>> diff --git a/Documentation/devicetree/bindings/net/qcom-emac.txt
>>> b/Documentation/devicetree/bindings/net/qcom-emac.txt
>>> new file mode 100644
>>> index 0000000..df5e7c0
>>> --- /dev/null
>>> +++ b/Documentation/devicetree/bindings/net/qcom-emac.txt
>>> @@ -0,0 +1,65 @@
>>> +Qualcomm EMAC Gigabit Ethernet Controller
>>> +
>>> +Required properties:
>>> +- compatible : Should be "qcom,emac".
>>> +- reg : Offset and length of the register regions for the device
>>> +- reg-names : Register region names referenced in 'reg' above.
>>> +    Required register resource entries are:
>>> +    "base"   : EMAC controller base register block.
>>> +    "csr"    : EMAC wrapper register block.
>>> +    Optional register resource entries are:
>>> +    "ptp"    : EMAC PTP (1588) register block.
>>> +           Required if 'qcom,emac-tstamp-en' is present.
>>> +    "sgmii"  : EMAC SGMII PHY register block.
>>> +- interrupts : Interrupt numbers used by this controller
>>> +- interrupt-names : Interrupt resource names referenced in
>>> 'interrupts' above.
>>> +    Required interrupt resource entries are:
>>> +    "emac_core0"   : EMAC core0 interrupt.
>>> +    "sgmii_irq"   : EMAC SGMII interrupt.
>>> +- phy-addr            : Specifies phy address on MDIO bus.
>>> +            Required if the optional property "qcom,no-external-phy"
>>> +            is not specified.
>>
>> This is not the standard way to represent an Ethernet PHY hanging off a
>> MDIO bus see ethernet.txt and phy.txt in D/dt/bindings/net/
> 
> The MDIO bus on these chips is not accessible as a separate entity.  It
> is melded (for lack of a better word) into the EMAC itself.  That's why
> there is a "qcom,no-external-phy" property.  You could, in theory, wire
> the internal phy of one SOC directly to the internal phy of another SOC,
> and use that as in interconnect between SOCs.  I don't know of any such
> use-cases however.

The fact the MDIO bus is built-into the MAC is really not a problem
here, there are tons of drivers that deal with that just fine, yet, the
DT binding needs to reflect that properly by having a sub-node of the
Ethernet MAC which is a MDIO bus controller node. If external or
internal PHYs are accessible through that MDIO bus, they also need to
appear as child-nodes of that MDIO bus controller node.

BTW, wiring two PHYs internally is a waste of HW resource at best, if
not just asking for trouble, you can do an Ethernet MAC to MAC
connection, tons of HW do that too.

[snip]

>>> +- qcom,no-external-phy      : Indicates there is no external PHY
>>> connected to
>>> +                  EMAC. Include this only if the EMAC is directly
>>> +                  connected to the peer end without EPHY.
>>
>> How is the internal PHY accessed, is it responding on the MDIO bus at a
>> particular address?
> 
> There is a set of memory-mapped registers.  It's not connected via MDIO
> at all.  It's mapped via the "sgmii" addresses in the device tree (see
> function emac_sgmii_config).
> 
>> If so, standard MDIO scanning/probing works, and you
>> can have your PHY driver flag this device has internal. Worst case, you
>> can do what BCMGENET does, and have a special "phy-mode" value set to
>> "internal" when this knowledge needs to exist prior to MDIO bus scanning
>> (e.g: to power on the PHY).
> 
> So the internal phy is not a real phy.  It's not capable of driving an
> RJ45 port (there's no analog part).  It's an SGMII-like device that is
> hard-wired to the EMAC itself.

OK, that explains things a bit, thanks, this is quite a bit of important
detail actually.

> 
> In theory, the internal PHY is optional.  You could design an SOC that
> has just the EMAC connected via normal MDIO to an external phy.  I
> really wish our hardware designers has done that.  But unfortunately,
> there are no SOCs like that, and so we have to treat the internal phy as
> an extension of the EMAC.
> 
> My preference would be to get rid of the "qcom,no-external-phy" property
> and have an external phy be required, at least until Qualcomm creates an
> SOC without the internal phy (which may never happen, for all I know).
> 

Can we just say that, an absence of PHY specified in the Device Tree (no
phy-handle property and PHY not a child node of the MDIO bus), means
that there is no external PHY?

[snip]

>> Do you need to maintain these flags when most, if not all of them
>> already exist in dev->flags or dev->features?
> 
> So you're saying that, for example, in emac_set_features() I should
> remove this:
> 
>     if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
>         set_bit(EMAC_STATUS_VLANSTRIP_EN, &adpt->status);
>     else
>         clear_bit(EMAC_STATUS_VLANSTRIP_EN, &adpt->status);
> 
> and then in emac_mac_mode_config(), I should do this instead:
> 
> void emac_mac_mode_config(struct emac_adapter *adpt)
> {
>     struct net_device *netdev = adpt->netdev;
> 
>     if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
>         mac |= VLAN_STRIP;
>     else
>         mac &= ~VLAN_STRIP;
> 
> 
> If so, then what do I do in emac_rx_mode_set()?  Should I delete this
> entire block:
> 
>     /* Check for Promiscuous and All Multicast modes */
>     if (netdev->flags & IFF_PROMISC) {
>         set_bit(EMAC_STATUS_PROMISC_EN, &adpt->status);
>     } else if (netdev->flags & IFF_ALLMULTI) {
>         set_bit(EMAC_STATUS_MULTIALL_EN, &adpt->status);
>         clear_bit(EMAC_STATUS_PROMISC_EN, &adpt->status);
>     } else {
>         clear_bit(EMAC_STATUS_MULTIALL_EN, &adpt->status);
>         clear_bit(EMAC_STATUS_PROMISC_EN, &adpt->status);
>     }
> 
> It does look like Gilad is just mirroring the flags/features variable
> into adpt->status.  What I can't figure out is why.  It seems completely
> redundant, but I have a nagging feeling that there is a good reason.

Yes, I think your set_features and set_rx_mode functions would be
greatly simplified, if each of them did take care of programming the HW
immediately based on function arguments/flags. Unless absolutely
required (e.g: suspend/resume, outside of the scope of the function
etc..) having bookeeping variables is always something that can be out
of sync, so better avoid them as much as possible.

[snip]

>>> +    napi_enable(&adpt->rx_q.napi);
>>> +
>>> +    /* enable mac irq */
>>> +    writel(~DIS_INT, adpt->base + EMAC_INT_STATUS);
>>> +    writel(adpt->irq.mask, adpt->base + EMAC_INT_MASK);
>>> +
>>> +    netif_start_queue(netdev);
>>
>> Starting the TX queue is typically the last ting you want to do, to
>> avoid a transient state where the TX queue is enabled, and the link is
>> not (which is okay if your driver is properly implemented and reflects
>> carrier changes anyway).
> 
> So I should move the netif_start_queue() to the end of this function?
> Sorry if that's a stupid question, but I know little about the MAC side
> of network drivers.

That's fine, yes moving netif_start_queue() at the far end of the
function is a good change.

[snip]

>>
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +/* Bring down the interface/HW */
>>> +void emac_mac_down(struct emac_adapter *adpt, bool reset)
>>> +{
>>> +    struct net_device *netdev = adpt->netdev;
>>> +    struct emac_phy *phy = &adpt->phy;
>>> +    unsigned long flags;
>>> +
>>> +    set_bit(EMAC_STATUS_DOWN, &adpt->status);
>>
>> Do you need to maintain that? Would not netif_running() tell you what
>> you want if you reflect the carrier state properly?
> 
> I think that emac_work_thread_link_check() handles this.  It's a timer
> thread that polls the link state and calls netif_carrier_off() if the
> link is down.  Is that sufficient?
> 

Probably, then again, with PHYLIB you have the option of either
switching the PHY to interrupt mode (thsus saving the polling_), or it
polls the PHY for link statuses every HZ.

[snip]

>>> +    if (skb_network_offset(skb) != ETH_HLEN)
>>> +        TPD_TYP_SET(&tpd, 1);
>>> +
>>> +    emac_tx_fill_tpd(adpt, tx_q, skb, &tpd);
>>> +
>>> +    netdev_sent_queue(adpt->netdev, skb->len);
>>> +
>>> +    /* update produce idx */
>>> +    prod_idx = (tx_q->tpd.produce_idx << tx_q->produce_shift) &
>>> +            tx_q->produce_mask;
>>> +    emac_reg_update32(adpt->base + tx_q->produce_reg,
>>> +              tx_q->produce_mask, prod_idx);
>>
>> Since you have a producer index, you should consider checking
>> skb->xmit_more to know whether you can update the register now, or
>> later, which could save some expensive operation and batch TX.
> 
> I'll have to figure out what means and get back to you.  When would
> "later" be?

After the driver gets accepted mainline for instance would seem fine.
Considering how this seems to work, something like this is usally all
that is needed:

if (!skb->xmit_more || netif_xmit_stopped(txq)
	/* write producer index to get HW to transmit */

[snip]

>>> +static int debug = -1;
>>> +module_param(debug, int, S_IRUGO | S_IWUSR | S_IWGRP);
>>
>> ethtool -s <iface> msglvl provides you with that already.
> 
> I'll remove it.  There's no ethtool support in this driver anyway, but
> there's no code that uses this parameter.

Adding support for changing message levels is really trivial, and will
probably help you while developing this driver.

[snip]

>>
>>> +}
>>> +
>>> +irqreturn_t emac_isr(int _irq, void *data)
>>> +{
>>> +    struct emac_irq *irq = data;
>>> +    struct emac_adapter *adpt = container_of(irq, struct
>>> emac_adapter, irq);
>>> +    struct emac_rx_queue *rx_q = &adpt->rx_q;
>>> +
>>> +    int max_ints = 1;
>>> +    u32 isr, status;
>>> +
>>> +    /* disable the interrupt */
>>> +    writel(0, adpt->base + EMAC_INT_MASK);
>>> +
>>> +    do {
>>
>> With max_ints = 1, this is essentially the same as no loop, so just
>> inline it to reduce the indentation.
> 
> In another internal version of this driver, max_ints is set to 5.  Could
> this be some way of processing multiple packets in one interrupt?  Isn't
> that something that NAPI already takes care of, anyway?

Yes, NAPI is going to mitigate the cost of taking an interrupt and
scheduling your bottom-half/soft IRQ for actual packet processing, it is
the recommended way to mitigate the number of interrupts in the receive
path (and transmit for that matter).

> 
>>> +        isr = readl_relaxed(adpt->base + EMAC_INT_STATUS);
>>> +        status = isr & irq->mask;
>>> +
>>> +        if (status == 0)
>>> +            break;
>>> +
>>> +        if (status & ISR_ERROR) {
>>> +            netif_warn(adpt,  intr, adpt->netdev,
>>> +                   "warning: error irq status 0x%lx\n",
>>> +                   status & ISR_ERROR);
>>> +            /* reset MAC */
>>> +            set_bit(EMAC_STATUS_TASK_REINIT_REQ, &adpt->status);
>>> +            emac_work_thread_reschedule(adpt);
>>> +        }
>>> +
>>> +        /* Schedule the napi for receive queue with interrupt
>>> +         * status bit set
>>> +         */
>>> +        if ((status & rx_q->intr)) {
>>> +            if (napi_schedule_prep(&rx_q->napi)) {
>>> +                irq->mask &= ~rx_q->intr;
>>> +                __napi_schedule(&rx_q->napi);
>>> +            }
>>> +        }
>>> +
>>> +        if (status & TX_PKT_INT)
>>> +            emac_mac_tx_process(adpt, &adpt->tx_q);
>>
>> You should consider using a NAPI instance for reclaiming TX buffers as
>> well.
> 
> I'll have to figure out what means and get back to you.

drivers/net/ethernet/broadcom/bcmsysport.c is an example driver that
reclaims transmitted buffers in NAPI. What that means is, take the TX
completion interrupt, schedule a NAPI instance to run, and this NAPI
instance cleans up the entire TX queue (it is not bounded, like the RX
NAPI instance). It is really just moving the freeing of SKBs into
softIRQ context vs. hardIRQ.

[snip]

>>> +/* Configure VLAN tag strip/insert feature */
>>> +static int emac_set_features(struct net_device *netdev,
>>> +                 netdev_features_t features)
>>> +{
>>> +    struct emac_adapter *adpt = netdev_priv(netdev);
>>> +
>>> +    netdev_features_t changed = features ^ netdev->features;
>>> +
>>> +    if (!(changed & (NETIF_F_HW_VLAN_CTAG_TX |
>>> NETIF_F_HW_VLAN_CTAG_RX)))
>>> +        return 0;
>>> +
>>> +    netdev->features = features;
>>> +    if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
>>> +        set_bit(EMAC_STATUS_VLANSTRIP_EN, &adpt->status);
>>> +    else
>>> +        clear_bit(EMAC_STATUS_VLANSTRIP_EN, &adpt->status);
>>
>> What about TX vlan offload?
> 
> I don't know what that is.

TX VLAN offload would be that you can specify the VLAN id somewhere in a
packet's descriptor and have the HW automatically build an Ethernet
frame with the correct VLAN id, and all the Ethernet frame payload
appropriately placed at the correct offsets, with no cost for the CPU
but indicating that information (and not having to do a memmove() to
insert the 802.1Q tag).

[snip]

>>> +/* Probe function */
>>> +static int emac_probe(struct platform_device *pdev)
>>> +{
>>> +    struct net_device *netdev;
>>> +    struct emac_adapter *adpt;
>>> +    struct emac_phy *phy;
>>> +    int ret = 0;
>>> +    u32 hw_ver;
>>> +    u32 extended_irq_mask = emac_irq_use_extended ? IMR_EXTENDED_MASK :
>>> +                            IMR_NORMAL_MASK;
>>> +
>>> +    netdev = alloc_etherdev(sizeof(struct emac_adapter));
>>> +    if (!netdev)
>>> +        return -ENOMEM;
>>
>> There are references to multiple queues in the code, so why not
>> alloc_etherdev_mq() here with the correct number of queues?
> 
> That support was removed from the driver, and on our SOC, we hard-code
> the number of queues to 1 anyway.  I'm planning on adding multiple queue
> support (much) later.

Sounds like a good thing to do later, yes.

> 
>>> +    dev_set_drvdata(&pdev->dev, netdev);
>>> +    SET_NETDEV_DEV(netdev, &pdev->dev);
>>> +
>>> +    adpt = netdev_priv(netdev);
>>> +    adpt->netdev = netdev;
>>> +    phy = &adpt->phy;
>>> +    adpt->msg_enable = netif_msg_init(debug, EMAC_MSG_DEFAULT);
>>> +
>>> +    dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
>>
>> Really, is not that supposed to run on ARM64 servers?
> 
> Well, this version of the driver isn't, which is why it supports DT and
> not ACPI.  I'm planning on adding that support in a later patch.
> However, I'll add support for 64-bit masks in the next version of this
> patch.
> 
> Would this be okay:
> 
>     retval = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
>     if (retval) {
>         dev_err(&pdev->dev, "failed to set DMA mask err %d\n", retval);
>         goto err_res;
>     }
> 
> I've seen code like this in other drivers:
> 
>         ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
>         if (ret) {
>                 ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
>                 if (ret) {
>                         dev_err(dev, "failed to set dma mask\n");
>                         return ret;
>                 }
>         }
> 
> and I've never understood why it's necessary to fall back to 32-bits if
> 64 bits fails.  Isn't 64 bits a superset of 32 bits?  The driver is
> saying that the hardware supports all of DDR.  How could fail, and how
> could 32-bit succeed if 64-bits fails?

I believe there could be cases where the HW is capable of addressing
more physical memory than the CPU itself (usually unlikely, but it
could), there could be cases where the HW is behind an IOMMMU which only
has a window into the DDR, and that could prevent a higher DMA_BIT_MASK
from being successfully configured.
-- 
Florian

^ permalink raw reply

* Re: [PATCH net-next v3] packet: uses kfree_skb() for errors.
From: Weongyo Jeong @ 2016-04-14 21:11 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, willemb
In-Reply-To: <20160413.225601.721902514935757078.davem@davemloft.net>

On Wed, Apr 13, 2016 at 10:56:01PM -0400, David Miller wrote:
> From: Weongyo Jeong <weongyo.linux@gmail.com>
> Date: Fri,  8 Apr 2016 09:25:48 -0700
> 
> > diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> > index 1ecfa71..4e054bb 100644
> > --- a/net/packet/af_packet.c
> > +++ b/net/packet/af_packet.c
> > @@ -2042,6 +2042,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
> >  	u8 *skb_head = skb->data;
> >  	int skb_len = skb->len;
> >  	unsigned int snaplen, res;
> > +	bool err = false;
> >  
> >  	if (skb->pkt_type == PACKET_LOOPBACK)
> >  		goto drop;
> 
> It is non-canonical to use a variable named 'err' as a boolean.
> Instead, all programmers expect a variable named 'err' to be an
> integer type and to hold an error code.
> 
> Therefore please use a more appropriate name for this variable.
> 
> Name it in a way which describes the exact important condition
> which is true or false.
> 
> Thank you.

Thank you for your opinion.  I'd fixed it to proper name and
submitted v4 patch.

Regards,
Weongyo Jeong

^ permalink raw reply

* [PATCH net-next v4] packet: uses kfree_skb() for errors.
From: Weongyo Jeong @ 2016-04-14 21:10 UTC (permalink / raw)
  To: netdev; +Cc: Weongyo Jeong, David S. Miller, Willem de Bruijn

consume_skb() isn't for error cases that kfree_skb() is more proper
one.  At this patch, it fixed tpacket_rcv() and packet_rcv() to be
consistent for error or non-error cases letting perf trace its event
properly.

Signed-off-by: Weongyo Jeong <weongyo.linux@gmail.com>
---
 net/packet/af_packet.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1ecfa71..3df29df 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2042,6 +2042,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 	u8 *skb_head = skb->data;
 	int skb_len = skb->len;
 	unsigned int snaplen, res;
+	bool is_drop_n_account = false;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
@@ -2130,6 +2131,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 	return 0;
 
 drop_n_acct:
+	is_drop_n_account = true;
 	spin_lock(&sk->sk_receive_queue.lock);
 	po->stats.stats1.tp_drops++;
 	atomic_inc(&sk->sk_drops);
@@ -2141,7 +2143,10 @@ drop_n_restore:
 		skb->len = skb_len;
 	}
 drop:
-	consume_skb(skb);
+	if (!is_drop_n_account)
+		consume_skb(skb);
+	else
+		kfree_skb(skb);
 	return 0;
 }
 
@@ -2160,6 +2165,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct sk_buff *copy_skb = NULL;
 	struct timespec ts;
 	__u32 ts_status;
+	bool is_drop_n_account = false;
 
 	/* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
 	 * We may add members to them until current aligned size without forcing
@@ -2367,10 +2373,14 @@ drop_n_restore:
 		skb->len = skb_len;
 	}
 drop:
-	kfree_skb(skb);
+	if (!is_drop_n_account)
+		consume_skb(skb);
+	else
+		kfree_skb(skb);
 	return 0;
 
 drop_n_account:
+	is_drop_n_account = true;
 	po->stats.stats1.tp_drops++;
 	spin_unlock(&sk->sk_receive_queue.lock);
 
-- 
2.1.3

^ permalink raw reply related

* [net-next PATCH] netdev_features: Add NETIF_F_TSO_MANGLEID to NETIF_F_ALL_TSO
From: Alexander Duyck @ 2016-04-14 21:04 UTC (permalink / raw)
  To: jesse, netdev, davem, alexander.duyck, tom

I realized that when I added NETIF_F_TSO_MANGLEID as a TSO type I forgot to
add it to NETIF_F_ALL_TSO.  This patch corrects that so the flag will be
included correctly.

The result should be minor as it was only used by a few drivers and in a
few specific cases such as when NETIF_F_SG was not supported on a device so
the TSO flags were cleared.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
---
 include/linux/netdev_features.h |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 9fc79df0e561..15eb0b12fff9 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -164,7 +164,8 @@ enum {
 #define NETIF_F_CSUM_MASK	(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \
 				 NETIF_F_HW_CSUM)
 
-#define NETIF_F_ALL_TSO 	(NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
+#define NETIF_F_ALL_TSO 	(NETIF_F_TSO | NETIF_F_TSO6 | \
+				 NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID)
 
 #define NETIF_F_ALL_FCOE	(NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \
 				 NETIF_F_FSO)

^ permalink raw reply related

* Re: [PATCH net-next v1 1/1] tipc: fix a race condition leading to subscriber refcnt bug
From: David Miller @ 2016-04-14 20:47 UTC (permalink / raw)
  To: parthasarathy.bhuvaragan; +Cc: netdev, tipc-discussion
In-Reply-To: <1460459121-2403-1-git-send-email-parthasarathy.bhuvaragan@ericsson.com>

From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Tue, 12 Apr 2016 13:05:21 +0200

> Until now, the requests sent to topology server are queued
> to a workqueue by the generic server framework.
> These messages are processed by worker threads and trigger the
> registered callbacks.
> To reduce latency on uniprocessor systems, explicit rescheduling
> is performed using cond_resched() after MAX_RECV_MSG_COUNT(25)
> messages.
> 
> This implementation on SMP systems leads to an subscriber refcnt
> error as described below:
> When a worker thread yields by calling cond_resched() in a SMP
> system, a new worker is created on another CPU to process the
> pending workitem. Sometimes the sleeping thread wakes up before
> the new thread finishes execution.
> This breaks the assumption on ordering and being single threaded.
> The fault is more frequent when MAX_RECV_MSG_COUNT is lowered.
> 
> If the first thread was processing subscription create and the
> second thread processing close(), the close request will free
> the subscriber and the create request oops as follows:
 ...
> In this commit, we
> - rename tipc_conn_shutdown() to tipc_conn_release().
> - move connection release callback execution from tipc_close_conn()
>   to a new function tipc_sock_release(), which is executed before
>   we free the connection.
> Thus we release the subscriber during connection release procedure
> rather than connection shutdown procedure.
> 
> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
> Acked-by: Ying Xue <ying.xue@windriver.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH V3] net: mediatek: update the IRQ part of the binding document
From: David Miller @ 2016-04-14 20:45 UTC (permalink / raw)
  To: blogic-p3rKhJxN3npAfugRpC6u6w
  Cc: nbd-p3rKhJxN3npAfugRpC6u6w, matthias.bgg-Re5JQEeQqe8AvxtiuMwx3w,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	devicetree-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1460442918-6070-1-git-send-email-blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>

From: John Crispin <blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>
Date: Tue, 12 Apr 2016 08:35:18 +0200

> The current binding document only describes a single interrupt. Update the
> document by adding the 2 other interrupts.
> 
> The driver currently only uses a single interrupt. The HW is however able
> to using IRQ grouping to split TX and RX onto separate GIC irqs.
> 
> Signed-off-by: John Crispin <blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 net 0/4] ipv6: datagram: Update dst cache of a connected udp sk during pmtu update
From: David Miller @ 2016-04-14 20:45 UTC (permalink / raw)
  To: kafai; +Cc: netdev, xiyou.wangcong, edumazet, weiwan, kernel-team
In-Reply-To: <1460413777-177662-1-git-send-email-kafai@fb.com>

From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 11 Apr 2016 15:29:33 -0700

> v2:
> ~ Protect __sk_dst_get() operations with rcu_read_lock in
>   release_cb() because another thread may do ip6_dst_store()
>   for a udp sk without taking the sk lock (e.g. in sendmsg).
> ~ Do a ipv6_addr_v4mapped(&sk->sk_v6_daddr) check before
>   calling ip6_datagram_dst_update() in patch 3 and 4.  It is
>   similar to how __ip6_datagram_connect handles it.
> ~ One fix in ip6_datagram_dst_update() in patch 2.  It needs
>   to check (np->flow_label & IPV6_FLOWLABEL_MASK) before
>   doing fl6_sock_lookup.  I was confused with the naming
>   of IPV6_FLOWLABEL_MASK and IPV6_FLOWINFO_MASK.
> ~ Check dst->obsolete just on the safe side, although I think it
>   should at least have DST_OBSOLETE_FORCE_CHK by now.
> ~ Add Fixes tag to patch 3 and 4
> ~ Add some points from the previous discussion about holding
>   sk lock to the commit message in patch 3.
> 
> v1:
> There is a case in connected UDP socket such that
> getsockopt(IPV6_MTU) will return a stale MTU value. The reproducible
> sequence could be the following:
> 1. Create a connected UDP socket
> 2. Send some datagrams out
> 3. Receive a ICMPV6_PKT_TOOBIG
> 4. No new outgoing datagrams to trigger the sk_dst_check()
>    logic to update the sk->sk_dst_cache.
> 5. getsockopt(IPV6_MTU) returns the mtu from the invalid
>    sk->sk_dst_cache instead of the newly created RTF_CACHE clone.
> 
> Patch 1 and 2 are the prep work.
> Patch 3 and 4 are the fixes.

Series applied, thanks Martin.

^ permalink raw reply

* Re: [net-next PATCH v2 0/5] GRO Fixed IPv4 ID support and GSO partial support
From: David Miller @ 2016-04-14 20:24 UTC (permalink / raw)
  To: aduyck; +Cc: herbert, tom, jesse, alexander.duyck, edumazet, netdev
In-Reply-To: <20160411013855.11189.56567.stgit@ahduyck-xeon-server>

From: Alexander Duyck <aduyck@mirantis.com>
Date: Sun, 10 Apr 2016 21:44:38 -0400

> This patch series sets up a few different things.
> 
> First it adds support for GRO of frames with a fixed IP ID value.  This
> will allow us to perform GRO for frames that go through things like an IPv6
> to IPv4 header translation.
> 
> The second item we add is support for segmenting frames that are generated
> this way.  Most devices only support an incrementing IP ID value, and in
> the case of TCP the IP ID can be ignored in many cases since the DF bit
> should be set.  So we can technically segment these frames using existing
> TSO if we are willing to allow the IP ID to be mangled.  As such I have
> added a matching feature for the new form of GRO/GSO called TCP IPv4 ID
> mangling.  With this enabled we can assemble and disassemble a frame with
> the sequence number fixed and the only ill effect will be that the IPv4 ID
> will be altered which may or may not have any noticeable effect.  As such I
> have defaulted the feature to disabled.
> 
> The third item this patch series adds is support for partial GSO
> segmentation.  Partial GSO segmentation allows us to split a large frame
> into two pieces.  The first piece will have an even multiple of MSS worth
> of data and the headers before the one pointed to by csum_start will have
> been updated so that they are correct for if the data payload had already
> been segmented.  By doing this we can do things such as precompute the
> outer header checksums for a frame to be segmented allowing us to perform
> TSO on devices that don't support tunneling, or tunneling with outer header
> checksums.
> 
> This patch set is based on the net-next tree, but I included "net: remove
> netdevice gso_min_segs" in my tree as I assume it is likely to be applied
> before this patch set will and I wanted to avoid a merge conflict.
> 
> v2: Fixed items reported by Jesse Gross
> 	fixed missing GSO flag in MPLS check
> 	adding DF check for MANGLEID
>     Moved extra GSO feature checks into gso_features_check
>     Rebased batches to account for "net: remove netdevice gso_min_segs"
> 
> Driver patches from the first patch set should still be compatible.  However
> I do have a few changes in them so I will submit a v2 of those to Jeff
> Kirsher once these patches are accepted into net-next.
> 
> Example driver patches for i40e, ixgbe, and igb:
> https://patchwork.ozlabs.org/patch/608221/
> https://patchwork.ozlabs.org/patch/608224/
> https://patchwork.ozlabs.org/patch/608225/

Series applied, thanks Alexander.

^ permalink raw reply

* Re: [PATCH net-next 1/8] dsa: mv88e6xxx: Prepare for turning this into a library module
From: Vivien Didelot @ 2016-04-14 20:22 UTC (permalink / raw)
  To: Andrew Lunn, David Miller; +Cc: Florian Fainelli, netdev, Andrew Lunn
In-Reply-To: <1460591998-20598-2-git-send-email-andrew@lunn.ch>

Hi Andrew,

Andrew Lunn <andrew@lunn.ch> writes:

> Export all the functions so that we can later turn the module into a
> library module.
>
> Signed-off-by: Andrew Lunn <andrew@lunn.ch>

Sorry but I don't like this. We don't want one module per 88E6xxx switch
model. We need one driver supporting them all, like any other driver.

Multiple modules will continue to confuse us with duplicated code. For
instance, every specific mv88e6*_setup_global functions program the
switch's DSA device number with something like:

    REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, ds->index & 0x1f);

Looking at every drivers/net/dsa/mv88e6*.c file, there are only few
differences in their dsa_switch_driver structures:

The .setup function is always specific, but easily factorizable in a
mv88e6xxx_setup function. The .probe function can be merged once we have
a single driver. mv88e6131 has different phy_{read,write} functions
which can be abstracted in mv88e6xxx_phy_{read,write}. Only mv88e6352
has support for the EEPROM, which is simple to abstract too.

I'm working on a few patches right away to factorize this and lighten up
that part from your current refactoring of DSA.

Here's an example of duplicated code fixed for the 6131 PHY access code:

    http://ix.io/wJm

Thanks,
Vivien

^ permalink raw reply

* Re: [PATCH 1/2] [v4] net: emac: emac gigabit ethernet controller driver
From: Timur Tabi @ 2016-04-14 20:19 UTC (permalink / raw)
  To: Florian Fainelli, netdev, linux-kernel, devicetree, linux-arm-msm,
	sdharia, Shanker Donthineni, Greg Kroah-Hartman, vikrams, cov,
	gavidov, Rob Herring, andrew, bjorn.andersson, Mark Langsdorf,
	Jon Masters, Andy Gross, David S. Miller
In-Reply-To: <570EC541.6080603@gmail.com>

Florian Fainelli wrote:
> On 13/04/16 10:59, Timur Tabi wrote:
>> From: Gilad Avidov <gavidov@codeaurora.org>
>>
>> Add supports for ethernet controller HW on Qualcomm Technologies, Inc. SoC.
>> This driver supports the following features:
>> 1) Checksum offload.
>> 2) Runtime power management support.
>> 3) Interrupt coalescing support.
>> 4) SGMII phy.
>> 5) SGMII direct connection without external phy.
>
> I think you should shoot for more simple for an initial submission:
>
> - no offload
> - no timestamping
>
> get that accepted, and then add features one by one, it sure is more
> work, but it helps with the review, and makes you work off a solid base.

Unfortunately, I didn't write this driver initially, so I'm not sure how 
to remove these features from it.  Variants of this driver have been 
bouncing around Qualcomm for years, and even the author of this patch 
(Gilad) is no longer around.

So although I have a lot of experience upstreaming code, I have little 
experience and knowledge with network drivers.  I'm going to need a lot 
of hand-holding.  I hope you will be patient with me.

Timestamping support seems to be just a few lines of code, so I can 
probably remove that.  I don't know where offloading is in the driver, 
however.  I don't know how offloading in netdev drivers works.

> You will see below, but a pet peeve of mine is authors reimplementing
> code that exists in PHYLIB.

I can understand that, but the PHYs on these SOCs are non-standard.  The 
"internal PHY" (for lack of a better name) is part of the EMAC itself, 
and it acts as a middle-man for the external PHY.  There is an MDIO bus, 
but it's hard-wired to the EMAC, and most of the time you don't touch it 
directly.  Instead you let the EMAC and/or the internal PHY send/receive 
commands/data to the external PHY on your behalf.  The internal phy 
talks to the external phy via SGMII only.  Only the EMAC uses the mdio bus.

I will look at PHYLIB, but I can't tell you whether it will work with 
this hardware (Gilad previously claim that it wouldn't work well).

>> diff --git a/Documentation/devicetree/bindings/net/qcom-emac.txt b/Documentation/devicetree/bindings/net/qcom-emac.txt
>> new file mode 100644
>> index 0000000..df5e7c0
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/net/qcom-emac.txt
>> @@ -0,0 +1,65 @@
>> +Qualcomm EMAC Gigabit Ethernet Controller
>> +
>> +Required properties:
>> +- compatible : Should be "qcom,emac".
>> +- reg : Offset and length of the register regions for the device
>> +- reg-names : Register region names referenced in 'reg' above.
>> +	Required register resource entries are:
>> +	"base"   : EMAC controller base register block.
>> +	"csr"    : EMAC wrapper register block.
>> +	Optional register resource entries are:
>> +	"ptp"    : EMAC PTP (1588) register block.
>> +		   Required if 'qcom,emac-tstamp-en' is present.
>> +	"sgmii"  : EMAC SGMII PHY register block.
>> +- interrupts : Interrupt numbers used by this controller
>> +- interrupt-names : Interrupt resource names referenced in 'interrupts' above.
>> +	Required interrupt resource entries are:
>> +	"emac_core0"   : EMAC core0 interrupt.
>> +	"sgmii_irq"   : EMAC SGMII interrupt.
>> +- phy-addr            : Specifies phy address on MDIO bus.
>> +			Required if the optional property "qcom,no-external-phy"
>> +			is not specified.
>
> This is not the standard way to represent an Ethernet PHY hanging off a
> MDIO bus see ethernet.txt and phy.txt in D/dt/bindings/net/

The MDIO bus on these chips is not accessible as a separate entity.  It 
is melded (for lack of a better word) into the EMAC itself.  That's why 
there is a "qcom,no-external-phy" property.  You could, in theory, wire 
the internal phy of one SOC directly to the internal phy of another SOC, 
and use that as in interconnect between SOCs.  I don't know of any such 
use-cases however.

>> +Optional properties:
>> +- qcom,emac-tstamp-en       : Enables the PTP (1588) timestamping feature.
>> +			      Include this only if PTP (1588) timestamping
>> +			      feature is needed. If included, "ptp" register
>> +			      base should be specified.
>
> If the "ptp" register range is not specified, then PTP gets disabled, so
> is a boolean really required here, considering that this looks like a
> policy decision more than anything.

It is, and I forget to remove it, since this is apparently handled via 
ethtool (which the driver does not currently support).

>> +- mac-address               : The 6-byte MAC address. If present, it is the
>> +			      default MAC address.
>
> This property is pretty much mandatory

Ok.

>> +- qcom,no-external-phy      : Indicates there is no external PHY connected to
>> +			      EMAC. Include this only if the EMAC is directly
>> +			      connected to the peer end without EPHY.
>
> How is the internal PHY accessed, is it responding on the MDIO bus at a
> particular address?

There is a set of memory-mapped registers.  It's not connected via MDIO 
at all.  It's mapped via the "sgmii" addresses in the device tree (see 
function emac_sgmii_config).

 > If so, standard MDIO scanning/probing works, and you
> can have your PHY driver flag this device has internal. Worst case, you
> can do what BCMGENET does, and have a special "phy-mode" value set to
> "internal" when this knowledge needs to exist prior to MDIO bus scanning
> (e.g: to power on the PHY).

So the internal phy is not a real phy.  It's not capable of driving an 
RJ45 port (there's no analog part).  It's an SGMII-like device that is 
hard-wired to the EMAC itself.

In theory, the internal PHY is optional.  You could design an SOC that 
has just the EMAC connected via normal MDIO to an external phy.  I 
really wish our hardware designers has done that.  But unfortunately, 
there are no SOCs like that, and so we have to treat the internal phy as 
an extension of the EMAC.

My preference would be to get rid of the "qcom,no-external-phy" property 
and have an external phy be required, at least until Qualcomm creates an 
SOC without the internal phy (which may never happen, for all I know).

>> +/* Config MAC modes */
>> +void emac_mac_mode_config(struct emac_adapter *adpt)
>> +{
>> +	u32 mac;
>> +
>> +	mac = readl(adpt->base + EMAC_MAC_CTRL);
>> +
>> +	if (test_bit(EMAC_STATUS_VLANSTRIP_EN, &adpt->status))
>> +		mac |= VLAN_STRIP;
>> +	else
>> +		mac &= ~VLAN_STRIP;
>> +
>> +	if (test_bit(EMAC_STATUS_PROMISC_EN, &adpt->status))
>> +		mac |= PROM_MODE;
>> +	else
>> +		mac &= ~PROM_MODE;
>> +
>> +	if (test_bit(EMAC_STATUS_MULTIALL_EN, &adpt->status))
>> +		mac |= MULTI_ALL;
>> +	else
>> +		mac &= ~MULTI_ALL;
>> +
>> +	if (test_bit(EMAC_STATUS_LOOPBACK_EN, &adpt->status))
>> +		mac |= MAC_LP_EN;
>> +	else
>> +		mac &= ~MAC_LP_EN;
>
> Do you need to maintain these flags when most, if not all of them
> already exist in dev->flags or dev->features?

So you're saying that, for example, in emac_set_features() I should 
remove this:

	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
		set_bit(EMAC_STATUS_VLANSTRIP_EN, &adpt->status);
	else
		clear_bit(EMAC_STATUS_VLANSTRIP_EN, &adpt->status);

and then in emac_mac_mode_config(), I should do this instead:

void emac_mac_mode_config(struct emac_adapter *adpt)
{
	struct net_device *netdev = adpt->netdev;

	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
		mac |= VLAN_STRIP;
	else
		mac &= ~VLAN_STRIP;


If so, then what do I do in emac_rx_mode_set()?  Should I delete this 
entire block:

	/* Check for Promiscuous and All Multicast modes */
	if (netdev->flags & IFF_PROMISC) {
		set_bit(EMAC_STATUS_PROMISC_EN, &adpt->status);
	} else if (netdev->flags & IFF_ALLMULTI) {
		set_bit(EMAC_STATUS_MULTIALL_EN, &adpt->status);
		clear_bit(EMAC_STATUS_PROMISC_EN, &adpt->status);
	} else {
		clear_bit(EMAC_STATUS_MULTIALL_EN, &adpt->status);
		clear_bit(EMAC_STATUS_PROMISC_EN, &adpt->status);
	}

It does look like Gilad is just mirroring the flags/features variable 
into adpt->status.  What I can't figure out is why.  It seems completely 
redundant, but I have a nagging feeling that there is a good reason.

>> +	/* setup link speed */
>> +	mac &= ~SPEED_BMSK;
>> +	switch (phy->link_speed) {
>> +	case EMAC_LINK_SPEED_1GB_FULL:
>> +		mac |= ((emac_mac_speed_1000 << SPEED_SHFT) & SPEED_BMSK);
>> +		csr1 |= FREQ_MODE;
>> +		break;
>> +	default:
>> +		mac |= ((emac_mac_speed_10_100 << SPEED_SHFT) & SPEED_BMSK);
>> +		csr1 &= ~FREQ_MODE;
>> +		break;
>> +	}
>
> If you implement the driver using PHYLIB, which you should in order to
> support arbitrary or internal PHYs, then this function gets invoked
> whenever there is a link parameter change (auto-neg, forcing,
> duplex/speed/no link etc.).

Ok, I'll probably understand this better once I figure out how to 
implement phylib.

>> +	napi_enable(&adpt->rx_q.napi);
>> +
>> +	/* enable mac irq */
>> +	writel(~DIS_INT, adpt->base + EMAC_INT_STATUS);
>> +	writel(adpt->irq.mask, adpt->base + EMAC_INT_MASK);
>> +
>> +	netif_start_queue(netdev);
>
> Starting the TX queue is typically the last ting you want to do, to
> avoid a transient state where the TX queue is enabled, and the link is
> not (which is okay if your driver is properly implemented and reflects
> carrier changes anyway).

So I should move the netif_start_queue() to the end of this function? 
Sorry if that's a stupid question, but I know little about the MAC side 
of network drivers.

>> +	clear_bit(EMAC_STATUS_DOWN, &adpt->status);
>> +
>> +	/* check link status */
>> +	set_bit(EMAC_STATUS_TASK_LSC_REQ, &adpt->status);
>> +	adpt->link_chk_timeout = jiffies + EMAC_TRY_LINK_TIMEOUT;
>> +	mod_timer(&adpt->timers, jiffies);
>
> Please implement a PHYLIB driver and use phy_start() here.

Ok, I'll try it.

>
>> +
>> +	return 0;
>> +}
>> +
>> +/* Bring down the interface/HW */
>> +void emac_mac_down(struct emac_adapter *adpt, bool reset)
>> +{
>> +	struct net_device *netdev = adpt->netdev;
>> +	struct emac_phy *phy = &adpt->phy;
>> +	unsigned long flags;
>> +
>> +	set_bit(EMAC_STATUS_DOWN, &adpt->status);
>
> Do you need to maintain that? Would not netif_running() tell you what
> you want if you reflect the carrier state properly?

I think that emac_work_thread_link_check() handles this.  It's a timer 
thread that polls the link state and calls netif_carrier_off() if the 
link is down.  Is that sufficient?

>> +
>> +	netif_stop_queue(netdev);
>> +	netif_carrier_off(netdev);
>
> phy_stop() would take care of the latter.

I'm beginning to see how phylib support would be useful.

>> +/* Process transmit event */
>> +void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q)
>> +{
>> +	struct emac_buffer *tpbuf;
>> +	u32 hw_consume_idx;
>> +	u32 pkts_compl = 0, bytes_compl = 0;
>> +	u32 reg = readl_relaxed(adpt->base + tx_q->consume_reg);
>> +
>> +	hw_consume_idx = (reg & tx_q->consume_mask) >> tx_q->consume_shift;
>> +
>> +	while (tx_q->tpd.consume_idx != hw_consume_idx) {
>> +		tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.consume_idx);
>> +		if (tpbuf->dma_addr) {
>> +			dma_unmap_single(adpt->netdev->dev.parent,
>> +					 tpbuf->dma_addr, tpbuf->length,
>> +					 DMA_TO_DEVICE);
>> +			tpbuf->dma_addr = 0;
>> +		}
>> +
>> +		if (tpbuf->skb) {
>> +			pkts_compl++;
>> +			bytes_compl += tpbuf->skb->len;
>> +			dev_kfree_skb_irq(tpbuf->skb);
>> +			tpbuf->skb = NULL;
>> +		}
>> +
>> +		if (++tx_q->tpd.consume_idx == tx_q->tpd.count)
>> +			tx_q->tpd.consume_idx = 0;
>> +	}
>> +
>> +	if (pkts_compl || bytes_compl)
>> +		netdev_completed_queue(adpt->netdev, pkts_compl, bytes_compl);
>
> The condition can be eliminated.

Ok.

>> +	if (skb_network_offset(skb) != ETH_HLEN)
>> +		TPD_TYP_SET(&tpd, 1);
>> +
>> +	emac_tx_fill_tpd(adpt, tx_q, skb, &tpd);
>> +
>> +	netdev_sent_queue(adpt->netdev, skb->len);
>> +
>> +	/* update produce idx */
>> +	prod_idx = (tx_q->tpd.produce_idx << tx_q->produce_shift) &
>> +		    tx_q->produce_mask;
>> +	emac_reg_update32(adpt->base + tx_q->produce_reg,
>> +			  tx_q->produce_mask, prod_idx);
>
> Since you have a producer index, you should consider checking
> skb->xmit_more to know whether you can update the register now, or
> later, which could save some expensive operation and batch TX.

I'll have to figure out what means and get back to you.  When would 
"later" be?

>> diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
>> new file mode 100644
>> index 0000000..7d18de3
>> --- /dev/null
>> +++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
>
> This file is really really ugly, and duplicates a lot of functionality
> provided by PHYLIB, you really need to implement a PHYLIB MDIO driver
> and eventually a small PHY driver for your internal PHY if it needs some
> baby sitting.

I'll try.

>> diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
>> new file mode 100644
>> index 0000000..ce328f5
>> --- /dev/null
>> +++ b/drivers/net/ethernet/qualcomm/emac/emac.c
>> @@ -0,0 +1,1206 @@
>> +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 and
>> + * only version 2 as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +
>> +/* Qualcomm Technologies, Inc. EMAC Gigabit Ethernet Driver
>> + * The EMAC driver supports following features:
>> + * 1) Receive Side Scaling (RSS).
>> + * 2) Checksum offload.
>> + * 3) Multiple PHY support on MDIO bus.
>> + * 4) Runtime power management support.
>> + * 5) Interrupt coalescing support.
>> + * 6) SGMII phy.
>> + * 7) SGMII direct connection (without external phy).
>> + */
>> +
>> +#include <linux/if_ether.h>
>> +#include <linux/if_vlan.h>
>> +#include <linux/interrupt.h>
>> +#include <linux/io.h>
>> +#include <linux/module.h>
>> +#include <linux/of.h>
>> +#include <linux/of_net.h>
>> +#include <linux/phy.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/pm_runtime.h>
>> +#include "emac.h"
>> +#include "emac-mac.h"
>> +#include "emac-phy.h"
>> +#include "emac-sgmii.h"
>> +
>> +#define DRV_VERSION "1.3.0.0"
>> +
>> +static int debug = -1;
>> +module_param(debug, int, S_IRUGO | S_IWUSR | S_IWGRP);
>
> ethtool -s <iface> msglvl provides you with that already.

I'll remove it.  There's no ethtool support in this driver anyway, but 
there's no code that uses this parameter.

>
>> +
>> +static int emac_irq_use_extended;
>> +module_param(emac_irq_use_extended, int, S_IRUGO | S_IWUSR | S_IWGRP);
>
> What is that module parameter used for?

Good question.  Apparently it's some IRQ mask.  I'll have to study the 
documentation and get back to you.  We don't ever set the parameter, so 
I think I'll just drop it.

>> +const char emac_drv_name[] = "qcom-emac";
>> +const char emac_drv_description[] =
>> +			"Qualcomm Technologies, Inc. EMAC Ethernet Driver";
>> +const char emac_drv_version[] = DRV_VERSION;
>
> Static all other the place?

Thanks for catching that.  I'll fix it.

>
> [snip]
>
>> +
>> +/* NAPI */
>> +static int emac_napi_rtx(struct napi_struct *napi, int budget)
>> +{
>> +	struct emac_rx_queue *rx_q = container_of(napi, struct emac_rx_queue,
>> +						   napi);
>> +	struct emac_adapter *adpt = netdev_priv(rx_q->netdev);
>> +	struct emac_irq *irq = rx_q->irq;
>> +
>> +	int work_done = 0;
>> +
>> +	/* Keep link state information with original netdev */
>> +	if (!netif_carrier_ok(adpt->netdev))
>> +		goto quit_polling;
>
> I do not think this is a condition that could occur?

I don't know what this code is trying to do.  I'll have to study it and 
get back to you.

>
>> +
>> +	emac_mac_rx_process(adpt, rx_q, &work_done, budget);
>> +
>> +	if (work_done < budget) {
>> +quit_polling:
>> +		napi_complete(napi);
>> +
>> +		irq->mask |= rx_q->intr;
>> +		writel(irq->mask, adpt->base + EMAC_INT_MASK);
>> +	}
>> +
>> +	return work_done;
>> +}
>> +
>> +/* Transmit the packet */
>> +static int emac_start_xmit(struct sk_buff *skb, struct net_device *netdev)
>> +{
>> +	struct emac_adapter *adpt = netdev_priv(netdev);
>> +
>> +	return emac_mac_tx_buf_send(adpt, &adpt->tx_q, skb);
>
> I would inline emac_mac_tx_buf_send()'s body here to make it much easier
> to read and audit...

Ok.

>
>> +}
>> +
>> +irqreturn_t emac_isr(int _irq, void *data)
>> +{
>> +	struct emac_irq *irq = data;
>> +	struct emac_adapter *adpt = container_of(irq, struct emac_adapter, irq);
>> +	struct emac_rx_queue *rx_q = &adpt->rx_q;
>> +
>> +	int max_ints = 1;
>> +	u32 isr, status;
>> +
>> +	/* disable the interrupt */
>> +	writel(0, adpt->base + EMAC_INT_MASK);
>> +
>> +	do {
>
> With max_ints = 1, this is essentially the same as no loop, so just
> inline it to reduce the indentation.

In another internal version of this driver, max_ints is set to 5.  Could 
this be some way of processing multiple packets in one interrupt?  Isn't 
that something that NAPI already takes care of, anyway?

>> +		isr = readl_relaxed(adpt->base + EMAC_INT_STATUS);
>> +		status = isr & irq->mask;
>> +
>> +		if (status == 0)
>> +			break;
>> +
>> +		if (status & ISR_ERROR) {
>> +			netif_warn(adpt,  intr, adpt->netdev,
>> +				   "warning: error irq status 0x%lx\n",
>> +				   status & ISR_ERROR);
>> +			/* reset MAC */
>> +			set_bit(EMAC_STATUS_TASK_REINIT_REQ, &adpt->status);
>> +			emac_work_thread_reschedule(adpt);
>> +		}
>> +
>> +		/* Schedule the napi for receive queue with interrupt
>> +		 * status bit set
>> +		 */
>> +		if ((status & rx_q->intr)) {
>> +			if (napi_schedule_prep(&rx_q->napi)) {
>> +				irq->mask &= ~rx_q->intr;
>> +				__napi_schedule(&rx_q->napi);
>> +			}
>> +		}
>> +
>> +		if (status & TX_PKT_INT)
>> +			emac_mac_tx_process(adpt, &adpt->tx_q);
>
> You should consider using a NAPI instance for reclaiming TX buffers as well.

I'll have to figure out what means and get back to you.

>> +		if (status & ISR_OVER)
>> +			netif_warn(adpt, intr, adpt->netdev,
>> +				   "warning: TX/RX overflow status 0x%lx\n",
>> +				   status & ISR_OVER);
>
> This should be ratelimited presumably

Ok.

>
>> +
>> +		/* link event */
>> +		if (status & (ISR_GPHY_LINK | SW_MAN_INT)) {
>> +			emac_lsc_schedule_check(adpt);
>> +			break;
>> +		}
>> +	} while (--max_ints > 0);
>> +
>> +	/* enable the interrupt */
>> +	writel(irq->mask, adpt->base + EMAC_INT_MASK);
>> +
>> +	return IRQ_HANDLED;
>> +}
>> +
>> +/* Configure VLAN tag strip/insert feature */
>> +static int emac_set_features(struct net_device *netdev,
>> +			     netdev_features_t features)
>> +{
>> +	struct emac_adapter *adpt = netdev_priv(netdev);
>> +
>> +	netdev_features_t changed = features ^ netdev->features;
>> +
>> +	if (!(changed & (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX)))
>> +		return 0;
>> +
>> +	netdev->features = features;
>> +	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
>> +		set_bit(EMAC_STATUS_VLANSTRIP_EN, &adpt->status);
>> +	else
>> +		clear_bit(EMAC_STATUS_VLANSTRIP_EN, &adpt->status);
>
> What about TX vlan offload?

I don't know what that is.

>> +/* Called when the network interface is made active */
>> +static int emac_open(struct net_device *netdev)
>> +{
>> +	struct emac_adapter *adpt = netdev_priv(netdev);
>> +	int ret;
>> +
>> +	netif_carrier_off(netdev);
>
> That seems unnecessary here because your close/down function does that,
> and with PHYLIB you would get it set correctly anyway.

Ok.  I'll see what I can do about it.

>> +/* PHY related IOCTLs */
>> +static int emac_mii_ioctl(struct net_device *netdev,
>> +			  struct ifreq *ifr, int cmd)
>> +{
>> +	struct emac_adapter *adpt = netdev_priv(netdev);
>> +	struct emac_phy *phy = &adpt->phy;
>> +	struct mii_ioctl_data *data = if_mii(ifr);
>> +
>> +	switch (cmd) {
>> +	case SIOCGMIIPHY:
>> +		data->phy_id = phy->addr;
>> +		return 0;
>> +
>> +	case SIOCGMIIREG:
>> +		if (!capable(CAP_NET_ADMIN))
>> +			return -EPERM;
>> +
>> +		if (data->reg_num & ~(0x1F))
>> +			return -EFAULT;
>> +
>> +		if (data->phy_id >= PHY_MAX_ADDR)
>> +			return -EFAULT;
>> +
>> +		if (phy->external && data->phy_id != phy->addr)
>> +			return -EFAULT;
>> +
>> +		return emac_phy_read(adpt, data->phy_id, data->reg_num,
>> +				     &data->val_out);
>> +
>> +	case SIOCSMIIREG:
>> +		if (!capable(CAP_NET_ADMIN))
>> +			return -EPERM;
>> +
>> +		if (data->reg_num & ~(0x1F))
>> +			return -EFAULT;
>> +
>> +		if (data->phy_id >= PHY_MAX_ADDR)
>> +			return -EFAULT;
>> +
>> +		if (phy->external && data->phy_id != phy->addr)
>> +			return -EFAULT;
>> +
>> +		return emac_phy_write(adpt, data->phy_id, data->reg_num,
>> +				      data->val_in);
>> +	default:
>> +		return -EFAULT;
>> +	}
>
> All of that can be eliminated with a PHYLIB implementation too.

Ok.

>
> [snip]
>
>> +/* Provide network statistics info for the interface */
>> +struct rtnl_link_stats64 *emac_get_stats64(struct net_device *netdev,
>> +					   struct rtnl_link_stats64 *net_stats)
>> +{
>> +	struct emac_adapter *adpt = netdev_priv(netdev);
>> +	struct emac_stats *stats = &adpt->stats;
>> +	u16 addr = REG_MAC_RX_STATUS_BIN;
>> +	u64 *stats_itr = &adpt->stats.rx_ok;
>> +	u32 val;
>> +
>> +	while (addr <= REG_MAC_RX_STATUS_END) {
>> +		val = readl_relaxed(adpt->base + addr);
>> +		*stats_itr += val;
>> +		++stats_itr;
>> +		addr += sizeof(u32);
>> +	}
>
> There is no reader locking here, what happens if two applications read
> the statistics at the same time?

Ah, even though the readl is atomic, it's reading a bunch of them in a 
row.  I'll add a lock or something.

>> +/* Get the resources */
>> +static int emac_probe_resources(struct platform_device *pdev,
>> +				struct emac_adapter *adpt)
>> +{
>> +	struct net_device *netdev = adpt->netdev;
>> +	struct device_node *node = pdev->dev.of_node;
>> +	struct resource *res;
>> +	const void *maddr;
>> +	int ret = 0;
>> +	int i;
>> +
>> +	/* get time stamp enable flag */
>> +	adpt->timestamp_en = of_property_read_bool(node, "qcom,emac-tstamp-en");
>> +
>> +	/* get mac address */
>> +	maddr = of_get_mac_address(node);
>> +	if (!maddr)
>> +		return -ENODEV;
>
> No, generate a random one, continue, but warn,

Ok.

>
>> +
>> +	memcpy(adpt->mac_perm_addr, maddr, netdev->addr_len);
>> +
>> +	ret = platform_get_irq_byname(pdev, EMAC_MAC_IRQ_RES);
>> +	if (ret < 0) {
>> +		netdev_err(adpt->netdev,
>> +			   "error: missing %s resource\n", EMAC_MAC_IRQ_RES);
>> +		return ret;
>> +	}
>> +	adpt->irq.irq = ret;
>> +
>> +	ret = emac_clks_get(pdev, adpt);
>> +	if (ret)
>> +		return ret;
>> +
>> +	/* get register addresses */
>> +	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "base");
>> +	if (!res) {
>> +		netdev_err(adpt->netdev, "error: missing 'base' resource\n");
>> +		ret = -ENXIO;
>> +		goto err_reg_res;
>> +	}
>> +
>> +	adpt->base = devm_ioremap_resource(&pdev->dev, res);
>> +	if (!adpt->base) {
>> +		ret = -ENOMEM;
>> +		goto err_reg_res;
>> +	}
>> +
>> +	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "csr");
>> +	if (!res) {
>> +		netdev_err(adpt->netdev, "error: missing 'csr' resource\n");
>> +		ret = -ENXIO;
>> +		goto err_reg_res;
>> +	}
>
> No need to check that, devm_ioremap_resource() does it too.

Ok.

>> +/* Probe function */
>> +static int emac_probe(struct platform_device *pdev)
>> +{
>> +	struct net_device *netdev;
>> +	struct emac_adapter *adpt;
>> +	struct emac_phy *phy;
>> +	int ret = 0;
>> +	u32 hw_ver;
>> +	u32 extended_irq_mask = emac_irq_use_extended ? IMR_EXTENDED_MASK :
>> +							IMR_NORMAL_MASK;
>> +
>> +	netdev = alloc_etherdev(sizeof(struct emac_adapter));
>> +	if (!netdev)
>> +		return -ENOMEM;
>
> There are references to multiple queues in the code, so why not
> alloc_etherdev_mq() here with the correct number of queues?

That support was removed from the driver, and on our SOC, we hard-code 
the number of queues to 1 anyway.  I'm planning on adding multiple queue 
support (much) later.

>> +	dev_set_drvdata(&pdev->dev, netdev);
>> +	SET_NETDEV_DEV(netdev, &pdev->dev);
>> +
>> +	adpt = netdev_priv(netdev);
>> +	adpt->netdev = netdev;
>> +	phy = &adpt->phy;
>> +	adpt->msg_enable = netif_msg_init(debug, EMAC_MSG_DEFAULT);
>> +
>> +	dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
>
> Really, is not that supposed to run on ARM64 servers?

Well, this version of the driver isn't, which is why it supports DT and 
not ACPI.  I'm planning on adding that support in a later patch. 
However, I'll add support for 64-bit masks in the next version of this 
patch.

Would this be okay:

	retval = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
	if (retval) {
		dev_err(&pdev->dev, "failed to set DMA mask err %d\n", retval);
		goto err_res;
	}

I've seen code like this in other drivers:

         ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
         if (ret) {
                 ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
                 if (ret) {
                         dev_err(dev, "failed to set dma mask\n");
                         return ret;
                 }
         }

and I've never understood why it's necessary to fall back to 32-bits if 
64 bits fails.  Isn't 64 bits a superset of 32 bits?  The driver is 
saying that the hardware supports all of DDR.  How could fail, and how 
could 32-bit succeed if 64-bits fails?

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
Forum, a Linux Foundation collaborative project.

^ permalink raw reply

* Re: [PATCH v3 0/2] sctp: delay calls to sk_data_ready() as much as possible
From: marcelo.leitner @ 2016-04-14 20:19 UTC (permalink / raw)
  To: Neil Horman
  Cc: David Miller, netdev, vyasevich, linux-sctp, David.Laight, jkbs
In-Reply-To: <20160414200351.GA4632@hmsreliant.think-freely.org>

On Thu, Apr 14, 2016 at 04:03:51PM -0400, Neil Horman wrote:
> On Thu, Apr 14, 2016 at 02:59:16PM -0400, David Miller wrote:
> > From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> > Date: Thu, 14 Apr 2016 14:00:49 -0300
> > 
> > > Em 14-04-2016 10:03, Neil Horman escreveu:
> > >> On Wed, Apr 13, 2016 at 11:05:32PM -0400, David Miller wrote:
> > >>> From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> > >>> Date: Fri,  8 Apr 2016 16:41:26 -0300
> > >>>
> > >>>> 1st patch is a preparation for the 2nd. The idea is to not call
> > >>>> ->sk_data_ready() for every data chunk processed while processing
> > >>>> packets but only once before releasing the socket.
> > >>>>
> > >>>> v2: patchset re-checked, small changelog fixes
> > >>>> v3: on patch 2, make use of local vars to make it more readable
> > >>>
> > >>> Applied to net-next, but isn't this reduced overhead coming at the
> > >>> expense of latency?  What if that lower latency is important to the
> > >>> application and/or consumer?
> > >> Thats a fair point, but I'd make the counter argument that, as it
> > >> currently
> > >> stands, any latency introduced (or removed), is an artifact of our
> > >> implementation rather than a designed feature of it.  That is to say,
> > >> we make no
> > >> guarantees at the application level regarding how long it takes to
> > >> signal data
> > >> readines from the time we get data off the wire, so I would rather see
> > >> our
> > >> throughput raised if we can, as thats been sctp's more pressing
> > >> achilles heel.
> > >>
> > >>
> > >> Thats not to say I'd like to enable lower latency, but I'd rather have
> > >> this now,
> > >> and start pondering how to design that in.  Perhaps we can convert the
> > >> pending
> > >> flag to a counter to count the number of events we enqueue, and call
> > >> sk_data_ready every  time we reach a sysctl defined threshold.
> > > 
> > > That and also that there is no chance of the application reading the
> > > first chunks before all current ToDo's are performed by either the bh
> > > or backlog handlers for that packet. Socket lock won't be cycled in
> > > between chunks so the application is going to wait all the processing
> > > one way or another.
> > 
> > But it takes time to signal the wakeup to the remote cpu the process
> > was running on, schedule out the current process on that cpu (if it
> > has in fact lost it's timeslice), and then finally look at the socket
> > queue.
> > 
> > Of course this is all assuming the process was sleeping in the first
> > place, either in recv or more likely poll.
> > 
> > I really think signalling early helps performance.
> > 
> 
> Early, yes, often, not so much :).  Perhaps what would be adventageous would be
> to signal at the start of a set of enqueues, rather than at the end.  That would
> be equivalent in terms of not signaling more than needed, but would eliminate
> the signaling on every chunk.   Perhaps what you could do Marcelo would be to
> change the sense of the signal_ready flag to be a has_signaled flag.  e.g. call
> sk_data_ready in ulp_event_tail like we used to, but only if the has_signaled
> flag isn't set, then set the flag, and clear it at the end of the command
> interpreter.
> 
> That would be a best of both worlds solution, as long as theres no chance of
> race with user space reading from the socket before we were done enqueuing (i.e.
> you have to guarantee that the socket lock stays held, which I think we do).

That is my feeling too. Will work on it. Thanks :-)

  Marcelo

^ permalink raw reply

* Re: [PATCH v2 net-next 2/5] qed/qede: Add VXLAN tunnel slowpath configuration support
From: David Miller @ 2016-04-14 20:15 UTC (permalink / raw)
  To: Yuval.Mintz; +Cc: manish.chopra, netdev, Ariel.Elior
In-Reply-To: <SN2PR11MB0094C4E2F13DA03C1E06C99497970@SN2PR11MB0094.namprd11.prod.outlook.com>

From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Thu, 14 Apr 2016 19:52:44 +0000

>> Why do you call all of these offloads the "slowpath"?
> 
> We term by 'slowpath' all the non {data,fast}path configurations,
> I.e., everything that doesn't really involves the ingress/egress handling
> of packets but rather the infrastructure changes surrounding it.
> 
> That exactly what we have in this patch - adding the ndo hooks
> necessary for configuring the FW/HW in the proper mode to support
> vxlan tunnels.
> 
> Only the last patch in the series is actually adding the fastpath stuff.

Thanks for explaining.

^ permalink raw reply

* [mac80211-next:master] 71bbe25d01fa4f35551ff7bffc3e03ddd3e960cd BUILD SUCCESS
From: kbuild test robot @ 2016-04-14 20:09 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev

https://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git  master
71bbe25d01fa4f35551ff7bffc3e03ddd3e960cd  Merge tag 'mac80211-next-for-davem-2016-04-13' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next

i386-tinyconfig vmlinux size:

+-------+--------------+---------------------------------------------------------------------------+
| DELTA |    SYMBOL    |                                  COMMIT                                   |
+-------+--------------+---------------------------------------------------------------------------+
|  -212 | TOTAL        | 05cf8077e54b..71bbe25d01fa (ALL COMMITS)                                  |
|  -200 | TOTAL        | ae95d7126104 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/ne |
|  -125 | TEXT         | 05cf8077e54b..71bbe25d01fa (ALL COMMITS)                                  |
|  -108 | TEXT         | ae95d7126104 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/ne |
|   -92 | DATA         | ae95d7126104 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/ne |
|  -117 | init.text    | ae95d7126104 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/ne |
|  -131 | setup_arch() | ae95d7126104 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/ne |
+-------+--------------+---------------------------------------------------------------------------+

elapsed time: 766m

configs tested: 420

alpha                               defconfig
parisc                            allnoconfig
parisc                        c3000_defconfig
parisc                              defconfig
m68k                       m5475evb_defconfig
m68k                        mvme147_defconfig
tile                             alldefconfig
i386                              allnoconfig
arm                         em_x270_defconfig
powerpc                     tqm8540_defconfig
sh                          sdk7780_defconfig
sh                           se7206_defconfig
i386                     randconfig-a0-201615
i386                   randconfig-a0-04142309
x86_64                 randconfig-a0-04141733
x86_64                 randconfig-a0-04141812
x86_64                 randconfig-a0-04141953
x86_64                 randconfig-a0-04142143
x86_64                 randconfig-a0-04142309
x86_64                 randconfig-a0-04150155
x86_64                             acpi-redef
x86_64                           allyesdebian
x86_64                                nfsroot
blackfin                BF526-EZBRD_defconfig
blackfin                BF533-EZKIT_defconfig
blackfin            BF561-EZKIT-SMP_defconfig
blackfin                  TCM-BF537_defconfig
cris                 etrax-100lx_v2_defconfig
arm                          imote2_defconfig
m68k                          hp300_defconfig
arm                              allmodconfig
m68k                                defconfig
sparc                               defconfig
blackfin              BF561-ACVILON_defconfig
mips                        qi_lb60_defconfig
powerpc                  mpc866_ads_defconfig
openrisc                    or1ksim_defconfig
powerpc                    mvme5100_defconfig
powerpc                      tqm8xx_defconfig
arm                       imx_v4_v5_defconfig
parisc                           allmodconfig
powerpc64                           defconfig
sh                                allnoconfig
sh                          rsk7269_defconfig
sh                  sh7785lcr_32bit_defconfig
sh                            titan_defconfig
i386                   randconfig-c0-04141908
i386                   randconfig-c0-04142058
i386                   randconfig-c0-04142235
i386                   randconfig-c0-04150046
i386                   randconfig-c0-04150133
i386                   randconfig-c0-04150226
i386                   randconfig-c0-04150323
x86_64                           allmodconfig
arm                               allnoconfig
arm                         at91_dt_defconfig
arm                           efm32_defconfig
arm                          exynos_defconfig
arm                        multi_v5_defconfig
arm                        multi_v7_defconfig
arm                        shmobile_defconfig
arm                           sunxi_defconfig
arm                            mps2_defconfig
mips                           ip28_defconfig
arm64                               defconfig
mips                          sead3_defconfig
sh                               allmodconfig
powerpc                           allnoconfig
powerpc                             defconfig
powerpc                       ppc64_defconfig
x86_64                 randconfig-v0-04141753
x86_64                 randconfig-v0-04150045
x86_64                 randconfig-v0-04150144
m32r                       m32104ut_defconfig
m32r                     mappi3.smp_defconfig
m32r                         opsput_defconfig
m32r                           usrv_defconfig
xtensa                       common_defconfig
xtensa                          iss_defconfig
parisc                         b180_defconfig
i386                             allmodconfig
arm                           acs5k_defconfig
m68k                             allyesconfig
s390                    performance_defconfig
xtensa                              defconfig
arm                       netwinder_defconfig
blackfin                    DNP5370_defconfig
microblaze                      mmu_defconfig
sh                          rsk7201_defconfig
sh                      rts7751r2d1_defconfig
frv                               allnoconfig
ia64                            sim_defconfig
mips                      bmips_stb_defconfig
mips                      malta_kvm_defconfig
arm                            lart_defconfig
avr32                            alldefconfig
sh                           se7619_defconfig
m68k                        m5272c3_defconfig
openrisc                            defconfig
powerpc                    adder875_defconfig
powerpc                 mpc832x_mds_defconfig
powerpc                 mpc834x_itx_defconfig
arm                            zeus_defconfig
ia64                         bigsur_defconfig
m68k                       m5275evb_defconfig
mips                           ip27_defconfig
mips                           xway_defconfig
openrisc                         allyesconfig
powerpc                     powernv_defconfig
arm                              allyesconfig
arm                        trizeps4_defconfig
m32r                   m32700ut.smp_defconfig
powerpc               corenet_basic_defconfig
arm                          badge4_defconfig
sh                        sh7785lcr_defconfig
sh                              ul2_defconfig
tile                         tilegx_defconfig
blackfin                BF609-EZKIT_defconfig
mips                             alldefconfig
mips                       rbtx49xx_defconfig
sh                          r7785rp_defconfig
sh                             sh03_defconfig
arm                          pxa168_defconfig
avr32                    hammerhead_defconfig
mips                         rt305x_defconfig
powerpc                      ppc40x_defconfig
sh                          polaris_defconfig
arm                          lpd270_defconfig
arm                       spear13xx_defconfig
frv                                 defconfig
powerpc                         wii_defconfig
arm                         lpc18xx_defconfig
blackfin                BF561-EZKIT_defconfig
powerpc                 mpc8313_rdb_defconfig
s390                        default_defconfig
sh                           se7724_defconfig
arm                          nuc950_defconfig
arm                            pleb_defconfig
blackfin                       SRV1_defconfig
mips                    maltaup_xpa_defconfig
arm                        mvebu_v5_defconfig
arm                          prima2_defconfig
ia64                              allnoconfig
powerpc                          allmodconfig
powerpc                      walnut_defconfig
arm                     am200epdkit_defconfig
arm                          ks8695_defconfig
arm                         s5pv210_defconfig
avr32                        merisc_defconfig
mips                          lasat_defconfig
x86_64                 randconfig-x010-201615
x86_64                 randconfig-x018-201615
x86_64                 randconfig-x011-201615
x86_64                 randconfig-x013-201615
x86_64                 randconfig-x019-201615
x86_64                 randconfig-x012-201615
x86_64                 randconfig-x014-201615
x86_64                 randconfig-x016-201615
x86_64                 randconfig-x015-201615
x86_64                 randconfig-x017-201615
i386                             alldefconfig
i386                                defconfig
m68k                          multi_defconfig
m68k                           sun3_defconfig
i386                     randconfig-s0-201615
i386                     randconfig-s1-201615
x86_64                 randconfig-s0-04141531
x86_64                 randconfig-s1-04141531
x86_64                 randconfig-s2-04141531
x86_64                 randconfig-s0-04141600
x86_64                 randconfig-s1-04141600
x86_64                 randconfig-s2-04141600
x86_64                 randconfig-s0-04141752
x86_64                 randconfig-s1-04141752
x86_64                 randconfig-s2-04141752
x86_64                 randconfig-s0-04141839
x86_64                 randconfig-s1-04141839
x86_64                 randconfig-s2-04141839
x86_64                 randconfig-s0-04141950
x86_64                 randconfig-s1-04141950
x86_64                 randconfig-s2-04141950
x86_64                 randconfig-s0-04142049
x86_64                 randconfig-s1-04142049
x86_64                 randconfig-s2-04142049
x86_64                 randconfig-s0-04142118
x86_64                 randconfig-s1-04142118
x86_64                 randconfig-s2-04142118
x86_64                 randconfig-s0-04142253
x86_64                 randconfig-s1-04142253
x86_64                 randconfig-s2-04142253
x86_64                 randconfig-s0-04150025
x86_64                 randconfig-s1-04150025
x86_64                 randconfig-s2-04150025
x86_64                 randconfig-s0-04150103
x86_64                 randconfig-s1-04150103
x86_64                 randconfig-s2-04150103
x86_64                 randconfig-s0-04150143
x86_64                 randconfig-s1-04150143
x86_64                 randconfig-s2-04150143
x86_64                 randconfig-s0-04150206
x86_64                 randconfig-s1-04150206
x86_64                 randconfig-s2-04150206
x86_64                 randconfig-s0-04150242
x86_64                 randconfig-s1-04150242
x86_64                 randconfig-s2-04150242
x86_64                 randconfig-s0-04150313
x86_64                 randconfig-s1-04150313
x86_64                 randconfig-s2-04150313
x86_64                 randconfig-s3-04141534
x86_64                 randconfig-s4-04141534
x86_64                 randconfig-s5-04141534
x86_64                 randconfig-s3-04141807
x86_64                 randconfig-s4-04141807
x86_64                 randconfig-s5-04141807
x86_64                 randconfig-s3-04141850
x86_64                 randconfig-s4-04141850
x86_64                 randconfig-s5-04141850
x86_64                 randconfig-s3-04141948
x86_64                 randconfig-s4-04141948
x86_64                 randconfig-s5-04141948
x86_64                 randconfig-s3-04142050
x86_64                 randconfig-s4-04142050
x86_64                 randconfig-s5-04142050
x86_64                 randconfig-s3-04142133
x86_64                 randconfig-s4-04142133
x86_64                 randconfig-s5-04142133
x86_64                 randconfig-s3-04142230
x86_64                 randconfig-s4-04142230
x86_64                 randconfig-s5-04142230
x86_64                 randconfig-s3-04142300
x86_64                 randconfig-s4-04142300
x86_64                 randconfig-s5-04142300
x86_64                 randconfig-s3-04150033
x86_64                 randconfig-s4-04150033
x86_64                 randconfig-s5-04150033
x86_64                 randconfig-s3-04150106
x86_64                 randconfig-s4-04150106
x86_64                 randconfig-s5-04150106
x86_64                 randconfig-s3-04150133
x86_64                 randconfig-s4-04150133
x86_64                 randconfig-s5-04150133
x86_64                 randconfig-s3-04150157
x86_64                 randconfig-s4-04150157
x86_64                 randconfig-s5-04150157
x86_64                 randconfig-s3-04150222
x86_64                 randconfig-s4-04150222
x86_64                 randconfig-s5-04150222
x86_64                 randconfig-s3-04150242
x86_64                 randconfig-s4-04150242
x86_64                 randconfig-s5-04150242
x86_64                 randconfig-s3-04150315
x86_64                 randconfig-s4-04150315
x86_64                 randconfig-s5-04150315
avr32                      atngw100_defconfig
avr32                     atstk1006_defconfig
mn10300                     asb2364_defconfig
um                             i386_defconfig
um                           x86_64_defconfig
microblaze                    nommu_defconfig
powerpc                     redwood_defconfig
um                                  defconfig
blackfin                BF537-STAMP_defconfig
powerpc                       ebony_defconfig
um                               allyesconfig
arm                             pxa_defconfig
arm                         vf610m4_defconfig
powerpc                     akebono_defconfig
sh                          lboxre2_defconfig
mips                       bmips_be_defconfig
sh                     sh7710voipgw_defconfig
xtensa                    smp_lx200_defconfig
arm                        clps711x_defconfig
arm                            netx_defconfig
arm                            qcom_defconfig
ia64                            zx1_defconfig
arm                            u300_defconfig
avr32        atngw100mkii_evklcd100_defconfig
mips                        nlm_xlp_defconfig
sh                 kfr2r09-romimage_defconfig
mips                              allnoconfig
mips                      fuloong2e_defconfig
mips                                   jz4740
mips                                     txx9
x86_64                   randconfig-i0-201615
i386                   randconfig-b0-04141812
i386                   randconfig-b0-04142049
i386                   randconfig-b0-04142310
i386                   randconfig-b0-04150044
i386                   randconfig-b0-04150119
i386                   randconfig-b0-04150147
i386                   randconfig-b0-04150216
i386                   randconfig-b0-04150307
i386                   randconfig-b0-04150335
sparc64                           allnoconfig
sparc64                             defconfig
arm                        neponset_defconfig
arm                         palmz72_defconfig
mips                       markeins_defconfig
sh                        apsh4ad0a_defconfig
arm                          ep93xx_defconfig
i386                     randconfig-i0-201615
i386                     randconfig-i1-201615
x86_64                 randconfig-b0-04141854
x86_64                 randconfig-b0-04142057
x86_64                 randconfig-b0-04142308
x86_64                 randconfig-b0-04150119
x86_64                 randconfig-b0-04150153
x86_64                 randconfig-b0-04150234
arm                         orion5x_defconfig
xtensa                  audio_kc705_defconfig
arm                             ezx_defconfig
mips                  mips_paravirt_defconfig
alpha                            allyesconfig
powerpc                      ppc6xx_defconfig
mips                      maltasmvp_defconfig
x86_64                              rhel_gcov
x86_64                 randconfig-n0-04141909
x86_64                 randconfig-n0-04141958
x86_64                 randconfig-n0-04142230
x86_64                 randconfig-n0-04142325
x86_64                 randconfig-n0-04150131
x86_64                 randconfig-n0-04150216
x86_64                 randconfig-n0-04150253
x86_64                 randconfig-n0-04150318
mips                       capcella_defconfig
s390                       zfcpdump_defconfig
blackfin                BF527-EZKIT_defconfig
blackfin                BF533-STAMP_defconfig
powerpc                     tqm8541_defconfig
arm                            mmp2_defconfig
arm                        mini2440_defconfig
sh                          rsk7264_defconfig
i386                   randconfig-x012-201615
i386                   randconfig-x013-201615
i386                   randconfig-x015-201615
i386                   randconfig-x019-201615
i386                   randconfig-x011-201615
i386                   randconfig-x018-201615
i386                   randconfig-x010-201615
i386                   randconfig-x016-201615
i386                   randconfig-x017-201615
i386                   randconfig-x014-201615
x86_64                                    lkp
x86_64                                   rhel
i386                     randconfig-n0-201615
i386                   randconfig-h0-04141949
i386                   randconfig-h1-04141949
i386                   randconfig-h0-04142053
i386                   randconfig-h1-04142053
i386                   randconfig-h0-04150130
i386                   randconfig-h1-04150130
i386                   randconfig-h0-04150226
i386                   randconfig-h1-04150226
alpha                            allmodconfig
m68k                          sun3x_defconfig
m32r                    m32700ut.up_defconfig
mips                        jmr3927_defconfig
tile                             allyesconfig
ia64                             alldefconfig
ia64                                defconfig
x86_64                 randconfig-h0-04141944
x86_64                 randconfig-h0-04142146
x86_64                 randconfig-h0-04150126
blackfin                            defconfig
mips                                defconfig
arm                          iop32x_defconfig
m68k                          atari_defconfig
openrisc                         alldefconfig
sh                           se7722_defconfig
arm                           stm32_defconfig
powerpc                  storcenter_defconfig
i386                  randconfig-sb0-04141510
i386                  randconfig-sb0-04142045
i386                  randconfig-sb0-04150049
i386                  randconfig-sb0-04150204
i386                  randconfig-sb0-04150319
arm                         lpc32xx_defconfig
mips                           ls1b_defconfig
powerpc                      virtex_defconfig
sh                          r7780mp_defconfig
arm                         lubbock_defconfig
m68k                            q40_defconfig
mips                          rb532_defconfig
mips                          ath79_defconfig
x86_64                 randconfig-x000-201615
x86_64                 randconfig-x001-201615
x86_64                 randconfig-x007-201615
x86_64                 randconfig-x005-201615
x86_64                 randconfig-x006-201615
x86_64                 randconfig-x004-201615
x86_64                 randconfig-x008-201615
x86_64                 randconfig-x009-201615
x86_64                 randconfig-x003-201615
x86_64                 randconfig-x002-201615
i386                     randconfig-r0-201615
x86_64                 randconfig-r0-04141850
x86_64                 randconfig-r0-04142230
x86_64                 randconfig-r0-04150047
x86_64                 randconfig-r0-04150223
arm                        realview_defconfig
powerpc                     kilauea_defconfig
sparc                             allnoconfig
arm                           omap1_defconfig
sparc                       sparc64_defconfig
i386                   randconfig-x000-201615
i386                   randconfig-x003-201615
i386                   randconfig-x009-201615
i386                   randconfig-x005-201615
i386                   randconfig-x004-201615
i386                   randconfig-x002-201615
i386                   randconfig-x006-201615
i386                   randconfig-x007-201615
i386                   randconfig-x008-201615
i386                   randconfig-x001-201615
i386                   randconfig-x0-04142123
i386                   randconfig-x0-04142257
i386                   randconfig-x0-04150020
i386                   randconfig-x0-04150149
i386                   randconfig-x0-04150306

Thanks,
Fengguang

^ permalink raw reply

* Re: [PATCH v3 0/2] sctp: delay calls to sk_data_ready() as much as possible
From: Neil Horman @ 2016-04-14 20:03 UTC (permalink / raw)
  To: David Miller
  Cc: marcelo.leitner, netdev, vyasevich, linux-sctp, David.Laight,
	jkbs
In-Reply-To: <20160414.145916.2286519059284215039.davem@davemloft.net>

On Thu, Apr 14, 2016 at 02:59:16PM -0400, David Miller wrote:
> From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> Date: Thu, 14 Apr 2016 14:00:49 -0300
> 
> > Em 14-04-2016 10:03, Neil Horman escreveu:
> >> On Wed, Apr 13, 2016 at 11:05:32PM -0400, David Miller wrote:
> >>> From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> >>> Date: Fri,  8 Apr 2016 16:41:26 -0300
> >>>
> >>>> 1st patch is a preparation for the 2nd. The idea is to not call
> >>>> ->sk_data_ready() for every data chunk processed while processing
> >>>> packets but only once before releasing the socket.
> >>>>
> >>>> v2: patchset re-checked, small changelog fixes
> >>>> v3: on patch 2, make use of local vars to make it more readable
> >>>
> >>> Applied to net-next, but isn't this reduced overhead coming at the
> >>> expense of latency?  What if that lower latency is important to the
> >>> application and/or consumer?
> >> Thats a fair point, but I'd make the counter argument that, as it
> >> currently
> >> stands, any latency introduced (or removed), is an artifact of our
> >> implementation rather than a designed feature of it.  That is to say,
> >> we make no
> >> guarantees at the application level regarding how long it takes to
> >> signal data
> >> readines from the time we get data off the wire, so I would rather see
> >> our
> >> throughput raised if we can, as thats been sctp's more pressing
> >> achilles heel.
> >>
> >>
> >> Thats not to say I'd like to enable lower latency, but I'd rather have
> >> this now,
> >> and start pondering how to design that in.  Perhaps we can convert the
> >> pending
> >> flag to a counter to count the number of events we enqueue, and call
> >> sk_data_ready every  time we reach a sysctl defined threshold.
> > 
> > That and also that there is no chance of the application reading the
> > first chunks before all current ToDo's are performed by either the bh
> > or backlog handlers for that packet. Socket lock won't be cycled in
> > between chunks so the application is going to wait all the processing
> > one way or another.
> 
> But it takes time to signal the wakeup to the remote cpu the process
> was running on, schedule out the current process on that cpu (if it
> has in fact lost it's timeslice), and then finally look at the socket
> queue.
> 
> Of course this is all assuming the process was sleeping in the first
> place, either in recv or more likely poll.
> 
> I really think signalling early helps performance.
> 

Early, yes, often, not so much :).  Perhaps what would be adventageous would be
to signal at the start of a set of enqueues, rather than at the end.  That would
be equivalent in terms of not signaling more than needed, but would eliminate
the signaling on every chunk.   Perhaps what you could do Marcelo would be to
change the sense of the signal_ready flag to be a has_signaled flag.  e.g. call
sk_data_ready in ulp_event_tail like we used to, but only if the has_signaled
flag isn't set, then set the flag, and clear it at the end of the command
interpreter.

That would be a best of both worlds solution, as long as theres no chance of
race with user space reading from the socket before we were done enqueuing (i.e.
you have to guarantee that the socket lock stays held, which I think we do).

Neil

^ permalink raw reply

* RE: [PATCH v2 net-next 2/5] qed/qede: Add VXLAN tunnel slowpath configuration support
From: Yuval Mintz @ 2016-04-14 19:52 UTC (permalink / raw)
  To: David Miller, Manish Chopra; +Cc: netdev, Ariel Elior
In-Reply-To: <20160414.152601.1422370571524886675.davem@davemloft.net>

> Why do you call all of these offloads the "slowpath"?

We term by 'slowpath' all the non {data,fast}path configurations,
I.e., everything that doesn't really involves the ingress/egress handling
of packets but rather the infrastructure changes surrounding it.

That exactly what we have in this patch - adding the ndo hooks
necessary for configuring the FW/HW in the proper mode to support
vxlan tunnels.

Only the last patch in the series is actually adding the fastpath stuff.

^ permalink raw reply

* [net-next PATCH 5/5] ip6gre: Add support for GSO
From: Alexander Duyck @ 2016-04-14 19:34 UTC (permalink / raw)
  To: jesse, netdev, davem, alexander.duyck, tom
In-Reply-To: <20160414192709.12934.82858.stgit@ahduyck-xeon-server>

This patch adds code borrowed from bits and pieces of other protocols to
the IPv6 GRE path so that we can support GSO over IPv6 based GRE tunnels.
By adding this support we are able to significantly improve the throughput
for GRE tunnels as we are able to make use of GSO.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
---
 net/ipv6/ip6_gre.c |   56 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 1a5ad143be40..ca5a2c5675c5 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -621,7 +621,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 	struct net *net = tunnel->net;
 	struct net_device *tdev;    /* Device to other host */
 	struct ipv6hdr  *ipv6h;     /* Our new IP header */
-	unsigned int max_headroom = 0; /* The extra header space needed */
+	unsigned int min_headroom = 0; /* The extra header space needed */
 	int    gre_hlen;
 	struct ipv6_tel_txoption opt;
 	int    mtu;
@@ -629,7 +629,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 	struct net_device_stats *stats = &tunnel->dev->stats;
 	int err = -1;
 	u8 proto;
-	struct sk_buff *new_skb;
 	__be16 protocol;
 
 	if (dev->type == ARPHRD_ETHER)
@@ -672,14 +671,14 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 
 	mtu = dst_mtu(dst) - sizeof(*ipv6h);
 	if (encap_limit >= 0) {
-		max_headroom += 8;
+		min_headroom += 8;
 		mtu -= 8;
 	}
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
 	if (skb_dst(skb))
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-	if (skb->len > mtu) {
+	if (skb->len > mtu && !skb_is_gso(skb)) {
 		*pmtu = mtu;
 		err = -EMSGSIZE;
 		goto tx_err_dst_release;
@@ -697,20 +696,19 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 
 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
 
-	max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+	min_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
 
-	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
-	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
-		new_skb = skb_realloc_headroom(skb, max_headroom);
-		if (max_headroom > dev->needed_headroom)
-			dev->needed_headroom = max_headroom;
-		if (!new_skb)
-			goto tx_err_dst_release;
+	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+		int head_delta = SKB_DATA_ALIGN(min_headroom -
+						skb_headroom(skb) +
+						16);
 
-		if (skb->sk)
-			skb_set_owner_w(new_skb, skb->sk);
-		consume_skb(skb);
-		skb = new_skb;
+		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+				       0, GFP_ATOMIC);
+		if (min_headroom > dev->needed_headroom)
+			dev->needed_headroom = min_headroom;
+		if (unlikely(err))
+			goto tx_err_dst_release;
 	}
 
 	if (!fl6->flowi6_mark && ndst)
@@ -723,10 +721,11 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 		ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
 	}
 
-	if (likely(!skb->encapsulation)) {
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
-	}
+	err = iptunnel_handle_offloads(skb,
+				       (tunnel->parms.o_flags & GRE_CSUM) ?
+				       SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
+	if (err)
+		goto tx_err_dst_release;
 
 	skb_push(skb, gre_hlen);
 	skb_reset_network_header(skb);
@@ -760,7 +759,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 			*ptr = tunnel->parms.o_key;
 			ptr--;
 		}
-		if (tunnel->parms.o_flags&GRE_CSUM) {
+		if ((tunnel->parms.o_flags & GRE_CSUM) &&
+		    !(skb_shinfo(skb)->gso_type &
+		      (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
 			*ptr = 0;
 			*(__sum16 *)ptr = gre6_checksum(skb);
 		}
@@ -1559,9 +1560,18 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
 	dev->features		|= GRE6_FEATURES;
 	dev->hw_features	|= GRE6_FEATURES;
 
-	/* Can use a lockless transmit, unless we generate output sequences */
-	if (!(nt->parms.o_flags & GRE_SEQ))
+	if (!(nt->parms.o_flags & GRE_SEQ)) {
+		/* TCP segmentation offload is not supported when we
+		 * generate output sequences.
+		 */
+		dev->features    |= NETIF_F_GSO_SOFTWARE;
+		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+
+		/* Can use a lockless transmit, unless we generate
+		 * output sequences
+		 */
 		dev->features |= NETIF_F_LLTX;
+	}
 
 	err = register_netdevice(dev);
 	if (err)

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox