Netdev List
 help / color / mirror / Atom feed
* [PATCH net] ixgbe: check adapter->vfinfo before dereference
From: Thierry Herbelot @ 2014-10-08 12:05 UTC (permalink / raw)
  To: Jeff Kirsher, Jesse Brandeburg, Bruce Allan, netdev; +Cc: Thierry Herbelot

this protects against the following panic:
(before a VF was actually created on p96p1 PF Ethernet port)
ip link set p96p1 vf 0 spoofchk off
BUG: unable to handle kernel NULL pointer dereference at 0000000000000052
IP: [<ffffffffa044a1c1>] ixgbe_ndo_set_vf_spoofchk+0x51/0x150 [ixgbe]

Signed-off-by: Thierry Herbelot <thierry.herbelot@6wind.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c |   73 +++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index c14d4d8..c6c9c0a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -314,7 +314,7 @@ static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
 	int entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK)
 		       >> IXGBE_VT_MSGINFO_SHIFT;
 	u16 *hash_list = (u16 *)&msgbuf[1];
-	struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
+	struct vf_data_storage *vfinfo;
 	struct ixgbe_hw *hw = &adapter->hw;
 	int i;
 	u32 vector_bit;
@@ -322,6 +322,11 @@ static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
 	u32 mta_reg;
 	u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
 
+	if (!adapter->vfinfo)
+		return -1;
+
+	vfinfo = &adapter->vfinfo[vf];
+
 	/* only so many hash values supported */
 	entries = min(entries, IXGBE_MAX_VF_MC_ENTRIES);
 
@@ -363,6 +368,9 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter)
 	u32 vector_reg;
 	u32 mta_reg;
 
+	if (!adapter->vfinfo)
+		return;
+
 	for (i = 0; i < adapter->num_vfs; i++) {
 		u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(i));
 		vfinfo = &adapter->vfinfo[i];
@@ -416,6 +424,9 @@ static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 		u32 reg_offset, vf_shift, vfre;
 		s32 err = 0;
 
+		if (!adapter->vfinfo)
+			return -1;
+
 #ifdef CONFIG_FCOE
 		if (dev->features & NETIF_F_FCOE_MTU)
 			pf_max_frame = max_t(int, pf_max_frame,
@@ -505,6 +516,9 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
 	struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
 	u8 num_tcs = netdev_get_num_tc(adapter->netdev);
 
+	if (!adapter->vfinfo)
+		return;
+
 	/* add PF assigned VLAN or VLAN 0 */
 	ixgbe_set_vf_vlan(adapter, true, vfinfo->pf_vlan, vf);
 
@@ -541,6 +555,8 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
 static int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter,
 			    int vf, unsigned char *mac_addr)
 {
+	if (!adapter->vfinfo)
+		return -1;
 	ixgbe_del_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
 	memcpy(adapter->vfinfo[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
 	ixgbe_add_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
@@ -610,6 +626,9 @@ int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask)
 
 	bool enable = ((event_mask & 0x10000000U) != 0);
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	if (enable)
 		eth_zero_addr(adapter->vfinfo[vfn].vf_mac_addresses);
 
@@ -620,13 +639,18 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 {
 	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
 	struct ixgbe_hw *hw = &adapter->hw;
-	unsigned char *vf_mac = adapter->vfinfo[vf].vf_mac_addresses;
+	unsigned char *vf_mac;
 	u32 reg, reg_offset, vf_shift;
 	u32 msgbuf[4] = {0, 0, 0, 0};
 	u8 *addr = (u8 *)(&msgbuf[1]);
 	u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
 	int i;
 
+	if (!adapter->vfinfo)
+		return -1;
+
+	vf_mac = adapter->vfinfo[vf].vf_mac_addresses;
+
 	e_info(probe, "VF Reset msg received from vf %d\n", vf);
 
 	/* reset the filters for the device */
@@ -721,6 +745,9 @@ static int ixgbe_set_vf_mac_addr(struct ixgbe_adapter *adapter,
 {
 	u8 *new_mac = ((u8 *)(&msgbuf[1]));
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	if (!is_valid_ether_addr(new_mac)) {
 		e_warn(drv, "VF %d attempted to set invalid mac\n", vf);
 		return -1;
@@ -773,6 +800,9 @@ static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter,
 	u32 bits;
 	u8 tcs = netdev_get_num_tc(adapter->netdev);
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	if (adapter->vfinfo[vf].pf_vlan || tcs) {
 		e_warn(drv,
 		       "VF %d attempted to override administratively set VLAN configuration\n"
@@ -839,6 +869,9 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter,
 		    IXGBE_VT_MSGINFO_SHIFT;
 	int err;
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	if (adapter->vfinfo[vf].pf_set_mac && index > 0) {
 		e_warn(drv,
 		       "VF %d requested MACVLAN filter but is administratively denied\n",
@@ -875,6 +908,9 @@ static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter,
 {
 	int api = msgbuf[1];
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	switch (api) {
 	case ixgbe_mbox_api_10:
 	case ixgbe_mbox_api_11:
@@ -897,6 +933,9 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter,
 	unsigned int default_tc = 0;
 	u8 num_tcs = netdev_get_num_tc(dev);
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	/* verify the PF is supporting the correct APIs */
 	switch (adapter->vfinfo[vf].vf_api) {
 	case ixgbe_mbox_api_20:
@@ -935,6 +974,9 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 	struct ixgbe_hw *hw = &adapter->hw;
 	s32 retval;
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	retval = ixgbe_read_mbx(hw, msgbuf, mbx_size, vf);
 
 	if (retval) {
@@ -1008,6 +1050,9 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 msg = IXGBE_VT_MSGTYPE_NACK;
 
+	if (!adapter->vfinfo)
+		return;
+
 	/* if device isn't clear to send it shouldn't be reading either */
 	if (!adapter->vfinfo[vf].clear_to_send)
 		ixgbe_write_mbx(hw, &msg, 1, vf);
@@ -1051,6 +1096,9 @@ void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter)
 	u32 ping;
 	int i;
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	for (i = 0 ; i < adapter->num_vfs; i++) {
 		ping = IXGBE_PF_CONTROL_MSG;
 		if (adapter->vfinfo[i].clear_to_send)
@@ -1064,6 +1112,9 @@ int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	if (!is_valid_ether_addr(mac) || (vf >= adapter->num_vfs))
 		return -EINVAL;
+	if (!adapter->vfinfo)
+		return -1;
+
 	adapter->vfinfo[vf].pf_set_mac = true;
 	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
 	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
@@ -1083,6 +1134,9 @@ int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
 
+	if (!adapter->vfinfo)
+		return -1;
+
 	if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7))
 		return -EINVAL;
 	if (vlan || qos) {
@@ -1147,8 +1201,12 @@ static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf)
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 bcnrc_val = 0;
 	u16 queue, queues_per_pool;
-	u16 tx_rate = adapter->vfinfo[vf].tx_rate;
+	u16 tx_rate;
 
+	if (!adapter->vfinfo)
+		return;
+
+	tx_rate = adapter->vfinfo[vf].tx_rate;
 	if (tx_rate) {
 		/* start with base link speed value */
 		bcnrc_val = adapter->vf_rate_link_speed;
@@ -1197,6 +1255,9 @@ void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter)
 {
 	int i;
 
+	if (!adapter->vfinfo)
+		return;
+
 	/* VF Tx rate limit was not set */
 	if (!adapter->vf_rate_link_speed)
 		return;
@@ -1259,6 +1320,9 @@ int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting)
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 regval;
 
+	if (!adapter->vfinfo)
+		return;
+
 	adapter->vfinfo[vf].spoofchk_enabled = setting;
 
 	regval = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg));
@@ -1283,6 +1347,9 @@ int ixgbe_ndo_get_vf_config(struct net_device *netdev,
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	if (vf >= adapter->num_vfs)
 		return -EINVAL;
+	if (!adapter->vfinfo)
+		return -EINVAL;
+
 	ivi->vf = vf;
 	memcpy(&ivi->mac, adapter->vfinfo[vf].vf_mac_addresses, ETH_ALEN);
 	ivi->max_tx_rate = adapter->vfinfo[vf].tx_rate;
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH v2 net-next 15/15] tipc: remove old ASCII netlink API
From: Richard Alpe @ 2014-10-08 12:02 UTC (permalink / raw)
  To: David Miller, jon.maloy; +Cc: netdev, tipc-discussion
In-Reply-To: <20141006.152003.1261907320590776614.davem@davemloft.net>

On 10/06/2014 09:20 PM, David Miller wrote:
> From: Jon Maloy <jon.maloy@ericsson.com>
> Date: Mon, 6 Oct 2014 13:37:14 +0000
>
>> I disagree.  We did of course consider this before we posted the patch.
>> There is absolutely no reason to believe there are binaries "out there"
>> using this API. It is intended for and used only by our own tool "tipc-config",
>> and the half-baked documentation that exists about it is not even
>> correct. This is an TIPC-internal API/ABI, which has changed over the years,
>> and keeps changing.  To our best judgement there is no risk in removing the
>> old API.
>>
>> However, we took great care to not break any scripts that might be using
>> tipc-config. Therefore, we made this wrapper that provides the old
>> tipc-config command API to existing users.
>
> A user should be able to upgrade the kernel and keep using tipc-config
> (or _whatever_) that's on their machine.
>
> You cannot break old stuff like this, and just because I didn't notice
> you slipping in incompatible changes in the past doesn't mean I'm going
> to let you keep doing it.
>
> Backwards compatability must be maintained for every single interface
> exposed to the user, and this is non-negotiable, sorry.
Alright. Can we log a deprecated warning when using the old API and 
eventually phase it out?

Regards
Richard

^ permalink raw reply

* Re: r8168 is needed to enter P-state: Package State6(pc6)onHaswellhardware
From: Ceriel Jacobs @ 2014-10-08 11:40 UTC (permalink / raw)
  To: Hayes Wang, Francois Romieu; +Cc: nic_swsd, netdev@vger.kernel.org
In-Reply-To: <0835B3720019904CB8F7AA43166CEEB2526D20@RTITMBSV03.realtek.com.tw>


Hayes Wang schreef op 08-10-14 om 04:35:
>   Francois Romieu [mailto:romieu@fr.zoreil.com]
>> Sent: Wednesday, October 08, 2014 4:17 AM
> [...]
>>> When enabling the ASPM, it would influence the thrpughput.
>>> It is hard to
>>> choose performance or power saving. Therefore, we reserve the config
>>> to let the user determines it.
>>
>> Mmmm...
>>
>> How do Realtek's devices behave if Config{2, 3} + MISC registers are
>> configured for ASPM whereas PCI config space registers aren't ?
>
> The feature of the ASPM for the nic would be disabled.
> ASPM is enabled when both the internal settings and
> the PCI config space registers are enabled.

Have in mind a situation where the PCI config space registers are set 
manually (after user login).

At which point in time will be determined whether ASPM for the nic will 
be disabled or enabled? Once, when loading the kernel module? Or more often?

^ permalink raw reply

* Re: [patch 1/2 -next] cxgb4: clean up a type issue
From: Dan Carpenter @ 2014-10-08 10:18 UTC (permalink / raw)
  To: David Miller; +Cc: hariprasad, netdev, kernel-janitors
In-Reply-To: <20141003.154629.555967384624529643.davem@davemloft.net>

On Fri, Oct 03, 2014 at 03:46:29PM -0700, David Miller wrote:
> From: Dan Carpenter <dan.carpenter@oracle.com>
> Date: Thu, 2 Oct 2014 14:22:19 +0300
> 
> > The tx_desc struct hold 8 __be64 values.  The original code took a
> > tx_desc pointer then casted it to an int pointer and then casted it to a
> > u64 pointer.  It was confusing and triggered some static checker
> > warnings.
> > 
> > I have changed the cxgb_pio_copy() to only take tx_desc pointers.  This
> > isn't really a loss of flexibility because anything else was buggy to
> > begin with.
> > 
> > Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
> 
> Please address the feedback you've received, resubmit this series, and actually
> number this second change "2/2" instead of "1/2" :-)
> 

Yes.  Sorry for the delay.  I'll send that this afternoon.

regards,
dan carpenter

^ permalink raw reply

* Re: [PATCH v1 4/4] ARM: Documentation: Update fec dts binding doc
From: Richard Cochran @ 2014-10-08 10:13 UTC (permalink / raw)
  To: luwei.zhou@freescale.com
  Cc: davem@davemloft.net, netdev@vger.kernel.org, shawn.guo@linaro.org,
	bhutchings@solarflare.com, Fabio.Estevam@freescale.com,
	fugang.duan@freescale.com, Frank.Li@freescale.com,
	stephen@networkplumber.org
In-Reply-To: <d9c981e97dae416488b45b06f7384471@BY2PR03MB441.namprd03.prod.outlook.com>

On Wed, Oct 08, 2014 at 08:36:11AM +0000, luwei.zhou@freescale.com wrote:
> 
> I am not expert in DT.

(Me neither ;)

> I didn't get your point of using timer resource in the DT.  Yes, the timer resource is
> Part of FEC IP hardware. But if you want to choose one channel for using PPS, you have to add another property
> to specify. Do you mean the current code already have the DT property to specify the channel?

I see some timer bindings in Documentation/devicetree/bindings/timer.
So, you could and should add your SoC timer there and in the DTS
files.

Then, you have something like

	@fec {
		pps_timer = &timer;
	}

in your SoC's DTS file.

Thanks,
Richard

^ permalink raw reply

* Re: [PATCH 1/1 net-next] af_unix: remove NULL assignment on static
From: Michal Kubecek @ 2014-10-08  9:46 UTC (permalink / raw)
  To: David Laight
  Cc: 'Hannes Frederic Sowa', Fabian Frederick, Guenter Roeck,
	David Miller, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6D174C6CA5@AcuExch.aculab.com>

On Wed, Oct 08, 2014 at 09:10:23AM +0000, David Laight wrote:
> From: Hannes Frederic Sowa
> > I think David's concern was whether if 0 == false in all situations. It
> > is pretty clear that static memory is initialized to 0.
> 
> I'm not 100% sure about that.
> static pointers may be required to be initialised to NULL.

ISO C 99 says:

  If an object that has static storage duration is not initialized
  explicitly, then:
  - if it has pointer type, it is initialized to a null pointer;
  - if it has arithmetic type, it is initialized to (positive or
    unsigned) zero;
  - if it is an aggregate, every member is initialized (recursively)
    according to these rules;
  - if it is a union, the first named member is initialized
    (recursively) according to these rules.

Michal Kubeček

^ permalink raw reply

* Re: [PATCH 1/1 net-next] af_unix: remove NULL assignment on static
From: Hannes Frederic Sowa @ 2014-10-08  9:34 UTC (permalink / raw)
  To: David Laight
  Cc: Fabian Frederick, Guenter Roeck, David Miller,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6D174C6CA5@AcuExch.aculab.com>

On Mi, 2014-10-08 at 09:10 +0000, David Laight wrote:
> From: Hannes Frederic Sowa
> > I think David's concern was whether if 0 == false in all situations. It
> > is pretty clear that static memory is initialized to 0.
> 
> I'm not 100% sure about that.
> static pointers may be required to be initialised to NULL.

This isn't something the C standard specified but in case of Linux, I
think the ELF standard might be the right one to look at. Looking at the
kernel, I think this is an unwritten law. ;)

The standard allows NULL and static memory initialization to be
implementation defined, so NULL very well might not be 'all 0', that's
correct. Even static memory must only be initialized, when this happens
and how it is initialized is completely up to the implementation.

> If NULL isn't the 'all 0 bit pattern' then the memory would need
> to be initialised to a different pattern.

I haven't looked closely but I don't think that pointers need to be
initialized to NULL, but please don't quote me on this.

> Not that anyone is likely to implement such a system because far too
> much code will break.
> 
> The only system I knew where 'native' NULL pointers were 'all 1s'
> used 0 in its C compiler.

:)

Bye,
Hannes

^ permalink raw reply

* Re: [PATCH] net: fec: fix regression on i.MX28 introduced by rx_copybreak support
From: Russell King - ARM Linux @ 2014-10-08  9:12 UTC (permalink / raw)
  To: David Laight
  Cc: 'Sergei Shtylyov', Lothar Waßmann,
	netdev@vger.kernel.org, David S. Miller, Frank Li, Fabio Estevam,
	linux-kernel@vger.kernel.org
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6D174C6C14@AcuExch.aculab.com>

On Wed, Oct 08, 2014 at 08:54:58AM +0000, David Laight wrote:
> Hmmm... in that case you may not want the compiler to convert the bit value
> to a 'bool' at all.
> 
> Passing 'id_entry->driver_data' through (that doesn't look like a field name for
> 'quirk flags) would generate better code.
> 
> Even better would be to reference the flag directly from 'ndev'.
> A pointer indirection for the test if probably faster then passing
> another argument.

A far better idea would be to copy the quirks into the fec_net_private
structure, storing them as a 'unsigned int' value, and test them from
there.  This is /much/ more efficient than jumping through the hoops to
retrieve id_entry, and then testing the 64-bit driver_data value.

I've had such a patch since about the beginning of the year (and patches
which add stuff like byte queue limits, which are really needed now that
we have a /huge/ transmit ring), but I can't keep up with rebasing the
patch set, and properly tested and performance impacts evaluated due to
the rate of FEC changes.  I never finished rebasing the set after Andy's
TSO work...

-- 
FTTC broadband for 0.8mile line: currently at 9.5Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply

* RE: [PATCH 1/1 net-next] af_unix: remove NULL assignment on static
From: David Laight @ 2014-10-08  9:10 UTC (permalink / raw)
  To: 'Hannes Frederic Sowa', Fabian Frederick
  Cc: Guenter Roeck, David Miller, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <1412715283.11600.11.camel@localhost>

From: Hannes Frederic Sowa
> I think David's concern was whether if 0 == false in all situations. It
> is pretty clear that static memory is initialized to 0.

I'm not 100% sure about that.
static pointers may be required to be initialised to NULL.
If NULL isn't the 'all 0 bit pattern' then the memory would need
to be initialised to a different pattern.
Not that anyone is likely to implement such a system because far too
much code will break.

The only system I knew where 'native' NULL pointers were 'all 1s'
used 0 in its C compiler.

	David


^ permalink raw reply

* Re: [PATCH] net: fec: fix regression on i.MX28 introduced by rx_copybreak support
From: Lothar Waßmann @ 2014-10-08  8:55 UTC (permalink / raw)
  To: David Laight
  Cc: 'Eric Dumazet', netdev@vger.kernel.org, David S. Miller,
	Russell King, Frank Li, Fabio Estevam,
	linux-kernel@vger.kernel.org
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6D174C6BA0@AcuExch.aculab.com>

Hi,

David Laight wrote:
> From: Lothar Waßmann
> > David Laight wrote:
> > > From: Eric Dumazet
> > > > On Tue, 2014-10-07 at 15:19 +0200, Lothar Wamann wrote:
> > > > > commit 1b7bde6d659d ("net: fec: implement rx_copybreak to improve rx performance")
> > > > > introduced a regression for i.MX28. The swap_buffer() function doing
> > > > > the endian conversion of the received data on i.MX28 may access memory
> > > > > beyond the actual packet size in the DMA buffer. fec_enet_copybreak()
> > > > > does not copy those bytes, so that the last bytes of a packet may be
> > > > > filled with invalid data after swapping.
> > > > > This will likely lead to checksum errors on received packets.
> > > > > E.g. when trying to mount an NFS rootfs:
> > > > > UDP: bad checksum. From 192.168.1.225:111 to 192.168.100.73:44662 ulen 36
> > > > >
> > > > > Do the byte swapping and copying to the new skb in one go if
> > > > > necessary.
> > > > >
> > > > > Signed-off-by: Lothar Wamann <LW@KARO-electronics.de>
> > > > > ---
> > > > >  drivers/net/ethernet/freescale/fec_main.c |   25 +++++++++++++++++++++----
> > > > >  1 file changed, 21 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> > > > > index 87975b5..eaaebad 100644
> > > > > --- a/drivers/net/ethernet/freescale/fec_main.c
> > > > > +++ b/drivers/net/ethernet/freescale/fec_main.c
> > > > > @@ -339,6 +339,18 @@ static void *swap_buffer(void *bufaddr, int len)
> > > > >  	return bufaddr;
> > > > >  }
> > > > >
> > > > > +static void *swap_buffer2(void *dst_buf, void *src_buf, int len)
> > > > > +{
> > > > > +	int i;
> > > > > +	unsigned int *src = src_buf;
> > > > > +	unsigned int *dst = dst_buf;
> > > > > +
> > > > > +	for (i = 0; i < DIV_ROUND_UP(len, 4); i++, src++, dst++)
> > > > > +		*dst = cpu_to_be32(*src);
> > > >
> > > > No need for the DIV :
> > > >
> > > > 	for (i = 0; i < len; i += sizeof(*dst), src++, dst++)
> > > > 		*dst = cpu_to_be32(*src);
> > > >
> > > > Also are you sure both src/dst are aligned to word boundaries, or is
> > > > this architecture OK with possible misalignment ?
> > >
> > > I wondered about that as well.
> > > I wouldn't have expected ppc to support misaligned transfers, and you'd also
> > > want to make sure that cpu_to_be(*src) was using a byte-swapping instruction.
> > > Hmmm... cpu_to_be() doesn't sound like the right 'swap' macro name.
> > >
> > ??? So what is cpu_to_be32() then?
> > The new swap function is an exact copy of the original one already in
> > use except for the fact that it uses distinct source and destination
> > buffers.
> 
> cpu_to_be32() is for converting a 'cpu' endianness value to 'big-endian'.
> Here you are processing receive data - so you probably want be32_to_cpu().
> (Yes, I know they are functionally identical....)
> 
> Alternatively, since these aren't actually 32bit numbers and you know
> whether you want to swap, something like the __swab32p() from swab.h
> - but I'm not entirely sure that one is expected to be used.
> 
OK, but that should probably be a separate patch to fix the original
swap_buffer() function as well.

> Clearly you are well inside the ppc 'endianness' hell.
>
No. i.MX28 is not PPC. It only has a "feature" in the ethernet
controller that stores all data in big endian format.


Lothar Waßmann
-- 
___________________________________________________________

Ka-Ro electronics GmbH | Pascalstraße 22 | D - 52076 Aachen
Phone: +49 2408 1402-0 | Fax: +49 2408 1402-10
Geschäftsführer: Matthias Kaussen
Handelsregistereintrag: Amtsgericht Aachen, HRB 4996

www.karo-electronics.de | info@karo-electronics.de
___________________________________________________________

^ permalink raw reply

* RE: [PATCH] net: fec: fix regression on i.MX28 introduced by rx_copybreak support
From: David Laight @ 2014-10-08  8:54 UTC (permalink / raw)
  To: 'Sergei Shtylyov', Lothar Waßmann,
	netdev@vger.kernel.org
  Cc: David S. Miller, Russell King, Frank Li, Fabio Estevam,
	linux-kernel@vger.kernel.org
In-Reply-To: <54340FA4.509@cogentembedded.com>

From: Sergei Shtylyov
> On 10/07/2014 05:19 PM, Lothar Wamann wrote:
> 
> > commit 1b7bde6d659d ("net: fec: implement rx_copybreak to improve rx performance")
> > introduced a regression for i.MX28. The swap_buffer() function doing
> > the endian conversion of the received data on i.MX28 may access memory
> > beyond the actual packet size in the DMA buffer. fec_enet_copybreak()
> > does not copy those bytes, so that the last bytes of a packet may be
> > filled with invalid data after swapping.
> > This will likely lead to checksum errors on received packets.
> > E.g. when trying to mount an NFS rootfs:
> > UDP: bad checksum. From 192.168.1.225:111 to 192.168.100.73:44662 ulen 36
> 
> > Do the byte swapping and copying to the new skb in one go if
> > necessary.
> 
> > Signed-off-by: Lothar Wamann <LW@KARO-electronics.de>
> > ---
> >   drivers/net/ethernet/freescale/fec_main.c |   25 +++++++++++++++++++++----
> >   1 file changed, 21 insertions(+), 4 deletions(-)
> 
> > diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> > index 87975b5..eaaebad 100644
> > --- a/drivers/net/ethernet/freescale/fec_main.c
> > +++ b/drivers/net/ethernet/freescale/fec_main.c
> [...]
> > @@ -1348,7 +1360,7 @@ fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc *bdp, struct
> sk_buff
> >   }
> >
> >   static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
> > -			       struct bufdesc *bdp, u32 length)
> > +			       struct bufdesc *bdp, u32 length, int swap)
> 
>     'bool swap' perhaps?
> 
> > @@ -1393,6 +1408,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
> >   	u16	vlan_tag;
> >   	int	index = 0;
> >   	bool	is_copybreak;
> > +	bool need_swap = id_entry->driver_data & FEC_QUIRK_SWAP_FRAME;
> 
>     ... especially talking this into account...

Hmmm... in that case you may not want the compiler to convert the bit value
to a 'bool' at all.

Passing 'id_entry->driver_data' through (that doesn't look like a field name for
'quirk flags) would generate better code.

Even better would be to reference the flag directly from 'ndev'.
A pointer indirection for the test if probably faster then passing another argument.

	David


^ permalink raw reply

* RE: FW: [PATCH V1 net-next 1/2] pgtable: Add API to query if write combining is available
From: David Laight @ 2014-10-08  8:50 UTC (permalink / raw)
  To: 'Moshe Lazer', davem@davemloft.net
  Cc: Or Gerlitz, Jack Morgenstein, Tal Alon, Yevgeny Petrilin,
	netdev@vger.kernel.org, Amir Vadai
In-Reply-To: <5434F989.2040101@dev.mellanox.co.il>

From: Moshe Lazer
> > From: David Miller [mailto:davem@davemloft.net]
> > Sent: Tuesday, October 07, 2014 10:44 PM
> > To: Or Gerlitz
> > Cc: netdev@vger.kernel.org; Amir Vadai; jackm@dev.mellanox.co.il; Moshe Lazer; Tal Alon; Yevgeny
> Petrilin
> > Subject: Re: [PATCH V1 net-next 1/2] pgtable: Add API to query if write combining is available
> >
> > From: Or Gerlitz <ogerlitz@mellanox.com>
> > Date: Sun,  5 Oct 2014 11:22:21 +0300
> >
> >> From: Moshe Lazer <moshel@mellanox.com>
> >>
> >> Currently the kernel write-combining interface provides a best effort
> >> mechanism in which the caller simply invokes pgprot_writecombine().
> >>
> >> If write combining is available, the region is mapped for it,
> >> otherwise the region is (silently) mapped as non-cached.
> >>
> >> In some cases, however, the calling driver must know if write
> >> combining is available, so a silent best effort mechanism is not sufficient.
> >>
> >> Add writecombine_available(), which returns true if the system
> >> supports write combining and false if it doesn't.
> >>
> >> Signed-off-by: Moshe Lazer <moshel@mellanox.com>
> >> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
> >> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
> > This needs some ACKs from MM developers.
> >
> > But also the situation is more complicated than a simple boolean test.
> >
> > On some platforms you have to test first whether the range you are trying to write combine can
> legally be marked in that way.  The DRM layer has all of these per-arch tests to do this properly.
> >
> > #if defined(__i386__) || defined(__x86_64__)
> > 	if (map->type == _DRM_REGISTERS && !(map->flags & _DRM_WRITE_COMBINING))
> > 		tmp = pgprot_noncached(tmp);
> > 	else
> > 		tmp = pgprot_writecombine(tmp);
> > #elif defined(__powerpc__)
> > 	pgprot_val(tmp) |= _PAGE_NO_CACHE;
> > 	if (map->type == _DRM_REGISTERS)
> > 		pgprot_val(tmp) |= _PAGE_GUARDED;
> > #elif defined(__ia64__)
> > 	if (efi_range_is_wc(vma->vm_start, vma->vm_end -
> > 				    vma->vm_start))
> > 		tmp = pgprot_writecombine(tmp);
> > 	else
> > 		tmp = pgprot_noncached(tmp);
> > #elif defined(__sparc__) || defined(__arm__) || defined(__mips__)
> > 	tmp = pgprot_noncached(tmp);
> > #endif

> The idea was to provide an indication as for whether the arch supports
> write-combining in general.
> If we want to benefit from blue flame operations, we need to map the
> blue flame registers as write combining - otherwise there is no benefit.
> So we would like to know if write combining is supported by the system
> or not.

Sounds like the mapping functions should have a mode where 'write combining'
is mandatory, then you can try to map the registers as 'write combining' and
if the map request fails use the alternate scheme.

That moves the test into the architecture specific mapping code where
it belongs.

	David

^ permalink raw reply

* vxlan gro problem ?
From: yinpeijun @ 2014-10-08  8:46 UTC (permalink / raw)
  To: netdev, linux-kernel, ogerlitz; +Cc: lichunhe, wangfakai

Hi all,
        recently Linux 3.14 has been released and I find the networking has added udp gro and vxlan gro funtion, then I use the redhat 7.0(there is also add this funtion)
to test, I use kernel vxlan module and  create a vxlan device then attach the device to  ovs  bridge , the configure as follow:
       root@25:~$ ip link      
        15: vxlan0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue master ovs-system state UNKNOWN mode DEFAULT
            link/ether be:e1:ae:3d:8b:f2 brd ff:ff:ff:ff:ff:ff
        16: vnet0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1400 qdisc mq master ovs-system state UNKNOWN mode DEFAULT qlen 5000
       
       root@25:~$ ovs-vsctl show
        aa1294f3-9952-4393-b2b5-54e9a6eb76ee
        Bridge ovs-vx
            Port ovs-vx
                Interface ovs-vx
                    type: internal
            Port "vnet0"
                Interface "vnet0"
            Port "vxlan0"
                Interface "vxlan0"
        ovs_version: "2.0.2"

vnet0 is a vm backend device,  and the end is the same configuration. then I use netperf to test throughput  in vm (netperf -H **** -t TCP_STREAM -l 10 -- -m 1460), 
the result is 3-4 Gbit/sec, the  improvement  is not obvious,   and I also confused there is no aggregation  packets (length > mtu) in the end vm.   so I want to know what
wrong ?   or how to test the function ?

^ permalink raw reply

* RE: [PATCH] net: fec: fix regression on i.MX28 introduced by rx_copybreak support
From: David Laight @ 2014-10-08  8:45 UTC (permalink / raw)
  To: 'Lothar Waßmann'
  Cc: 'Eric Dumazet', netdev@vger.kernel.org, David S. Miller,
	Russell King, Frank Li, Fabio Estevam,
	linux-kernel@vger.kernel.org
In-Reply-To: <20141008070147.2f3b5319@ipc1.ka-ro>

From: Lothar Waßmann
> David Laight wrote:
> > From: Eric Dumazet
> > > On Tue, 2014-10-07 at 15:19 +0200, Lothar Wamann wrote:
> > > > commit 1b7bde6d659d ("net: fec: implement rx_copybreak to improve rx performance")
> > > > introduced a regression for i.MX28. The swap_buffer() function doing
> > > > the endian conversion of the received data on i.MX28 may access memory
> > > > beyond the actual packet size in the DMA buffer. fec_enet_copybreak()
> > > > does not copy those bytes, so that the last bytes of a packet may be
> > > > filled with invalid data after swapping.
> > > > This will likely lead to checksum errors on received packets.
> > > > E.g. when trying to mount an NFS rootfs:
> > > > UDP: bad checksum. From 192.168.1.225:111 to 192.168.100.73:44662 ulen 36
> > > >
> > > > Do the byte swapping and copying to the new skb in one go if
> > > > necessary.
> > > >
> > > > Signed-off-by: Lothar Wamann <LW@KARO-electronics.de>
> > > > ---
> > > >  drivers/net/ethernet/freescale/fec_main.c |   25 +++++++++++++++++++++----
> > > >  1 file changed, 21 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> > > > index 87975b5..eaaebad 100644
> > > > --- a/drivers/net/ethernet/freescale/fec_main.c
> > > > +++ b/drivers/net/ethernet/freescale/fec_main.c
> > > > @@ -339,6 +339,18 @@ static void *swap_buffer(void *bufaddr, int len)
> > > >  	return bufaddr;
> > > >  }
> > > >
> > > > +static void *swap_buffer2(void *dst_buf, void *src_buf, int len)
> > > > +{
> > > > +	int i;
> > > > +	unsigned int *src = src_buf;
> > > > +	unsigned int *dst = dst_buf;
> > > > +
> > > > +	for (i = 0; i < DIV_ROUND_UP(len, 4); i++, src++, dst++)
> > > > +		*dst = cpu_to_be32(*src);
> > >
> > > No need for the DIV :
> > >
> > > 	for (i = 0; i < len; i += sizeof(*dst), src++, dst++)
> > > 		*dst = cpu_to_be32(*src);
> > >
> > > Also are you sure both src/dst are aligned to word boundaries, or is
> > > this architecture OK with possible misalignment ?
> >
> > I wondered about that as well.
> > I wouldn't have expected ppc to support misaligned transfers, and you'd also
> > want to make sure that cpu_to_be(*src) was using a byte-swapping instruction.
> > Hmmm... cpu_to_be() doesn't sound like the right 'swap' macro name.
> >
> ??? So what is cpu_to_be32() then?
> The new swap function is an exact copy of the original one already in
> use except for the fact that it uses distinct source and destination
> buffers.

cpu_to_be32() is for converting a 'cpu' endianness value to 'big-endian'.
Here you are processing receive data - so you probably want be32_to_cpu().
(Yes, I know they are functionally identical....)

Alternatively, since these aren't actually 32bit numbers and you know
whether you want to swap, something like the __swab32p() from swab.h
- but I'm not entirely sure that one is expected to be used.

Clearly you are well inside the ppc 'endianness' hell.

	David


^ permalink raw reply

* Re: FW: [PATCH V1 net-next 1/2] pgtable: Add API to query if write combining is available
From: Moshe Lazer @ 2014-10-08  8:44 UTC (permalink / raw)
  To: davem
  Cc: Or Gerlitz, Jack Morgenstein, Tal Alon, Yevgeny Petrilin, netdev,
	Amir Vadai
In-Reply-To: <925ad10b2ec44e228e69bf0cbe6c0a0e@AMSPR05MB002.eurprd05.prod.outlook.com>


> From: David Miller [mailto:davem@davemloft.net]
> Sent: Tuesday, October 07, 2014 10:44 PM
> To: Or Gerlitz
> Cc: netdev@vger.kernel.org; Amir Vadai; jackm@dev.mellanox.co.il; Moshe Lazer; Tal Alon; Yevgeny Petrilin
> Subject: Re: [PATCH V1 net-next 1/2] pgtable: Add API to query if write combining is available
>
> From: Or Gerlitz <ogerlitz@mellanox.com>
> Date: Sun,  5 Oct 2014 11:22:21 +0300
>
>> From: Moshe Lazer <moshel@mellanox.com>
>>
>> Currently the kernel write-combining interface provides a best effort
>> mechanism in which the caller simply invokes pgprot_writecombine().
>>
>> If write combining is available, the region is mapped for it,
>> otherwise the region is (silently) mapped as non-cached.
>>
>> In some cases, however, the calling driver must know if write
>> combining is available, so a silent best effort mechanism is not sufficient.
>>
>> Add writecombine_available(), which returns true if the system
>> supports write combining and false if it doesn't.
>>
>> Signed-off-by: Moshe Lazer <moshel@mellanox.com>
>> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
>> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
> This needs some ACKs from MM developers.
>
> But also the situation is more complicated than a simple boolean test.
>
> On some platforms you have to test first whether the range you are trying to write combine can legally be marked in that way.  The DRM layer has all of these per-arch tests to do this properly.
>
> #if defined(__i386__) || defined(__x86_64__)
> 	if (map->type == _DRM_REGISTERS && !(map->flags & _DRM_WRITE_COMBINING))
> 		tmp = pgprot_noncached(tmp);
> 	else
> 		tmp = pgprot_writecombine(tmp);
> #elif defined(__powerpc__)
> 	pgprot_val(tmp) |= _PAGE_NO_CACHE;
> 	if (map->type == _DRM_REGISTERS)
> 		pgprot_val(tmp) |= _PAGE_GUARDED;
> #elif defined(__ia64__)
> 	if (efi_range_is_wc(vma->vm_start, vma->vm_end -
> 				    vma->vm_start))
> 		tmp = pgprot_writecombine(tmp);
> 	else
> 		tmp = pgprot_noncached(tmp);
> #elif defined(__sparc__) || defined(__arm__) || defined(__mips__)
> 	tmp = pgprot_noncached(tmp);
> #endif
The idea was to provide an indication as for whether the arch supports 
write-combining in general.
If we want to benefit from blue flame operations, we need to map the 
blue flame registers as write combining - otherwise there is no benefit. 
So we would like to know if write combining is supported by the system 
or not.

^ permalink raw reply

* RE: [PATCH v1 4/4] ARM: Documentation: Update fec dts binding doc
From: luwei.zhou @ 2014-10-08  8:36 UTC (permalink / raw)
  To: Richard Cochran
  Cc: davem@davemloft.net, netdev@vger.kernel.org, shawn.guo@linaro.org,
	bhutchings@solarflare.com, Fabio.Estevam@freescale.com,
	fugang.duan@freescale.com, Frank.Li@freescale.com,
	stephen@networkplumber.org
In-Reply-To: <20141008080603.GA4648@netboy>

On Wed, Oct 8, 2014 at 4:06:00AM, Richard Cochran wrote:
> -----Original Message-----
> From: Richard Cochran [mailto:richardcochran@gmail.com]
> Sent: Wednesday, October 08, 2014 4:06 PM
> To: Zhou Luwei-B45643
> Cc: davem@davemloft.net; netdev@vger.kernel.org; shawn.guo@linaro.org;
> bhutchings@solarflare.com; Estevam Fabio-R49496; Duan Fugang-B38611; Li
> Frank-B20596; stephen@networkplumber.org
> Subject: Re: [PATCH v1 4/4] ARM: Documentation: Update fec dts binding
> doc
> 
> On Wed, Oct 08, 2014 at 03:15:08AM +0000, luwei.zhou@freescale.com wrote:
> > On Wed, Oct 1, 2014 at 11:59:00AM, Richard Cochran wrote:
> > > -----Original Message-----
> > > From: Richard Cochran [mailto:richardcochran@gmail.com]
> > > Sent: Wednesday, October 01, 2014 11:59 AM
> > > To: Zhou Luwei-B45643
> > > Cc: davem@davemloft.net; netdev@vger.kernel.org;
> > > shawn.guo@linaro.org; bhutchings@solarflare.com; Estevam
> > > Fabio-R49496; Duan Fugang-B38611; Li Frank-B20596;
> > > stephen@networkplumber.org
> > > Subject: Re: [PATCH v1 4/4] ARM: Documentation: Update fec dts
> > > binding doc
> > >
> > > On Thu, Sep 25, 2014 at 04:43:50PM +0200, Richard Cochran wrote:
> > > > On Thu, Sep 25, 2014 at 04:10:21PM +0800, Luwei Zhou wrote:
> > > > > This patch update fec devicetree binding doc that add Optional
> > > > > properties "pps-channel".
> > > >
> > > > Again, use the PTP pin interface. We don't need a random new FEC
> > > > DT property for this.
> > >
> > > BTW, if the "channel" only means an internal timer resource, please
> > > find a proper way to claim that resource. Do not add another random
> > > DT property. We have enough of those already.
> > >
> > > Thanks,
> > > Richard
> >
> > Okay. I will use the PTP pin interface to let user set the channel
> > index as V2 patch does. Sorry for the late reply caused by vocation.
> 
> Do not use the PTP pin control for an internal PPS event. Instead, just
> claim the timer as some kind of internal resource. It looks like timers
> are already part of DT, so why not use those?
> 
> Thanks,
> Richard

Hi Richard,

I am not expert in DT.I didn't get your point of using timer resource in the DT.  Yes, the timer resource is
Part of FEC IP hardware. But if you want to choose one channel for using PPS, you have to add another property
to specify. Do you mean the current code already have the DT property to specify the channel?

Thanks
Luwei

^ permalink raw reply

* Re: [PATCH v1 4/4] ARM: Documentation: Update fec dts binding doc
From: Richard Cochran @ 2014-10-08  8:06 UTC (permalink / raw)
  To: luwei.zhou@freescale.com
  Cc: davem@davemloft.net, netdev@vger.kernel.org, shawn.guo@linaro.org,
	bhutchings@solarflare.com, Fabio.Estevam@freescale.com,
	fugang.duan@freescale.com, Frank.Li@freescale.com,
	stephen@networkplumber.org
In-Reply-To: <39bcecf4ec1a4862bc7316dc845a535c@BY2PR03MB441.namprd03.prod.outlook.com>

On Wed, Oct 08, 2014 at 03:15:08AM +0000, luwei.zhou@freescale.com wrote:
> On Wed, Oct 1, 2014 at 11:59:00AM, Richard Cochran wrote:
> > -----Original Message-----
> > From: Richard Cochran [mailto:richardcochran@gmail.com]
> > Sent: Wednesday, October 01, 2014 11:59 AM
> > To: Zhou Luwei-B45643
> > Cc: davem@davemloft.net; netdev@vger.kernel.org; shawn.guo@linaro.org;
> > bhutchings@solarflare.com; Estevam Fabio-R49496; Duan Fugang-B38611; Li
> > Frank-B20596; stephen@networkplumber.org
> > Subject: Re: [PATCH v1 4/4] ARM: Documentation: Update fec dts binding
> > doc
> > 
> > On Thu, Sep 25, 2014 at 04:43:50PM +0200, Richard Cochran wrote:
> > > On Thu, Sep 25, 2014 at 04:10:21PM +0800, Luwei Zhou wrote:
> > > > This patch update fec devicetree binding doc that add Optional
> > > > properties "pps-channel".
> > >
> > > Again, use the PTP pin interface. We don't need a random new FEC DT
> > > property for this.
> > 
> > BTW, if the "channel" only means an internal timer resource, please find
> > a proper way to claim that resource. Do not add another random DT
> > property. We have enough of those already.
> > 
> > Thanks,
> > Richard
> 
> Okay. I will use the PTP pin interface to let user set the channel index as V2 patch does. Sorry for the
> late reply caused by vocation.

Do not use the PTP pin control for an internal PPS event. Instead,
just claim the timer as some kind of internal resource. It looks like
timers are already part of DT, so why not use those?

Thanks,
Richard

^ permalink raw reply

* Re: [PATCH] net: fec: fix regression on i.MX28 introduced by rx_copybreak support
From: Lothar Waßmann @ 2014-10-08  5:01 UTC (permalink / raw)
  To: David Laight
  Cc: 'Eric Dumazet', netdev@vger.kernel.org, David S. Miller,
	Russell King, Frank Li, Fabio Estevam,
	linux-kernel@vger.kernel.org
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6D174C6116@AcuExch.aculab.com>

Hi,

David Laight wrote:
> From: Eric Dumazet
> > On Tue, 2014-10-07 at 15:19 +0200, Lothar Wamann wrote:
> > > commit 1b7bde6d659d ("net: fec: implement rx_copybreak to improve rx performance")
> > > introduced a regression for i.MX28. The swap_buffer() function doing
> > > the endian conversion of the received data on i.MX28 may access memory
> > > beyond the actual packet size in the DMA buffer. fec_enet_copybreak()
> > > does not copy those bytes, so that the last bytes of a packet may be
> > > filled with invalid data after swapping.
> > > This will likely lead to checksum errors on received packets.
> > > E.g. when trying to mount an NFS rootfs:
> > > UDP: bad checksum. From 192.168.1.225:111 to 192.168.100.73:44662 ulen 36
> > >
> > > Do the byte swapping and copying to the new skb in one go if
> > > necessary.
> > >
> > > Signed-off-by: Lothar Wamann <LW@KARO-electronics.de>
> > > ---
> > >  drivers/net/ethernet/freescale/fec_main.c |   25 +++++++++++++++++++++----
> > >  1 file changed, 21 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> > > index 87975b5..eaaebad 100644
> > > --- a/drivers/net/ethernet/freescale/fec_main.c
> > > +++ b/drivers/net/ethernet/freescale/fec_main.c
> > > @@ -339,6 +339,18 @@ static void *swap_buffer(void *bufaddr, int len)
> > >  	return bufaddr;
> > >  }
> > >
> > > +static void *swap_buffer2(void *dst_buf, void *src_buf, int len)
> > > +{
> > > +	int i;
> > > +	unsigned int *src = src_buf;
> > > +	unsigned int *dst = dst_buf;
> > > +
> > > +	for (i = 0; i < DIV_ROUND_UP(len, 4); i++, src++, dst++)
> > > +		*dst = cpu_to_be32(*src);
> > 
> > No need for the DIV :
> > 
> > 	for (i = 0; i < len; i += sizeof(*dst), src++, dst++)
> > 		*dst = cpu_to_be32(*src);
> > 
> > Also are you sure both src/dst are aligned to word boundaries, or is
> > this architecture OK with possible misalignment ?
> 
> I wondered about that as well.
> I wouldn't have expected ppc to support misaligned transfers, and you'd also
> want to make sure that cpu_to_be(*src) was using a byte-swapping instruction.
> Hmmm... cpu_to_be() doesn't sound like the right 'swap' macro name.
> 
??? So what is cpu_to_be32() then?
The new swap function is an exact copy of the original one already in
use except for the fact that it uses distinct source and destination
buffers.


Lothar Waßmann
-- 
___________________________________________________________

Ka-Ro electronics GmbH | Pascalstraße 22 | D - 52076 Aachen
Phone: +49 2408 1402-0 | Fax: +49 2408 1402-10
Geschäftsführer: Matthias Kaussen
Handelsregistereintrag: Amtsgericht Aachen, HRB 4996

www.karo-electronics.de | info@karo-electronics.de
___________________________________________________________

^ permalink raw reply

* Re: [PATCH] net: fec: fix regression on i.MX28 introduced by rx_copybreak support
From: Lothar Waßmann @ 2014-10-08  4:59 UTC (permalink / raw)
  To: David Laight
  Cc: netdev@vger.kernel.org, David S. Miller, Russell King, Frank Li,
	Fabio Estevam, linux-kernel@vger.kernel.org
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6D174C60B1@AcuExch.aculab.com>

Hi,

David Laight wrote:
> From: Lothar
> > David Laight wrote:
> > > From: Lothar Waßmann
> > > > commit 1b7bde6d659d ("net: fec: implement rx_copybreak to improve rx performance")
> > > > introduced a regression for i.MX28. The swap_buffer() function doing
> > > > the endian conversion of the received data on i.MX28 may access memory
> > > > beyond the actual packet size in the DMA buffer. fec_enet_copybreak()
> > > > does not copy those bytes, so that the last bytes of a packet may be
> > > > filled with invalid data after swapping.
> > > > This will likely lead to checksum errors on received packets.
> > > > E.g. when trying to mount an NFS rootfs:
> > > > UDP: bad checksum. From 192.168.1.225:111 to 192.168.100.73:44662 ulen 36
> > > >
> > > > Do the byte swapping and copying to the new skb in one go if
> > > > necessary.
> > >
> > > ISTM that if you need to do the 'swap' you should copy the data regardless
> > > of the length.
> > >
> > The swap function has to look at at most 3 bytes beyond the actual
> > packet length. That is what the original swap_buffer() function does and
> > what the new function swap_buffer2(), that does the endian swapping
> > while copying to the new buffer, also does.
> 
> I understood the bug.
> 
> The point I was making is that if you have to do a read-write of the received
> data (to byteswap it) then you might as well always copy it into a new skb that
> is just big enough for the actual receive frame.
> 
I wanted to use the least intrusive solution.


Lothar Waßmann
-- 
___________________________________________________________

Ka-Ro electronics GmbH | Pascalstraße 22 | D - 52076 Aachen
Phone: +49 2408 1402-0 | Fax: +49 2408 1402-10
Geschäftsführer: Matthias Kaussen
Handelsregistereintrag: Amtsgericht Aachen, HRB 4996

www.karo-electronics.de | info@karo-electronics.de
___________________________________________________________

^ permalink raw reply

* Re: [PATCH/RFC] datapath: offload hooks
From: Stephen Hemminger @ 2014-10-08  4:55 UTC (permalink / raw)
  To: Simon Horman
  Cc: dev, netdev, Pravin Shelar, Jesse Gross, Jiri Pirko, Thomas Graf,
	John Fastabend, Scott Feldman, Roopa Prabhu, Alexi Starovoitov,
	Florian Fainelli, Jamal Hadi Salim, Bert van Leeuwen
In-Reply-To: <1412728851-32534-1-git-send-email-simon.horman@netronome.com>

On Wed,  8 Oct 2014 09:40:51 +0900
Simon Horman <simon.horman@netronome.com> wrote:

> +
> +struct ovs_offload_ops {
> +	/* Flow offload functions  */
> +	/* Called when a flow entry is added to the flow table */
> +	void (*flow_new)(struct sw_flow *);
> +	/* Called when a flow entry is modified */
> +	void (*flow_set)(struct sw_flow *);
> +	/* Called when a flow entry is removed from the flow table */
> +	void (*flow_del)(struct sw_flow *);
> +	/* Called when flow stats are queried */
> +	void (*flow_stats_get)(const struct sw_flow *, struct ovs_flow_stats *,
> +			      unsigned long *used, __be16 *tcp_flags);
> +	/* Called when flow stats are removed */
> +	void (*flow_stats_clear)(struct sw_flow *);
> +
> +	/* Port offload functions  */
> +	/* Called when a vport is added to the datapath */
> +	void (*vport_new)(struct sk_buff *, struct vport *,
> +			  struct vport_parms *);
> +	/* Called when a vport is modified */
> +	void (*vport_set)(struct sk_buff *, struct vport *);
> +	/* Called when a vport is removed from the datapath */
> +	void (*vport_del)(struct sk_buff *, struct vport *);
> +	/* Called when vport stats are queried */
> +	void (*vport_stats_get)(struct vport *, struct ovs_vport_stats *);
> +	/* Called when vport stats are set */
> +	void (*vport_stats_set)(struct vport *, struct ovs_vport_stats *);
> +
> +	/* Datapath offload functions  */
> +	/* Called when the datapath is created */
> +	void (*dp_new)(struct datapath *);
> +	/* Called when the datapath is modified */
> +	void (*dp_set)(struct datapath *);
> +	/* Called when the datapath is removed */
> +	void (*dp_del)(struct datapath *);
> +	/* Called when the datapath stats are queried */
> +	void (*dp_stats_get)(struct datapath *, struct ovs_dp_stats *);
> +};

What about using netlink and netlink notifiers for event type stuff?
Much easier to extend than all the _ops stuff and you can provide
hook for people that want to do it in user space.

^ permalink raw reply

* Re: [PATCH/RFC] datapath: offload hooks
From: Stephen Hemminger @ 2014-10-08  4:50 UTC (permalink / raw)
  To: Simon Horman
  Cc: dev, netdev, Pravin Shelar, Jesse Gross, Jiri Pirko, Thomas Graf,
	John Fastabend, Scott Feldman, Roopa Prabhu, Alexi Starovoitov,
	Florian Fainelli, Jamal Hadi Salim, Bert van Leeuwen
In-Reply-To: <1412728851-32534-1-git-send-email-simon.horman@netronome.com>

On Wed,  8 Oct 2014 09:40:51 +0900
Simon Horman <simon.horman@netronome.com> wrote:

> +struct ovs_offload_ops {
> +	/* Flow offload functions  */
> +	/* Called when a flow entry is added to the flow table */
> +	void (*flow_new)(struct sw_flow *);
> +	/* Called when a flow entry is modified */
> +	void (*flow_set)(struct sw_flow *);
> +	/* Called when a flow entry is removed from the flow table */
> +	void (*flow_del)(struct sw_flow *);
> +	/* Called when flow stats are queried */
> +	void (*flow_stats_get)(const struct sw_flow *, struct ovs_flow_stats *,
> +			      unsigned long *used, __be16 *tcp_flags);
> +	/* Called when flow stats are removed */
> +	void (*flow_stats_clear)(struct sw_flow *);
> +
> +	/* Port offload functions  */
> +	/* Called when a vport is added to the datapath */
> +	void (*vport_new)(struct sk_buff *, struct vport *,
> +			  struct vport_parms *);
> +	/* Called when a vport is modified */
> +	void (*vport_set)(struct sk_buff *, struct vport *);
> +	/* Called when a vport is removed from the datapath */
> +	void (*vport_del)(struct sk_buff *, struct vport *);
> +	/* Called when vport stats are queried */
> +	void (*vport_stats_get)(struct vport *, struct ovs_vport_stats *);
> +	/* Called when vport stats are set */
> +	void (*vport_stats_set)(struct vport *, struct ovs_vport_stats *);
> +
> +	/* Datapath offload functions  */
> +	/* Called when the datapath is created */
> +	void (*dp_new)(struct datapath *);
> +	/* Called when the datapath is modified */
> +	void (*dp_set)(struct datapath *);
> +	/* Called when the datapath is removed */
> +	void (*dp_del)(struct datapath *);
> +	/* Called when the datapath stats are queried */
> +	void (*dp_stats_get)(struct datapath *, struct ovs_dp_stats *);
> +}

For security, you should mark any ops type table const, so an
attacker can't find a home to poke their favorite routine into.

^ permalink raw reply

* Re: [PATCH] net: fec: fix regression on i.MX28 introduced by rx_copybreak support
From: Lothar Waßmann @ 2014-10-08  4:43 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, rmk+kernel, Frank.Li, fabio.estevam, linux-kernel
In-Reply-To: <20141007.131507.635415594538460008.davem@davemloft.net>

Hi,

David Miller wrote:
> From: Lothar Waßmann <LW@KARO-electronics.de>
> Date: Tue,  7 Oct 2014 15:19:37 +0200
> 
> > commit 1b7bde6d659d ("net: fec: implement rx_copybreak to improve rx performance")
> > introduced a regression for i.MX28. The swap_buffer() function doing
> > the endian conversion of the received data on i.MX28 may access memory
> > beyond the actual packet size in the DMA buffer. fec_enet_copybreak()
> > does not copy those bytes, so that the last bytes of a packet may be
> > filled with invalid data after swapping.
> > This will likely lead to checksum errors on received packets.
> > E.g. when trying to mount an NFS rootfs:
> > UDP: bad checksum. From 192.168.1.225:111 to 192.168.100.73:44662 ulen 36
> > 
> > Do the byte swapping and copying to the new skb in one go if
> > necessary.
> > 
> > Signed-off-by: Lothar Waßmann <LW@KARO-electronics.de>
> 
> Why don't you just round up the length fec_enet_copybreak() uses when
> need_swap is true?  Then you will end up mimicking the original behavior
> and not require this new helper function.
> 
I wanted to eliminate the need to access the buffer twice (once for
copying and once for swapping).

> And in any case I agree with Sergei that if you do retain your approach,
> the new 'swap' argument to fec_enet_copybreak() should be a 'bool'.
> 
Right.

> I'm really surprised there isn't a control register bit to adjust the
> endianness of the data DMA'd to/from the network.
>
This is a "feature" of the i.MX28.


Lothar Waßmann
-- 
___________________________________________________________

Ka-Ro electronics GmbH | Pascalstraße 22 | D - 52076 Aachen
Phone: +49 2408 1402-0 | Fax: +49 2408 1402-10
Geschäftsführer: Matthias Kaussen
Handelsregistereintrag: Amtsgericht Aachen, HRB 4996

www.karo-electronics.de | info@karo-electronics.de
___________________________________________________________

^ permalink raw reply

* Re: [PATCH net-next] i40e: skb->xmit_more support
From: Jeff Kirsher @ 2014-10-08  4:02 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, netdev, dborkman
In-Reply-To: <20141007.163730.2221137596344407031.davem@davemloft.net>

[-- Attachment #1: Type: text/plain, Size: 599 bytes --]

On Tue, 2014-10-07 at 16:37 -0400, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Tue, 07 Oct 2014 13:30:23 -0700
> 
> > From: Eric Dumazet <edumazet@google.com>
> > 
> > Support skb->xmit_more in i40e is straightforward : we need to move
> > around i40e_maybe_stop_tx() call to correctly test netif_xmit_stopped()
> > before taking the decision to not kick the NIC.
> > 
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> 
> Intel folks, if I could get a quick review of this that would be great.
> 
> Thanks.

Working on it, let you know tomorrow.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* RE: [PATCH v1 0/4] Enable FEC pps ouput
From: luwei.zhou @ 2014-10-08  3:30 UTC (permalink / raw)
  To: Richard Cochran
  Cc: davem@davemloft.net, netdev@vger.kernel.org, shawn.guo@linaro.org,
	bhutchings@solarflare.com, Fabio.Estevam@freescale.com,
	fugang.duan@freescale.com, Frank.Li@freescale.com,
	stephen@networkplumber.org
In-Reply-To: <20141003082341.GB4249@localhost.localdomain>

On Wed, Oct 3, 2014 at 04:24:00PM, Richard Cochran wrote:
> -----Original Message-----
> From: Richard Cochran [mailto:richardcochran@gmail.com]
> Sent: Friday, October 03, 2014 4:24 PM
> To: Zhou Luwei-B45643
> Cc: davem@davemloft.net; netdev@vger.kernel.org; shawn.guo@linaro.org;
> bhutchings@solarflare.com; Estevam Fabio-R49496; Duan Fugang-B38611; Li
> Frank-B20596; stephen@networkplumber.org
> Subject: Re: [PATCH v1 0/4] Enable FEC pps ouput
> 
> On Thu, Sep 25, 2014 at 04:10:17PM +0800, Luwei Zhou wrote:
> > This patch does:
> > 	- Replace 32-bit free-running PTP timer with 31-bit.
> > 	- Implement hardware PTP timestamp adjustment.
> > 	- Enable PPS output based on hardware adjustment.
> >
> >
> > Luwei Zhou (4):
> >   net: fec: ptp: Use the 31-bit ptp timer.
> >   net: fec: ptp: Use hardware algorithm to adjust PTP counter.
> >   net: fec: ptp: Enalbe PPS ouput based on ptp clock
>                    ^^^^^^     ^^^^^
>                    Enable     output
> 
> Also, it looks like you have implemented the wrong feature.
> 
> The "pps" capability means that the clock provides a PPS event
> (interrupt) to the kernel.
> 
> IOW, if .pps==1, then you must call ptp_clock_event() with event type
> PTP_CLOCK_PPS.
> 
> Your patch set seems to be creating a periodic output and not a PPS
> kernel callback.
> 
> You should clarify what you are trying to do, and then implement the
> appropriate interface in your driver.
> 
> Thanks,
> Richard


Hum..The FEC IP does not support a specified channel for PPS function. So I have to use
a ptp timer output channel to implement the PPS function. That is why the software flow
looks like the periodic output but the function is PPS function. We only want the event to happen
on the  per second. So I think it should be clarified to be PPS event. 

Thanks
Luwei

^ permalink raw reply

* RE: [PATCH v1 4/4] ARM: Documentation: Update fec dts binding doc
From: luwei.zhou @ 2014-10-08  3:15 UTC (permalink / raw)
  To: Richard Cochran
  Cc: davem@davemloft.net, netdev@vger.kernel.org, shawn.guo@linaro.org,
	bhutchings@solarflare.com, Fabio.Estevam@freescale.com,
	fugang.duan@freescale.com, Frank.Li@freescale.com,
	stephen@networkplumber.org
In-Reply-To: <20141001035900.GA4544@netboy>

On Wed, Oct 1, 2014 at 11:59:00AM, Richard Cochran wrote:
> -----Original Message-----
> From: Richard Cochran [mailto:richardcochran@gmail.com]
> Sent: Wednesday, October 01, 2014 11:59 AM
> To: Zhou Luwei-B45643
> Cc: davem@davemloft.net; netdev@vger.kernel.org; shawn.guo@linaro.org;
> bhutchings@solarflare.com; Estevam Fabio-R49496; Duan Fugang-B38611; Li
> Frank-B20596; stephen@networkplumber.org
> Subject: Re: [PATCH v1 4/4] ARM: Documentation: Update fec dts binding
> doc
> 
> On Thu, Sep 25, 2014 at 04:43:50PM +0200, Richard Cochran wrote:
> > On Thu, Sep 25, 2014 at 04:10:21PM +0800, Luwei Zhou wrote:
> > > This patch update fec devicetree binding doc that add Optional
> > > properties "pps-channel".
> >
> > Again, use the PTP pin interface. We don't need a random new FEC DT
> > property for this.
> 
> BTW, if the "channel" only means an internal timer resource, please find
> a proper way to claim that resource. Do not add another random DT
> property. We have enough of those already.
> 
> Thanks,
> Richard

Okay. I will use the PTP pin interface to let user set the channel index as V2 patch does. Sorry for the
late reply caused by vocation.

Thanks,
Luwei

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox