Netdev List
 help / color / mirror / Atom feed
* [PATCH 31/34] efx: Use pci_enable_msix_range()
From: Alexander Gordeev @ 2014-01-31 15:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Shradha Shah, Solarflare maintainers, netdev,
	linux-pci
In-Reply-To: <cover.1391172839.git.agordeev@redhat.com>

As result of deprecation of MSI-X/MSI enablement functions
pci_enable_msix() and pci_enable_msi_block() all drivers
using these two interfaces need to be updated to use the
new pci_enable_msi_range() and pci_enable_msix_range()
interfaces.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/net/ethernet/sfc/efx.c |   20 +++++++++-----------
 1 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 83d4643..297b97a 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1346,20 +1346,23 @@ static int efx_probe_interrupts(struct efx_nic *efx)
 
 		for (i = 0; i < n_channels; i++)
 			xentries[i].entry = i;
-		rc = pci_enable_msix(efx->pci_dev, xentries, n_channels);
-		if (rc > 0) {
+		rc = pci_enable_msix_range(efx->pci_dev,
+					   xentries, 1, n_channels);
+		if (rc < 0) {
+			/* Fall back to single channel MSI */
+			efx->interrupt_mode = EFX_INT_MODE_MSI;
+			netif_err(efx, drv, efx->net_dev,
+				  "could not enable MSI-X\n");
+		} else if (rc < n_channels) {
 			netif_err(efx, drv, efx->net_dev,
 				  "WARNING: Insufficient MSI-X vectors"
 				  " available (%d < %u).\n", rc, n_channels);
 			netif_err(efx, drv, efx->net_dev,
 				  "WARNING: Performance may be reduced.\n");
-			EFX_BUG_ON_PARANOID(rc >= n_channels);
 			n_channels = rc;
-			rc = pci_enable_msix(efx->pci_dev, xentries,
-					     n_channels);
 		}
 
-		if (rc == 0) {
+		if (rc > 0) {
 			efx->n_channels = n_channels;
 			if (n_channels > extra_channels)
 				n_channels -= extra_channels;
@@ -1375,11 +1378,6 @@ static int efx_probe_interrupts(struct efx_nic *efx)
 			for (i = 0; i < efx->n_channels; i++)
 				efx_get_channel(efx, i)->irq =
 					xentries[i].vector;
-		} else {
-			/* Fall back to single channel MSI */
-			efx->interrupt_mode = EFX_INT_MODE_MSI;
-			netif_err(efx, drv, efx->net_dev,
-				  "could not enable MSI-X\n");
 		}
 	}
 
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH 32/34] niu: Use pci_enable_msix_range()
From: Alexander Gordeev @ 2014-01-31 15:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, David S. Miller, Jingoo Han, netdev, linux-pci
In-Reply-To: <cover.1391172839.git.agordeev@redhat.com>

As result of deprecation of MSI-X/MSI enablement functions
pci_enable_msix() and pci_enable_msi_block() all drivers
using these two interfaces need to be updated to use the
new pci_enable_msi_range() and pci_enable_msix_range()
interfaces.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/net/ethernet/sun/niu.c |   11 +++--------
 1 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 8e2266e..79606f4 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -9041,7 +9041,7 @@ static void niu_try_msix(struct niu *np, u8 *ldg_num_map)
 	struct msix_entry msi_vec[NIU_NUM_LDG];
 	struct niu_parent *parent = np->parent;
 	struct pci_dev *pdev = np->pdev;
-	int i, num_irqs, err;
+	int i, num_irqs;
 	u8 first_ldg;
 
 	first_ldg = (NIU_NUM_LDG / parent->num_ports) * np->port;
@@ -9053,21 +9053,16 @@ static void niu_try_msix(struct niu *np, u8 *ldg_num_map)
 		    (np->port == 0 ? 3 : 1));
 	BUG_ON(num_irqs > (NIU_NUM_LDG / parent->num_ports));
 
-retry:
 	for (i = 0; i < num_irqs; i++) {
 		msi_vec[i].vector = 0;
 		msi_vec[i].entry = i;
 	}
 
-	err = pci_enable_msix(pdev, msi_vec, num_irqs);
-	if (err < 0) {
+	num_irqs = pci_enable_msix_range(pdev, msi_vec, 1, num_irqs);
+	if (num_irqs < 0) {
 		np->flags &= ~NIU_FLAGS_MSIX;
 		return;
 	}
-	if (err > 0) {
-		num_irqs = err;
-		goto retry;
-	}
 
 	np->flags |= NIU_FLAGS_MSIX;
 	for (i = 0; i < num_irqs; i++)
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH 33/34] vmxnet3: Fix MSI-X/MSI enablement code
From: Alexander Gordeev @ 2014-01-31 15:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Shreyas Bhatewara, VMware, Inc., netdev,
	linux-pci
In-Reply-To: <cover.1391172839.git.agordeev@redhat.com>

This update cleans up the MSI-X/MSI enablement code, fixes
vmxnet3_acquire_msix_vectors() invalid return values and
enables a dead code in case VMXNET3_LINUX_MIN_MSIX_VECT
MSI-X vectors were allocated.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/net/vmxnet3/vmxnet3_drv.c |  101 +++++++++++++++++--------------------
 1 files changed, 46 insertions(+), 55 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 3be786f..3a17797 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2729,47 +2729,44 @@ vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
 /*
  * Enable MSIx vectors.
  * Returns :
- *	0 on successful enabling of required vectors,
  *	VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
- *	 could be enabled.
- *	number of vectors which can be enabled otherwise (this number is smaller
+ *	 were enabled.
+ *	number of vectors which were enabled otherwise (this number is greater
  *	 than VMXNET3_LINUX_MIN_MSIX_VECT)
  */
 
 static int
-vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter,
-			     int vectors)
+vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter, int nvec)
 {
-	int err = 0, vector_threshold;
-	vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT;
-
-	while (vectors >= vector_threshold) {
-		err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
-				      vectors);
+	do {
+		int err = pci_enable_msix(adapter->pdev,
+					  adapter->intr.msix_entries, nvec);
 		if (!err) {
-			adapter->intr.num_intrs = vectors;
-			return 0;
+			return nvec;
 		} else if (err < 0) {
 			dev_err(&adapter->netdev->dev,
-				   "Failed to enable MSI-X, error: %d\n", err);
-			vectors = 0;
-		} else if (err < vector_threshold) {
-			break;
+				"Failed to enable MSI-X, error: %d\n", err);
+			return err;
+		} else if (err < VMXNET3_LINUX_MIN_MSIX_VECT) {
+			dev_info(&adapter->pdev->dev,
+				 "Number of MSI-X which can be allocated "
+				 "is lower than min threshold required.\n");
+			return -ENOSPC;
 		} else {
 			/* If fails to enable required number of MSI-x vectors
 			 * try enabling minimum number of vectors required.
 			 */
 			dev_err(&adapter->netdev->dev,
-				"Failed to enable %d MSI-X, trying %d instead\n",
-				    vectors, vector_threshold);
-			vectors = vector_threshold;
+				"Failed to enable %d MSI-X, trying %d\n",
+				nvec, VMXNET3_LINUX_MIN_MSIX_VECT);
+			nvec = VMXNET3_LINUX_MIN_MSIX_VECT;
 		}
-	}
+	} while (nvec >= VMXNET3_LINUX_MIN_MSIX_VECT);
 
-	dev_info(&adapter->pdev->dev,
-		 "Number of MSI-X interrupts which can be allocated "
-		 "is lower than min threshold required.\n");
-	return err;
+	/*
+	 * Should never get here
+	 */
+	return -ENOSPC;
 }
 
 
@@ -2796,56 +2793,50 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
 
 #ifdef CONFIG_PCI_MSI
 	if (adapter->intr.type == VMXNET3_IT_MSIX) {
-		int vector, err = 0;
-
-		adapter->intr.num_intrs = (adapter->share_intr ==
-					   VMXNET3_INTR_TXSHARE) ? 1 :
-					   adapter->num_tx_queues;
-		adapter->intr.num_intrs += (adapter->share_intr ==
-					   VMXNET3_INTR_BUDDYSHARE) ? 0 :
-					   adapter->num_rx_queues;
-		adapter->intr.num_intrs += 1;		/* for link event */
-
-		adapter->intr.num_intrs = (adapter->intr.num_intrs >
-					   VMXNET3_LINUX_MIN_MSIX_VECT
-					   ? adapter->intr.num_intrs :
-					   VMXNET3_LINUX_MIN_MSIX_VECT);
-
-		for (vector = 0; vector < adapter->intr.num_intrs; vector++)
-			adapter->intr.msix_entries[vector].entry = vector;
-
-		err = vmxnet3_acquire_msix_vectors(adapter,
-						   adapter->intr.num_intrs);
+		int i, nvec;
+
+		nvec  = adapter->share_intr == VMXNET3_INTR_TXSHARE ?
+			1 : adapter->num_tx_queues;
+		nvec += adapter->share_intr == VMXNET3_INTR_BUDDYSHARE ?
+			0 : adapter->num_rx_queues;
+		nvec += 1;	/* for link event */
+		nvec = nvec > VMXNET3_LINUX_MIN_MSIX_VECT ?
+		       nvec : VMXNET3_LINUX_MIN_MSIX_VECT;
+
+		for (i = 0; i < nvec; i++)
+			adapter->intr.msix_entries[i].entry = i;
+
+		nvec = vmxnet3_acquire_msix_vectors(adapter, nvec);
+		if (nvec < 0)
+			goto msix_err;
+
 		/* If we cannot allocate one MSIx vector per queue
 		 * then limit the number of rx queues to 1
 		 */
-		if (err == VMXNET3_LINUX_MIN_MSIX_VECT) {
+		if (nvec == VMXNET3_LINUX_MIN_MSIX_VECT) {
 			if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
 			    || adapter->num_rx_queues != 1) {
 				adapter->share_intr = VMXNET3_INTR_TXSHARE;
 				netdev_err(adapter->netdev,
 					   "Number of rx queues : 1\n");
 				adapter->num_rx_queues = 1;
-				adapter->intr.num_intrs =
-						VMXNET3_LINUX_MIN_MSIX_VECT;
 			}
-			return;
 		}
-		if (!err)
-			return;
 
+		adapter->intr.num_intrs = nvec;
+		return;
+
+msix_err:
 		/* If we cannot allocate MSIx vectors use only one rx queue */
 		dev_info(&adapter->pdev->dev,
 			 "Failed to enable MSI-X, error %d. "
-			 "Limiting #rx queues to 1, try MSI.\n", err);
+			 "Limiting #rx queues to 1, try MSI.\n", nvec);
 
 		adapter->intr.type = VMXNET3_IT_MSI;
 	}
 
 	if (adapter->intr.type == VMXNET3_IT_MSI) {
-		int err;
-		err = pci_enable_msi(adapter->pdev);
-		if (!err) {
+		if (!pci_enable_msi(adapter->pdev)) {
 			adapter->num_rx_queues = 1;
 			adapter->intr.num_intrs = 1;
 			return;
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH 34/34] vmxnet3: Use pci_enable_msix_range()
From: Alexander Gordeev @ 2014-01-31 15:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Shreyas Bhatewara, VMware, Inc., netdev,
	linux-pci
In-Reply-To: <cover.1391172839.git.agordeev@redhat.com>

As result of deprecation of MSI-X/MSI enablement functions
pci_enable_msix() and pci_enable_msi_block() all drivers
using these two interfaces need to be updated to use the
new pci_enable_msi_range() and pci_enable_msix_range()
interfaces.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/net/vmxnet3/vmxnet3_drv.c |   47 +++++++++++++++----------------------
 1 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 3a17797..9275c8c 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2738,35 +2738,26 @@ vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
 static int
 vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter, int nvec)
 {
-	do {
-		int err = pci_enable_msix(adapter->pdev,
-					  adapter->intr.msix_entries, nvec);
-		if (!err) {
-			return nvec;
-		} else if (err < 0) {
-			dev_err(&adapter->netdev->dev,
-				"Failed to enable MSI-X, error: %d\n", err);
-			return err;
-		} else if (err < VMXNET3_LINUX_MIN_MSIX_VECT) {
-			dev_info(&adapter->pdev->dev,
-				 "Number of MSI-X which can be allocated "
-				 "is lower than min threshold required.\n");
-			return -ENOSPC;
-		} else {
-			/* If fails to enable required number of MSI-x vectors
-			 * try enabling minimum number of vectors required.
-			 */
-			dev_err(&adapter->netdev->dev,
-				"Failed to enable %d MSI-X, trying %d\n",
-				nvec, VMXNET3_LINUX_MIN_MSIX_VECT);
-			nvec = VMXNET3_LINUX_MIN_MSIX_VECT;
-		}
-	} while (nvec >= VMXNET3_LINUX_MIN_MSIX_VECT);
+	int ret = pci_enable_msix_range(adapter->pdev,
+					adapter->intr.msix_entries, nvec, nvec);
 
-	/*
-	 * Should never get here
-	 */
-	return -ENOSPC;
+	if (ret == -ENOSPC && nvec > VMXNET3_LINUX_MIN_MSIX_VECT) {
+		dev_err(&adapter->netdev->dev,
+			"Failed to enable %d MSI-X, trying %d\n",
+			nvec, VMXNET3_LINUX_MIN_MSIX_VECT);
+
+		ret = pci_enable_msix_range(adapter->pdev,
+					    adapter->intr.msix_entries,
+					    VMXNET3_LINUX_MIN_MSIX_VECT,
+					    VMXNET3_LINUX_MIN_MSIX_VECT);
+	}
+
+	if (ret < 0) {
+		dev_err(&adapter->netdev->dev,
+			"Failed to enable MSI-X, error: %d\n", ret);
+	}
+
+	return ret;
 }
 
 
-- 
1.7.7.6

^ permalink raw reply related

* RE: [PATCH RFC 1/1] usb: Tell xhci when usb data might be misaligned
From: David Laight @ 2014-01-31 15:22 UTC (permalink / raw)
  To: 'Sarah Sharp', Bjørn Mork
  Cc: linux-usb@vger.kernel.org, netdev@vger.kernel.org,
	Greg Kroah-Hartman, David Miller, Dan Williams, Nyman, Mathias,
	Mark Lord, Alan Stern, Freddy Xin, Ming Lei
In-Reply-To: <20140130221511.GD14228@xanatos>

From: Sarah Sharp
> On Thu, Jan 30, 2014 at 10:50:21PM +0100, Bjørn Mork wrote:
> > FWIW, the plan looks fine to me.  Just adding a couple of hints to
> > simplify the implementation.
> >
> > Sarah Sharp <sarah.a.sharp@linux.intel.com> writes:
> >
> > > Let's do this fix the right way, instead of wall papering over the
> > > issue.  Here's what we should do:
> > >
> > > 1. Disable scatter-gather for the ax88179_178a driver when it's under an
> > >    xHCI host.
> >
> > No need to make this conditional.  SG is only enabled in the
> > ax88179_178a driver if udev->bus->no_sg_constraint is true, so it
> > applies only to xHCI hosts in the first place.

Leave the usbnet code alone and unset udev->bus->no_sg_constraint.

	David

^ permalink raw reply

* Re: [BUG] at include/linux/page-flags.h:415 (PageTransHuge)
From: Vlastimil Babka @ 2014-01-31 15:25 UTC (permalink / raw)
  To: Thomas Hellstrom
  Cc: Daniel Borkmann, Andrew Morton, linux-kernel, Michel Lespinasse,
	linux-mm, Jared Hulbert, netdev, John David Anglin,
	HATAYAMA Daisuke, Konstantin Khlebnikov, Carsten Otte,
	Peter Zijlstra
In-Reply-To: <52EBBA21.1070009@vmware.com>

On 01/31/2014 03:58 PM, Thomas Hellstrom wrote:
> On 01/31/2014 03:40 PM, Vlastimil Babka wrote:
>> On 01/15/2014 05:06 PM, Daniel Borkmann wrote:
>>> [keeping netdev in loop as well]
>>>
>>> On 01/15/2014 03:27 PM, Vlastimil Babka wrote:
>>>> On 01/13/2014 12:39 PM, Daniel Borkmann wrote:
>>>>> On 01/13/2014 11:16 AM, Vlastimil Babka wrote:
>>>>>> On 01/11/2014 02:32 PM, Daniel Borkmann wrote:
>>>>>>> On 01/11/2014 07:22 AM, Andrew Morton wrote:
>>>>>>>> On Fri, 10 Jan 2014 19:23:26 +0100 Daniel Borkmann <borkmann@iogearbox.net> wrote:
>>>>>>>>
>>>>>>>>> This is being reliably triggered for each mmaped() packet(7)
>>>>>>>>> socket from user space, basically during unmapping resp.
>>>>>>>>> closing the TX socket.
>>>>>>>>>
>>>>>>>>> I believe due to some change in transparent hugepages code ?
>>>>>>>>>
>>>>>>>>> When I disable transparent hugepages, everything works fine,
>>>>>>>>> no BUG triggered.
>>>>>>>>>
>>>>>>>>> I'd be happy to test patches.
>>>>>>>> Did the inclusion of c424be1cbbf852e46acc8 ("mm: munlock: fix a bug
>>>>>>>> where THP tail page is encountered") in current mainline fix this?
>>>>>>> Thanks for your answer Andrew!
>>>>>>>
>>>>>>> Hm, I just cherry-picked that onto current net-next as I have some work
>>>>>>> there, and this time I got ...
>>>>>>>
>>>>>>> (User space uses packet mmap() and mlockall(MCL_CURRENT | MCL_FUTURE)
>>>>>>>         and on shutdown munlockall() ...)
>>>>>>>
>>>>>>> [   63.863672] ------------[ cut here ]------------
>>>>>>> [   63.863702] kernel BUG at mm/mlock.c:507!
>>>>>>> [   63.863721] invalid opcode: 0000 [#1] SMP
>>>>>>> [   63.863743] Modules linked in: fuse ebtable_nat xt_CHECKSUM nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE ip6table_nat nf_nat_ipv6 ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat nf_nat_ipv4 nf_nat iptable_mangle nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack bridge ebtable_filter ebtables stp llc ip6table_filter ip6_tables rfcomm bnep snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_intel snd_hda_codec iwlwifi cfg80211 snd_hwdep btusb snd_seq bluetooth sdhci_pci snd_seq_device e1000e tpm_tis snd_pcm thinkpad_acpi sdhci ptp tpm uvcvideo pps_core snd_page_alloc snd_timer snd rfkill mmc_core iTCO_wdt iTCO_vendor_support lpc_ich mfd_core soundcore joydev wmi videobuf2_vmalloc videobuf2_memops videobuf2_core i2c_i801 pcspkr videod
 ev media uinput i915
>>>>>>> [   63.864152]  i2c_algo_bit drm_kms_helper drm i2c_core video
>>>>>>> [   63.864181] CPU: 1 PID: 1617 Comm: trafgen Not tainted 3.13.0-rc6+ #15
>>>>>>> [   63.864209] Hardware name: LENOVO 2429BP3/2429BP3, BIOS G4ET37WW (1.12 ) 05/29/2012
>>>>>>> [   63.864242] task: ffff8801ee060000 ti: ffff8800b5954000 task.ti: ffff8800b5954000
>>>>>>> [   63.864274] RIP: 0010:[<ffffffff8116fa9a>]  [<ffffffff8116fa9a>] munlock_vma_pages_range+0x2ea/0x2f0
>>>>>>> [   63.864318] RSP: 0018:ffff8800b5955e08  EFLAGS: 00010202
>>>>>>> [   63.864341] RAX: 00000000000001ff RBX: ffff8800b58f7508 RCX: 0000000000000034
>>>>>>> [   63.864372] RDX: 00000007f0708992 RSI: ffffea0002c3e700 RDI: ffffea0002c3e700
>>>>>>> [   63.864402] RBP: ffff8800b5955ee0 R08: 3800000000000000 R09: a8000b0f9c000000
>>>>>>> [   63.864432] R10: 57ffdef066c3e700 R11: ffffff5cfb00c14a R12: ffffea0002c3e700
>>>>>>> [   63.864462] R13: ffff8800b5955f48 R14: 00007f0708992000 R15: 00007f0708992000
>>>>>>> [   63.864492] FS:  00007f0708b92740(0000) GS:ffff88021e240000(0000) knlGS:0000000000000000
>>>>>>> [   63.864526] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>>>>> [   63.864551] CR2: 00007f33bb373000 CR3: 00000000b2a2c000 CR4: 00000000001407e0
>>>>>>> [   63.864581] Stack:
>>>>>>> [   63.864593]  ffff8800b5955ed0 00007f0708b91fff 00007f0708b92000 ffff8800b5955e48
>>>>>>> [   63.864632]  000001ff810c864b ffff8801ee060000 0000000000000000 0000000000000000
>>>>>>> [   63.864669]  ffff8800b5955e58 ffff8801ee060000 0000000700000086 ffff8801ee060000
>>>>>>> [   63.864708] Call Trace:
>>>>>>> [   63.864724]  [<ffffffff816956bc>] ? _raw_spin_unlock_irq+0x2c/0x30
>>>>>>> [   63.864754]  [<ffffffff81171b52>] ? vma_merge+0xc2/0x330
>>>>>>> [   63.864786]  [<ffffffff8116fb9c>] mlock_fixup+0xfc/0x190
>>>>>>> [   63.864812]  [<ffffffff8116fde7>] do_mlockall+0x87/0xc0
>>>>>>> [   63.864836]  [<ffffffff811702df>] sys_munlockall+0x2f/0x50
>>>>>>> [   63.864873]  [<ffffffff8169e192>] system_call_fastpath+0x16/0x1b
>>>>>>> [   63.864898] Code: d7 48 89 95 28 ff ff ff e8 a4 04 fe ff 84 c0 48 8b 95 28 ff ff ff 0f 85 5a ff ff ff e9 46 ff ff ff e8 3f ac 51 00 e8 34 ac 51 00 <0f> 0b 0f 1f 40 00 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41 55
>>>>>>> [   63.865114] RIP  [<ffffffff8116fa9a>] munlock_vma_pages_range+0x2ea/0x2f0
>>>>>>> [   63.865148]  RSP <ffff8800b5955e08>
>>>>>>> [   63.874968] ------------[ cut here ]------------
>>>>>>>
>>>>>>> ... when I find some time, I'll try with normal torvalds' tree, maybe some
>>>>>>> other patches are missing as well, not sure right now.
>>>>>> Uh so the triggered assertion is the one added by this very patch, and there are no more changes wrt this in mainline.
>>>>>>
>>>>>> If you can still try debug patches, please try this. Thanks.
>>>>> Yes, thanks, I'll come back to you some time by today.
>>>> Daniel sent me (off-list) instructions to reproduce:
>>>>
>>>>> Then in the kernel source tree, you'll find:
>>>>>
>>>>>       tools/testing/selftests/net/
>>>>>
>>>>> There, just do a 'make' and run ./psock_tpacket
>>>> It reproduces deterministically in mainline since 3.12, i.e. my munlock
>>>> performance series. Based on the initial debug output, I've expanded the
>>>> debug patch below a bit:
>>>>
>>>>>> From: Vlastimil Babka <vbabka@suse.cz>
>>>>>> Date: Mon, 13 Jan 2014 11:13:53 +0100
>>>>>> Subject: [PATCH] debug munlock_vma_pages_range
>>>>>>
>>>>>> ---
>>>>>>       mm/mlock.c | 22 ++++++++++++++++++++--
>>>>>>       1 file changed, 20 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/mm/mlock.c b/mm/mlock.c
>>>>>> index c59c420..7d0e29a 100644
>>>>>> --- a/mm/mlock.c
>>>>>> +++ b/mm/mlock.c
>>>>>> @@ -448,12 +448,14 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
>>>>>>       void munlock_vma_pages_range(struct vm_area_struct *vma,
>>>>>>       			     unsigned long start, unsigned long end)
>>>>>>       {
>>>>>> +	unsigned long orig_start = start;
>>>>>> +	unsigned long page_increm = 0;
>>>>>> +
>>>>>>       	vma->vm_flags &= ~VM_LOCKED;
>>>>>>
>>>>>>       	while (start < end) {
>>>>>>       		struct page *page = NULL;
>>>>>>       		unsigned int page_mask;
>>>>>> -		unsigned long page_increm;
>>>>>>       		struct pagevec pvec;
>>>>>>       		struct zone *zone;
>>>>>>       		int zoneid;
>>>>>> @@ -504,7 +506,23 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
>>>>>>       			}
>>>>>>       		}
>>>>>>       		/* It's a bug to munlock in the middle of a THP page */
>>>>>> -		VM_BUG_ON((start >> PAGE_SHIFT) & page_mask);
>>>>>> +		if ((start >> PAGE_SHIFT) & page_mask) {
>>>>>> +			dump_page(page);
>>>>>> +			printk("start=%lu pfn=%lu orig_start=%lu "
>>>>>> +			       "prev_page_increm=%lu page_mask=%u "
>>>>>> +			       "vm_start=%lu vm_end=%lu vm_flags=%lu\n",
>>>>>> +				start, page_to_pfn(page), orig_start,
>>>>>> +				page_increm, page_mask,
>>>>>> +				vma->vm_start, vma->vm_end,
>>>>>> +				vma->vm_flags);
>>>> +                        printk("vm_ops=%pF, open=%pF, fault=%pF, remap_pages=%pF\n", vma->vm_ops,
>>>> +                                vma->vm_ops->open, vma->vm_ops->fault, vma->vm_ops->remap_pages);
>>>> +                        if (PageCompound(page)) {
>>>> +                                printk("page is compound with order=%d\n", compound_order(page));
>>>> +                        }
>>>>>> +			if (PageTail(page)) {
>>>>>> +				struct page *first_page = page->first_page;
>>>>>> +				printk("first_page pfn=%lu\n",
>>>>>> +						page_to_pfn(first_page));
>>>>>> +				dump_page(first_page);
>>>>>> +			}
>>>>>> +			VM_BUG_ON(true);
>>>>>> +		}
>>>>>>       		page_increm = 1 + page_mask;
>>>>>>       		start += page_increm * PAGE_SIZE;
>>>>>>       next:
>>>>>>
>>>> And got output like this:
>>>>
>>>> page:ffffea0002474a40 count:5 mapcount:1 mapping:          (null) index:0x0
>>>> page flags: 0x100000000004004(referenced|head)
>>>> start=140242647736320 pfn=682616 orig_start=140242647736320 prev_page_increm=0 page_mask=511 vm_start=140242647736320 vm_end=140242651930624 vm_flags=268435707
>>>> vm_ops=packet_mmap_ops+0x0/0xfffffffffffff8e0 [af_packet], open=packet_mm_open+0x0/0x30 [af_packet], fault=          (null), remap_pages=          (null)
>>>> page is compound with order=2
>>>>
>>>> Observations:
>>>> - address 140242647736320 is where the vma starts, and is not aligned to 512 pages
>>>>      (so it cannot be a THP head which the munlock expects). Yet there is a head page
>>>>      that triggers the PageTransHuge() and consequently hpage_nr_pages() in munlock_vma_page()
>>>>      That's why page_mask is determined to be 511 and the code thinks it's in the
>>>>      middle of a THP page.
>>>> - in fact, the page is a compound page with order=2
>>>> - the VM flags (except (may)read/write) are VM_SHARED and VM_MIXEDMAP
>>>> - the vma was mmapped by packet_mmap() (net/packet/af_packet.c) which uses
>>>>      vm_insert_page(), which adds the VM_MIXEDMAP flag
>>>> - the buffers that are mapped were allocated by alloc_one_pg_vec_page()
>>>>      where flags indeed include __GFP_COMP
>>>>
>>>> So clearly there is a way to have mlock/munlock operate on a vma that contains
>>>> compound pages and confuse the checks for PageTransHuge().
>>>>
>>>> The checks for THP in munlock came with commit ff6a6da60b89 ("mm: accelerate munlock()
>>>> treatment of THP pages"), i.e. since 3.9, but did not trigger a bug. It however
>>>> makes munlock_vma_pages_range() skip pages until the next 512-pages-aligned page,
>>>> when it encounters a head page. If the head page is of smaller order and is followed
>>>> by normal LRU pages (theoretically, I'm not sure if that's possible, or done anywhere),
>>>> they wouldn't get munlocked.
>>>>
>>>> My commit 7225522bb429 ("mm: munlock: batch non-THP page isolation and
>>>> munlock+putback using pagevec") (since 3.12) has added a new PageTransHuge() check
>>>> that can trigger on tail pages of the compound page here. Commit c424be1cbbf852e46acc8
>>>> ("mm: munlock: fix a bug where THP tail page is encountered") in current rc's removes
>>>> one class of bugs here, but still non-THP compound pages are not expected in mlock/munlock,
>>>> which leads to this assertion failing.
>>>>
>>>> The question is what is the correct fix, and I'm not that familiar with VM_MIXEDMAP
>>>> to decide.
>>>>
>>>> Option 1: mlocking VM_MIXEDMAP vma's has no sense. They should be treated like VM_PFNMAP
>>>>              and added to VM_SPECIAL, which makes m(un)lock skip them completely.
>>>>
>>>> Option 2: if indeed VM_MIXEDMAP can contain PageLRU pages for which mlocking is useful,
>>>>              VM_NO_THP should be checked in munlock before attempting PageTransHuge() and
>>>>              friends. VM_NO_THP already contains VM_MIXEDMAP, so knowing that there can be
>>>>              no THP means we don't try optimize for it and no unexpected head pages trip us.
>>>>
>>>> Thoughts?
>> OK, here's a RFC patch to hopefully help get us somewhere. I went for
>> Option1, as I didn't see anyone using VM_MIXEDMAP also for LRU pages,
>> and Option2 was ugly to implement and also seemed quite arbitrary. I'm
>> not sure if making VM_MIXEDMAP also non-mergeable this way is an issue
>> though.
>>
>>
>
> Hi!
>
> Forgive an ignorant question, but are anonymous COW'd pages LRU pages?

I believe so, but I've checked where VM_MIXEDMAP is used in TTM and it 
seems all those vma's are also VM_IO which means they are already 
included in VM_SPECIAL and this change won't affect them.

Vlastimil

> The reason I'm asking is that TTM VM_MIXEDMAP vmas may contain such pages.
>
> /Thomas
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [BUG] at include/linux/page-flags.h:415 (PageTransHuge)
From: Thomas Hellstrom @ 2014-01-31 15:35 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Daniel Borkmann, Andrew Morton, linux-kernel, Michel Lespinasse,
	linux-mm, Jared Hulbert, netdev, John David Anglin,
	HATAYAMA Daisuke, Konstantin Khlebnikov, Carsten Otte,
	Peter Zijlstra
In-Reply-To: <52EBC06F.2050307@suse.cz>

On 01/31/2014 04:25 PM, Vlastimil Babka wrote:
> On 01/31/2014 03:58 PM, Thomas Hellstrom wrote:
>> On 01/31/2014 03:40 PM, Vlastimil Babka wrote:
>>> On 01/15/2014 05:06 PM, Daniel Borkmann wrote:
>>>> [keeping netdev in loop as well]
>>>>
>>>> On 01/15/2014 03:27 PM, Vlastimil Babka wrote:
>>>>> On 01/13/2014 12:39 PM, Daniel Borkmann wrote:
>>>>>> On 01/13/2014 11:16 AM, Vlastimil Babka wrote:
>>>>>>> On 01/11/2014 02:32 PM, Daniel Borkmann wrote:
>>>>>>>> On 01/11/2014 07:22 AM, Andrew Morton wrote:
>>>>>>>>> On Fri, 10 Jan 2014 19:23:26 +0100 Daniel Borkmann
>>>>>>>>> <borkmann@iogearbox.net> wrote:
>>>>>>>>>
>>>>>>>>>> This is being reliably triggered for each mmaped() packet(7)
>>>>>>>>>> socket from user space, basically during unmapping resp.
>>>>>>>>>> closing the TX socket.
>>>>>>>>>>
>>>>>>>>>> I believe due to some change in transparent hugepages code ?
>>>>>>>>>>
>>>>>>>>>> When I disable transparent hugepages, everything works fine,
>>>>>>>>>> no BUG triggered.
>>>>>>>>>>
>>>>>>>>>> I'd be happy to test patches.
>>>>>>>>> Did the inclusion of c424be1cbbf852e46acc8 ("mm: munlock: fix
>>>>>>>>> a bug
>>>>>>>>> where THP tail page is encountered") in current mainline fix
>>>>>>>>> this?
>>>>>>>> Thanks for your answer Andrew!
>>>>>>>>
>>>>>>>> Hm, I just cherry-picked that onto current net-next as I have
>>>>>>>> some work
>>>>>>>> there, and this time I got ...
>>>>>>>>
>>>>>>>> (User space uses packet mmap() and mlockall(MCL_CURRENT |
>>>>>>>> MCL_FUTURE)
>>>>>>>>         and on shutdown munlockall() ...)
>>>>>>>>
>>>>>>>> [   63.863672] ------------[ cut here ]------------
>>>>>>>> [   63.863702] kernel BUG at mm/mlock.c:507!
>>>>>>>> [   63.863721] invalid opcode: 0000 [#1] SMP
>>>>>>>> [   63.863743] Modules linked in: fuse ebtable_nat xt_CHECKSUM
>>>>>>>> nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE
>>>>>>>> ip6table_nat nf_nat_ipv6 ip6table_mangle ip6t_REJECT
>>>>>>>> nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat nf_nat_ipv4 nf_nat
>>>>>>>> iptable_mangle nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack
>>>>>>>> nf_conntrack bridge ebtable_filter ebtables stp llc
>>>>>>>> ip6table_filter ip6_tables rfcomm bnep snd_hda_codec_hdmi
>>>>>>>> snd_hda_codec_realtek snd_hda_intel snd_hda_codec iwlwifi
>>>>>>>> cfg80211 snd_hwdep btusb snd_seq bluetooth sdhci_pci
>>>>>>>> snd_seq_device e1000e tpm_tis snd_pcm thinkpad_acpi sdhci ptp
>>>>>>>> tpm uvcvideo pps_core snd_page_alloc snd_timer snd rfkill
>>>>>>>> mmc_core iTCO_wdt iTCO_vendor_support lpc_ich mfd_core
>>>>>>>> soundcore joydev wmi videobuf2_vmalloc videobuf2_memops
>>>>>>>> videobuf2_core i2c_i801 pcspkr videodev media uinput i915
>>>>>>>> [   63.864152]  i2c_algo_bit drm_kms_helper drm i2c_core video
>>>>>>>> [   63.864181] CPU: 1 PID: 1617 Comm: trafgen Not tainted
>>>>>>>> 3.13.0-rc6+ #15
>>>>>>>> [   63.864209] Hardware name: LENOVO 2429BP3/2429BP3, BIOS
>>>>>>>> G4ET37WW (1.12 ) 05/29/2012
>>>>>>>> [   63.864242] task: ffff8801ee060000 ti: ffff8800b5954000
>>>>>>>> task.ti: ffff8800b5954000
>>>>>>>> [   63.864274] RIP: 0010:[<ffffffff8116fa9a>] 
>>>>>>>> [<ffffffff8116fa9a>] munlock_vma_pages_range+0x2ea/0x2f0
>>>>>>>> [   63.864318] RSP: 0018:ffff8800b5955e08  EFLAGS: 00010202
>>>>>>>> [   63.864341] RAX: 00000000000001ff RBX: ffff8800b58f7508 RCX:
>>>>>>>> 0000000000000034
>>>>>>>> [   63.864372] RDX: 00000007f0708992 RSI: ffffea0002c3e700 RDI:
>>>>>>>> ffffea0002c3e700
>>>>>>>> [   63.864402] RBP: ffff8800b5955ee0 R08: 3800000000000000 R09:
>>>>>>>> a8000b0f9c000000
>>>>>>>> [   63.864432] R10: 57ffdef066c3e700 R11: ffffff5cfb00c14a R12:
>>>>>>>> ffffea0002c3e700
>>>>>>>> [   63.864462] R13: ffff8800b5955f48 R14: 00007f0708992000 R15:
>>>>>>>> 00007f0708992000
>>>>>>>> [   63.864492] FS:  00007f0708b92740(0000)
>>>>>>>> GS:ffff88021e240000(0000) knlGS:0000000000000000
>>>>>>>> [   63.864526] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>>>>>> [   63.864551] CR2: 00007f33bb373000 CR3: 00000000b2a2c000 CR4:
>>>>>>>> 00000000001407e0
>>>>>>>> [   63.864581] Stack:
>>>>>>>> [   63.864593]  ffff8800b5955ed0 00007f0708b91fff
>>>>>>>> 00007f0708b92000 ffff8800b5955e48
>>>>>>>> [   63.864632]  000001ff810c864b ffff8801ee060000
>>>>>>>> 0000000000000000 0000000000000000
>>>>>>>> [   63.864669]  ffff8800b5955e58 ffff8801ee060000
>>>>>>>> 0000000700000086 ffff8801ee060000
>>>>>>>> [   63.864708] Call Trace:
>>>>>>>> [   63.864724]  [<ffffffff816956bc>] ?
>>>>>>>> _raw_spin_unlock_irq+0x2c/0x30
>>>>>>>> [   63.864754]  [<ffffffff81171b52>] ? vma_merge+0xc2/0x330
>>>>>>>> [   63.864786]  [<ffffffff8116fb9c>] mlock_fixup+0xfc/0x190
>>>>>>>> [   63.864812]  [<ffffffff8116fde7>] do_mlockall+0x87/0xc0
>>>>>>>> [   63.864836]  [<ffffffff811702df>] sys_munlockall+0x2f/0x50
>>>>>>>> [   63.864873]  [<ffffffff8169e192>]
>>>>>>>> system_call_fastpath+0x16/0x1b
>>>>>>>> [   63.864898] Code: d7 48 89 95 28 ff ff ff e8 a4 04 fe ff 84
>>>>>>>> c0 48 8b 95 28 ff ff ff 0f 85 5a ff ff ff e9 46 ff ff ff e8 3f
>>>>>>>> ac 51 00 e8 34 ac 51 00 <0f> 0b 0f 1f 40 00 0f 1f 44 00 00 55
>>>>>>>> 48 89 e5 41 57 41 56 41 55
>>>>>>>> [   63.865114] RIP  [<ffffffff8116fa9a>]
>>>>>>>> munlock_vma_pages_range+0x2ea/0x2f0
>>>>>>>> [   63.865148]  RSP <ffff8800b5955e08>
>>>>>>>> [   63.874968] ------------[ cut here ]------------
>>>>>>>>
>>>>>>>> ... when I find some time, I'll try with normal torvalds' tree,
>>>>>>>> maybe some
>>>>>>>> other patches are missing as well, not sure right now.
>>>>>>> Uh so the triggered assertion is the one added by this very
>>>>>>> patch, and there are no more changes wrt this in mainline.
>>>>>>>
>>>>>>> If you can still try debug patches, please try this. Thanks.
>>>>>> Yes, thanks, I'll come back to you some time by today.
>>>>> Daniel sent me (off-list) instructions to reproduce:
>>>>>
>>>>>> Then in the kernel source tree, you'll find:
>>>>>>
>>>>>>       tools/testing/selftests/net/
>>>>>>
>>>>>> There, just do a 'make' and run ./psock_tpacket
>>>>> It reproduces deterministically in mainline since 3.12, i.e. my
>>>>> munlock
>>>>> performance series. Based on the initial debug output, I've
>>>>> expanded the
>>>>> debug patch below a bit:
>>>>>
>>>>>>> From: Vlastimil Babka <vbabka@suse.cz>
>>>>>>> Date: Mon, 13 Jan 2014 11:13:53 +0100
>>>>>>> Subject: [PATCH] debug munlock_vma_pages_range
>>>>>>>
>>>>>>> ---
>>>>>>>       mm/mlock.c | 22 ++++++++++++++++++++--
>>>>>>>       1 file changed, 20 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/mm/mlock.c b/mm/mlock.c
>>>>>>> index c59c420..7d0e29a 100644
>>>>>>> --- a/mm/mlock.c
>>>>>>> +++ b/mm/mlock.c
>>>>>>> @@ -448,12 +448,14 @@ static unsigned long
>>>>>>> __munlock_pagevec_fill(struct pagevec *pvec,
>>>>>>>       void munlock_vma_pages_range(struct vm_area_struct *vma,
>>>>>>>                        unsigned long start, unsigned long end)
>>>>>>>       {
>>>>>>> +    unsigned long orig_start = start;
>>>>>>> +    unsigned long page_increm = 0;
>>>>>>> +
>>>>>>>           vma->vm_flags &= ~VM_LOCKED;
>>>>>>>
>>>>>>>           while (start < end) {
>>>>>>>               struct page *page = NULL;
>>>>>>>               unsigned int page_mask;
>>>>>>> -        unsigned long page_increm;
>>>>>>>               struct pagevec pvec;
>>>>>>>               struct zone *zone;
>>>>>>>               int zoneid;
>>>>>>> @@ -504,7 +506,23 @@ void munlock_vma_pages_range(struct
>>>>>>> vm_area_struct *vma,
>>>>>>>                   }
>>>>>>>               }
>>>>>>>               /* It's a bug to munlock in the middle of a THP
>>>>>>> page */
>>>>>>> -        VM_BUG_ON((start >> PAGE_SHIFT) & page_mask);
>>>>>>> +        if ((start >> PAGE_SHIFT) & page_mask) {
>>>>>>> +            dump_page(page);
>>>>>>> +            printk("start=%lu pfn=%lu orig_start=%lu "
>>>>>>> +                   "prev_page_increm=%lu page_mask=%u "
>>>>>>> +                   "vm_start=%lu vm_end=%lu vm_flags=%lu\n",
>>>>>>> +                start, page_to_pfn(page), orig_start,
>>>>>>> +                page_increm, page_mask,
>>>>>>> +                vma->vm_start, vma->vm_end,
>>>>>>> +                vma->vm_flags);
>>>>> +                        printk("vm_ops=%pF, open=%pF, fault=%pF,
>>>>> remap_pages=%pF\n", vma->vm_ops,
>>>>> +                                vma->vm_ops->open,
>>>>> vma->vm_ops->fault, vma->vm_ops->remap_pages);
>>>>> +                        if (PageCompound(page)) {
>>>>> +                                printk("page is compound with
>>>>> order=%d\n", compound_order(page));
>>>>> +                        }
>>>>>>> +            if (PageTail(page)) {
>>>>>>> +                struct page *first_page = page->first_page;
>>>>>>> +                printk("first_page pfn=%lu\n",
>>>>>>> +                        page_to_pfn(first_page));
>>>>>>> +                dump_page(first_page);
>>>>>>> +            }
>>>>>>> +            VM_BUG_ON(true);
>>>>>>> +        }
>>>>>>>               page_increm = 1 + page_mask;
>>>>>>>               start += page_increm * PAGE_SIZE;
>>>>>>>       next:
>>>>>>>
>>>>> And got output like this:
>>>>>
>>>>> page:ffffea0002474a40 count:5 mapcount:1 mapping:          (null)
>>>>> index:0x0
>>>>> page flags: 0x100000000004004(referenced|head)
>>>>> start=140242647736320 pfn=682616 orig_start=140242647736320
>>>>> prev_page_increm=0 page_mask=511 vm_start=140242647736320
>>>>> vm_end=140242651930624 vm_flags=268435707
>>>>> vm_ops=packet_mmap_ops+0x0/0xfffffffffffff8e0 [af_packet],
>>>>> open=packet_mm_open+0x0/0x30 [af_packet], fault=          (null),
>>>>> remap_pages=          (null)
>>>>> page is compound with order=2
>>>>>
>>>>> Observations:
>>>>> - address 140242647736320 is where the vma starts, and is not
>>>>> aligned to 512 pages
>>>>>      (so it cannot be a THP head which the munlock expects). Yet
>>>>> there is a head page
>>>>>      that triggers the PageTransHuge() and consequently
>>>>> hpage_nr_pages() in munlock_vma_page()
>>>>>      That's why page_mask is determined to be 511 and the code
>>>>> thinks it's in the
>>>>>      middle of a THP page.
>>>>> - in fact, the page is a compound page with order=2
>>>>> - the VM flags (except (may)read/write) are VM_SHARED and VM_MIXEDMAP
>>>>> - the vma was mmapped by packet_mmap() (net/packet/af_packet.c)
>>>>> which uses
>>>>>      vm_insert_page(), which adds the VM_MIXEDMAP flag
>>>>> - the buffers that are mapped were allocated by
>>>>> alloc_one_pg_vec_page()
>>>>>      where flags indeed include __GFP_COMP
>>>>>
>>>>> So clearly there is a way to have mlock/munlock operate on a vma
>>>>> that contains
>>>>> compound pages and confuse the checks for PageTransHuge().
>>>>>
>>>>> The checks for THP in munlock came with commit ff6a6da60b89 ("mm:
>>>>> accelerate munlock()
>>>>> treatment of THP pages"), i.e. since 3.9, but did not trigger a
>>>>> bug. It however
>>>>> makes munlock_vma_pages_range() skip pages until the next
>>>>> 512-pages-aligned page,
>>>>> when it encounters a head page. If the head page is of smaller
>>>>> order and is followed
>>>>> by normal LRU pages (theoretically, I'm not sure if that's
>>>>> possible, or done anywhere),
>>>>> they wouldn't get munlocked.
>>>>>
>>>>> My commit 7225522bb429 ("mm: munlock: batch non-THP page isolation
>>>>> and
>>>>> munlock+putback using pagevec") (since 3.12) has added a new
>>>>> PageTransHuge() check
>>>>> that can trigger on tail pages of the compound page here. Commit
>>>>> c424be1cbbf852e46acc8
>>>>> ("mm: munlock: fix a bug where THP tail page is encountered") in
>>>>> current rc's removes
>>>>> one class of bugs here, but still non-THP compound pages are not
>>>>> expected in mlock/munlock,
>>>>> which leads to this assertion failing.
>>>>>
>>>>> The question is what is the correct fix, and I'm not that familiar
>>>>> with VM_MIXEDMAP
>>>>> to decide.
>>>>>
>>>>> Option 1: mlocking VM_MIXEDMAP vma's has no sense. They should be
>>>>> treated like VM_PFNMAP
>>>>>              and added to VM_SPECIAL, which makes m(un)lock skip
>>>>> them completely.
>>>>>
>>>>> Option 2: if indeed VM_MIXEDMAP can contain PageLRU pages for
>>>>> which mlocking is useful,
>>>>>              VM_NO_THP should be checked in munlock before
>>>>> attempting PageTransHuge() and
>>>>>              friends. VM_NO_THP already contains VM_MIXEDMAP, so
>>>>> knowing that there can be
>>>>>              no THP means we don't try optimize for it and no
>>>>> unexpected head pages trip us.
>>>>>
>>>>> Thoughts?
>>> OK, here's a RFC patch to hopefully help get us somewhere. I went for
>>> Option1, as I didn't see anyone using VM_MIXEDMAP also for LRU pages,
>>> and Option2 was ugly to implement and also seemed quite arbitrary. I'm
>>> not sure if making VM_MIXEDMAP also non-mergeable this way is an issue
>>> though.
>>>
>>>
>>
>> Hi!
>>
>> Forgive an ignorant question, but are anonymous COW'd pages LRU pages?
>
> I believe so, but I've checked where VM_MIXEDMAP is used in TTM and it
> seems all those vma's are also VM_IO which means they are already
> included in VM_SPECIAL and this change won't affect them.
>
> Vlastimil
>

OK. Thanks.

/Thomas




>> The reason I'm asking is that TTM VM_MIXEDMAP vmas may contain such
>> pages.
>>
>> /Thomas
>>
>> -- 
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@kvack.org.  For more info on Linux MM,
>> see:
>> https://urldefense.proofpoint.com/v1/url?u=http://www.linux-mm.org/&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=l5Ago9ekmVFZ3c4M6eauqrJWGwjf6fTb%2BP3CxbBFkVM%3D%0A&m=Wlb%2FkDdCXWj2m8QNoBUogfTl0sK0cH2%2BOONacP0U1SE%3D%0A&s=84b13d34ca94efa34cd185d7ae467b18377a92330174671ac93dbb2948cda967
>> .
>> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>>

^ permalink raw reply

* [PATCH 00/34] net: Use pci_enable_msix_range()
From: Alexander Gordeev @ 2014-01-31 15:46 UTC (permalink / raw)
  To: linux-kernel
  Cc: VMware, Inc., linux-pci, Dimitris Michailidis, Shreyas Bhatewara,
	Rasesh Mody, Eli Cohen, Sujith Sankar, e1000-devel, Jingoo Han,
	Amir Vadai, Jesse Brandeburg, Manish Chopra, Alexander Gordeev,
	Govindarajulu Varadarajan, Santosh Raspatur, Casey Leedom,
	Shahed Shaikh, Sony Chacko, Ajit Khaparde, Shradha Shah,
	Ron Mercer, Michael Chan, Subbu Seetharaman

As result of deprecation of MSI-X/MSI enablement functions
pci_enable_msix() and pci_enable_msi_block() all drivers
using these two interfaces need to be updated to use the
new pci_enable_msi_range() and pci_enable_msix_range()
interfaces.

Alexander Gordeev (34):
  bnx2: Use pci_enable_msix_range()
  bnx2x: Use pci_enable_msix_range()
  tg3: Use pci_enable_msix_range()
  bna: Use pci_enable_msix_range()
  cxgb3: Remove superfluous call to pci_disable_msix()
  cxgb3: Use pci_enable_msix_range()
  cxgb4: Use pci_enable_msix_range()
  cxgb4vf: Remove superfluous call to pci_disable_msix()
  cxgb4vf: Use pci_enable_msix_range()
  enic: Use pci_enable_msix_range()
  benet: Use pci_enable_msix_range()
  e1000e: Use pci_enable_msix_range()
  i40e: Use pci_enable_msix_range()
  igb: Use pci_enable_msix_range()
  igbvf: Use pci_enable_msix_range()
  ixgbe: Use pci_enable_msix_range()
  ixgbevf: Use pci_enable_msix_range()
  mlx4: Use pci_enable_msix_range()
  mlx5: Use pci_enable_msix_range()
  myri10ge: Use pci_enable_msix_range()
  s2io: Use pci_enable_msix_range()
  vxge: Use pci_enable_msix_range()
  forcedeth: Fix invalid errno reporting in nv_request_irq()
  forcedeth: Cleanup MSI-X to MSI to INTx fallback code
  forcedeth: Use pci_enable_msix_range()
  netxen: Use pci_enable_msix_range()
  qlcnic: Cleanup qlcnic_enable_msix() return values
  qlcnic: Use pci_enable_msix_range()
  qlge: Get rid of an redundant assignment
  qlge: Use pci_enable_msix_range()
  efx: Use pci_enable_msix_range()
  niu: Use pci_enable_msix_range()
  vmxnet3: Fix MSI-X/MSI enablement code
  vmxnet3: Use pci_enable_msix_range()

 drivers/net/ethernet/broadcom/bnx2.c               |   15 +--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |   48 ++++-----
 drivers/net/ethernet/broadcom/tg3.c                |    6 +-
 drivers/net/ethernet/brocade/bna/bnad.c            |   23 ++---
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c    |   26 ++---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |   50 +++++-----
 .../net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c    |   35 +++----
 drivers/net/ethernet/cisco/enic/enic_main.c        |    6 +-
 drivers/net/ethernet/emulex/benet/be_main.c        |   31 +++---
 drivers/net/ethernet/intel/e1000e/netdev.c         |    9 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c        |   33 +-----
 drivers/net/ethernet/intel/igb/igb_main.c          |    9 +-
 drivers/net/ethernet/intel/igbvf/netdev.c          |    6 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c       |   16 +--
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c  |   33 ++----
 drivers/net/ethernet/mellanox/mlx4/main.c          |   19 +---
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   16 +--
 drivers/net/ethernet/myricom/myri10ge/myri10ge.c   |   32 +++---
 drivers/net/ethernet/neterion/s2io.c               |    5 +-
 drivers/net/ethernet/neterion/vxge/vxge-main.c     |   15 ++-
 drivers/net/ethernet/nvidia/forcedeth.c            |   41 +++++---
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    5 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c   |   18 ++--
 drivers/net/ethernet/qlogic/qlge/qlge_main.c       |   16 +--
 drivers/net/ethernet/sfc/efx.c                     |   20 ++--
 drivers/net/ethernet/sun/niu.c                     |   11 +--
 drivers/net/vmxnet3/vmxnet3_drv.c                  |  110 ++++++++-----------
 27 files changed, 274 insertions(+), 380 deletions(-)

-- 
1.7.7.6


------------------------------------------------------------------------------
WatchGuard Dimension instantly turns raw network data into actionable 
security intelligence. It gives you real-time visual feedback on key
security issues and trends.  Skip the complicated setup - simply import
a virtual appliance and go from zero to informed in seconds.
http://pubads.g.doubleclick.net/gampad/clk?id=123612991&iu=/4140/ostg.clktrk
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply

* Re: OOPS in nf_ct_unlink_expect_report using Polycom RealPresence Mobile
From: astx @ 2014-01-31 16:04 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev, netfilter
In-Reply-To: <20140131125014.Horde.G2TRlt-60JJ0Nl_3Y8IniQ2@bigboss.aws-it.at>

Dear Alexey,

seems to help. Thank you for your quick response. Kernel 3.10.28 is  
now stable using h323 / Polycom.

Will test this patch with different kernel versions the next days.

Best Regards,
Toni


Original message from Alexey Dobriyan:
--------------------------------------------------------------------------------------
Date	Fri, 31 Jan 2014 16:29:58 +0300
Subject	Re: OOPS in nf_ct_unlink_expect_report using Polycom  
RealPresence Mobile
From	Alexey Dobriyan <>


> Disabling nf_nat_h323 and nf_conntrack_h323 avoids crash -
> but video conferencing software is no more usable.
>
> BUG: unable to handle kernel paging request at 00100104
> IP: [<f8214f07>] nf_ct_unlink_expect_report

This must be the same bug fixed in SIP module:
commit 3f509c689a07a4aa989b426893d8491a7ffcc410
netfilter: nf_nat_sip: fix incorrect handling of EBUSY for RTCP expectation

Try attached patch (if this is mangled):

--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -229,7 +229,10 @@ static int nat_rtp_rtcp(struct sk_buff *skb,
struct nf_conn *ct,
   ret = nf_ct_expect_related(rtcp_exp);
   if (ret == 0)
   break;
- else if (ret != -EBUSY) {
+ else if (ret == -EBUSY) {
+ nf_ct_unexpect_related(rtp_exp);
+ continue;
+ } else if (ret < 0) {
   nf_ct_unexpect_related(rtp_exp);
   nated_port = 0;
   break;--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -229,7 +229,10 @@ static int nat_rtp_rtcp(struct sk_buff *skb,  
struct nf_conn *ct,
  			ret = nf_ct_expect_related(rtcp_exp);
  			if (ret == 0)
  				break;
-			else if (ret != -EBUSY) {
+			else if (ret == -EBUSY) {
+				nf_ct_unexpect_related(rtp_exp);
+				continue;
+			} else if (ret < 0) {
  				nf_ct_unexpect_related(rtp_exp);
  				nated_port = 0;
  				break;

^ permalink raw reply

* [PATCH 0/2] Drivers: net: hyperv: Cleanup the recive path
From: K. Y. Srinivasan @ 2014-01-31 16:24 UTC (permalink / raw)
  To: davem, netdev, linux-kernel, devel, olaf, apw, jasowang

Some minor cleanup of the receive path. Get rid of unnecessary
indirection as well as unnecessary re-establishment of state.

K. Y. Srinivasan (2):
  Drivers: net: hyperv: Get rid of the rndis_filter_packet structure
  Drivers: net: hyperv: Cleanup the receive path

 drivers/net/hyperv/hyperv_net.h   |    6 -----
 drivers/net/hyperv/netvsc.c       |   29 +++++++++++--------------
 drivers/net/hyperv/netvsc_drv.c   |    2 +-
 drivers/net/hyperv/rndis_filter.c |   41 ++----------------------------------
 4 files changed, 17 insertions(+), 61 deletions(-)

-- 
1.7.4.1

^ permalink raw reply

* [PATCH 1/2] Drivers: net: hyperv: Get rid of the rndis_filter_packet structure
From: K. Y. Srinivasan @ 2014-01-31 16:25 UTC (permalink / raw)
  To: davem, netdev, linux-kernel, devel, olaf, apw, jasowang
In-Reply-To: <1391185478-5655-1-git-send-email-kys@microsoft.com>

This structure is redundant; get rid of it make the code little more efficient -
get rid of the unnecessary indirection.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h   |    6 -----
 drivers/net/hyperv/netvsc_drv.c   |    2 +-
 drivers/net/hyperv/rndis_filter.c |   41 ++----------------------------------
 3 files changed, 4 insertions(+), 45 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 7b594ce..7645ba3 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -846,12 +846,6 @@ struct rndis_message {
 };
 
 
-struct rndis_filter_packet {
-	void *completion_ctx;
-	void (*completion)(void *context);
-	struct rndis_message msg;
-};
-
 /* Handy macros */
 
 /* get the size of an RNDIS message. Pass in the message type, */
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 7756118..1eadc13 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -146,7 +146,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 	/* Allocate a netvsc packet based on # of frags. */
 	packet = kzalloc(sizeof(struct hv_netvsc_packet) +
 			 (num_pages * sizeof(struct hv_page_buffer)) +
-			 sizeof(struct rndis_filter_packet) +
+			 sizeof(struct rndis_message) +
 			 NDIS_VLAN_PPI_SIZE, GFP_ATOMIC);
 	if (!packet) {
 		/* out of memory, drop packet */
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 1084e5d..f0cc8ef 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -58,9 +58,6 @@ struct rndis_request {
 	u8 request_ext[RNDIS_EXT_LEN];
 };
 
-static void rndis_filter_send_completion(void *ctx);
-
-
 static struct rndis_device *get_rndis_device(void)
 {
 	struct rndis_device *device;
@@ -277,7 +274,7 @@ static void rndis_filter_receive_response(struct rndis_device *dev,
 				"rndis response buffer overflow "
 				"detected (size %u max %zu)\n",
 				resp->msg_len,
-				sizeof(struct rndis_filter_packet));
+				sizeof(struct rndis_message));
 
 			if (resp->ndis_msg_type ==
 			    RNDIS_MSG_RESET_C) {
@@ -898,17 +895,14 @@ int rndis_filter_close(struct hv_device *dev)
 int rndis_filter_send(struct hv_device *dev,
 			     struct hv_netvsc_packet *pkt)
 {
-	int ret;
-	struct rndis_filter_packet *filter_pkt;
 	struct rndis_message *rndis_msg;
 	struct rndis_packet *rndis_pkt;
 	u32 rndis_msg_size;
 	bool isvlan = pkt->vlan_tci & VLAN_TAG_PRESENT;
 
 	/* Add the rndis header */
-	filter_pkt = (struct rndis_filter_packet *)pkt->extension;
+	rndis_msg = (struct rndis_message *)pkt->extension;
 
-	rndis_msg = &filter_pkt->msg;
 	rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
 	if (isvlan)
 		rndis_msg_size += NDIS_VLAN_PPI_SIZE;
@@ -961,34 +955,5 @@ int rndis_filter_send(struct hv_device *dev,
 		pkt->page_buf[1].len = rndis_msg_size - pkt->page_buf[0].len;
 	}
 
-	/* Save the packet send completion and context */
-	filter_pkt->completion = pkt->completion.send.send_completion;
-	filter_pkt->completion_ctx =
-				pkt->completion.send.send_completion_ctx;
-
-	/* Use ours */
-	pkt->completion.send.send_completion = rndis_filter_send_completion;
-	pkt->completion.send.send_completion_ctx = filter_pkt;
-
-	ret = netvsc_send(dev, pkt);
-	if (ret != 0) {
-		/*
-		 * Reset the completion to originals to allow retries from
-		 * above
-		 */
-		pkt->completion.send.send_completion =
-				filter_pkt->completion;
-		pkt->completion.send.send_completion_ctx =
-				filter_pkt->completion_ctx;
-	}
-
-	return ret;
-}
-
-static void rndis_filter_send_completion(void *ctx)
-{
-	struct rndis_filter_packet *filter_pkt = ctx;
-
-	/* Pass it back to the original handler */
-	filter_pkt->completion(filter_pkt->completion_ctx);
+	return netvsc_send(dev, pkt);
 }
-- 
1.7.4.1

^ permalink raw reply related

* [PATCH 2/2] Drivers: net: hyperv: Cleanup the receive path
From: K. Y. Srinivasan @ 2014-01-31 16:25 UTC (permalink / raw)
  To: davem, netdev, linux-kernel, devel, olaf, apw, jasowang
In-Reply-To: <1391185517-5698-1-git-send-email-kys@microsoft.com>

Make the receive path a little more efficient by parameterizing the
required state rather than re-establishing that state.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 drivers/net/hyperv/netvsc.c |   29 +++++++++++++----------------
 1 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 03a2c6e..7fa2bba 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -432,17 +432,14 @@ static inline u32 hv_ringbuf_avail_percent(
 	return avail_write * 100 / ring_info->ring_datasize;
 }
 
-static void netvsc_send_completion(struct hv_device *device,
+static void netvsc_send_completion(struct netvsc_device *net_device,
+				   struct hv_device *device,
 				   struct vmpacket_descriptor *packet)
 {
-	struct netvsc_device *net_device;
 	struct nvsp_message *nvsp_packet;
 	struct hv_netvsc_packet *nvsc_packet;
 	struct net_device *ndev;
 
-	net_device = get_inbound_net_device(device);
-	if (!net_device)
-		return;
 	ndev = net_device->ndev;
 
 	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
@@ -561,13 +558,13 @@ int netvsc_send(struct hv_device *device,
 }
 
 static void netvsc_send_recv_completion(struct hv_device *device,
+					struct netvsc_device *net_device,
 					u64 transaction_id, u32 status)
 {
 	struct nvsp_message recvcompMessage;
 	int retries = 0;
 	int ret;
 	struct net_device *ndev;
-	struct netvsc_device *net_device = hv_get_drvdata(device);
 
 	ndev = net_device->ndev;
 
@@ -653,14 +650,15 @@ static void netvsc_receive_completion(void *context)
 
 	/* Send a receive completion for the xfer page packet */
 	if (fsend_receive_comp)
-		netvsc_send_recv_completion(device, transaction_id, status);
+		netvsc_send_recv_completion(device, net_device, transaction_id,
+					status);
 
 }
 
-static void netvsc_receive(struct hv_device *device,
-			    struct vmpacket_descriptor *packet)
+static void netvsc_receive(struct netvsc_device *net_device,
+			struct hv_device *device,
+			struct vmpacket_descriptor *packet)
 {
-	struct netvsc_device *net_device;
 	struct vmtransfer_page_packet_header *vmxferpage_packet;
 	struct nvsp_message *nvsp_packet;
 	struct hv_netvsc_packet *netvsc_packet = NULL;
@@ -673,9 +671,6 @@ static void netvsc_receive(struct hv_device *device,
 
 	LIST_HEAD(listHead);
 
-	net_device = get_inbound_net_device(device);
-	if (!net_device)
-		return;
 	ndev = net_device->ndev;
 
 	/*
@@ -741,7 +736,7 @@ static void netvsc_receive(struct hv_device *device,
 		spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
 				       flags);
 
-		netvsc_send_recv_completion(device,
+		netvsc_send_recv_completion(device, net_device,
 					    vmxferpage_packet->d.trans_id,
 					    NVSP_STAT_FAIL);
 
@@ -825,11 +820,13 @@ static void netvsc_channel_cb(void *context)
 				desc = (struct vmpacket_descriptor *)buffer;
 				switch (desc->type) {
 				case VM_PKT_COMP:
-					netvsc_send_completion(device, desc);
+					netvsc_send_completion(net_device,
+								device, desc);
 					break;
 
 				case VM_PKT_DATA_USING_XFER_PAGES:
-					netvsc_receive(device, desc);
+					netvsc_receive(net_device,
+							device, desc);
 					break;
 
 				default:
-- 
1.7.4.1

^ permalink raw reply related

* Re: [PATCH linux-3.10.y v2 1/3] sit: fix double free of fb_tunnel_dev on exit
From: Steven Rostedt @ 2014-01-31 17:19 UTC (permalink / raw)
  To: Nicolas Dichtel
  Cc: linux-kernel, netdev, stable, williams, lclaudio, jkacur, willemb
In-Reply-To: <1391156646-11981-1-git-send-email-nicolas.dichtel@6wind.com>

Just FYI.

Our full tier tests have completed with no issues due to the sit or
ip6_tunnel modules. These patches appear to have solved our problems.

Nicolas, Thanks again for posting these!

-- Steve


On Fri, 31 Jan 2014 09:24:04 +0100
Nicolas Dichtel <nicolas.dichtel@6wind.com> wrote:

> This problem was fixed upstream by commit 9434266f2c64 ("sit: fix use after free
> of fb_tunnel_dev").
> The upstream patch depends on upstream commit 5e6700b3bf98 ("sit: add support of
> x-netns"), which was not backported into 3.10 branch.
> 
> First, explain the problem: when the sit module is unloaded, sit_cleanup() is
> called.
> rmmod sit
> => sit_cleanup()
>   => rtnl_link_unregister()
>     => __rtnl_kill_links()
>       => for_each_netdev(net, dev) {
>         if (dev->rtnl_link_ops == ops)
>         	ops->dellink(dev, &list_kill);
>         }
> At this point, the FB device is deleted (and all sit tunnels).
>   => unregister_pernet_device()
>     => unregister_pernet_operations()
>       => ops_exit_list()
>         => sit_exit_net()
>           => sit_destroy_tunnels()
>           In this function, no tunnel is found.
>           => unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
> We delete the FB device a second time here!
> 
> Because we cannot simply remove the second deletion (sit_exit_net() must remove
> the FB device when a netns is deleted), we add an rtnl ops which delete all sit
> device excepting the FB device and thus we can keep the explicit deletion in
> sit_exit_net().
> 

^ permalink raw reply

* Re: [PATCH 31/34] efx: Use pci_enable_msix_range()
From: Shradha Shah @ 2014-01-31 17:21 UTC (permalink / raw)
  To: Alexander Gordeev; +Cc: linux-kernel, Solarflare maintainers, netdev, linux-pci
In-Reply-To: <433fe68f834c10679986274f4bb605aa4ed5eaf3.1391172839.git.agordeev@redhat.com>

On 01/31/2014 03:08 PM, Alexander Gordeev wrote:
> As result of deprecation of MSI-X/MSI enablement functions
> pci_enable_msix() and pci_enable_msi_block() all drivers
> using these two interfaces need to be updated to use the
> new pci_enable_msi_range() and pci_enable_msix_range()
> interfaces.
> 
> Signed-off-by: Alexander Gordeev <agordeev@redhat.com>

Acked-by: Shradha Shah <sshah@solarflare.com>

> ---
>  drivers/net/ethernet/sfc/efx.c |   20 +++++++++-----------
>  1 files changed, 9 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
> index 83d4643..297b97a 100644
> --- a/drivers/net/ethernet/sfc/efx.c
> +++ b/drivers/net/ethernet/sfc/efx.c
> @@ -1346,20 +1346,23 @@ static int efx_probe_interrupts(struct efx_nic *efx)
>  
>  		for (i = 0; i < n_channels; i++)
>  			xentries[i].entry = i;
> -		rc = pci_enable_msix(efx->pci_dev, xentries, n_channels);
> -		if (rc > 0) {
> +		rc = pci_enable_msix_range(efx->pci_dev,
> +					   xentries, 1, n_channels);
> +		if (rc < 0) {
> +			/* Fall back to single channel MSI */
> +			efx->interrupt_mode = EFX_INT_MODE_MSI;
> +			netif_err(efx, drv, efx->net_dev,
> +				  "could not enable MSI-X\n");
> +		} else if (rc < n_channels) {
>  			netif_err(efx, drv, efx->net_dev,
>  				  "WARNING: Insufficient MSI-X vectors"
>  				  " available (%d < %u).\n", rc, n_channels);
>  			netif_err(efx, drv, efx->net_dev,
>  				  "WARNING: Performance may be reduced.\n");
> -			EFX_BUG_ON_PARANOID(rc >= n_channels);
>  			n_channels = rc;
> -			rc = pci_enable_msix(efx->pci_dev, xentries,
> -					     n_channels);
>  		}
>  
> -		if (rc == 0) {
> +		if (rc > 0) {
>  			efx->n_channels = n_channels;
>  			if (n_channels > extra_channels)
>  				n_channels -= extra_channels;
> @@ -1375,11 +1378,6 @@ static int efx_probe_interrupts(struct efx_nic *efx)
>  			for (i = 0; i < efx->n_channels; i++)
>  				efx_get_channel(efx, i)->irq =
>  					xentries[i].vector;
> -		} else {
> -			/* Fall back to single channel MSI */
> -			efx->interrupt_mode = EFX_INT_MODE_MSI;
> -			netif_err(efx, drv, efx->net_dev,
> -				  "could not enable MSI-X\n");
>  		}
>  	}
>  
> 

^ permalink raw reply

* Re: [PATCH v2 2/4] net: ethoc: implement ethtool get/set settings
From: Ben Hutchings @ 2014-01-31 18:03 UTC (permalink / raw)
  To: Max Filippov
  Cc: netdev, linux-kernel, David S. Miller, Florian Fainelli,
	Marc Gauthier
In-Reply-To: <1391146867-30508-3-git-send-email-jcmvbkbc@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1503 bytes --]

On Fri, 2014-01-31 at 09:41 +0400, Max Filippov wrote:
> Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>

Reviewed-by: Ben Hutchings <ben@decadent.org.uk>

> ---
> Changes v1->v2:
> - fix {get,set}_settings return code in case there's no PHY.
> 
>  drivers/net/ethernet/ethoc.c | 24 ++++++++++++++++++++++++
>  1 file changed, 24 insertions(+)
> 
> diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
> index 0623c20..779d3c3 100644
> --- a/drivers/net/ethernet/ethoc.c
> +++ b/drivers/net/ethernet/ethoc.c
> @@ -890,7 +890,31 @@ out:
>  	return NETDEV_TX_OK;
>  }
>  
> +static int ethoc_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
> +{
> +	struct ethoc *priv = netdev_priv(dev);
> +	struct phy_device *phydev = priv->phy;
> +
> +	if (!phydev)
> +		return -EOPNOTSUPP;
> +
> +	return phy_ethtool_gset(phydev, cmd);
> +}
> +
> +static int ethoc_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
> +{
> +	struct ethoc *priv = netdev_priv(dev);
> +	struct phy_device *phydev = priv->phy;
> +
> +	if (!phydev)
> +		return -EOPNOTSUPP;
> +
> +	return phy_ethtool_sset(phydev, cmd);
> +}
> +
>  const struct ethtool_ops ethoc_ethtool_ops = {
> +	.get_settings = ethoc_get_settings,
> +	.set_settings = ethoc_set_settings,
>  	.get_link = ethtool_op_get_link,
>  	.get_ts_info = ethtool_op_get_ts_info,
>  };

-- 
Ben Hutchings
It is easier to write an incorrect program than to understand a correct one.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 828 bytes --]

^ permalink raw reply

* Re: [PATCH 17/34] ixgbevf: Use pci_enable_msix_range()
From: Sergei Shtylyov @ 2014-01-31 19:20 UTC (permalink / raw)
  To: Alexander Gordeev, linux-kernel
  Cc: Jeff Kirsher, Jesse Brandeburg, Bruce Allan, e1000-devel, netdev,
	linux-pci
In-Reply-To: <3d63d58082b8cb80de7f2cb434ca5d83781e8c9f.1391172839.git.agordeev@redhat.com>

On 01/31/2014 06:08 PM, Alexander Gordeev wrote:

> As result of deprecation of MSI-X/MSI enablement functions
> pci_enable_msix() and pci_enable_msi_block() all drivers
> using these two interfaces need to be updated to use the
> new pci_enable_msi_range() and pci_enable_msix_range()
> interfaces.

> Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
> ---
>   drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |   33 +++++++-------------
>   1 files changed, 12 insertions(+), 21 deletions(-)

> diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
> index 9df2898..521a9d7 100644
> --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
> +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c

> @@ -1831,33 +1830,25 @@ static int ixgbevf_acquire_msix_vectors(struct ixgbevf_adapter *adapter,
>   	 * Right now, we simply care about how many we'll get; we'll
>   	 * set them up later while requesting irq's.
>   	 */
> -	while (vectors >= vector_threshold) {
> -		err = pci_enable_msix(adapter->pdev, adapter->msix_entries,
> -				      vectors);
> -		if (!err || err < 0) /* Success or a nasty failure. */
> -			break;
> -		else /* err == number of vectors we should try again with */
> -			vectors = err;
> -	}
> +	vectors = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
> +					vector_threshold, vectors);
>
> -	if (vectors < vector_threshold)
> -		err = -ENOMEM;
> -
> -	if (err) {
> +	if (vectors < 0) {
>   		dev_err(&adapter->pdev->dev,
>   			"Unable to allocate MSI-X interrupts\n");
>   		kfree(adapter->msix_entries);
>   		adapter->msix_entries = NULL;
> -	} else {
> -		/*
> -		 * Adjust for only the vectors we'll use, which is minimum
> -		 * of max_msix_q_vectors + NON_Q_VECTORS, or the number of
> -		 * vectors we were allocated.
> -		 */
> -		adapter->num_msix_vectors = vectors;
> +		return vectors;
>   	}
>
> -	return err;
> +	/*
> +	 * Adjust for only the vectors we'll use, which is minimum
> +	 * of max_msix_q_vectors + NON_Q_VECTORS, or the number of
> +	 * vectors we were allocated.
> +	 */

    Networking code formats multi-line comments slightly differently to the 
rest of the kernel:

/* bla
  * bla
  */

    Although, you're only moving what was there before you, maybe it's a good 
time to get this right.

WBR, Sergei

^ permalink raw reply

* Re: Help testing for USB ethernet/xHCI regression
From: Sarah Sharp @ 2014-01-31 18:37 UTC (permalink / raw)
  To: Mark Lord
  Cc: Paul Zimmerman, David Laight, renevant@internode.on.net,
	linux-usb@vger.kernel.org, Greg Kroah-Hartman,
	netdev@vger.kernel.org
In-Reply-To: <52EB0B51.4020407@pobox.com>

On Thu, Jan 30, 2014 at 09:32:49PM -0500, Mark Lord wrote:
> On 14-01-30 06:26 PM, Sarah Sharp wrote:
> > On Thu, Jan 30, 2014 at 05:20:40PM -0500, Mark Lord wrote:
> >> On 14-01-30 04:41 PM, Sarah Sharp wrote:
> >>>
> >>> Mark and David, can you pull the 3.13-td-changes-reverted branch again,
> >>> and see if the latest patch fixes your issue?  It disables scatter
> >>> gather for the ax88179_178a device, but only when it's operating at USB
> >>> 3.0 speeds.
> >>
> >> As expected, this works just fine.
> > 
> > Did it work when plugged into a USB 2.0 hub?
> 
> Curiosity, NO.  Dies almost immediately when run at USB 2.0 Hi-Speed.
> With a USB 2.0 hub, with a USB 2.0 port on a USB 3.0 hub,
> and with a USB 2.0 extension cable in place of a hub.
> 
> Near instant hangs.
> 
> Plugged directly to the USB 3.0 port, it works fine.

Ok, that makes sense.  The patch I wrote only limited scatter-gather at
USB 3.0 speeds, to see if scatter-gather could work at USB 2.0 speeds.
Reverting commit 3804fad45411b48233b48003e33a78f290d227c8 "USBNET:
ax88179_178a: enable tso if usb host supports sg dma" is the right way
to go instead.

Sarah Sharp

^ permalink raw reply

* [RFC PATCHv2] net: ipv4: move inetpeer garbage collector work to power efficient workqueue
From: Zoran Markovic @ 2014-01-31 18:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: netdev, Shaibal Dutta, David S. Miller, Alexey Kuznetsov,
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, Zoran Markovic

From: Shaibal Dutta <shaibal.dutta@broadcom.com>

Garbage collector work does not have to be bound to the CPU that scheduled
it. By moving work to the power-efficient workqueue, the selection of
CPU executing the work is left to the scheduler. This extends idle
residency times and conserves power.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Shaibal Dutta <shaibal.dutta@broadcom.com>
[zoran.markovic@linaro.org: Rebased to latest kernel version. Added
commit message. Fixed code alignment.]
Signed-off-by: Zoran Markovic <zoran.markovic@linaro.org>
---
 net/ipv4/inetpeer.c |    6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 48f4244..7e3da6c6 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -161,7 +161,8 @@ static void inetpeer_gc_worker(struct work_struct *work)
 	list_splice(&list, &gc_list);
 	spin_unlock_bh(&gc_lock);
 
-	schedule_delayed_work(&gc_work, gc_delay);
+	queue_delayed_work(system_power_efficient_wq,
+			   &gc_work, gc_delay);
 }
 
 /* Called from ip_output.c:ip_init  */
@@ -576,7 +577,8 @@ static void inetpeer_inval_rcu(struct rcu_head *head)
 	list_add_tail(&p->gc_list, &gc_list);
 	spin_unlock_bh(&gc_lock);
 
-	schedule_delayed_work(&gc_work, gc_delay);
+	queue_delayed_work(system_power_efficient_wq,
+			   &gc_work, gc_delay);
 }
 
 void inetpeer_invalidate_tree(struct inet_peer_base *base)
-- 
1.7.9.5

^ permalink raw reply related

* [RFC PATCHv2] net: core: move core networking work to power efficient workqueue
From: Zoran Markovic @ 2014-01-31 18:51 UTC (permalink / raw)
  To: linux-kernel
  Cc: netdev, Shaibal Dutta, David S. Miller, Jiri Pirko,
	YOSHIFUJI Hideaki, Eric Dumazet, Julian Anastasov, Flavio Leitner,
	Neil Horman, Patrick McHardy, John Fastabend, Amerigo Wang,
	Joe Perches, Jason Wang, Antonio Quartulli, Simon Horman,
	Nikolay Aleksandrov, Zoran Markovic

From: Shaibal Dutta <shaibal.dutta@broadcom.com>

This patch moves the following work to the power efficient workqueue:
  - Transmit work of netpoll
  - Destination cache garbage collector work
  - Link watch event handler work

In general, assignment of CPUs to pending work could be deferred to
the scheduler in order to extend idle residency time and improve
power efficiency. I would value community's opinion on the migration
of this work to the power efficient workqueue, with an emphasis on
migration of netpoll's transmit work.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Julian Anastasov <ja@ssi.bg>
Cc: Flavio Leitner <fbl@redhat.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Amerigo Wang <amwang@redhat.com>
Cc: Joe Perches <joe@perches.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Antonio Quartulli <antonio@meshcoding.com>
Cc: Simon Horman <horms@verge.net.au>
Cc: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: Shaibal Dutta <shaibal.dutta@broadcom.com>
[zoran.markovic@linaro.org: Rebased to latest kernel version. Edited
calls to mod_delayed_work to reference power efficient workqueue.
Added commit message. Fixed code alignment.]
Signed-off-by: Zoran Markovic <zoran.markovic@linaro.org>
---
 net/core/dst.c        |    5 +++--
 net/core/link_watch.c |    5 +++--
 net/core/netpoll.c    |    6 ++++--
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/core/dst.c b/net/core/dst.c
index ca4231e..57fba10 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -135,7 +135,8 @@ loop:
 		 */
 		if (expires > 4*HZ)
 			expires = round_jiffies_relative(expires);
-		schedule_delayed_work(&dst_gc_work, expires);
+		queue_delayed_work(system_power_efficient_wq,
+				   &dst_gc_work, expires);
 	}
 
 	spin_unlock_bh(&dst_garbage.lock);
@@ -223,7 +224,7 @@ void __dst_free(struct dst_entry *dst)
 	if (dst_garbage.timer_inc > DST_GC_INC) {
 		dst_garbage.timer_inc = DST_GC_INC;
 		dst_garbage.timer_expires = DST_GC_MIN;
-		mod_delayed_work(system_wq, &dst_gc_work,
+		mod_delayed_work(system_power_efficient_wq, &dst_gc_work,
 				 dst_garbage.timer_expires);
 	}
 	spin_unlock_bh(&dst_garbage.lock);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 9c3a839..6899935 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -135,9 +135,10 @@ static void linkwatch_schedule_work(int urgent)
 	 * override the existing timer.
 	 */
 	if (test_bit(LW_URGENT, &linkwatch_flags))
-		mod_delayed_work(system_wq, &linkwatch_work, 0);
+		mod_delayed_work(system_power_efficient_wq, &linkwatch_work, 0);
 	else
-		schedule_delayed_work(&linkwatch_work, delay);
+		queue_delayed_work(system_power_efficient_wq,
+				   &linkwatch_work, delay);
 }
 
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c03f3de..6685938 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -101,7 +101,8 @@ static void queue_process(struct work_struct *work)
 			__netif_tx_unlock(txq);
 			local_irq_restore(flags);
 
-			schedule_delayed_work(&npinfo->tx_work, HZ/10);
+			queue_delayed_work(system_power_efficient_wq,
+					   &npinfo->tx_work, HZ/10);
 			return;
 		}
 		__netif_tx_unlock(txq);
@@ -423,7 +424,8 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 
 	if (status != NETDEV_TX_OK) {
 		skb_queue_tail(&npinfo->txq, skb);
-		schedule_delayed_work(&npinfo->tx_work,0);
+		queue_delayed_work(system_power_efficient_wq,
+				   &npinfo->tx_work, 0);
 	}
 }
 EXPORT_SYMBOL(netpoll_send_skb_on_dev);
-- 
1.7.9.5

^ permalink raw reply related

* Re: igb and bnx2: "NETDEV WATCHDOG: transmit queue timed out" when skb has huge linear buffer
From: Wei Liu @ 2014-01-31 18:56 UTC (permalink / raw)
  To: Zoltan Kiss
  Cc: Jeff Kirsher, Jesse Brandeburg, Bruce Allan, Carolyn Wyborny,
	Don Skidmore, Greg Rose, Peter P Waskiewicz Jr, Alex Duyck,
	John Ronciak, Tushar Dave, Akeem G Abodunrin, David S. Miller,
	e1000-devel, netdev@vger.kernel.org, linux-kernel, Michael Chan,
	xen-devel@lists.xenproject.org, wei.liu2
In-Reply-To: <52EAA31B.1090606@schaman.hu>

On Thu, Jan 30, 2014 at 07:08:11PM +0000, Zoltan Kiss wrote:
> Hi,
> 
> I've experienced some queue timeout problems mentioned in the
> subject with igb and bnx2 cards. I haven't seen them on other cards
> so far. I'm using XenServer with 3.10 Dom0 kernel (however igb were
> already updated to latest version), and there are Windows guests
> sending data through these cards. I noticed these problems in XenRT
> test runs, and I know that they usually mean some lost interrupt
> problem or other hardware error, but in my case they started to
> appear more often, and they are likely connected to my netback grant
> mapping patches. These patches causing skb's with huge (~64kb)
> linear buffers to appear more often.
> The reason for that is an old problem in the ring protocol:
> originally the maximum amount of slots were linked to MAX_SKB_FRAGS,
> as every slot ended up as a frag of the skb. When this value were
> changed, netback had to cope with the situation by coalescing the
> packets into fewer frags.
> My patch series take a different approach: the leftover slots
> (pages) were assigned to a new skb's frags, and that skb were
> stashed to the frag_list of the first one. Then, before sending it
> off to the stack it calls skb = skb_copy_expand(skb, 0, 0,
> GFP_ATOMIC, __GFP_NOWARN), which basically creates a new skb and
> copied all the data into it. As far as I understood, it put
> everything into the linear buffer, which can amount to 64KB at most.
> The original skb are freed then, and this new one were sent to the
> stack.

Just my two cents, if it is this case, you can try to call
skb_copy_expand on every SKB netback receives to manually create SKBs
with ~64KB linear buffer to see how it goes...

Wei.

> I suspect that this is the problem as it only happens when guests
> send too much slots. Does anyone familiar with these drivers have
> seen such issue before? (when these kind of skb's get stucked in the
> queue)
> 
> Regards,
> 
> Zoltan Kiss
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH RFC 1/1] usb: Tell xhci when usb data might be misaligned
From: Sarah Sharp @ 2014-01-31 19:00 UTC (permalink / raw)
  To: Ming Lei
  Cc: Bjørn Mork, David Laight, linux-usb@vger.kernel.org,
	netdev@vger.kernel.org, Greg Kroah-Hartman, David Miller,
	Dan Williams, Nyman, Mathias, Mark Lord, Alan Stern, Freddy Xin
In-Reply-To: <CACVXFVMVoMAH3DRiv-VYZL8VWq2Rw+fZwreVtUVm1M44HR+X8Q@mail.gmail.com>

On Fri, Jan 31, 2014 at 08:17:58AM +0800, Ming Lei wrote:
> On Fri, Jan 31, 2014 at 6:15 AM, Sarah Sharp
> <sarah.a.sharp@linux.intel.com> wrote:
> > On Thu, Jan 30, 2014 at 10:50:21PM +0100, Bjørn Mork wrote:
> >> FWIW, the plan looks fine to me.  Just adding a couple of hints to
> >> simplify the implementation.
> >>
> >> Sarah Sharp <sarah.a.sharp@linux.intel.com> writes:
> >>
> >> > Let's do this fix the right way, instead of wall papering over the
> >> > issue.  Here's what we should do:
> >> >
> >> > 1. Disable scatter-gather for the ax88179_178a driver when it's under an
> >> >    xHCI host.
> >>
> >> No need to make this conditional.  SG is only enabled in the
> >> ax88179_178a driver if udev->bus->no_sg_constraint is true, so it
> >> applies only to xHCI hosts in the first place.
> >
> > Ah, so you're suggesting just reverting commit
> > 3804fad45411b48233b48003e33a78f290d227c8 "USBNET: ax88179_178a: enable
> > tso if usb host supports sg dma"?
> 
> If I understand the problem correctly, the current issue is that xhci driver
> doesn't support the arbitrary dma length not well, but per XHCI spec, it
> should be supported, right?
> 
> If the above is correct, reverting the commit isn't correct since there isn't
> any issue about the commit, so I suggest to disable the flag in xhci
> for the buggy devices, and it may be enabled again if the problem is fixed.

Ok, I like that plan, since it means I don't have to touch any
networking code to fix this. :)

I believe that means we'll have to disable the flag for all 1.0 xHCI
hosts, since those are the ones that need TD fragments.

> >> > 2. Revert the following commits:
> >> >    f2d9b991c549 xhci: Set scatter-gather limit to avoid failed block writes.
> >> >    d6c9ea9069af xhci: Avoid infinite loop when sg urb requires too many trbs
> >> >    35773dac5f86 usb: xhci: Link TRB must not occur within a USB payload burst
> >> >
> >> > 3. Dan and Mathias can work together to come up with an overall plan to
> >> >    change the xHCI driver architecture to be fully compliant with the TD
> >> >    fragment rules.  That can be done over the next few kernel releases.
> >> >
> >> > The end result is that we don't destabilize storage or break userspace
> >> > USB drivers, we don't break people's xHCI host controllers,
> >> > the ax88179_178a USB ethernet devices still work under xHCI (a bit with
> >> > worse performance), and other USB ethernet devices still get the
> >> > performance improvement introduced in 3.12.
> >>
> >> No other usbnet drivers has enabled SG...  Which is why you have only
> >> seen this problem with the ax88179_178a devices.  So there is no
> >> performance improvement to keep.
> 
> In my test environment, the patch does improve both throughput and
> cpu utilization, if you search the previous email for the patch, you can
> see the data.

Right, I did see the performance improvement note in that commit.  Do
you know if the ARM A15 dual core board was using a 0.96 xHCI host, or a
1.0 host?  You can find out by reloading the xHCI driver with dynamic
debugging turned on:

# sudo modprobe xhci_hcd dyndbg

and then look for lines like:

[25296.765767] xhci_hcd 0000:00:14.0: HCIVERSION: 0x100

Sarah Sharp

^ permalink raw reply

* I NEED YOUR HELP.
From: FROM MRS GRACE MANDA @ 2014-01-31 18:55 UTC (permalink / raw)

In-Reply-To: <1391189868.90855.YahooMailNeo@web5706.biz.mail.ne1.yahoo.com>

[-- Attachment #1: Type: text/plain, Size: 59 bytes --]




I PRAY THAT THIS MAIL GETS TO YOU IN BETTER HEALTH.

[-- Attachment #2: From Mrs Grace Manda..pdf --]
[-- Type: application/pdf, Size: 41148 bytes --]

^ permalink raw reply

* Re: [PATCH 02/34] bnx2x: Use pci_enable_msix_range()
From: Sergei Shtylyov @ 2014-01-31 19:11 UTC (permalink / raw)
  To: Alexander Gordeev, linux-kernel; +Cc: Ariel Elior, netdev, linux-pci
In-Reply-To: <911884c4e906af8acfa4c06ee3206387449f1d84.1391172839.git.agordeev@redhat.com>

Hello.

On 01/31/2014 06:08 PM, Alexander Gordeev wrote:

> As result of deprecation of MSI-X/MSI enablement functions
> pci_enable_msix() and pci_enable_msi_block() all drivers
> using these two interfaces need to be updated to use the
> new pci_enable_msi_range() and pci_enable_msix_range()
> interfaces.

> Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
> ---
>   drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |   48 ++++++++++-------------
>   1 files changed, 21 insertions(+), 27 deletions(-)

> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> index 9d7419e..b396d74 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> @@ -1638,24 +1638,36 @@ int bnx2x_enable_msix(struct bnx2x *bp)
>   	DP(BNX2X_MSG_SP, "about to request enable msix with %d vectors\n",
>   	   msix_vec);
>
> -	rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], msix_vec);
> -
> +	rc = pci_enable_msix_range(bp->pdev, &bp->msix_table[0],
> +				   BNX2X_MIN_MSIX_VEC_CNT(bp), msix_vec);
>   	/*
>   	 * reconfigure number of tx/rx queues according to available
>   	 * MSI-X vectors
>   	 */
> -	if (rc >= BNX2X_MIN_MSIX_VEC_CNT(bp)) {
> +	if (rc < 0) {
> +		BNX2X_DEV_INFO("MSI-X is not attainable rc %d\n", rc);
> +		goto no_msix;
> +	} else if (rc == -ENOSPC) {

    This branch is unreachable now. You should have put this check first.

> +		/* Get by with single vector */
> +		rc = pci_enable_msix_range(bp->pdev, &bp->msix_table[0], 1, 1);
> +		if (rc < 0) {
> +			BNX2X_DEV_INFO("Single MSI-X is not attainable rc %d\n",
> +				       rc);
> +			goto no_msix;
> +		}
> +
> +		BNX2X_DEV_INFO("Using single MSI-X vector\n");
> +		bp->flags |= USING_SINGLE_MSIX_FLAG;
> +
> +		BNX2X_DEV_INFO("set number of queues to 1\n");
> +		bp->num_ethernet_queues = 1;
> +		bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues;
> +	} else if (rc < msix_vec) {

WBR, Sergei

^ permalink raw reply

* Re: [PATCH 12/34] e1000e: Use pci_enable_msix_range()
From: Sergei Shtylyov @ 2014-01-31 19:17 UTC (permalink / raw)
  To: Alexander Gordeev, linux-kernel
  Cc: Jeff Kirsher, Jesse Brandeburg, Bruce Allan, e1000-devel, netdev,
	linux-pci
In-Reply-To: <0ec36309e17031a66d3a6ab489fc60702b6d76b3.1391172839.git.agordeev@redhat.com>

Hello.

On 01/31/2014 06:08 PM, Alexander Gordeev wrote:

> As result of deprecation of MSI-X/MSI enablement functions
> pci_enable_msix() and pci_enable_msi_block() all drivers
> using these two interfaces need to be updated to use the
> new pci_enable_msi_range() and pci_enable_msix_range()
> interfaces.

> Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
> ---
>   drivers/net/ethernet/intel/e1000e/netdev.c |    9 +++++----
>   1 files changed, 5 insertions(+), 4 deletions(-)

> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
> index 6d91933..7735d1a 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -2041,10 +2041,11 @@ void e1000e_set_interrupt_capability(struct e1000_adapter *adapter)
>   				for (i = 0; i < adapter->num_vectors; i++)
>   					adapter->msix_entries[i].entry = i;
>
> -				err = pci_enable_msix(adapter->pdev,
> -						      adapter->msix_entries,
> -						      adapter->num_vectors);
> -				if (err == 0)
> +				err = pci_enable_msix_range(adapter->pdev,
> +							adapter->msix_entries,
> +							adapter->num_vectors,
> +							adapter->num_vectors);

    You should align all 'adapter' references under each other, according to 
networking coding rules.

WBR, Sergei

^ permalink raw reply

* Re: [PATCH 18/34] mlx4: Use pci_enable_msix_range()
From: Sergei Shtylyov @ 2014-01-31 19:22 UTC (permalink / raw)
  To: Alexander Gordeev, linux-kernel
  Cc: David S. Miller, Amir Vadai, netdev, linux-pci
In-Reply-To: <2f6674dabb90d4cbca3ef4039c3b893f2e5e9ec4.1391172839.git.agordeev@redhat.com>

Hello.

On 01/31/2014 06:08 PM, Alexander Gordeev wrote:

> As result of deprecation of MSI-X/MSI enablement functions
> pci_enable_msix() and pci_enable_msi_block() all drivers
> using these two interfaces need to be updated to use the
> new pci_enable_msi_range() and pci_enable_msix_range()
> interfaces.

> Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
> ---
>   drivers/net/ethernet/mellanox/mlx4/main.c |   19 ++++---------------
>   1 files changed, 4 insertions(+), 15 deletions(-)

> diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
> index d711158..a9d1249 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/main.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/main.c
[...]
> @@ -1990,22 +1989,12 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
[...]
> +		nreq = pci_enable_msix_range(dev->pdev, entries, 2, nreq);
> +
> +		if (nreq < 0) {
>   			kfree(entries);
>   			goto no_msi;
> -		}
> -
> -		if (nreq <
> +		} else if (nreq <
>   		    MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) {

    Please realign this line to start right under 'nreq'.

WBR, Sergei

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox