Netdev List
 help / color / mirror / Atom feed
* [PATCH RFC 21/77] csiostor: Update MSI/MSI-X interrupts enablement code
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390, x86,
	linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-devel
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

As result of recent re-design of the MSI/MSI-X interrupts enabling
pattern this driver has to be updated to use the new technique to
obtain a optimal number of MSI/MSI-X interrupts required.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/scsi/csiostor/csio_isr.c |   18 ++++++++++++------
 1 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/csiostor/csio_isr.c b/drivers/scsi/csiostor/csio_isr.c
index abf20ab..b8a840e 100644
--- a/drivers/scsi/csiostor/csio_isr.c
+++ b/drivers/scsi/csiostor/csio_isr.c
@@ -512,6 +512,16 @@ csio_enable_msix(struct csio_hw *hw)
 	if (hw->flags & CSIO_HWF_USING_SOFT_PARAMS || !csio_is_hw_master(hw))
 		cnt = min_t(uint8_t, hw->cfg_niq, cnt);
 
+	rv = pci_msix_table_size(hw->pdev);
+	if (rv < 0)
+		return rv;
+
+	cnt = min(cnt, rv);
+	if (cnt < min) {
+		csio_info(hw, "Not using MSI-X, remainder:%d\n", rv);
+		return -ENOSPC;
+	}
+
 	entries = kzalloc(sizeof(struct msix_entry) * cnt, GFP_KERNEL);
 	if (!entries)
 		return -ENOMEM;
@@ -521,19 +531,15 @@ csio_enable_msix(struct csio_hw *hw)
 
 	csio_dbg(hw, "FW supp #niq:%d, trying %d msix's\n", hw->cfg_niq, cnt);
 
-	while ((rv = pci_enable_msix(hw->pdev, entries, cnt)) >= min)
-		cnt = rv;
+	rv = pci_enable_msix(hw->pdev, entries, cnt);
 	if (!rv) {
 		if (cnt < (hw->num_sqsets + extra)) {
 			csio_dbg(hw, "Reducing sqsets to %d\n", cnt - extra);
 			csio_reduce_sqsets(hw, cnt - extra);
 		}
 	} else {
-		if (rv > 0)
-			csio_info(hw, "Not using MSI-X, remainder:%d\n", rv);
-
 		kfree(entries);
-		return -ENOSPC;
+		return rv;
 	}
 
 	/* Save off vectors */
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 20/77] csiostor: Return -ENOSPC when not enough MSI-X vectors available
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	stable, linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390,
	x86, linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-de
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/scsi/csiostor/csio_isr.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/scsi/csiostor/csio_isr.c b/drivers/scsi/csiostor/csio_isr.c
index 91ba91d..abf20ab 100644
--- a/drivers/scsi/csiostor/csio_isr.c
+++ b/drivers/scsi/csiostor/csio_isr.c
@@ -533,7 +533,7 @@ csio_enable_msix(struct csio_hw *hw)
 			csio_info(hw, "Not using MSI-X, remainder:%d\n", rv);
 
 		kfree(entries);
-		return -ENOMEM;
+		return -ENOSPC;
 	}
 
 	/* Save off vectors */
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 19/77] csiostor: Do not call pci_disable_msix() if pci_enable_msix() failed
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	stable, linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390,
	x86, linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-de
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/scsi/csiostor/csio_isr.c |    4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/csiostor/csio_isr.c b/drivers/scsi/csiostor/csio_isr.c
index 7ee9777..91ba91d 100644
--- a/drivers/scsi/csiostor/csio_isr.c
+++ b/drivers/scsi/csiostor/csio_isr.c
@@ -529,10 +529,8 @@ csio_enable_msix(struct csio_hw *hw)
 			csio_reduce_sqsets(hw, cnt - extra);
 		}
 	} else {
-		if (rv > 0) {
-			pci_disable_msix(hw->pdev);
+		if (rv > 0)
 			csio_info(hw, "Not using MSI-X, remainder:%d\n", rv);
-		}
 
 		kfree(entries);
 		return -ENOMEM;
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 18/77] cciss: Fallback to single MSI mode in case MSI-X failed
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	stable, linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390,
	x86, linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-de
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/block/cciss.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index bf11540..0eea035 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -4081,7 +4081,7 @@ static void cciss_interrupt_mode(ctlr_info_t *h)
 	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
 		err = pci_msix_table_size(h->pdev);
 		if (err < ARRAY_SIZE(cciss_msix_entries))
-			goto default_int_mode;
+			goto single_msi_mode;
 		err = pci_enable_msix(h->pdev, cciss_msix_entries,
 				      ARRAY_SIZE(cciss_msix_entries));
 		if (!err) {
@@ -4093,8 +4093,8 @@ static void cciss_interrupt_mode(ctlr_info_t *h)
 			return;
 		}
 		dev_warn(&h->pdev->dev, "MSI-X init failed %d\n", err);
-		goto default_int_mode;
 	}
+single_msi_mode:
 	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) {
 		if (!pci_enable_msi(h->pdev))
 			h->msi_vector = 1;
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 17/77] cciss: Update a misleading comment on interrupt usage
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	stable, linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390,
	x86, linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-de
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/block/cciss.c |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 80068a0..bf11540 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -4103,7 +4103,10 @@ static void cciss_interrupt_mode(ctlr_info_t *h)
 	}
 default_int_mode:
 #endif				/* CONFIG_PCI_MSI */
-	/* if we get here we're going to use the default interrupt mode */
+	/*
+	 * If we get here we're going to use either the
+	 * default interrupt mode or single MSI mode
+	 */
 	h->intr[h->intr_mode] = h->pdev->irq;
 	return;
 }
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 16/77] cciss: Update MSI/MSI-X interrupts enablement code
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390, x86,
	linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-devel
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

As result of recent re-design of the MSI/MSI-X interrupts enabling
pattern this driver has to be updated to use the new technique to
obtain a optimal number of MSI/MSI-X interrupts required.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/block/cciss.c |   17 +++++++----------
 1 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index d2d95ff..80068a0 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -4079,7 +4079,11 @@ static void cciss_interrupt_mode(ctlr_info_t *h)
 		goto default_int_mode;
 
 	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
-		err = pci_enable_msix(h->pdev, cciss_msix_entries, 4);
+		err = pci_msix_table_size(h->pdev);
+		if (err < ARRAY_SIZE(cciss_msix_entries))
+			goto default_int_mode;
+		err = pci_enable_msix(h->pdev, cciss_msix_entries,
+				      ARRAY_SIZE(cciss_msix_entries));
 		if (!err) {
 			h->intr[0] = cciss_msix_entries[0].vector;
 			h->intr[1] = cciss_msix_entries[1].vector;
@@ -4088,15 +4092,8 @@ static void cciss_interrupt_mode(ctlr_info_t *h)
 			h->msix_vector = 1;
 			return;
 		}
-		if (err > 0) {
-			dev_warn(&h->pdev->dev,
-				"only %d MSI-X vectors available\n", err);
-			goto default_int_mode;
-		} else {
-			dev_warn(&h->pdev->dev,
-				"MSI-X init failed %d\n", err);
-			goto default_int_mode;
-		}
+		dev_warn(&h->pdev->dev, "MSI-X init failed %d\n", err);
+		goto default_int_mode;
 	}
 	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) {
 		if (!pci_enable_msi(h->pdev))
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 15/77] bnx2: Update MSI/MSI-X interrupts enablement code
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390, x86,
	linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-devel
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

As result of recent re-design of the MSI/MSI-X interrupts enabling
pattern this driver has to be updated to use the new technique to
obtain a optimal number of MSI/MSI-X interrupts required.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/net/ethernet/broadcom/bnx2.c |   27 ++++++++++++++-------------
 1 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index e838a3f..c902627 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -6202,25 +6202,26 @@ bnx2_enable_msix(struct bnx2 *bp, int msix_vecs)
 	 *  is setup properly */
 	BNX2_RD(bp, BNX2_PCI_MSIX_CONTROL);
 
-	for (i = 0; i < BNX2_MAX_MSIX_VEC; i++) {
-		msix_ent[i].entry = i;
-		msix_ent[i].vector = 0;
-	}
-
 	total_vecs = msix_vecs;
 #ifdef BCM_CNIC
 	total_vecs++;
 #endif
-	rc = -ENOSPC;
-	while (total_vecs >= BNX2_MIN_MSIX_VEC) {
-		rc = pci_enable_msix(bp->pdev, msix_ent, total_vecs);
-		if (rc <= 0)
-			break;
-		if (rc > 0)
-			total_vecs = rc;
+	rc = pci_msix_table_size(bp->pdev);
+	if (rc < 0)
+		return;
+
+	total_vecs = min(total_vecs, rc);
+	if (total_vecs < BNX2_MIN_MSIX_VEC)
+		return;
+
+	BUG_ON(total_vecs > ARRAY_SIZE(msix_ent));
+	for (i = 0; i < total_vecs; i++) {
+		msix_ent[i].entry = i;
+		msix_ent[i].vector = 0;
 	}
 
-	if (rc != 0)
+	rc = pci_enable_msix(bp->pdev, msix_ent, total_vecs);
+	if (rc)
 		return;
 
 	msix_vecs = total_vecs;
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 14/77] bnx2x: Update MSI/MSI-X interrupts enablement code
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390, x86,
	linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-devel
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

As result of recent re-design of the MSI/MSI-X interrupts enabling
pattern this driver has to be updated to use the new technique to
obtain a optimal number of MSI/MSI-X interrupts required.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |   54 ++++++++++-------------
 1 files changed, 23 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 61726af..edf31d2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1564,7 +1564,7 @@ void bnx2x_free_irq(struct bnx2x *bp)
 
 int bnx2x_enable_msix(struct bnx2x *bp)
 {
-	int msix_vec = 0, i, rc;
+	int msix_vec = 0, nvec, i, rc;
 
 	/* VFs don't have a default status block */
 	if (IS_PF(bp)) {
@@ -1590,60 +1590,52 @@ int bnx2x_enable_msix(struct bnx2x *bp)
 		msix_vec++;
 	}
 
+	rc = pci_msix_table_size(bp->pdev);
+	if (rc < 0)
+		goto no_msix;
+
+	nvec = min(msix_vec, rc);
+	if (nvec < BNX2X_MIN_MSIX_VEC_CNT(bp))
+		nvec = 1;
+
 	DP(BNX2X_MSG_SP, "about to request enable msix with %d vectors\n",
 	   msix_vec);
 
-	rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], msix_vec);
+	rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], nvec);
+	if (rc)
+		goto no_msix;
 
 	/*
 	 * reconfigure number of tx/rx queues according to available
 	 * MSI-X vectors
 	 */
-	if (rc >= BNX2X_MIN_MSIX_VEC_CNT(bp)) {
-		/* how less vectors we will have? */
-		int diff = msix_vec - rc;
-
-		BNX2X_DEV_INFO("Trying to use less MSI-X vectors: %d\n", rc);
+	if (nvec == 1) {
+		bp->flags |= USING_SINGLE_MSIX_FLAG;
 
-		rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], rc);
+		bp->num_ethernet_queues = 1;
+		bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues;
+	} else if (nvec < msix_vec) {
+		/* how less vectors we will have? */
+		int diff = msix_vec - nvec;
 
-		if (rc) {
-			BNX2X_DEV_INFO("MSI-X is not attainable rc %d\n", rc);
-			goto no_msix;
-		}
 		/*
 		 * decrease number of queues by number of unallocated entries
 		 */
 		bp->num_ethernet_queues -= diff;
 		bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues;
+	}
 
+	if (nvec != msix_vec)
 		BNX2X_DEV_INFO("New queue configuration set: %d\n",
 			       bp->num_queues);
-	} else if (rc > 0) {
-		/* Get by with single vector */
-		rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], 1);
-		if (rc) {
-			BNX2X_DEV_INFO("Single MSI-X is not attainable rc %d\n",
-				       rc);
-			goto no_msix;
-		}
-
-		BNX2X_DEV_INFO("Using single MSI-X vector\n");
-		bp->flags |= USING_SINGLE_MSIX_FLAG;
-
-		BNX2X_DEV_INFO("set number of queues to 1\n");
-		bp->num_ethernet_queues = 1;
-		bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues;
-	} else if (rc < 0) {
-		BNX2X_DEV_INFO("MSI-X is not attainable  rc %d\n", rc);
-		goto no_msix;
-	}
 
 	bp->flags |= USING_MSIX_FLAG;
 
 	return 0;
 
 no_msix:
+	BNX2X_DEV_INFO("MSI-X is not attainable rc %d\n", rc);
+
 	/* fall to INTx if not enough memory */
 	if (rc == -ENOMEM)
 		bp->flags |= DISABLE_MSI_FLAG;
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 13/77] bna: Update MSI/MSI-X interrupts enablement code
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390, x86,
	linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-devel
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

As result of recent re-design of the MSI/MSI-X interrupts enabling
pattern this driver has to be updated to use the new technique to
obtain a optimal number of MSI/MSI-X interrupts required.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/net/ethernet/brocade/bna/bnad.c |   34 ++++++++++++------------------
 1 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index b78e69e..d41257c 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -2469,21 +2469,11 @@ bnad_enable_msix(struct bnad *bnad)
 	if (bnad->msix_table)
 		return;
 
-	bnad->msix_table =
-		kcalloc(bnad->msix_num, sizeof(struct msix_entry), GFP_KERNEL);
-
-	if (!bnad->msix_table)
+	ret = pci_msix_table_size(bnad->pcidev);
+	if (ret < 0)
 		goto intx_mode;
 
-	for (i = 0; i < bnad->msix_num; i++)
-		bnad->msix_table[i].entry = i;
-
-	ret = pci_enable_msix(bnad->pcidev, bnad->msix_table, bnad->msix_num);
-	if (ret > 0) {
-		/* Not enough MSI-X vectors. */
-		pr_warn("BNA: %d MSI-X vectors allocated < %d requested\n",
-			ret, bnad->msix_num);
-
+	if (ret < bnad->msix_num) {
 		spin_lock_irqsave(&bnad->bna_lock, flags);
 		/* ret = #of vectors that we got */
 		bnad_q_num_adjust(bnad, (ret - BNAD_MAILBOX_MSIX_VECTORS) / 2,
@@ -2495,15 +2485,19 @@ bnad_enable_msix(struct bnad *bnad)
 
 		if (bnad->msix_num > ret)
 			goto intx_mode;
+	}
 
-		/* Try once more with adjusted numbers */
-		/* If this fails, fall back to INTx */
-		ret = pci_enable_msix(bnad->pcidev, bnad->msix_table,
-				      bnad->msix_num);
-		if (ret)
-			goto intx_mode;
+	bnad->msix_table =
+		kcalloc(bnad->msix_num, sizeof(struct msix_entry), GFP_KERNEL);
+
+	if (!bnad->msix_table)
+		goto intx_mode;
 
-	} else if (ret < 0)
+	for (i = 0; i < bnad->msix_num; i++)
+		bnad->msix_table[i].entry = i;
+
+	ret = pci_enable_msix(bnad->pcidev, bnad->msix_table, bnad->msix_num);
+	if (ret)
 		goto intx_mode;
 
 	pci_intx(bnad->pcidev, 0);
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 12/77] benet: Update MSI/MSI-X interrupts enablement code
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390, x86,
	linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-devel
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

As result of recent re-design of the MSI/MSI-X interrupts enabling
pattern this driver has to be updated to use the new technique to
obtain a optimal number of MSI/MSI-X interrupts required.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/net/ethernet/emulex/benet/be_main.c |   38 ++++++++++++++------------
 1 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 3e2c834..84d560d 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2366,29 +2366,23 @@ static int be_msix_enable(struct be_adapter *adapter)
 	else
 		num_vec = adapter->cfg_num_qs;
 
-	for (i = 0; i < num_vec; i++)
-		adapter->msix_entries[i].entry = i;
+	status = pci_msix_table_size(adapter->pdev);
+	if (status < 0)
+		goto fail;
 
-	status = pci_enable_msix(adapter->pdev, adapter->msix_entries, num_vec);
-	if (status == 0) {
-		goto done;
-	} else if (status >= MIN_MSIX_VECTORS) {
-		num_vec = status;
-		status = pci_enable_msix(adapter->pdev, adapter->msix_entries,
-					 num_vec);
-		if (!status)
-			goto done;
-	} else (status > 0) {
+	num_vec = min(num_vec, status);
+	if (num_vec < MIN_MSIX_VECTORS) {
 		status = -ENOSPC;
+		goto fail;
 	}
 
-	dev_warn(dev, "MSIx enable failed\n");
+	for (i = 0; i < num_vec; i++)
+		adapter->msix_entries[i].entry = i;
+
+	status = pci_enable_msix(adapter->pdev, adapter->msix_entries, num_vec);
+	if (status)
+		goto fail;
 
-	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
-	if (!be_physfn(adapter))
-		return status;
-	return 0;
-done:
 	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
 		adapter->num_msix_roce_vec = num_vec / 2;
 		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
@@ -2400,6 +2394,14 @@ done:
 	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
 		 adapter->num_msix_vec);
 	return 0;
+
+fail:
+	dev_warn(dev, "MSIx enable failed\n");
+
+	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
+	if (!be_physfn(adapter))
+		return status;
+	return 0;
 }
 
 static inline int be_msix_vec_get(struct be_adapter *adapter,
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 11/77] benet: Return -ENOSPC when not enough MSI-Xs available
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390, x86,
	linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-devel
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/net/ethernet/emulex/benet/be_main.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 100b528..3e2c834 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2378,6 +2378,8 @@ static int be_msix_enable(struct be_adapter *adapter)
 					 num_vec);
 		if (!status)
 			goto done;
+	} else (status > 0) {
+		status = -ENOSPC;
 	}
 
 	dev_warn(dev, "MSIx enable failed\n");
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 10/77] ahci: Check MRSM bit when multiple MSIs enabled
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390, x86,
	linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-devel
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

Do not trust the hardware and always check if MSI
Revert to Single Message mode was enforced. Fall
back to the single MSI mode in case it did. Not
doing so might screw up the interrupt handling.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/ata/ahci.c |   17 +++++++++++++++++
 drivers/ata/ahci.h |    1 +
 2 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index f1c8838..3a39cc8 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1091,6 +1091,14 @@ static inline void ahci_gtf_filter_workaround(struct ata_host *host)
 {}
 #endif
 
+static int ahci_get_mrsm(struct ahci_host_priv *hpriv)
+{
+	void __iomem *mmio = hpriv->mmio;
+	u32 ctl = readl(mmio + HOST_CTL);
+
+	return ctl & HOST_MRSM;
+}
+
 int ahci_init_interrupts(struct pci_dev *pdev, unsigned int n_ports,
 			 struct ahci_host_priv *hpriv)
 {
@@ -1116,6 +1124,15 @@ int ahci_init_interrupts(struct pci_dev *pdev, unsigned int n_ports,
 	if (rc)
 		goto intx;
 
+	/*
+	 * Fallback to single MSI mode if the controller enforced MRSM mode 
+	 */
+	if (ahci_get_mrsm(hpriv)) {
+		pci_disable_msi(pdev);
+		printk(KERN_INFO "ahci: MRSM is on, fallback to single MSI\n");
+		goto single_msi;
+	}
+
 	return nvec;
 
 single_msi:
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 1145637..19bc846 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -91,6 +91,7 @@ enum {
 	/* HOST_CTL bits */
 	HOST_RESET		= (1 << 0),  /* reset controller; self-clear */
 	HOST_IRQ_EN		= (1 << 1),  /* global IRQ enable */
+	HOST_MRSM		= (1 << 2),  /* MSI Revert to Single Message */
 	HOST_AHCI_EN		= (1 << 31), /* AHCI enabled */
 
 	/* HOST_CAP bits */
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 09/77] ahci: Update MSI/MSI-X interrupts enablement code
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390, x86,
	linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-devel
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

As result of recent re-design of the MSI/MSI-X interrupts enabling
pattern this driver has to be updated to use the new technique to
obtain a optimal number of MSI/MSI-X interrupts required.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/ata/ahci.c |   56 ++++++++++++++++++++++++++++++++-------------------
 1 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 9d715ae..f1c8838 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1091,26 +1091,40 @@ static inline void ahci_gtf_filter_workaround(struct ata_host *host)
 {}
 #endif
 
-int ahci_init_interrupts(struct pci_dev *pdev, struct ahci_host_priv *hpriv)
+int ahci_init_interrupts(struct pci_dev *pdev, unsigned int n_ports,
+			 struct ahci_host_priv *hpriv)
 {
-	int rc;
-	unsigned int maxvec;
+	int rc, nvec;
 
-	if (!(hpriv->flags & AHCI_HFLAG_NO_MSI)) {
-		rc = pci_enable_msi_block_auto(pdev, &maxvec);
-		if (rc > 0) {
-			if ((rc == maxvec) || (rc == 1))
-				return rc;
-			/*
-			 * Assume that advantage of multipe MSIs is negated,
-			 * so fallback to single MSI mode to save resources
-			 */
-			pci_disable_msi(pdev);
-			if (!pci_enable_msi(pdev))
-				return 1;
-		}
-	}
+	if (hpriv->flags & AHCI_HFLAG_NO_MSI)
+		goto intx;
+
+	rc = pci_get_msi_cap(pdev);
+	if (rc < 0)
+		goto intx;
+
+	/*
+	 * If number of MSIs is less than number of ports then Sharing Last
+	 * Message mode could be enforced. In this case assume that advantage
+	 * of multipe MSIs is negated and use single MSI mode instead.
+	 */
+	if (rc < n_ports)
+		goto single_msi;
+
+	nvec = rc;
+	rc = pci_enable_msi_block(pdev, nvec);
+	if (rc)
+		goto intx;
 
+	return nvec;
+
+single_msi:
+	rc = pci_enable_msi(pdev);
+	if (rc)
+		goto intx;
+	return 1;
+
+intx:
 	pci_intx(pdev, 1);
 	return 0;
 }
@@ -1277,10 +1291,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar];
 
-	n_msis = ahci_init_interrupts(pdev, hpriv);
-	if (n_msis > 1)
-		hpriv->flags |= AHCI_HFLAG_MULTI_MSI;
-
 	/* save initial config */
 	ahci_pci_save_initial_config(pdev, hpriv);
 
@@ -1335,6 +1345,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 */
 	n_ports = max(ahci_nr_ports(hpriv->cap), fls(hpriv->port_map));
 
+	n_msis = ahci_init_interrupts(pdev, n_ports, hpriv);
+	if (n_msis > 1)
+		hpriv->flags |= AHCI_HFLAG_MULTI_MSI;
+
 	host = ata_host_alloc_pinfo(&pdev->dev, ppi, n_ports);
 	if (!host)
 		return -ENOMEM;
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 08/77] PCI/MSI: Get rid of pci_enable_msi_block_auto() interface
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390, x86,
	linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-devel
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

As result of recent re-design of MSI/MSI-X interrupts enabling
pattern pci_enable_msi_block_auto() interface became obsolete.

To enable maximum number of MSI interrupts for a device the
driver will first obtain that number from pci_get_msi_cap()
function and then call pci_enable_msi_block() interface.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 Documentation/PCI/MSI-HOWTO.txt |   30 ++----------------------------
 drivers/pci/msi.c               |   20 --------------------
 include/linux/pci.h             |    7 -------
 3 files changed, 2 insertions(+), 55 deletions(-)

diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 40abcfb..4d0525f 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -133,33 +133,7 @@ static int foo_driver_enable_msi(struct foo_adapter *adapter, int nvec)
 	return rc;
 }
 
-4.2.3 pci_enable_msi_block_auto
-
-int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *count)
-
-This variation on pci_enable_msi() call allows a device driver to request
-the maximum possible number of MSIs.  The MSI specification only allows
-interrupts to be allocated in powers of two, up to a maximum of 2^5 (32).
-
-If this function returns a positive number, it indicates that it has
-succeeded and the returned value is the number of allocated interrupts. In
-this case, the function enables MSI on this device and updates dev->irq to
-be the lowest of the new interrupts assigned to it.  The other interrupts
-assigned to the device are in the range dev->irq to dev->irq + returned
-value - 1.
-
-If this function returns a negative number, it indicates an error and
-the driver should not attempt to request any more MSI interrupts for
-this device.
-
-If the device driver needs to know the number of interrupts the device
-supports it can pass the pointer count where that number is stored. The
-device driver must decide what action to take if pci_enable_msi_block_auto()
-succeeds, but returns a value less than the number of interrupts supported.
-If the device driver does not need to know the number of interrupts
-supported, it can set the pointer count to NULL.
-
-4.2.4 pci_disable_msi
+4.2.3 pci_disable_msi
 
 void pci_disable_msi(struct pci_dev *dev)
 
@@ -175,7 +149,7 @@ on any interrupt for which it previously called request_irq().
 Failure to do so results in a BUG_ON(), leaving the device with
 MSI enabled and thus leaking its vector.
 
-4.2.5 pci_get_msi_cap
+4.2.4 pci_get_msi_cap
 
 int pci_get_msi_cap(struct pci_dev *dev)
 
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 583ace1..1934519 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -849,26 +849,6 @@ int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 }
 EXPORT_SYMBOL(pci_enable_msi_block);
 
-int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
-{
-	int ret, nvec;
-
-	ret = pci_get_msi_cap(dev);
-	if (ret < 0)
-		return ret;
-
-	if (maxvec)
-		*maxvec = ret;
-
-	nvec = ret;
-	ret = pci_enable_msi_block(dev, nvec);
-	if (ret)
-		return ret;
-
-	return nvec;
-}
-EXPORT_SYMBOL(pci_enable_msi_block_auto);
-
 void pci_msi_shutdown(struct pci_dev *dev)
 {
 	struct msi_desc *desc;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2fa92ef..13bf88d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1154,12 +1154,6 @@ static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 	return -1;
 }
 
-static inline int
-pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
-{
-	return -1;
-}
-
 static inline void pci_msi_shutdown(struct pci_dev *dev)
 { }
 static inline void pci_disable_msi(struct pci_dev *dev)
@@ -1192,7 +1186,6 @@ static inline int pci_msi_enabled(void)
 #else
 int pci_get_msi_cap(struct pci_dev *dev);
 int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec);
-int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec);
 void pci_msi_shutdown(struct pci_dev *dev);
 void pci_disable_msi(struct pci_dev *dev);
 int pci_msix_table_size(struct pci_dev *dev);
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 07/77] PCI/MSI: Re-design MSI/MSI-X interrupts enablement pattern
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-doc-u79uwXL29TY76Z2rM5mHXA,
	linux-mips-6z/3iImG2C8G8FEW9MqTrA,
	linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ,
	linux390-tA70FqPdS9bQT0dZR+AlfA,
	linux-s390-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-ide-u79uwXL29TY76Z2rM5mHXA, iss_storagedev-VXdhtT5mjnY,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	e1000-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	linux-driver-h88ZbnxC6KDQT0dZR+AlfA,
	Solarflare linux maintainers <linux-net-drivers@
In-Reply-To: <cover.1380703262.git.agordeev-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

Currently pci_enable_msi_block() and pci_enable_msix() interfaces
return a error code in case of failure, 0 in case of success and a
positive value which indicates the number of MSI-X/MSI interrupts
that could have been allocated. The latter value should be passed
to a repeated call to the interfaces until a failure or success.

This technique proved to be confusing and error-prone. Vast share
of device drivers simply fail to follow the described guidelines.

This update converts pci_enable_msix() and pci_enable_msi_block()
interfaces to canonical kernel functions and makes them return a
error code in case of failure or 0 in case of success.

As result, device drivers will cease to use the overcomplicated
repeated fallbacks technique and resort to a straightforward
pattern - determine the number of MSI/MSI-X interrupts required
before calling pci_enable_msix() and pci_enable_msi_block()
interfaces.

Device drivers will use their knowledge of underlying hardware
to determine the number of MSI/MSI-X interrupts required.

The simplest case would be requesting all available interrupts -
to obtain that value device drivers will use pci_get_msi_cap()
interface for MSI and pci_msix_table_size() for MSI-X.

More complex cases would entail matching device capabilities
with the system environment, i.e. limiting number of hardware
queues (and hence associated MSI/MSI-X interrupts) to the number
of online CPUs.

Suggested-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Signed-off-by: Alexander Gordeev <agordeev-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
---
 Documentation/PCI/MSI-HOWTO.txt      |   71 ++++++++++++++++++---------------
 arch/mips/pci/msi-octeon.c           |    2 +-
 arch/powerpc/kernel/msi.c            |    2 +-
 arch/powerpc/platforms/pseries/msi.c |    2 +-
 arch/s390/pci/pci.c                  |    2 +-
 arch/x86/kernel/apic/io_apic.c       |    2 +-
 drivers/pci/msi.c                    |   52 +++++++------------------
 7 files changed, 58 insertions(+), 75 deletions(-)

diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 1f37ce2..40abcfb 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -111,21 +111,27 @@ the device are in the range dev->irq to dev->irq + count - 1.
 
 If this function returns a negative number, it indicates an error and
 the driver should not attempt to request any more MSI interrupts for
-this device.  If this function returns a positive number, it is
-less than 'count' and indicates the number of interrupts that could have
-been allocated.  In neither case is the irq value updated or the device
-switched into MSI mode.
-
-The device driver must decide what action to take if
-pci_enable_msi_block() returns a value less than the number requested.
-For instance, the driver could still make use of fewer interrupts;
-in this case the driver should call pci_enable_msi_block()
-again.  Note that it is not guaranteed to succeed, even when the
-'count' has been reduced to the value returned from a previous call to
-pci_enable_msi_block().  This is because there are multiple constraints
-on the number of vectors that can be allocated; pci_enable_msi_block()
-returns as soon as it finds any constraint that doesn't allow the
-call to succeed.
+this device.
+
+Device drivers should normally call pci_get_msi_cap() function before
+calling this function to determine maximum number of MSI interrupts
+a device can send.
+
+A sequence to achieve that might look like:
+
+static int foo_driver_enable_msi(struct foo_adapter *adapter, int nvec)
+{
+	rc = pci_get_msi_cap(adapter->pdev);
+	if (rc < 0)
+		return rc;
+
+	nvec = min(nvec, rc);
+	if (nvec < FOO_DRIVER_MINIMUM_NVEC) {
+		return -ENOSPC;
+
+	rc = pci_enable_msi_block(adapter->pdev, nvec);
+	return rc;
+}
 
 4.2.3 pci_enable_msi_block_auto
 
@@ -218,9 +224,7 @@ interrupts assigned to the MSI-X vectors so it can free them again later.
 
 If this function returns a negative number, it indicates an error and
 the driver should not attempt to allocate any more MSI-X interrupts for
-this device.  If it returns a positive number, it indicates the maximum
-number of interrupt vectors that could have been allocated. See example
-below.
+this device.
 
 This function, in contrast with pci_enable_msi(), does not adjust
 dev->irq.  The device will not generate interrupts for this interrupt
@@ -229,24 +233,27 @@ number once MSI-X is enabled.
 Device drivers should normally call this function once per device
 during the initialization phase.
 
-It is ideal if drivers can cope with a variable number of MSI-X interrupts;
-there are many reasons why the platform may not be able to provide the
-exact number that a driver asks for.
+Device drivers should normally call pci_msix_table_size() function before
+calling this function to determine maximum number of MSI-X interrupts
+a device can send.
 
-A request loop to achieve that might look like:
+A sequence to achieve that might look like:
 
 static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec)
 {
-	while (nvec >= FOO_DRIVER_MINIMUM_NVEC) {
-		rc = pci_enable_msix(adapter->pdev,
-				     adapter->msix_entries, nvec);
-		if (rc > 0)
-			nvec = rc;
-		else
-			return rc;
-	}
-
-	return -ENOSPC;
+	rc = pci_msix_table_size(adapter->pdev);
+	if (rc < 0)
+		return rc;
+
+	nvec = min(nvec, rc);
+	if (nvec < FOO_DRIVER_MINIMUM_NVEC) {
+		return -ENOSPC;
+
+	for (i = 0; i < nvec; i++)
+		adapter->msix_entries[i].entry = i;
+
+	rc = pci_enable_msix(adapter->pdev, adapter->msix_entries, nvec);
+	return rc;
 }
 
 4.3.2 pci_disable_msix
diff --git a/arch/mips/pci/msi-octeon.c b/arch/mips/pci/msi-octeon.c
index d37be36..0ee5f4d 100644
--- a/arch/mips/pci/msi-octeon.c
+++ b/arch/mips/pci/msi-octeon.c
@@ -193,7 +193,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	 * override arch_setup_msi_irqs()
 	 */
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
-		return 1;
+		return -EINVAL;
 
 	list_for_each_entry(entry, &dev->msi_list, list) {
 		ret = arch_setup_msi_irq(dev, entry);
diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c
index 8bbc12d..36d70b9 100644
--- a/arch/powerpc/kernel/msi.c
+++ b/arch/powerpc/kernel/msi.c
@@ -22,7 +22,7 @@ int arch_msi_check_device(struct pci_dev* dev, int nvec, int type)
 
 	/* PowerPC doesn't support multiple MSI yet */
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
-		return 1;
+		return -EINVAL;
 
 	if (ppc_md.msi_check_device) {
 		pr_debug("msi: Using platform check routine.\n");
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 009ec73..89648c1 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -348,7 +348,7 @@ static int rtas_msi_check_device(struct pci_dev *pdev, int nvec, int type)
 	quota = msi_quota_for_device(pdev, nvec);
 
 	if (quota && quota < nvec)
-		return quota;
+		return -ENOSPC;
 
 	return 0;
 }
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 61a3c2c..45a1875 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -426,7 +426,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 
 	pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
-		return 1;
+		return -EINVAL;
 	msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
 	msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e63a5bd..6126eaf 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3145,7 +3145,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 
 	/* Multiple MSI vectors only supported with interrupt remapping */
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
-		return 1;
+		return -EINVAL;
 
 	node = dev_to_node(&dev->dev);
 	irq_want = nr_irqs_gsi;
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index ca59bfc..583ace1 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -719,7 +719,7 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
 	if (ret)
-		goto out_avail;
+		goto error;
 
 	/*
 	 * Some devices require MSI-X to be enabled before we can touch the
@@ -733,7 +733,7 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	ret = populate_msi_sysfs(dev);
 	if (ret)
-		goto out_free;
+		goto error;
 
 	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
@@ -744,24 +744,7 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	return 0;
 
-out_avail:
-	if (ret < 0) {
-		/*
-		 * If we had some success, report the number of irqs
-		 * we succeeded in setting up.
-		 */
-		struct msi_desc *entry;
-		int avail = 0;
-
-		list_for_each_entry(entry, &dev->msi_list, list) {
-			if (entry->irq != 0)
-				avail++;
-		}
-		if (avail != 0)
-			ret = avail;
-	}
-
-out_free:
+error:
 	free_msi_irqs(dev);
 
 	return ret;
@@ -832,13 +815,11 @@ EXPORT_SYMBOL(pci_get_msi_cap);
  * @dev: device to configure
  * @nvec: number of interrupts to configure
  *
- * Allocate IRQs for a device with the MSI capability.
- * This function returns a negative errno if an error occurs.  If it
- * is unable to allocate the number of interrupts requested, it returns
- * the number of interrupts it might be able to allocate.  If it successfully
- * allocates at least the number of interrupts requested, it returns 0 and
- * updates the @dev's irq member to the lowest new interrupt number; the
- * other interrupt numbers allocated to this device are consecutive.
+ * Allocate IRQs for a device with the MSI capability. This function returns
+ * a negative errno if an error occurs. If it successfully allocates at least
+ * the number of interrupts requested, it returns 0 and updates the @dev's
+ * irq member to the lowest new interrupt number; the other interrupt numbers
+ * allocated to this device are consecutive.
  */
 int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 {
@@ -848,7 +829,7 @@ int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 	if (maxvec < 0)
 		return maxvec;
 	if (nvec > maxvec)
-		return maxvec;
+		return -EINVAL;
 
 	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
 	if (status)
@@ -879,13 +860,11 @@ int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
 	if (maxvec)
 		*maxvec = ret;
 
-	do {
-		nvec = ret;
-		ret = pci_enable_msi_block(dev, nvec);
-	} while (ret > 0);
-
-	if (ret < 0)
+	nvec = ret;
+	ret = pci_enable_msi_block(dev, nvec);
+	if (ret)
 		return ret;
+
 	return nvec;
 }
 EXPORT_SYMBOL(pci_enable_msi_block_auto);
@@ -955,9 +934,6 @@ EXPORT_SYMBOL(pci_msix_table_size);
  * MSI-X mode enabled on its hardware device function. A return of zero
  * indicates the successful configuration of MSI-X capability structure
  * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
- * Or a return of > 0 indicates that driver request is exceeding the number
- * of irqs or MSI-X vectors available. Driver should use the returned value to
- * re-send its request.
  **/
 int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
 {
@@ -975,7 +951,7 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
 	if (nr_entries < 0)
 		return nr_entries;
 	if (nvec > nr_entries)
-		return nr_entries;
+		return -EINVAL;
 
 	/* Check for any invalid entries */
 	for (i = 0; i < nvec; i++) {
-- 
1.7.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH RFC 06/77] PCI/MSI: Factor out pci_get_msi_cap() interface
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390, x86,
	linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-devel
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

This update is needed to facilitate a forthcoming re-design
MSI/MSI-X interrupts enabling pattern.

Device drivers will use this interface to obtain maximum number
of MSI interrupts the device supports and use that value in the
following call to pci_enable_msi_block() interface.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 Documentation/PCI/MSI-HOWTO.txt |   15 +++++++++++++++
 drivers/pci/msi.c               |   33 +++++++++++++++++++++------------
 include/linux/pci.h             |    6 ++++++
 3 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 35b2d64..1f37ce2 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -169,6 +169,21 @@ on any interrupt for which it previously called request_irq().
 Failure to do so results in a BUG_ON(), leaving the device with
 MSI enabled and thus leaking its vector.
 
+4.2.5 pci_get_msi_cap
+
+int pci_get_msi_cap(struct pci_dev *dev)
+
+This function could be used to retrieve the number of MSI vectors the
+device requested (via the Multiple Message Capable register). The MSI
+specification only allows the returned value to be a power of two,
+up to a maximum of 2^5 (32).
+
+If this function returns a negative number, it indicates the device is
+not capable of sending MSIs.
+
+If this function returns a positive number, it indicates the maximum
+number of MSI interrupt vectors that could be allocated.
+
 4.3 Using MSI-X
 
 The MSI-X capability is much more flexible than the MSI capability.
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 875c353..ca59bfc 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -812,6 +812,21 @@ static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
 	return 0;
 }
 
+int pci_get_msi_cap(struct pci_dev *dev)
+{
+	int ret;
+	u16 msgctl;
+
+	if (!dev->msi_cap)
+		return -EINVAL;
+
+	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
+	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
+
+	return ret;
+}
+EXPORT_SYMBOL(pci_get_msi_cap);
+
 /**
  * pci_enable_msi_block - configure device's MSI capability structure
  * @dev: device to configure
@@ -828,13 +843,10 @@ static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
 int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 {
 	int status, maxvec;
-	u16 msgctl;
 
-	if (!dev->msi_cap)
-		return -EINVAL;
-
-	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
-	maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
+	maxvec = pci_get_msi_cap(dev);
+	if (maxvec < 0)
+		return maxvec;
 	if (nvec > maxvec)
 		return maxvec;
 
@@ -859,13 +871,10 @@ EXPORT_SYMBOL(pci_enable_msi_block);
 int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
 {
 	int ret, nvec;
-	u16 msgctl;
 
-	if (!dev->msi_cap)
-		return -EINVAL;
-
-	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
-	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
+	ret = pci_get_msi_cap(dev);
+	if (ret < 0)
+		return ret;
 
 	if (maxvec)
 		*maxvec = ret;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index da172f9..2fa92ef 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1144,6 +1144,11 @@ struct msix_entry {
 
 
 #ifndef CONFIG_PCI_MSI
+static inline int pci_get_msi_cap(struct pci_dev *dev)
+{
+	return -1;
+}
+
 static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 {
 	return -1;
@@ -1185,6 +1190,7 @@ static inline int pci_msi_enabled(void)
 	return 0;
 }
 #else
+int pci_get_msi_cap(struct pci_dev *dev);
 int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec);
 int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec);
 void pci_msi_shutdown(struct pci_dev *dev);
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 05/77] PCI/MSI: Convert pci_msix_table_size() to a public interface
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390, x86,
	linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-devel
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

Make pci_msix_table_size() to return a error code if the device
does not support MSI-X. This update is needed to facilitate a
forthcoming re-design MSI/MSI-X interrupts enabling pattern.

Device drivers will use this interface to obtain maximum number
of MSI-X interrupts the device supports and use that value in
the following call to pci_enable_msix() interface.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 Documentation/PCI/MSI-HOWTO.txt |   13 +++++++++++++
 drivers/pci/msi.c               |    5 ++++-
 drivers/pci/pcie/portdrv_core.c |    2 ++
 3 files changed, 19 insertions(+), 1 deletions(-)

diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index a091780..35b2d64 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -255,6 +255,19 @@ MSI-X Table.  This address is mapped by the PCI subsystem, and should not
 be accessed directly by the device driver.  If the driver wishes to
 mask or unmask an interrupt, it should call disable_irq() / enable_irq().
 
+4.3.4 pci_msix_table_size
+
+int pci_msix_table_size(struct pci_dev *dev)
+
+This function could be used to retrieve number of entries in the device
+MSI-X table.
+
+If this function returns a negative number, it indicates the device is
+not capable of sending MSI-Xs.
+
+If this function returns a positive number, it indicates the maximum
+number of MSI-X interrupt vectors that could be allocated.
+
 4.4 Handling devices implementing both MSI and MSI-X capabilities
 
 If a device implements both MSI and MSI-X capabilities, it can
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index b43f391..875c353 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -928,11 +928,12 @@ int pci_msix_table_size(struct pci_dev *dev)
 	u16 control;
 
 	if (!dev->msix_cap)
-		return 0;
+		return -EINVAL;
 
 	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
 	return msix_table_size(control);
 }
+EXPORT_SYMBOL(pci_msix_table_size);
 
 /**
  * pci_enable_msix - configure device's MSI-X capability structure
@@ -962,6 +963,8 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
 		return status;
 
 	nr_entries = pci_msix_table_size(dev);
+	if (nr_entries < 0)
+		return nr_entries;
 	if (nvec > nr_entries)
 		return nr_entries;
 
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 31063ac..b4d86eb 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -80,6 +80,8 @@ static int pcie_port_enable_msix(struct pci_dev *dev, int *vectors, int mask)
 	u32 reg32;
 
 	nr_entries = pci_msix_table_size(dev);
+	if (nr_entries < 0)
+		return nr_entries;
 	if (!nr_entries)
 		return -EINVAL;
 	if (nr_entries > PCIE_PORT_MAX_MSIX_ENTRIES)
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 04/77] PCI/MSI/s390: Remove superfluous check of MSI type
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	stable, linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390,
	x86, linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-de
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

arch_setup_msi_irqs() hook can only be called from the generic
MSI code which ensures correct MSI type parameter.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 arch/s390/pci/pci.c |    2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index c79c6e4..61a3c2c 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -425,8 +425,6 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 	int rc;
 
 	pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
-	if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
-		return -EINVAL;
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
 		return 1;
 	msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 03/77] PCI/MSI/s390: Fix single MSI only check
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	stable, linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390,
	x86, linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-de
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

Multiple MSIs have never been supported on s390 architecture,
but the platform code fails to report single MSI only.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 arch/s390/pci/pci.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index f17a834..c79c6e4 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -427,6 +427,8 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 	pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
 	if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
 		return -EINVAL;
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
 	msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
 	msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
 
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 02/77] PCI/MSI/PPC: Fix wrong RTAS error code reporting
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	stable, linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390,
	x86, linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-de
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 arch/powerpc/platforms/pseries/msi.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 6d2f0ab..009ec73 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -467,7 +467,7 @@ again:
 	list_for_each_entry(entry, &pdev->msi_list, list) {
 		hwirq = rtas_query_irq_number(pdn, i++);
 		if (hwirq < 0) {
-			pr_debug("rtas_msi: error (%d) getting hwirq\n", rc);
+			pr_debug("rtas_msi: error (%d) getting hwirq\n", hwirq);
 			return hwirq;
 		}
 
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 01/77] PCI/MSI: Fix return value when populate_msi_sysfs() failed
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	stable, linux-pci, linux-mips, linuxppc-dev, linux390, linux-s390,
	x86, linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-de
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 drivers/pci/msi.c |   11 +++++------
 1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index d5f90d6..b43f391 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -719,7 +719,7 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
 	if (ret)
-		goto error;
+		goto out_avail;
 
 	/*
 	 * Some devices require MSI-X to be enabled before we can touch the
@@ -732,10 +732,8 @@ static int msix_capability_init(struct pci_dev *dev,
 	msix_program_entries(dev, entries);
 
 	ret = populate_msi_sysfs(dev);
-	if (ret) {
-		ret = 0;
-		goto error;
-	}
+	if (ret)
+		goto out_free;
 
 	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
@@ -746,7 +744,7 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	return 0;
 
-error:
+out_avail:
 	if (ret < 0) {
 		/*
 		 * If we had some success, report the number of irqs
@@ -763,6 +761,7 @@ error:
 			ret = avail;
 	}
 
+out_free:
 	free_msi_irqs(dev);
 
 	return ret;
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Alexander Gordeev @ 2013-10-02 10:48 UTC (permalink / raw)
  To: linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: Alexander Gordeev, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Tejun Heo, Dan Williams, Andy King, Jon Mason, Matt Porter,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-mips-6z/3iImG2C8G8FEW9MqTrA,
	linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ,
	linux390-tA70FqPdS9bQT0dZR+AlfA,
	linux-s390-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-ide-u79uwXL29TY76Z2rM5mHXA, iss_storagedev-VXdhtT5mjnY,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	e1000-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	linux-driver-h88ZbnxC6KDQT0dZR+AlfA, Solarflare linux maintainers,
	"VMware,

This series is against "next" branch in Bjorn's repo:
git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git

Currently pci_enable_msi_block() and pci_enable_msix() interfaces
return a error code in case of failure, 0 in case of success and a
positive value which indicates the number of MSI-X/MSI interrupts
that could have been allocated. The latter value should be passed
to a repeated call to the interfaces until a failure or success:


	for (i = 0; i < FOO_DRIVER_MAXIMUM_NVEC; i++)
		adapter->msix_entries[i].entry = i;

	while (nvec >= FOO_DRIVER_MINIMUM_NVEC) {
		rc = pci_enable_msix(adapter->pdev,
				     adapter->msix_entries, nvec);
		if (rc > 0)
			nvec = rc;
		else
			return rc;
	}

	return -ENOSPC;


This technique proved to be confusing and error-prone. Vast share
of device drivers simply fail to follow the described guidelines.

This update converts pci_enable_msix() and pci_enable_msi_block()
interfaces to canonical kernel functions and makes them return a
error code in case of failure or 0 in case of success.

As result, device drivers will cease to use the overcomplicated
repeated fallbacks technique and resort to a straightforward
pattern - determine the number of MSI/MSI-X interrupts required
before calling pci_enable_msi_block() and pci_enable_msix()
interfaces:


	rc = pci_msix_table_size(adapter->pdev);
	if (rc < 0)
		return rc;

	nvec = min(nvec, rc);
	if (nvec < FOO_DRIVER_MINIMUM_NVEC) {
		return -ENOSPC;

	for (i = 0; i < nvec; i++)
		adapter->msix_entries[i].entry = i;

	rc = pci_enable_msix(adapter->pdev,
			     adapter->msix_entries, nvec);
	return rc;


Device drivers will use their knowledge of underlying hardware
to determine the number of MSI/MSI-X interrupts required.

The simplest case would be requesting all available interrupts -
to obtain that value device drivers will use pci_get_msi_cap()
interface for MSI and pci_msix_table_size() for MSI-X.

More complex cases would entail matching device capabilities
to the system environment, i.e. limiting number of hardware
queues (and hence associated MSI/MSI-X interrupts) to the number
of online CPUs.

Device drivers using MSI/MSI-X could be divided in three groups:
- drivers that request a hardcoded number of interrupts;
- drivers that request a number of interrupts using one call to
  pci_enable_msix() and then enable MSI/MSI-X using a follow-up
  to pci_enable_msix();
- drivers that fully follow the guidelines and repeatedly call
  pci_enable_msix() in a loop;
This series converts to the new technique second and third groups.

To simplify device drivers code review I tried to make as little
changes as possible - the scope of this series is an introduction
of the new technique rather than clean-up effort for all drivers
affected.

The testing was very limited - I ensured successful booting on
all affected architectures except MIPS and operation of few
devices with and without pci=nomsi kernel parameter.

There is a ongoing discussion about impact of this update on
PowerPC pSeries platform. I am going to incorporate the outcome
of this discussion into the next version. Yet, the rest of the
platforms and the vast majority of device drivers already can
start getting initial reviews.

Patches 5,6,8	- update of the generic MSI code
Patch 7		- update of architectures affected
Patches 9-77	- bugfixes and updates of device drivers affected

The tree could be found in "pci-next-msi-v1" branch in repo:
https://github.com/a-gordeev/linux.git

Alexander Gordeev (77):
  PCI/MSI: Fix return value when populate_msi_sysfs() failed
  PCI/MSI/PPC: Fix wrong RTAS error code reporting
  PCI/MSI/s390: Fix single MSI only check
  PCI/MSI/s390: Remove superfluous check of MSI type
  PCI/MSI: Convert pci_msix_table_size() to a public interface
  PCI/MSI: Factor out pci_get_msi_cap() interface
  PCI/MSI: Re-design MSI/MSI-X interrupts enablement pattern
  PCI/MSI: Get rid of pci_enable_msi_block_auto() interface
  ahci: Update MSI/MSI-X interrupts enablement code
  ahci: Check MRSM bit when multiple MSIs enabled
  benet: Return -ENOSPC when not enough MSI-Xs available
  benet: Update MSI/MSI-X interrupts enablement code
  bna: Update MSI/MSI-X interrupts enablement code
  bnx2x: Update MSI/MSI-X interrupts enablement code
  bnx2: Update MSI/MSI-X interrupts enablement code
  cciss: Update MSI/MSI-X interrupts enablement code
  cciss: Update a misleading comment on interrupt usage
  cciss: Fallback to single MSI mode in case MSI-X failed
  csiostor: Do not call pci_disable_msix() if pci_enable_msix() failed
  csiostor: Return -ENOSPC when not enough MSI-X vectors available
  csiostor: Update MSI/MSI-X interrupts enablement code
  cxgb3: Do not call pci_disable_msix() if pci_enable_msix() failed
  cxgb3: Return -ENOSPC when not enough MSI-X vectors available
  cxgb3: Update MSI/MSI-X interrupts enablement code
  cxgb4: Return -ENOSPC when not enough MSI-X vectors available
  cxgb4: Update MSI/MSI-X interrupts enablement code
  cxgb4vf: Do not call pci_disable_msix() if pci_enable_msix() failed
  cxgb4vf: Return -ENOSPC when not enough MSI-X vectors available
  cxgb4vf: Update MSI/MSI-X interrupts enablement code
  hpsa: Update a misleading comment on interrupt usage
  hpsa: Update MSI/MSI-X interrupts enablement code
  hpsa: Fallback to single MSI mode in case MSI-X failed
  ioat: Disable MSI-X in case request of IRQ failed
  ioat: Update MSI/MSI-X interrupts enablement code
  ipr: Do not call pci_disable_msi/msix() if pci_enable_msi/msix()
    failed
  ipr: Enable MSI-X when IPR_USE_MSIX type is set, not IPR_USE_MSI
  ipr: Update MSI/MSI-X interrupts enablement code
  ixgbe: Update MSI/MSI-X interrupts enablement code
  ixgbevf: Return -ENOSPC when not enough MSI-X vectors available
  ixgbevf: Update MSI/MSI-X interrupts enablement code
  lpfc: Do not call pci_disable_msix() if pci_enable_msix() failed
  lpfc: Update MSI/MSI-X interrupts enablement code
  lpfc: Return -ENOSPC when not enough MSI-X vectors available
  lpfc: Make MSI-X initialization routine more readable
  megaraid: Update MSI/MSI-X interrupts enablement code
  mlx4: Update MSI/MSI-X interrupts enablement code
  mlx5: Fix memory leak in case not enough MSI-X vectors available
  mlx5: Return -ENOSPC when not enough MSI-X vectors available
  mlx5: Fix minimum number of MSI-Xs
  mlx5: Update MSI/MSI-X interrupts enablement code
  mthca: Update MSI/MSI-X interrupts enablement code
  niu: Update MSI/MSI-X interrupts enablement code
  ntb: Fix missed call to pci_enable_msix()
  ntb: Ensure number of MSIs on SNB is enough for the link interrupt
  ntb: Update MSI/MSI-X interrupts enablement code
  nvme: Update MSI/MSI-X interrupts enablement code
  pmcraid: Update MSI/MSI-X interrupts enablement code
  qib: Update MSI/MSI-X interrupts enablement code
  qla2xxx: Update MSI/MSI-X interrupts enablement code
  qlcnic: Return -ENOSPC when not enough MSI-X vectors available
  qlogic: Return -EINVAL in case MSI-X is not supported
  qlcnic: Remove redundant return operator
  qlcnic: Update MSI/MSI-X interrupts enablement code
  qlcnic: Make MSI-X initialization routine bit more readable
  qlge: Remove a redundant assignment
  qlge: Update MSI/MSI-X interrupts enablement code
  rapidio: Update MSI/MSI-X interrupts enablement code
  sfc: Update MSI/MSI-X interrupts enablement code
  tg3: Update MSI/MSI-X interrupts enablement code
  vmci: Update MSI/MSI-X interrupts enablement code
  vmxnet3: Return -EINVAL if number of requested MSI-Xs is not enough
  vmxnet3: Fixup a weird loop exit
  vmxnet3: Return -ENOSPC when not enough MSI-X vectors available
  vmxnet3: Limit number of rx queues to 1 if per-queue MSI-Xs failed
  vmxnet3: Update MSI/MSI-X interrupts enablement code
  vxge: Sanitize MSI-X allocation routine error codes
  vxge: Update MSI/MSI-X interrupts enablement code

 Documentation/PCI/MSI-HOWTO.txt                    |  123 +++++++++++---------
 arch/mips/pci/msi-octeon.c                         |    2 +-
 arch/powerpc/kernel/msi.c                          |    2 +-
 arch/powerpc/platforms/pseries/msi.c               |    4 +-
 arch/s390/pci/pci.c                                |    2 +-
 arch/x86/kernel/apic/io_apic.c                     |    2 +-
 drivers/ata/ahci.c                                 |   71 ++++++++---
 drivers/ata/ahci.h                                 |    1 +
 drivers/block/cciss.c                              |   22 ++--
 drivers/block/nvme-core.c                          |   48 ++++----
 drivers/dma/ioat/dma.c                             |   11 ++-
 drivers/infiniband/hw/mthca/mthca_main.c           |   16 ++-
 drivers/infiniband/hw/qib/qib_pcie.c               |    4 -
 drivers/misc/vmw_vmci/vmci_guest.c                 |   22 +++-
 drivers/net/ethernet/broadcom/bnx2.c               |   27 +++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |   54 ++++-----
 drivers/net/ethernet/broadcom/tg3.c                |   24 ++--
 drivers/net/ethernet/brocade/bna/bnad.c            |   34 +++---
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c    |   32 +++---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |   62 ++++++----
 .../net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c    |   49 +++++---
 drivers/net/ethernet/emulex/benet/be_main.c        |   36 +++---
 drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c       |   62 +++++-----
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c  |   18 +--
 drivers/net/ethernet/mellanox/mlx4/main.c          |   17 ++--
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   17 ++--
 drivers/net/ethernet/neterion/vxge/vxge-main.c     |   38 +++----
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c   |  108 +++++++++--------
 drivers/net/ethernet/qlogic/qlge/qlge_main.c       |   40 +++----
 drivers/net/ethernet/sfc/efx.c                     |   18 ++-
 drivers/net/ethernet/sun/niu.c                     |   20 ++--
 drivers/net/vmxnet3/vmxnet3_drv.c                  |   84 +++++++-------
 drivers/ntb/ntb_hw.c                               |   37 ++----
 drivers/ntb/ntb_hw.h                               |    2 -
 drivers/pci/msi.c                                  |   93 +++++----------
 drivers/pci/pcie/portdrv_core.c                    |    2 +
 drivers/rapidio/devices/tsi721.c                   |   27 +++--
 drivers/scsi/csiostor/csio_isr.c                   |   20 ++--
 drivers/scsi/hpsa.c                                |   35 +++---
 drivers/scsi/ipr.c                                 |   52 ++++-----
 drivers/scsi/lpfc/lpfc_init.c                      |   40 ++++---
 drivers/scsi/megaraid/megaraid_sas_base.c          |   20 ++--
 drivers/scsi/pmcraid.c                             |   23 ++--
 drivers/scsi/qla2xxx/qla_isr.c                     |   18 ++-
 include/linux/pci.h                                |    7 +-
 45 files changed, 744 insertions(+), 702 deletions(-)

-- 
1.7.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] ipv6: udp packets following an UFO enqueued packet need also be handled by UFO
From: Jiri Pirko @ 2013-10-02 10:33 UTC (permalink / raw)
  To: netdev, yoshfuji, davem, kuznet, jmorris, kaber, herbert,
	eric.dumazet
In-Reply-To: <20131001232534.GM10771@order.stressinduktion.org>

Wed, Oct 02, 2013 at 01:25:34AM CEST, hannes@stressinduktion.org wrote:
>On Tue, Oct 01, 2013 at 11:47:21PM +0200, Hannes Frederic Sowa wrote:
>> The strange thing is that if I don't do the IPV6_MTU setsockopt I don't
>> get an oops.
>
>This is incorrect, it just depends on the size of the writes and on the
>interface mtu.
>
>> IPv4 seems to work without problems, too.
>
>I also get kernel oopses from IPv4 now, too.

I'm not able to trigger this with ipv4. Can you please send strace
output for this as well?

Thanks

^ permalink raw reply

* Re: [PATCH] ipv6: udp packets following an UFO enqueued packet need also be handled by UFO
From: Jiri Pirko @ 2013-10-02  8:58 UTC (permalink / raw)
  To: netdev, yoshfuji, davem, kuznet, jmorris, kaber, herbert,
	eric.dumazet
In-Reply-To: <20131001232534.GM10771@order.stressinduktion.org>

Wed, Oct 02, 2013 at 01:25:34AM CEST, hannes@stressinduktion.org wrote:
>On Tue, Oct 01, 2013 at 11:47:21PM +0200, Hannes Frederic Sowa wrote:
>> The strange thing is that if I don't do the IPV6_MTU setsockopt I don't
>> get an oops.
>
>This is incorrect, it just depends on the size of the writes and on the
>interface mtu.
>
>> IPv4 seems to work without problems, too.
>
>I also get kernel oopses from IPv4 now, too.
>
>
>So, skb_is_gso is not accurate enough in the output and we have to check if we
>already started to append to skb frags. The following diff does resolve this
>issue in both the IPv4 and IPv6 non-page-append output path but I am not
>confident if it is correct:
>
>diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>index 6d56840..3565450 100644
>--- a/include/linux/skbuff.h
>+++ b/include/linux/skbuff.h
>@@ -1308,6 +1308,11 @@ static inline int skb_pagelen(const struct sk_buff *skb)
> 	return len + skb_headlen(skb);
> }
> 
>+static inline bool skb_has_frags(const struct sk_buff *skb)
>+{
>+	return skb_shinfo(skb)->nr_frags;
>+}
>+
> /**
>  * __skb_fill_page_desc - initialise a paged fragment in an skb
>  * @skb: buffer containing fragment to be initialised
>diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
>index 7d8357b..8dc3d8d 100644
>--- a/net/ipv4/ip_output.c
>+++ b/net/ipv4/ip_output.c
>@@ -836,7 +836,7 @@ static int __ip_append_data(struct sock *sk,
> 		csummode = CHECKSUM_PARTIAL;
> 
> 	cork->length += length;
>-	if (((length > mtu) || (skb && skb_is_gso(skb))) &&
>+	if (((length > mtu) || (skb && skb_has_frags(skb))) &&
> 	    (sk->sk_protocol == IPPROTO_UDP) &&
> 	    (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) {
> 		err = ip_ufo_append_data(sk, queue, getfrag, from, length,
>diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
>index a54c45c..ded4f6f 100644
>--- a/net/ipv6/ip6_output.c
>+++ b/net/ipv6/ip6_output.c
>@@ -1227,7 +1227,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
> 	skb = skb_peek_tail(&sk->sk_write_queue);
> 	cork->length += length;
> 	if (((length > mtu) ||
>-	     (skb && skb_is_gso(skb))) &&
>+	     (skb && skb_has_frags(skb))) &&
> 	    (sk->sk_protocol == IPPROTO_UDP) &&
> 	    (rt->dst.dev->features & NETIF_F_UFO)) {
> 		err = ip6_ufo_append_data(sk, getfrag, from, length,
>
>Greetings,
>
>  Hannes
>


This seems correct to me. sk_is_gso would work as well is you apply my
patch "[patch net] ip6_output: do skb ufo init for peeked non ufo skb as
well" which does the setting of gso_size.

Jiri

^ permalink raw reply

* [PATCH v3] l2tp: fix kernel panic when using IPv4-mapped IPv6 addresses
From: François Cachereul @ 2013-10-02  8:16 UTC (permalink / raw)
  To: David S. Miller, James Chapman; +Cc: netdev

IPv4 mapped addresses cause kernel panic.
The patch juste check whether the IPv6 address is an IPv4 mapped
address. If so, use IPv4 API instead of IPv6.

[  940.026915] general protection fault: 0000 [#1]
[  940.026915] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core pppox ppp_generic slhc loop psmouse
[  940.026915] CPU: 0 PID: 3184 Comm: memcheck-amd64- Not tainted 3.11.0+ #1
[  940.026915] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
[  940.026915] task: ffff880007130e20 ti: ffff88000737e000 task.ti: ffff88000737e000
[  940.026915] RIP: 0010:[<ffffffff81333780>]  [<ffffffff81333780>] ip6_xmit+0x276/0x326
[  940.026915] RSP: 0018:ffff88000737fd28  EFLAGS: 00010286
[  940.026915] RAX: c748521a75ceff48 RBX: ffff880000c30800 RCX: 0000000000000000
[  940.026915] RDX: ffff88000075cc4e RSI: 0000000000000028 RDI: ffff8800060e5a40
[  940.026915] RBP: ffff8800060e5a40 R08: 0000000000000000 R09: ffff88000075cc90
[  940.026915] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88000737fda0
[  940.026915] R13: 0000000000000000 R14: 0000000000002000 R15: ffff880005d3b580
[  940.026915] FS:  00007f163dc5e800(0000) GS:ffffffff81623000(0000) knlGS:0000000000000000
[  940.026915] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  940.026915] CR2: 00000004032dc940 CR3: 0000000005c25000 CR4: 00000000000006f0
[  940.026915] Stack:
[  940.026915]  ffff88000075cc4e ffffffff81694e90 ffff880000c30b38 0000000000000020
[  940.026915]  11000000523c4bac ffff88000737fdb4 0000000000000000 ffff880000c30800
[  940.026915]  ffff880005d3b580 ffff880000c30b38 ffff8800060e5a40 0000000000000020
[  940.026915] Call Trace:
[  940.026915]  [<ffffffff81356cc3>] ? inet6_csk_xmit+0xa4/0xc4
[  940.026915]  [<ffffffffa0038535>] ? l2tp_xmit_skb+0x503/0x55a [l2tp_core]
[  940.026915]  [<ffffffff812b8d3b>] ? pskb_expand_head+0x161/0x214
[  940.026915]  [<ffffffffa003e91d>] ? pppol2tp_xmit+0xf2/0x143 [l2tp_ppp]
[  940.026915]  [<ffffffffa00292e0>] ? ppp_channel_push+0x36/0x8b [ppp_generic]
[  940.026915]  [<ffffffffa00293fe>] ? ppp_write+0xaf/0xc5 [ppp_generic]
[  940.026915]  [<ffffffff8110ead4>] ? vfs_write+0xa2/0x106
[  940.026915]  [<ffffffff8110edd6>] ? SyS_write+0x56/0x8a
[  940.026915]  [<ffffffff81378ac0>] ? system_call_fastpath+0x16/0x1b
[  940.026915] Code: 00 49 8b 8f d8 00 00 00 66 83 7c 11 02 00 74 60 49
8b 47 58 48 83 e0 fe 48 8b 80 18 01 00 00 48 85 c0 74 13 48 8b 80 78 02
00 00 <48> ff 40 28 41 8b 57 68 48 01 50 30 48 8b 54 24 08 49 c7 c1 51
[  940.026915] RIP  [<ffffffff81333780>] ip6_xmit+0x276/0x326
[  940.026915]  RSP <ffff88000737fd28>
[  940.057945] ---[ end trace be8aba9a61c8b7f3 ]---
[  940.058583] Kernel panic - not syncing: Fatal exception in interrupt

Signed-off-by: François CACHEREUL <f.cachereul@alphalink.fr>
---
v3: add panic message
Sorry for the dumb oversight

 net/l2tp/l2tp_core.c |   27 +++++++++++++++++++++++----
 net/l2tp/l2tp_core.h |    3 +++
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index feae495..aedaa2c 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -496,6 +496,7 @@ out:
 static inline int l2tp_verify_udp_checksum(struct sock *sk,
 					   struct sk_buff *skb)
 {
+	struct l2tp_tunnel *tunnel = (struct l2tp_tunnel *)sk->sk_user_data;
 	struct udphdr *uh = udp_hdr(skb);
 	u16 ulen = ntohs(uh->len);
 	__wsum psum;
@@ -504,7 +505,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk,
 		return 0;
 
 #if IS_ENABLED(CONFIG_IPV6)
-	if (sk->sk_family == PF_INET6) {
+	if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) {
 		if (!uh->check) {
 			LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n");
 			return 1;
@@ -1128,7 +1129,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 	/* Queue the packet to IP for output */
 	skb->local_df = 1;
 #if IS_ENABLED(CONFIG_IPV6)
-	if (skb->sk->sk_family == PF_INET6)
+	if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped)
 		error = inet6_csk_xmit(skb, NULL);
 	else
 #endif
@@ -1255,7 +1256,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 
 		/* Calculate UDP checksum if configured to do so */
 #if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == PF_INET6)
+		if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
 			l2tp_xmit_ipv6_csum(sk, skb, udp_len);
 		else
 #endif
@@ -1704,6 +1705,24 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	if (cfg != NULL)
 		tunnel->debug = cfg->debug;
 
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == PF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		if (ipv6_addr_v4mapped(&np->saddr) &&
+		    ipv6_addr_v4mapped(&np->daddr)) {
+			struct inet_sock *inet = inet_sk(sk);
+
+			tunnel->v4mapped = true;
+			inet->inet_saddr = np->saddr.s6_addr32[3];
+			inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3];
+			inet->inet_daddr = np->daddr.s6_addr32[3];
+		} else {
+			tunnel->v4mapped = false;
+		}
+	}
+#endif
+
 	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
 	tunnel->encap = encap;
 	if (encap == L2TP_ENCAPTYPE_UDP) {
@@ -1712,7 +1731,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
 		udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy;
 #if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == PF_INET6)
+		if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
 			udpv6_encap_enable();
 		else
 #endif
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 66a559b..6f251cb 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -194,6 +194,9 @@ struct l2tp_tunnel {
 	struct sock		*sock;		/* Parent socket */
 	int			fd;		/* Parent fd, if tunnel socket
 						 * was created by userspace */
+#if IS_ENABLED(CONFIG_IPV6)
+	bool			v4mapped;
+#endif
 
 	struct work_struct	del_work;
 
-- 
1.7.10.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox