Netdev List
 help / color / mirror / Atom feed
* [net-next PATCH v3 1/1] atm: remove deprecated use of pci api
From: chas williams - CONTRACTOR @ 2015-01-16 13:57 UTC (permalink / raw)
  To: David Laight
  Cc: 'David Miller', lambert.quentin@gmail.com,
	linux-atm-general@lists.sourceforge.net, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6D1CAC79B3@AcuExch.aculab.com>



Signed-off-by: Chas Williams - CONTRACTOR <chas@cmf.nrl.navy.mil>
---
 drivers/atm/eni.c       |  33 +++++++------
 drivers/atm/fore200e.c  |  22 +++++----
 drivers/atm/he.c        | 125 +++++++++++++++++++++++++-----------------------
 drivers/atm/he.h        |   4 +-
 drivers/atm/idt77252.c  | 107 ++++++++++++++++++++++-------------------
 drivers/atm/iphase.c    |  54 +++++++++++----------
 drivers/atm/lanai.c     |  14 ++----
 drivers/atm/nicstar.c   |  60 +++++++++++------------
 drivers/atm/solos-pci.c |  26 +++++-----
 drivers/atm/zatm.c      |  17 ++++---
 10 files changed, 243 insertions(+), 219 deletions(-)

diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index c7fab3e..6339efd 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -354,9 +354,9 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb,
 	eni_vcc = ENI_VCC(vcc);
 	paddr = 0; /* GCC, shut up */
 	if (skb) {
-		paddr = pci_map_single(eni_dev->pci_dev,skb->data,skb->len,
-		    PCI_DMA_FROMDEVICE);
-		if (pci_dma_mapping_error(eni_dev->pci_dev, paddr))
+		paddr = dma_map_single(&eni_dev->pci_dev->dev,skb->data,skb->len,
+				       DMA_FROM_DEVICE);
+		if (dma_mapping_error(&eni_dev->pci_dev->dev, paddr))
 			goto dma_map_error;
 		ENI_PRV_PADDR(skb) = paddr;
 		if (paddr & 3)
@@ -481,8 +481,8 @@ rx_enqueued++;
 
 trouble:
 	if (paddr)
-		pci_unmap_single(eni_dev->pci_dev,paddr,skb->len,
-		    PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&eni_dev->pci_dev->dev,paddr,skb->len,
+				 DMA_FROM_DEVICE);
 dma_map_error:
 	if (skb) dev_kfree_skb_irq(skb);
 	return -1;
@@ -758,8 +758,8 @@ rx_dequeued++;
 		}
 		eni_vcc->rxing--;
 		eni_vcc->rx_pos = ENI_PRV_POS(skb) & (eni_vcc->words-1);
-		pci_unmap_single(eni_dev->pci_dev,ENI_PRV_PADDR(skb),skb->len,
-		    PCI_DMA_TODEVICE);
+		dma_unmap_single(&eni_dev->pci_dev->dev,ENI_PRV_PADDR(skb),skb->len,
+			         DMA_TO_DEVICE);
 		if (!skb->len) dev_kfree_skb_irq(skb);
 		else {
 			EVENT("pushing (len=%ld)\n",skb->len,0);
@@ -1112,8 +1112,8 @@ DPRINTK("iovcnt = %d\n",skb_shinfo(skb)->nr_frags);
 		    vcc->dev->number);
 		return enq_jam;
 	}
-	paddr = pci_map_single(eni_dev->pci_dev,skb->data,skb->len,
-	    PCI_DMA_TODEVICE);
+	paddr = dma_map_single(&eni_dev->pci_dev->dev,skb->data,skb->len,
+			       DMA_TO_DEVICE);
 	ENI_PRV_PADDR(skb) = paddr;
 	/* prepare DMA queue entries */
 	j = 0;
@@ -1226,8 +1226,8 @@ static void dequeue_tx(struct atm_dev *dev)
 			break;
 		}
 		ENI_VCC(vcc)->txing -= ENI_PRV_SIZE(skb);
-		pci_unmap_single(eni_dev->pci_dev,ENI_PRV_PADDR(skb),skb->len,
-		    PCI_DMA_TODEVICE);
+		dma_unmap_single(&eni_dev->pci_dev->dev,ENI_PRV_PADDR(skb),skb->len,
+				 DMA_TO_DEVICE);
 		if (vcc->pop) vcc->pop(vcc,skb);
 		else dev_kfree_skb_irq(skb);
 		atomic_inc(&vcc->stats->tx);
@@ -2240,13 +2240,18 @@ static int eni_init_one(struct pci_dev *pci_dev,
 	if (rc < 0)
 		goto out;
 
+	rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
+	if (rc < 0)
+		goto out;
+
 	rc = -ENOMEM;
 	eni_dev = kmalloc(sizeof(struct eni_dev), GFP_KERNEL);
 	if (!eni_dev)
 		goto err_disable;
 
 	zero = &eni_dev->zero;
-	zero->addr = pci_alloc_consistent(pci_dev, ENI_ZEROES_SIZE, &zero->dma);
+	zero->addr = dma_alloc_coherent(&pci_dev->dev,
+					ENI_ZEROES_SIZE, &zero->dma, GFP_KERNEL);
 	if (!zero->addr)
 		goto err_kfree;
 
@@ -2277,7 +2282,7 @@ err_eni_release:
 err_unregister:
 	atm_dev_deregister(dev);
 err_free_consistent:
-	pci_free_consistent(pci_dev, ENI_ZEROES_SIZE, zero->addr, zero->dma);
+	dma_free_coherent(&pci_dev->dev, ENI_ZEROES_SIZE, zero->addr, zero->dma);
 err_kfree:
 	kfree(eni_dev);
 err_disable:
@@ -2302,7 +2307,7 @@ static void eni_remove_one(struct pci_dev *pdev)
 
 	eni_do_release(dev);
 	atm_dev_deregister(dev);
-	pci_free_consistent(pdev, ENI_ZEROES_SIZE, zero->addr, zero->dma);
+	dma_free_coherent(&pdev->dev, ENI_ZEROES_SIZE, zero->addr, zero->dma);
 	kfree(ed);
 	pci_disable_device(pdev);
 }
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index d5d9eaf..75dde90 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -425,7 +425,7 @@ static void fore200e_pca_write(u32 val, volatile u32 __iomem *addr)
 static u32
 fore200e_pca_dma_map(struct fore200e* fore200e, void* virt_addr, int size, int direction)
 {
-    u32 dma_addr = pci_map_single((struct pci_dev*)fore200e->bus_dev, virt_addr, size, direction);
+    u32 dma_addr = dma_map_single(&((struct pci_dev *) fore200e->bus_dev)->dev, virt_addr, size, direction);
 
     DPRINTK(3, "PCI DVMA mapping: virt_addr = 0x%p, size = %d, direction = %d,  --> dma_addr = 0x%08x\n",
 	    virt_addr, size, direction, dma_addr);
@@ -440,7 +440,7 @@ fore200e_pca_dma_unmap(struct fore200e* fore200e, u32 dma_addr, int size, int di
     DPRINTK(3, "PCI DVMA unmapping: dma_addr = 0x%08x, size = %d, direction = %d\n",
 	    dma_addr, size, direction);
 
-    pci_unmap_single((struct pci_dev*)fore200e->bus_dev, dma_addr, size, direction);
+    dma_unmap_single(&((struct pci_dev *) fore200e->bus_dev)->dev, dma_addr, size, direction);
 }
 
 
@@ -449,7 +449,7 @@ fore200e_pca_dma_sync_for_cpu(struct fore200e* fore200e, u32 dma_addr, int size,
 {
     DPRINTK(3, "PCI DVMA sync: dma_addr = 0x%08x, size = %d, direction = %d\n", dma_addr, size, direction);
 
-    pci_dma_sync_single_for_cpu((struct pci_dev*)fore200e->bus_dev, dma_addr, size, direction);
+    dma_sync_single_for_cpu(&((struct pci_dev *) fore200e->bus_dev)->dev, dma_addr, size, direction);
 }
 
 static void
@@ -457,7 +457,7 @@ fore200e_pca_dma_sync_for_device(struct fore200e* fore200e, u32 dma_addr, int si
 {
     DPRINTK(3, "PCI DVMA sync: dma_addr = 0x%08x, size = %d, direction = %d\n", dma_addr, size, direction);
 
-    pci_dma_sync_single_for_device((struct pci_dev*)fore200e->bus_dev, dma_addr, size, direction);
+    dma_sync_single_for_device(&((struct pci_dev *) fore200e->bus_dev)->dev, dma_addr, size, direction);
 }
 
 
@@ -470,9 +470,10 @@ fore200e_pca_dma_chunk_alloc(struct fore200e* fore200e, struct chunk* chunk,
 {
     /* returned chunks are page-aligned */
     chunk->alloc_size = size * nbr;
-    chunk->alloc_addr = pci_alloc_consistent((struct pci_dev*)fore200e->bus_dev,
-					     chunk->alloc_size,
-					     &chunk->dma_addr);
+    chunk->alloc_addr = dma_alloc_coherent(&((struct pci_dev *) fore200e->bus_dev)->dev,
+					   chunk->alloc_size,
+					   &chunk->dma_addr,
+					   GFP_KERNEL);
     
     if ((chunk->alloc_addr == NULL) || (chunk->dma_addr == 0))
 	return -ENOMEM;
@@ -488,7 +489,7 @@ fore200e_pca_dma_chunk_alloc(struct fore200e* fore200e, struct chunk* chunk,
 static void
 fore200e_pca_dma_chunk_free(struct fore200e* fore200e, struct chunk* chunk)
 {
-    pci_free_consistent((struct pci_dev*)fore200e->bus_dev,
+    dma_free_coherent(&((struct pci_dev *) fore200e->bus_dev)->dev,
 			chunk->alloc_size,
 			chunk->alloc_addr,
 			chunk->dma_addr);
@@ -2707,6 +2708,11 @@ static int fore200e_pca_detect(struct pci_dev *pci_dev,
 	err = -EINVAL;
 	goto out;
     }
+
+    if (dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32))) {
+	err = -EINVAL;
+	goto out;
+    }
     
     fore200e = kzalloc(sizeof(struct fore200e), GFP_KERNEL);
     if (fore200e == NULL) {
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index c39702b..93dca2e 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -359,7 +359,7 @@ static int he_init_one(struct pci_dev *pci_dev,
 
 	if (pci_enable_device(pci_dev))
 		return -EIO;
-	if (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32)) != 0) {
+	if (dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)) != 0) {
 		printk(KERN_WARNING "he: no suitable dma available\n");
 		err = -EIO;
 		goto init_one_failure;
@@ -533,9 +533,9 @@ static void he_init_tx_lbfp(struct he_dev *he_dev)
 
 static int he_init_tpdrq(struct he_dev *he_dev)
 {
-	he_dev->tpdrq_base = pci_zalloc_consistent(he_dev->pci_dev,
-						   CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq),
-						   &he_dev->tpdrq_phys);
+	he_dev->tpdrq_base = dma_zalloc_coherent(&he_dev->pci_dev->dev,
+						 CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq),
+						 &he_dev->tpdrq_phys, GFP_KERNEL);
 	if (he_dev->tpdrq_base == NULL) {
 		hprintk("failed to alloc tpdrq\n");
 		return -ENOMEM;
@@ -796,16 +796,16 @@ static int he_init_group(struct he_dev *he_dev, int group)
 	}
 
 	/* large buffer pool */
-	he_dev->rbpl_pool = pci_pool_create("rbpl", he_dev->pci_dev,
+	he_dev->rbpl_pool = dma_pool_create("rbpl", &he_dev->pci_dev->dev,
 					    CONFIG_RBPL_BUFSIZE, 64, 0);
 	if (he_dev->rbpl_pool == NULL) {
 		hprintk("unable to create rbpl pool\n");
 		goto out_free_rbpl_virt;
 	}
 
-	he_dev->rbpl_base = pci_zalloc_consistent(he_dev->pci_dev,
-						  CONFIG_RBPL_SIZE * sizeof(struct he_rbp),
-						  &he_dev->rbpl_phys);
+	he_dev->rbpl_base = dma_zalloc_coherent(&he_dev->pci_dev->dev,
+						CONFIG_RBPL_SIZE * sizeof(struct he_rbp),
+						&he_dev->rbpl_phys, GFP_KERNEL);
 	if (he_dev->rbpl_base == NULL) {
 		hprintk("failed to alloc rbpl_base\n");
 		goto out_destroy_rbpl_pool;
@@ -815,7 +815,7 @@ static int he_init_group(struct he_dev *he_dev, int group)
 
 	for (i = 0; i < CONFIG_RBPL_SIZE; ++i) {
 
-		heb = pci_pool_alloc(he_dev->rbpl_pool, GFP_KERNEL|GFP_DMA, &mapping);
+		heb = dma_pool_alloc(he_dev->rbpl_pool, GFP_KERNEL, &mapping);
 		if (!heb)
 			goto out_free_rbpl;
 		heb->mapping = mapping;
@@ -842,9 +842,9 @@ static int he_init_group(struct he_dev *he_dev, int group)
 
 	/* rx buffer ready queue */
 
-	he_dev->rbrq_base = pci_zalloc_consistent(he_dev->pci_dev,
-						  CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq),
-						  &he_dev->rbrq_phys);
+	he_dev->rbrq_base = dma_zalloc_coherent(&he_dev->pci_dev->dev,
+						CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq),
+						&he_dev->rbrq_phys, GFP_KERNEL);
 	if (he_dev->rbrq_base == NULL) {
 		hprintk("failed to allocate rbrq\n");
 		goto out_free_rbpl;
@@ -866,9 +866,9 @@ static int he_init_group(struct he_dev *he_dev, int group)
 
 	/* tx buffer ready queue */
 
-	he_dev->tbrq_base = pci_zalloc_consistent(he_dev->pci_dev,
-						  CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
-						  &he_dev->tbrq_phys);
+	he_dev->tbrq_base = dma_zalloc_coherent(&he_dev->pci_dev->dev,
+						CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
+						&he_dev->tbrq_phys, GFP_KERNEL);
 	if (he_dev->tbrq_base == NULL) {
 		hprintk("failed to allocate tbrq\n");
 		goto out_free_rbpq_base;
@@ -884,18 +884,18 @@ static int he_init_group(struct he_dev *he_dev, int group)
 	return 0;
 
 out_free_rbpq_base:
-	pci_free_consistent(he_dev->pci_dev, CONFIG_RBRQ_SIZE *
-			sizeof(struct he_rbrq), he_dev->rbrq_base,
-			he_dev->rbrq_phys);
+	dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_RBRQ_SIZE *
+			  sizeof(struct he_rbrq), he_dev->rbrq_base,
+			  he_dev->rbrq_phys);
 out_free_rbpl:
 	list_for_each_entry_safe(heb, next, &he_dev->rbpl_outstanding, entry)
-		pci_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
+		dma_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
 
-	pci_free_consistent(he_dev->pci_dev, CONFIG_RBPL_SIZE *
-			sizeof(struct he_rbp), he_dev->rbpl_base,
-			he_dev->rbpl_phys);
+	dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_RBPL_SIZE *
+			  sizeof(struct he_rbp), he_dev->rbpl_base,
+			  he_dev->rbpl_phys);
 out_destroy_rbpl_pool:
-	pci_pool_destroy(he_dev->rbpl_pool);
+	dma_pool_destroy(he_dev->rbpl_pool);
 out_free_rbpl_virt:
 	kfree(he_dev->rbpl_virt);
 out_free_rbpl_table:
@@ -911,8 +911,11 @@ static int he_init_irq(struct he_dev *he_dev)
 	/* 2.9.3.5  tail offset for each interrupt queue is located after the
 		    end of the interrupt queue */
 
-	he_dev->irq_base = pci_alloc_consistent(he_dev->pci_dev,
-			(CONFIG_IRQ_SIZE+1) * sizeof(struct he_irq), &he_dev->irq_phys);
+	he_dev->irq_base = dma_zalloc_coherent(&he_dev->pci_dev->dev,
+					       (CONFIG_IRQ_SIZE + 1)
+					       * sizeof(struct he_irq),
+					       &he_dev->irq_phys,
+					       GFP_KERNEL);
 	if (he_dev->irq_base == NULL) {
 		hprintk("failed to allocate irq\n");
 		return -ENOMEM;
@@ -1419,10 +1422,10 @@ static int he_start(struct atm_dev *dev)
 
 	he_init_tpdrq(he_dev);
 
-	he_dev->tpd_pool = pci_pool_create("tpd", he_dev->pci_dev,
-		sizeof(struct he_tpd), TPD_ALIGNMENT, 0);
+	he_dev->tpd_pool = dma_pool_create("tpd", &he_dev->pci_dev->dev,
+					   sizeof(struct he_tpd), TPD_ALIGNMENT, 0);
 	if (he_dev->tpd_pool == NULL) {
-		hprintk("unable to create tpd pci_pool\n");
+		hprintk("unable to create tpd dma_pool\n");
 		return -ENOMEM;         
 	}
 
@@ -1459,9 +1462,9 @@ static int he_start(struct atm_dev *dev)
 
 	/* host status page */
 
-	he_dev->hsp = pci_zalloc_consistent(he_dev->pci_dev,
-					    sizeof(struct he_hsp),
-					    &he_dev->hsp_phys);
+	he_dev->hsp = dma_zalloc_coherent(&he_dev->pci_dev->dev,
+					  sizeof(struct he_hsp),
+					  &he_dev->hsp_phys, GFP_KERNEL);
 	if (he_dev->hsp == NULL) {
 		hprintk("failed to allocate host status page\n");
 		return -ENOMEM;
@@ -1558,41 +1561,41 @@ he_stop(struct he_dev *he_dev)
 		free_irq(he_dev->irq, he_dev);
 
 	if (he_dev->irq_base)
-		pci_free_consistent(he_dev->pci_dev, (CONFIG_IRQ_SIZE+1)
-			* sizeof(struct he_irq), he_dev->irq_base, he_dev->irq_phys);
+		dma_free_coherent(&he_dev->pci_dev->dev, (CONFIG_IRQ_SIZE + 1)
+				  * sizeof(struct he_irq), he_dev->irq_base, he_dev->irq_phys);
 
 	if (he_dev->hsp)
-		pci_free_consistent(he_dev->pci_dev, sizeof(struct he_hsp),
-						he_dev->hsp, he_dev->hsp_phys);
+		dma_free_coherent(&he_dev->pci_dev->dev, sizeof(struct he_hsp),
+				  he_dev->hsp, he_dev->hsp_phys);
 
 	if (he_dev->rbpl_base) {
 		list_for_each_entry_safe(heb, next, &he_dev->rbpl_outstanding, entry)
-			pci_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
+			dma_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
 
-		pci_free_consistent(he_dev->pci_dev, CONFIG_RBPL_SIZE
-			* sizeof(struct he_rbp), he_dev->rbpl_base, he_dev->rbpl_phys);
+		dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_RBPL_SIZE
+				  * sizeof(struct he_rbp), he_dev->rbpl_base, he_dev->rbpl_phys);
 	}
 
 	kfree(he_dev->rbpl_virt);
 	kfree(he_dev->rbpl_table);
 
 	if (he_dev->rbpl_pool)
-		pci_pool_destroy(he_dev->rbpl_pool);
+		dma_pool_destroy(he_dev->rbpl_pool);
 
 	if (he_dev->rbrq_base)
-		pci_free_consistent(he_dev->pci_dev, CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq),
-							he_dev->rbrq_base, he_dev->rbrq_phys);
+		dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq),
+				  he_dev->rbrq_base, he_dev->rbrq_phys);
 
 	if (he_dev->tbrq_base)
-		pci_free_consistent(he_dev->pci_dev, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
-							he_dev->tbrq_base, he_dev->tbrq_phys);
+		dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
+				  he_dev->tbrq_base, he_dev->tbrq_phys);
 
 	if (he_dev->tpdrq_base)
-		pci_free_consistent(he_dev->pci_dev, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
-							he_dev->tpdrq_base, he_dev->tpdrq_phys);
+		dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
+				  he_dev->tpdrq_base, he_dev->tpdrq_phys);
 
 	if (he_dev->tpd_pool)
-		pci_pool_destroy(he_dev->tpd_pool);
+		dma_pool_destroy(he_dev->tpd_pool);
 
 	if (he_dev->pci_dev) {
 		pci_read_config_word(he_dev->pci_dev, PCI_COMMAND, &command);
@@ -1610,7 +1613,7 @@ __alloc_tpd(struct he_dev *he_dev)
 	struct he_tpd *tpd;
 	dma_addr_t mapping;
 
-	tpd = pci_pool_alloc(he_dev->tpd_pool, GFP_ATOMIC|GFP_DMA, &mapping);
+	tpd = dma_pool_alloc(he_dev->tpd_pool, GFP_ATOMIC, &mapping);
 	if (tpd == NULL)
 		return NULL;
 			
@@ -1681,7 +1684,7 @@ he_service_rbrq(struct he_dev *he_dev, int group)
 			if (!RBRQ_HBUF_ERR(he_dev->rbrq_head)) {
 				clear_bit(i, he_dev->rbpl_table);
 				list_del(&heb->entry);
-				pci_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
+				dma_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
 			}
 					
 			goto next_rbrq_entry;
@@ -1774,7 +1777,7 @@ return_host_buffers:
 		++pdus_assembled;
 
 		list_for_each_entry_safe(heb, next, &he_vcc->buffers, entry)
-			pci_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
+			dma_pool_free(he_dev->rbpl_pool, heb, heb->mapping);
 		INIT_LIST_HEAD(&he_vcc->buffers);
 		he_vcc->pdu_len = 0;
 
@@ -1843,10 +1846,10 @@ he_service_tbrq(struct he_dev *he_dev, int group)
 
 		for (slot = 0; slot < TPD_MAXIOV; ++slot) {
 			if (tpd->iovec[slot].addr)
-				pci_unmap_single(he_dev->pci_dev,
+				dma_unmap_single(&he_dev->pci_dev->dev,
 					tpd->iovec[slot].addr,
 					tpd->iovec[slot].len & TPD_LEN_MASK,
-							PCI_DMA_TODEVICE);
+							DMA_TO_DEVICE);
 			if (tpd->iovec[slot].len & TPD_LST)
 				break;
 				
@@ -1861,7 +1864,7 @@ he_service_tbrq(struct he_dev *he_dev, int group)
 
 next_tbrq_entry:
 		if (tpd)
-			pci_pool_free(he_dev->tpd_pool, tpd, TPD_ADDR(tpd->status));
+			dma_pool_free(he_dev->tpd_pool, tpd, TPD_ADDR(tpd->status));
 		he_dev->tbrq_head = (struct he_tbrq *)
 				((unsigned long) he_dev->tbrq_base |
 					TBRQ_MASK(he_dev->tbrq_head + 1));
@@ -1905,7 +1908,7 @@ he_service_rbpl(struct he_dev *he_dev, int group)
 		}
 		he_dev->rbpl_hint = i + 1;
 
-		heb = pci_pool_alloc(he_dev->rbpl_pool, GFP_ATOMIC|GFP_DMA, &mapping);
+		heb = dma_pool_alloc(he_dev->rbpl_pool, GFP_ATOMIC, &mapping);
 		if (!heb)
 			break;
 		heb->mapping = mapping;
@@ -2084,10 +2087,10 @@ __enqueue_tpd(struct he_dev *he_dev, struct he_tpd *tpd, unsigned cid)
 			 */
 			for (slot = 0; slot < TPD_MAXIOV; ++slot) {
 				if (tpd->iovec[slot].addr)
-					pci_unmap_single(he_dev->pci_dev,
+					dma_unmap_single(&he_dev->pci_dev->dev,
 						tpd->iovec[slot].addr,
 						tpd->iovec[slot].len & TPD_LEN_MASK,
-								PCI_DMA_TODEVICE);
+								DMA_TO_DEVICE);
 			}
 			if (tpd->skb) {
 				if (tpd->vcc->pop)
@@ -2096,7 +2099,7 @@ __enqueue_tpd(struct he_dev *he_dev, struct he_tpd *tpd, unsigned cid)
 					dev_kfree_skb_any(tpd->skb);
 				atomic_inc(&tpd->vcc->stats->tx_err);
 			}
-			pci_pool_free(he_dev->tpd_pool, tpd, TPD_ADDR(tpd->status));
+			dma_pool_free(he_dev->tpd_pool, tpd, TPD_ADDR(tpd->status));
 			return;
 		}
 	}
@@ -2550,8 +2553,8 @@ he_send(struct atm_vcc *vcc, struct sk_buff *skb)
 	}
 
 #ifdef USE_SCATTERGATHER
-	tpd->iovec[slot].addr = pci_map_single(he_dev->pci_dev, skb->data,
-				skb_headlen(skb), PCI_DMA_TODEVICE);
+	tpd->iovec[slot].addr = dma_map_single(&he_dev->pci_dev->dev, skb->data,
+				skb_headlen(skb), DMA_TO_DEVICE);
 	tpd->iovec[slot].len = skb_headlen(skb);
 	++slot;
 
@@ -2579,9 +2582,9 @@ he_send(struct atm_vcc *vcc, struct sk_buff *skb)
 			slot = 0;
 		}
 
-		tpd->iovec[slot].addr = pci_map_single(he_dev->pci_dev,
+		tpd->iovec[slot].addr = dma_map_single(&he_dev->pci_dev->dev,
 			(void *) page_address(frag->page) + frag->page_offset,
-				frag->size, PCI_DMA_TODEVICE);
+				frag->size, DMA_TO_DEVICE);
 		tpd->iovec[slot].len = frag->size;
 		++slot;
 
@@ -2589,7 +2592,7 @@ he_send(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	tpd->iovec[slot - 1].len |= TPD_LST;
 #else
-	tpd->address0 = pci_map_single(he_dev->pci_dev, skb->data, skb->len, PCI_DMA_TODEVICE);
+	tpd->address0 = dma_map_single(&he_dev->pci_dev->dev, skb->data, skb->len, DMA_TO_DEVICE);
 	tpd->length0 = skb->len | TPD_LST;
 #endif
 	tpd->status |= TPD_INT;
diff --git a/drivers/atm/he.h b/drivers/atm/he.h
index 110a27d..f3f5367 100644
--- a/drivers/atm/he.h
+++ b/drivers/atm/he.h
@@ -281,7 +281,7 @@ struct he_dev {
 	int irq_peak;
 
 	struct tasklet_struct tasklet;
-	struct pci_pool *tpd_pool;
+	struct dma_pool *tpd_pool;
 	struct list_head outstanding_tpds;
 
 	dma_addr_t tpdrq_phys;
@@ -296,7 +296,7 @@ struct he_dev {
 	struct he_buff **rbpl_virt;
 	unsigned long *rbpl_table;
 	unsigned long rbpl_hint;
-	struct pci_pool *rbpl_pool;
+	struct dma_pool *rbpl_pool;
 	dma_addr_t rbpl_phys;
 	struct he_rbp *rbpl_base, *rbpl_tail;
 	struct list_head rbpl_outstanding;
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 2b24ed0..074616b 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -641,7 +641,8 @@ alloc_scq(struct idt77252_dev *card, int class)
 	scq = kzalloc(sizeof(struct scq_info), GFP_KERNEL);
 	if (!scq)
 		return NULL;
-	scq->base = pci_zalloc_consistent(card->pcidev, SCQ_SIZE, &scq->paddr);
+	scq->base = dma_zalloc_coherent(&card->pcidev->dev, SCQ_SIZE,
+					&scq->paddr, GFP_KERNEL);
 	if (scq->base == NULL) {
 		kfree(scq);
 		return NULL;
@@ -669,12 +670,12 @@ free_scq(struct idt77252_dev *card, struct scq_info *scq)
 	struct sk_buff *skb;
 	struct atm_vcc *vcc;
 
-	pci_free_consistent(card->pcidev, SCQ_SIZE,
-			    scq->base, scq->paddr);
+	dma_free_coherent(&card->pcidev->dev, SCQ_SIZE,
+			  scq->base, scq->paddr);
 
 	while ((skb = skb_dequeue(&scq->transmit))) {
-		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-				 skb->len, PCI_DMA_TODEVICE);
+		dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+				 skb->len, DMA_TO_DEVICE);
 
 		vcc = ATM_SKB(skb)->vcc;
 		if (vcc->pop)
@@ -684,8 +685,8 @@ free_scq(struct idt77252_dev *card, struct scq_info *scq)
 	}
 
 	while ((skb = skb_dequeue(&scq->pending))) {
-		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-				 skb->len, PCI_DMA_TODEVICE);
+		dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+				 skb->len, DMA_TO_DEVICE);
 
 		vcc = ATM_SKB(skb)->vcc;
 		if (vcc->pop)
@@ -800,8 +801,8 @@ drain_scq(struct idt77252_dev *card, struct vc_map *vc)
 	if (skb) {
 		TXPRINTK("%s: freeing skb at %p.\n", card->name, skb);
 
-		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-				 skb->len, PCI_DMA_TODEVICE);
+		dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+				 skb->len, DMA_TO_DEVICE);
 
 		vcc = ATM_SKB(skb)->vcc;
 
@@ -846,8 +847,8 @@ queue_skb(struct idt77252_dev *card, struct vc_map *vc,
 	tbd = &IDT77252_PRV_TBD(skb);
 	vcc = ATM_SKB(skb)->vcc;
 
-	IDT77252_PRV_PADDR(skb) = pci_map_single(card->pcidev, skb->data,
-						 skb->len, PCI_DMA_TODEVICE);
+	IDT77252_PRV_PADDR(skb) = dma_map_single(&card->pcidev->dev, skb->data,
+						 skb->len, DMA_TO_DEVICE);
 
 	error = -EINVAL;
 
@@ -924,8 +925,8 @@ done:
 	return 0;
 
 errout:
-	pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-			 skb->len, PCI_DMA_TODEVICE);
+	dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+			 skb->len, DMA_TO_DEVICE);
 	return error;
 }
 
@@ -970,8 +971,8 @@ init_rsq(struct idt77252_dev *card)
 {
 	struct rsq_entry *rsqe;
 
-	card->rsq.base = pci_zalloc_consistent(card->pcidev, RSQSIZE,
-					       &card->rsq.paddr);
+	card->rsq.base = dma_zalloc_coherent(&card->pcidev->dev, RSQSIZE,
+					     &card->rsq.paddr, GFP_KERNEL);
 	if (card->rsq.base == NULL) {
 		printk("%s: can't allocate RSQ.\n", card->name);
 		return -1;
@@ -1001,8 +1002,8 @@ init_rsq(struct idt77252_dev *card)
 static void
 deinit_rsq(struct idt77252_dev *card)
 {
-	pci_free_consistent(card->pcidev, RSQSIZE,
-			    card->rsq.base, card->rsq.paddr);
+	dma_free_coherent(&card->pcidev->dev, RSQSIZE,
+			  card->rsq.base, card->rsq.paddr);
 }
 
 static void
@@ -1057,9 +1058,9 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
 
 	vcc = vc->rx_vcc;
 
-	pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(skb),
-				    skb_end_pointer(skb) - skb->data,
-				    PCI_DMA_FROMDEVICE);
+	dma_sync_single_for_cpu(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+				skb_end_pointer(skb) - skb->data,
+				DMA_FROM_DEVICE);
 
 	if ((vcc->qos.aal == ATM_AAL0) ||
 	    (vcc->qos.aal == ATM_AAL34)) {
@@ -1180,9 +1181,9 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
 			return;
 		}
 
-		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
+		dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
 				 skb_end_pointer(skb) - skb->data,
-				 PCI_DMA_FROMDEVICE);
+				 DMA_FROM_DEVICE);
 		sb_pool_remove(card, skb);
 
 		skb_trim(skb, len);
@@ -1254,9 +1255,9 @@ idt77252_rx_raw(struct idt77252_dev *card)
 	head = IDT77252_PRV_PADDR(queue) + (queue->data - queue->head - 16);
 	tail = readl(SAR_REG_RAWCT);
 
-	pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(queue),
-				    skb_end_offset(queue) - 16,
-				    PCI_DMA_FROMDEVICE);
+	dma_sync_single_for_cpu(&card->pcidev->dev, IDT77252_PRV_PADDR(queue),
+				skb_end_offset(queue) - 16,
+				DMA_FROM_DEVICE);
 
 	while (head != tail) {
 		unsigned int vpi, vci;
@@ -1348,11 +1349,11 @@ drop:
 			if (next) {
 				card->raw_cell_head = next;
 				queue = card->raw_cell_head;
-				pci_dma_sync_single_for_cpu(card->pcidev,
-							    IDT77252_PRV_PADDR(queue),
-							    (skb_end_pointer(queue) -
-							     queue->data),
-							    PCI_DMA_FROMDEVICE);
+				dma_sync_single_for_cpu(&card->pcidev->dev,
+							IDT77252_PRV_PADDR(queue),
+							(skb_end_pointer(queue) -
+							 queue->data),
+							DMA_FROM_DEVICE);
 			} else {
 				card->raw_cell_head = NULL;
 				printk("%s: raw cell queue overrun\n",
@@ -1375,8 +1376,8 @@ init_tsq(struct idt77252_dev *card)
 {
 	struct tsq_entry *tsqe;
 
-	card->tsq.base = pci_alloc_consistent(card->pcidev, RSQSIZE,
-					      &card->tsq.paddr);
+	card->tsq.base = dma_alloc_coherent(&card->pcidev->dev, RSQSIZE,
+					    &card->tsq.paddr, GFP_KERNEL);
 	if (card->tsq.base == NULL) {
 		printk("%s: can't allocate TSQ.\n", card->name);
 		return -1;
@@ -1398,8 +1399,8 @@ init_tsq(struct idt77252_dev *card)
 static void
 deinit_tsq(struct idt77252_dev *card)
 {
-	pci_free_consistent(card->pcidev, TSQSIZE,
-			    card->tsq.base, card->tsq.paddr);
+	dma_free_coherent(&card->pcidev->dev, TSQSIZE,
+			  card->tsq.base, card->tsq.paddr);
 }
 
 static void
@@ -1861,9 +1862,9 @@ add_rx_skb(struct idt77252_dev *card, int queue,
 			goto outfree;
 		}
 
-		paddr = pci_map_single(card->pcidev, skb->data,
+		paddr = dma_map_single(&card->pcidev->dev, skb->data,
 				       skb_end_pointer(skb) - skb->data,
-				       PCI_DMA_FROMDEVICE);
+				       DMA_FROM_DEVICE);
 		IDT77252_PRV_PADDR(skb) = paddr;
 
 		if (push_rx_skb(card, skb, queue)) {
@@ -1875,8 +1876,8 @@ add_rx_skb(struct idt77252_dev *card, int queue,
 	return;
 
 outunmap:
-	pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-			 skb_end_pointer(skb) - skb->data, PCI_DMA_FROMDEVICE);
+	dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+			 skb_end_pointer(skb) - skb->data, DMA_FROM_DEVICE);
 
 	handle = IDT77252_PRV_POOL(skb);
 	card->sbpool[POOL_QUEUE(handle)].skb[POOL_INDEX(handle)] = NULL;
@@ -1892,15 +1893,15 @@ recycle_rx_skb(struct idt77252_dev *card, struct sk_buff *skb)
 	u32 handle = IDT77252_PRV_POOL(skb);
 	int err;
 
-	pci_dma_sync_single_for_device(card->pcidev, IDT77252_PRV_PADDR(skb),
-				       skb_end_pointer(skb) - skb->data,
-				       PCI_DMA_FROMDEVICE);
+	dma_sync_single_for_device(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
+				   skb_end_pointer(skb) - skb->data,
+				   DMA_FROM_DEVICE);
 
 	err = push_rx_skb(card, skb, POOL_QUEUE(handle));
 	if (err) {
-		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
+		dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
 				 skb_end_pointer(skb) - skb->data,
-				 PCI_DMA_FROMDEVICE);
+				 DMA_FROM_DEVICE);
 		sb_pool_remove(card, skb);
 		dev_kfree_skb(skb);
 	}
@@ -3058,11 +3059,11 @@ deinit_card(struct idt77252_dev *card)
 		for (j = 0; j < FBQ_SIZE; j++) {
 			skb = card->sbpool[i].skb[j];
 			if (skb) {
-				pci_unmap_single(card->pcidev,
+				dma_unmap_single(&card->pcidev->dev,
 						 IDT77252_PRV_PADDR(skb),
 						 (skb_end_pointer(skb) -
 						  skb->data),
-						 PCI_DMA_FROMDEVICE);
+						 DMA_FROM_DEVICE);
 				card->sbpool[i].skb[j] = NULL;
 				dev_kfree_skb(skb);
 			}
@@ -3076,8 +3077,8 @@ deinit_card(struct idt77252_dev *card)
 	vfree(card->vcs);
 
 	if (card->raw_cell_hnd) {
-		pci_free_consistent(card->pcidev, 2 * sizeof(u32),
-				    card->raw_cell_hnd, card->raw_cell_paddr);
+		dma_free_coherent(&card->pcidev->dev, 2 * sizeof(u32),
+				  card->raw_cell_hnd, card->raw_cell_paddr);
 	}
 
 	if (card->rsq.base) {
@@ -3397,9 +3398,10 @@ static int init_card(struct atm_dev *dev)
 	writel(0, SAR_REG_GP);
 
 	/* Initialize RAW Cell Handle Register  */
-	card->raw_cell_hnd = pci_zalloc_consistent(card->pcidev,
-						   2 * sizeof(u32),
-						   &card->raw_cell_paddr);
+	card->raw_cell_hnd = dma_zalloc_coherent(&card->pcidev->dev,
+						 2 * sizeof(u32),
+						 &card->raw_cell_paddr,
+						 GFP_KERNEL);
 	if (!card->raw_cell_hnd) {
 		printk("%s: memory allocation failure.\n", card->name);
 		deinit_card(card);
@@ -3611,6 +3613,11 @@ static int idt77252_init_one(struct pci_dev *pcidev,
 		return err;
 	}
 
+	if ((err = dma_set_mask_and_coherent(&pcidev->dev, DMA_BIT_MASK(32)))) {
+		printk("idt77252: can't enable DMA for PCI device at %s\n", pci_name(pcidev));
+		return err;
+	}
+
 	card = kzalloc(sizeof(struct idt77252_dev), GFP_KERNEL);
 	if (!card) {
 		printk("idt77252-%d: can't allocate private data\n", index);
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index 4217f29..924f8e2 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -1185,8 +1185,8 @@ static int rx_pkt(struct atm_dev *dev)
 
 	/* Build the DLE structure */  
 	wr_ptr = iadev->rx_dle_q.write;  
-	wr_ptr->sys_pkt_addr = pci_map_single(iadev->pci, skb->data,
-		len, PCI_DMA_FROMDEVICE);
+	wr_ptr->sys_pkt_addr = dma_map_single(&iadev->pci->dev, skb->data,
+					      len, DMA_FROM_DEVICE);
 	wr_ptr->local_pkt_addr = buf_addr;  
 	wr_ptr->bytes = len;	/* We don't know this do we ?? */  
 	wr_ptr->mode = DMA_INT_ENABLE;  
@@ -1306,8 +1306,8 @@ static void rx_dle_intr(struct atm_dev *dev)
           u_short length;
           struct ia_vcc *ia_vcc;
 
-	  pci_unmap_single(iadev->pci, iadev->rx_dle_q.write->sys_pkt_addr,
-	  	len, PCI_DMA_FROMDEVICE);
+	  dma_unmap_single(&iadev->pci->dev, iadev->rx_dle_q.write->sys_pkt_addr,
+			   len, DMA_FROM_DEVICE);
           /* no VCC related housekeeping done as yet. lets see */  
           vcc = ATM_SKB(skb)->vcc;
 	  if (!vcc) {
@@ -1430,8 +1430,8 @@ static int rx_init(struct atm_dev *dev)
   //    spin_lock_init(&iadev->rx_lock); 
   
 	/* Allocate 4k bytes - more aligned than needed (4k boundary) */
-	dle_addr = pci_alloc_consistent(iadev->pci, DLE_TOTAL_SIZE,
-					&iadev->rx_dle_dma);  
+	dle_addr = dma_alloc_coherent(&iadev->pci->dev, DLE_TOTAL_SIZE,
+				      &iadev->rx_dle_dma, GFP_KERNEL);
 	if (!dle_addr)  {  
 		printk(KERN_ERR DEV_LABEL "can't allocate DLEs\n");
 		goto err_out;
@@ -1631,8 +1631,8 @@ static int rx_init(struct atm_dev *dev)
 	return 0;  
 
 err_free_dle:
-	pci_free_consistent(iadev->pci, DLE_TOTAL_SIZE, iadev->rx_dle_q.start,
-			    iadev->rx_dle_dma);  
+	dma_free_coherent(&iadev->pci->dev, DLE_TOTAL_SIZE, iadev->rx_dle_q.start,
+			  iadev->rx_dle_dma);
 err_out:
 	return -ENOMEM;
 }  
@@ -1702,8 +1702,8 @@ static void tx_dle_intr(struct atm_dev *dev)
 
 	    /* Revenge of the 2 dle (skb + trailer) used in ia_pkt_tx() */
 	    if (!((dle - iadev->tx_dle_q.start)%(2*sizeof(struct dle)))) {
-		pci_unmap_single(iadev->pci, dle->sys_pkt_addr, skb->len,
-				 PCI_DMA_TODEVICE);
+		dma_unmap_single(&iadev->pci->dev, dle->sys_pkt_addr, skb->len,
+				 DMA_TO_DEVICE);
 	    }
             vcc = ATM_SKB(skb)->vcc;
             if (!vcc) {
@@ -1917,8 +1917,8 @@ static int tx_init(struct atm_dev *dev)
                                 readw(iadev->seg_reg+SEG_MASK_REG));)  
 
 	/* Allocate 4k (boundary aligned) bytes */
-	dle_addr = pci_alloc_consistent(iadev->pci, DLE_TOTAL_SIZE,
-					&iadev->tx_dle_dma);  
+	dle_addr = dma_alloc_coherent(&iadev->pci->dev, DLE_TOTAL_SIZE,
+				      &iadev->tx_dle_dma, GFP_KERNEL);
 	if (!dle_addr)  {
 		printk(KERN_ERR DEV_LABEL "can't allocate DLEs\n");
 		goto err_out;
@@ -1989,8 +1989,10 @@ static int tx_init(struct atm_dev *dev)
 		goto err_free_tx_bufs;
             }
 	    iadev->tx_buf[i].cpcs = cpcs;
-	    iadev->tx_buf[i].dma_addr = pci_map_single(iadev->pci,
-		cpcs, sizeof(*cpcs), PCI_DMA_TODEVICE);
+	    iadev->tx_buf[i].dma_addr = dma_map_single(&iadev->pci->dev,
+						       cpcs,
+						       sizeof(*cpcs),
+						       DMA_TO_DEVICE);
         }
         iadev->desc_tbl = kmalloc(iadev->num_tx_desc *
                                    sizeof(struct desc_tbl_t), GFP_KERNEL);
@@ -2198,14 +2200,14 @@ err_free_tx_bufs:
 	while (--i >= 0) {
 		struct cpcs_trailer_desc *desc = iadev->tx_buf + i;
 
-		pci_unmap_single(iadev->pci, desc->dma_addr,
-			sizeof(*desc->cpcs), PCI_DMA_TODEVICE);
+		dma_unmap_single(&iadev->pci->dev, desc->dma_addr,
+				 sizeof(*desc->cpcs), DMA_TO_DEVICE);
 		kfree(desc->cpcs);
 	}
 	kfree(iadev->tx_buf);
 err_free_dle:
-	pci_free_consistent(iadev->pci, DLE_TOTAL_SIZE, iadev->tx_dle_q.start,
-			    iadev->tx_dle_dma);  
+	dma_free_coherent(&iadev->pci->dev, DLE_TOTAL_SIZE, iadev->tx_dle_q.start,
+			  iadev->tx_dle_dma);
 err_out:
 	return -ENOMEM;
 }   
@@ -2476,20 +2478,20 @@ static void ia_free_tx(IADEV *iadev)
 	for (i = 0; i < iadev->num_tx_desc; i++) {
 		struct cpcs_trailer_desc *desc = iadev->tx_buf + i;
 
-		pci_unmap_single(iadev->pci, desc->dma_addr,
-			sizeof(*desc->cpcs), PCI_DMA_TODEVICE);
+		dma_unmap_single(&iadev->pci->dev, desc->dma_addr,
+				 sizeof(*desc->cpcs), DMA_TO_DEVICE);
 		kfree(desc->cpcs);
 	}
 	kfree(iadev->tx_buf);
-	pci_free_consistent(iadev->pci, DLE_TOTAL_SIZE, iadev->tx_dle_q.start,
-			    iadev->tx_dle_dma);  
+	dma_free_coherent(&iadev->pci->dev, DLE_TOTAL_SIZE, iadev->tx_dle_q.start,
+			  iadev->tx_dle_dma);
 }
 
 static void ia_free_rx(IADEV *iadev)
 {
 	kfree(iadev->rx_open);
-	pci_free_consistent(iadev->pci, DLE_TOTAL_SIZE, iadev->rx_dle_q.start,
-			  iadev->rx_dle_dma);  
+	dma_free_coherent(&iadev->pci->dev, DLE_TOTAL_SIZE, iadev->rx_dle_q.start,
+			  iadev->rx_dle_dma);
 }
 
 static int ia_start(struct atm_dev *dev)
@@ -3009,8 +3011,8 @@ static int ia_pkt_tx (struct atm_vcc *vcc, struct sk_buff *skb) {
 	/* Build the DLE structure */  
 	wr_ptr = iadev->tx_dle_q.write;  
 	memset((caddr_t)wr_ptr, 0, sizeof(*wr_ptr));  
-	wr_ptr->sys_pkt_addr = pci_map_single(iadev->pci, skb->data,
-		skb->len, PCI_DMA_TODEVICE);
+	wr_ptr->sys_pkt_addr = dma_map_single(&iadev->pci->dev, skb->data,
+					      skb->len, DMA_TO_DEVICE);
 	wr_ptr->local_pkt_addr = (buf_desc_ptr->buf_start_hi << 16) | 
                                                   buf_desc_ptr->buf_start_lo;  
 	/* wr_ptr->bytes = swap_byte_order(total_len); didn't seem to affect?? */
diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c
index 93eaf8d..02fae18 100644
--- a/drivers/atm/lanai.c
+++ b/drivers/atm/lanai.c
@@ -346,7 +346,8 @@ static void lanai_buf_allocate(struct lanai_buffer *buf,
 		 * everything, but the way the lanai uses DMA memory would
 		 * make that a terrific pain.  This is much simpler.
 		 */
-		buf->start = pci_alloc_consistent(pci, size, &buf->dmaaddr);
+		buf->start = dma_alloc_coherent(&pci->dev,
+						size, &buf->dmaaddr, GFP_KERNEL);
 		if (buf->start != NULL) {	/* Success */
 			/* Lanai requires 256-byte alignment of DMA bufs */
 			APRINTK((buf->dmaaddr & ~0xFFFFFF00) == 0,
@@ -372,8 +373,8 @@ static void lanai_buf_deallocate(struct lanai_buffer *buf,
 	struct pci_dev *pci)
 {
 	if (buf->start != NULL) {
-		pci_free_consistent(pci, lanai_buf_size(buf),
-		    buf->start, buf->dmaaddr);
+		dma_free_coherent(&pci->dev, lanai_buf_size(buf),
+				  buf->start, buf->dmaaddr);
 		buf->start = buf->end = buf->ptr = NULL;
 	}
 }
@@ -1953,12 +1954,7 @@ static int lanai_pci_start(struct lanai_dev *lanai)
 		return -ENXIO;
 	}
 	pci_set_master(pci);
-	if (pci_set_dma_mask(pci, DMA_BIT_MASK(32)) != 0) {
-		printk(KERN_WARNING DEV_LABEL
-		    "(itf %d): No suitable DMA available.\n", lanai->number);
-		return -EBUSY;
-	}
-	if (pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(32)) != 0) {
+	if (dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(32)) != 0) {
 		printk(KERN_WARNING DEV_LABEL
 		    "(itf %d): No suitable DMA available.\n", lanai->number);
 		return -EBUSY;
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index 9988ac9..b7e1cc0 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -252,10 +252,10 @@ static void nicstar_remove_one(struct pci_dev *pcidev)
 			free_scq(card, card->scd2vc[j]->scq, card->scd2vc[j]->tx_vcc);
 	}
 	idr_destroy(&card->idr);
-	pci_free_consistent(card->pcidev, NS_RSQSIZE + NS_RSQ_ALIGNMENT,
-			    card->rsq.org, card->rsq.dma);
-	pci_free_consistent(card->pcidev, NS_TSQSIZE + NS_TSQ_ALIGNMENT,
-			    card->tsq.org, card->tsq.dma);
+	dma_free_coherent(&card->pcidev->dev, NS_RSQSIZE + NS_RSQ_ALIGNMENT,
+			  card->rsq.org, card->rsq.dma);
+	dma_free_coherent(&card->pcidev->dev, NS_TSQSIZE + NS_TSQ_ALIGNMENT,
+			  card->tsq.org, card->tsq.dma);
 	free_irq(card->pcidev->irq, card);
 	iounmap(card->membase);
 	kfree(card);
@@ -370,8 +370,7 @@ static int ns_init_card(int i, struct pci_dev *pcidev)
 		ns_init_card_error(card, error);
 		return error;
 	}
-        if ((pci_set_dma_mask(pcidev, DMA_BIT_MASK(32)) != 0) ||
-	    (pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(32)) != 0)) {
+        if (dma_set_mask_and_coherent(&pcidev->dev, DMA_BIT_MASK(32)) != 0) {
                 printk(KERN_WARNING
 		       "nicstar%d: No suitable DMA available.\n", i);
 		error = 2;
@@ -535,9 +534,9 @@ static int ns_init_card(int i, struct pci_dev *pcidev)
 	writel(0x00000000, card->membase + VPM);
 
 	/* Initialize TSQ */
-	card->tsq.org = pci_alloc_consistent(card->pcidev,
-					     NS_TSQSIZE + NS_TSQ_ALIGNMENT,
-					     &card->tsq.dma);
+	card->tsq.org = dma_alloc_coherent(&card->pcidev->dev,
+					   NS_TSQSIZE + NS_TSQ_ALIGNMENT,
+					   &card->tsq.dma, GFP_KERNEL);
 	if (card->tsq.org == NULL) {
 		printk("nicstar%d: can't allocate TSQ.\n", i);
 		error = 10;
@@ -554,9 +553,9 @@ static int ns_init_card(int i, struct pci_dev *pcidev)
 	PRINTK("nicstar%d: TSQ base at 0x%p.\n", i, card->tsq.base);
 
 	/* Initialize RSQ */
-	card->rsq.org = pci_alloc_consistent(card->pcidev,
-					     NS_RSQSIZE + NS_RSQ_ALIGNMENT,
-					     &card->rsq.dma);
+	card->rsq.org = dma_alloc_coherent(&card->pcidev->dev,
+					   NS_RSQSIZE + NS_RSQ_ALIGNMENT,
+					   &card->rsq.dma, GFP_KERNEL);
 	if (card->rsq.org == NULL) {
 		printk("nicstar%d: can't allocate RSQ.\n", i);
 		error = 11;
@@ -874,7 +873,8 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd)
 	scq = kmalloc(sizeof(scq_info), GFP_KERNEL);
 	if (!scq)
 		return NULL;
-        scq->org = pci_alloc_consistent(card->pcidev, 2 * size, &scq->dma);
+        scq->org = dma_alloc_coherent(&card->pcidev->dev,
+				      2 * size,  &scq->dma, GFP_KERNEL);
 	if (!scq->org) {
 		kfree(scq);
 		return NULL;
@@ -936,10 +936,10 @@ static void free_scq(ns_dev *card, scq_info *scq, struct atm_vcc *vcc)
 			}
 	}
 	kfree(scq->skb);
-	pci_free_consistent(card->pcidev,
-			    2 * (scq->num_entries == VBR_SCQ_NUM_ENTRIES ?
-				 VBR_SCQSIZE : CBR_SCQSIZE),
-			    scq->org, scq->dma);
+	dma_free_coherent(&card->pcidev->dev,
+			  2 * (scq->num_entries == VBR_SCQ_NUM_ENTRIES ?
+			       VBR_SCQSIZE : CBR_SCQSIZE),
+			  scq->org, scq->dma);
 	kfree(scq);
 }
 
@@ -957,11 +957,11 @@ static void push_rxbufs(ns_dev * card, struct sk_buff *skb)
 	handle2 = NULL;
 	addr2 = 0;
 	handle1 = skb;
-	addr1 = pci_map_single(card->pcidev,
+	addr1 = dma_map_single(&card->pcidev->dev,
 			       skb->data,
 			       (NS_PRV_BUFTYPE(skb) == BUF_SM
 				? NS_SMSKBSIZE : NS_LGSKBSIZE),
-			       PCI_DMA_TODEVICE);
+			       DMA_TO_DEVICE);
 	NS_PRV_DMA(skb) = addr1; /* save so we can unmap later */
 
 #ifdef GENERAL_DEBUG
@@ -1670,8 +1670,8 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	ATM_SKB(skb)->vcc = vcc;
 
-	NS_PRV_DMA(skb) = pci_map_single(card->pcidev, skb->data,
-					 skb->len, PCI_DMA_TODEVICE);
+	NS_PRV_DMA(skb) = dma_map_single(&card->pcidev->dev, skb->data,
+					 skb->len, DMA_TO_DEVICE);
 
 	if (vcc->qos.aal == ATM_AAL5) {
 		buflen = (skb->len + 47 + 8) / 48 * 48;	/* Multiple of 48 */
@@ -1930,10 +1930,10 @@ static void drain_scq(ns_dev * card, scq_info * scq, int pos)
 		XPRINTK("nicstar%d: freeing skb at 0x%p (index %d).\n",
 			card->index, skb, i);
 		if (skb != NULL) {
-			pci_unmap_single(card->pcidev,
+			dma_unmap_single(&card->pcidev->dev,
 					 NS_PRV_DMA(skb),
 					 skb->len,
-					 PCI_DMA_TODEVICE);
+					 DMA_TO_DEVICE);
 			vcc = ATM_SKB(skb)->vcc;
 			if (vcc && vcc->pop != NULL) {
 				vcc->pop(vcc, skb);
@@ -1992,16 +1992,16 @@ static void dequeue_rx(ns_dev * card, ns_rsqe * rsqe)
 		return;
 	}
 	idr_remove(&card->idr, id);
-        pci_dma_sync_single_for_cpu(card->pcidev,
-				    NS_PRV_DMA(skb),
-				    (NS_PRV_BUFTYPE(skb) == BUF_SM
-				     ? NS_SMSKBSIZE : NS_LGSKBSIZE),
-				    PCI_DMA_FROMDEVICE);
-	pci_unmap_single(card->pcidev,
+	dma_sync_single_for_cpu(&card->pcidev->dev,
+				NS_PRV_DMA(skb),
+				(NS_PRV_BUFTYPE(skb) == BUF_SM
+				 ? NS_SMSKBSIZE : NS_LGSKBSIZE),
+				DMA_FROM_DEVICE);
+	dma_unmap_single(&card->pcidev->dev,
 			 NS_PRV_DMA(skb),
 			 (NS_PRV_BUFTYPE(skb) == BUF_SM
 			  ? NS_SMSKBSIZE : NS_LGSKBSIZE),
-			 PCI_DMA_FROMDEVICE);
+			 DMA_FROM_DEVICE);
 	vpi = ns_rsqe_vpi(rsqe);
 	vci = ns_rsqe_vci(rsqe);
 	if (vpi >= 1UL << card->vpibits || vci >= 1UL << card->vcibits) {
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 21b0bc6..74e18b0 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -785,8 +785,8 @@ static void solos_bh(unsigned long card_arg)
 				skb = card->rx_skb[port];
 				card->rx_skb[port] = NULL;
 
-				pci_unmap_single(card->dev, SKB_CB(skb)->dma_addr,
-						 RX_DMA_SIZE, PCI_DMA_FROMDEVICE);
+				dma_unmap_single(&card->dev->dev, SKB_CB(skb)->dma_addr,
+						 RX_DMA_SIZE, DMA_FROM_DEVICE);
 
 				header = (void *)skb->data;
 				size = le16_to_cpu(header->size);
@@ -872,8 +872,8 @@ static void solos_bh(unsigned long card_arg)
 			struct sk_buff *skb = alloc_skb(RX_DMA_SIZE, GFP_ATOMIC);
 			if (skb) {
 				SKB_CB(skb)->dma_addr =
-					pci_map_single(card->dev, skb->data,
-						       RX_DMA_SIZE, PCI_DMA_FROMDEVICE);
+					dma_map_single(&card->dev->dev, skb->data,
+						       RX_DMA_SIZE, DMA_FROM_DEVICE);
 				iowrite32(SKB_CB(skb)->dma_addr,
 					  card->config_regs + RX_DMA_ADDR(port));
 				card->rx_skb[port] = skb;
@@ -1069,8 +1069,8 @@ static uint32_t fpga_tx(struct solos_card *card)
 		if (tx_pending & 1) {
 			struct sk_buff *oldskb = card->tx_skb[port];
 			if (oldskb) {
-				pci_unmap_single(card->dev, SKB_CB(oldskb)->dma_addr,
-						 oldskb->len, PCI_DMA_TODEVICE);
+				dma_unmap_single(&card->dev->dev, SKB_CB(oldskb)->dma_addr,
+						 oldskb->len, DMA_TO_DEVICE);
 				card->tx_skb[port] = NULL;
 			}
 			spin_lock(&card->tx_queue_lock);
@@ -1089,8 +1089,8 @@ static uint32_t fpga_tx(struct solos_card *card)
 					data = card->dma_bounce + (BUF_SIZE * port);
 					memcpy(data, skb->data, skb->len);
 				}
-				SKB_CB(skb)->dma_addr = pci_map_single(card->dev, data,
-								       skb->len, PCI_DMA_TODEVICE);
+				SKB_CB(skb)->dma_addr = dma_map_single(&card->dev->dev, data,
+								       skb->len, DMA_TO_DEVICE);
 				card->tx_skb[port] = skb;
 				iowrite32(SKB_CB(skb)->dma_addr,
 					  card->config_regs + TX_DMA_ADDR(port));
@@ -1210,7 +1210,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		goto out;
 	}
 
-	err = pci_set_dma_mask(dev, DMA_BIT_MASK(32));
+	err = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32));
 	if (err) {
 		dev_warn(&dev->dev, "Failed to set 32-bit DMA mask\n");
 		goto out;
@@ -1411,14 +1411,14 @@ static void atm_remove(struct solos_card *card)
 
 			skb = card->rx_skb[i];
 			if (skb) {
-				pci_unmap_single(card->dev, SKB_CB(skb)->dma_addr,
-						 RX_DMA_SIZE, PCI_DMA_FROMDEVICE);
+				dma_unmap_single(&card->dev->dev, SKB_CB(skb)->dma_addr,
+						 RX_DMA_SIZE, DMA_FROM_DEVICE);
 				dev_kfree_skb(skb);
 			}
 			skb = card->tx_skb[i];
 			if (skb) {
-				pci_unmap_single(card->dev, SKB_CB(skb)->dma_addr,
-						 skb->len, PCI_DMA_TODEVICE);
+				dma_unmap_single(&card->dev->dev, SKB_CB(skb)->dma_addr,
+						 skb->len, DMA_TO_DEVICE);
 				dev_kfree_skb(skb);
 			}
 			while ((skb = skb_dequeue(&card->tx_queue[i])))
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index 969c3c2..cecfb94 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -1306,19 +1306,20 @@ static int zatm_start(struct atm_dev *dev)
 
 		if (!mbx_entries[i])
 			continue;
-		mbx = pci_alloc_consistent(pdev, 2*MBX_SIZE(i), &mbx_dma);
+		mbx = dma_alloc_coherent(&pdev->dev,
+					 2 * MBX_SIZE(i), &mbx_dma, GFP_KERNEL);
 		if (!mbx) {
 			error = -ENOMEM;
 			goto out;
 		}
 		/*
-		 * Alignment provided by pci_alloc_consistent() isn't enough
+		 * Alignment provided by dma_alloc_coherent() isn't enough
 		 * for this device.
 		 */
 		if (((unsigned long)mbx ^ mbx_dma) & 0xffff) {
 			printk(KERN_ERR DEV_LABEL "(itf %d): system "
 			       "bus incompatible with driver\n", dev->number);
-			pci_free_consistent(pdev, 2*MBX_SIZE(i), mbx, mbx_dma);
+			dma_free_coherent(&pdev->dev, 2*MBX_SIZE(i), mbx, mbx_dma);
 			error = -ENODEV;
 			goto out;
 		}
@@ -1354,9 +1355,9 @@ out_tx:
 	kfree(zatm_dev->tx_map);
 out:
 	while (i-- > 0) {
-		pci_free_consistent(pdev, 2*MBX_SIZE(i), 
-				    (void *)zatm_dev->mbx_start[i],
-				    zatm_dev->mbx_dma[i]);
+		dma_free_coherent(&pdev->dev, 2 * MBX_SIZE(i),
+				  (void *)zatm_dev->mbx_start[i],
+				  zatm_dev->mbx_dma[i]);
 	}
 	free_irq(zatm_dev->irq, dev);
 	goto done;
@@ -1608,6 +1609,10 @@ static int zatm_init_one(struct pci_dev *pci_dev,
 	if (ret < 0)
 		goto out_disable;
 
+	ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
+	if (ret < 0)
+		goto out_disable;
+
 	zatm_dev->pci_dev = pci_dev;
 	dev->dev_data = zatm_dev;
 	zatm_dev->copper = (int)ent->driver_data;
-- 
1.9.3

^ permalink raw reply related

* Re: [net-next PATCH v3 1/1] atm: remove deprecated use of pci api
From: Quentin Lambert @ 2015-01-16 14:10 UTC (permalink / raw)
  To: chas williams - CONTRACTOR, David Laight
  Cc: 'David Miller', linux-atm-general@lists.sourceforge.net,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <20150116085721.56aa8075@thirdoffive.cmf.nrl.navy.mil>


On 16/01/2015 14:57, chas williams - CONTRACTOR wrote:
>
> Signed-off-by: Chas Williams - CONTRACTOR <chas@cmf.nrl.navy.mil>
> ---
>   drivers/atm/eni.c       |  33 +++++++------
>   drivers/atm/fore200e.c  |  22 +++++----
>   drivers/atm/he.c        | 125 +++++++++++++++++++++++++-----------------------
>   drivers/atm/he.h        |   4 +-
>   drivers/atm/idt77252.c  | 107 ++++++++++++++++++++++-------------------
>   drivers/atm/iphase.c    |  54 +++++++++++----------
>   drivers/atm/lanai.c     |  14 ++----
>   drivers/atm/nicstar.c   |  60 +++++++++++------------
>   drivers/atm/solos-pci.c |  26 +++++-----
>   drivers/atm/zatm.c      |  17 ++++---
>   10 files changed, 243 insertions(+), 219 deletions(-)
>
[]
> diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
> index d5d9eaf..75dde90 100644
> --- a/drivers/atm/fore200e.c
> +++ b/drivers/atm/fore200e.c
> @@ -425,7 +425,7 @@ static void fore200e_pca_write(u32 val, volatile u32 __iomem *addr)
>   static u32
>   fore200e_pca_dma_map(struct fore200e* fore200e, void* virt_addr, int size, int direction)
>   {
> -    u32 dma_addr = pci_map_single((struct pci_dev*)fore200e->bus_dev, virt_addr, size, direction);
> +    u32 dma_addr = dma_map_single(&((struct pci_dev *) fore200e->bus_dev)->dev, virt_addr, size, direction);
>   
>       DPRINTK(3, "PCI DVMA mapping: virt_addr = 0x%p, size = %d, direction = %d,  --> dma_addr = 0x%08x\n",
>   	    virt_addr, size, direction, dma_addr);
>
[]

I am going try to make similar changes in some other part of the kernel and
I was wondering if you could explain how you decided it wasn't necessary to
check for "((struct pci_dev *) fore200e->bus_dev" nullity for instance.

Thanks,

Quentin

^ permalink raw reply

* Re: [PATCH net] net: sctp: fix race for one-to-many sockets in sendmsg's auto associate
From: Vlad Yasevich @ 2015-01-16 14:31 UTC (permalink / raw)
  To: Daniel Borkmann, davem; +Cc: netdev, linux-sctp
In-Reply-To: <1421336075-25061-1-git-send-email-dborkman@redhat.com>

On 01/15/2015 10:34 AM, Daniel Borkmann wrote:
> I.e. one-to-many sockets in SCTP are not required to explicitly
> call into connect(2) or sctp_connectx(2) prior to data exchange.
> Instead, they can directly invoke sendmsg(2) and the SCTP stack
> will automatically trigger connection establishment through 4WHS
> via sctp_primitive_ASSOCIATE(). However, this in its current
> implementation is racy: INIT is being sent out immediately (as
> it cannot be bundled anyway) and the rest of the DATA chunks are
> queued up for later xmit when connection is established, meaning
> sendmsg(2) will return successfully. This behaviour can result
> in an undesired side-effect that the kernel made the application
> think the data has already been transmitted, although none of it
> has actually left the machine, worst case even after close(2)'ing
> the socket.
> 
> Instead, when the association from client side has been shut down
> e.g. first gracefully through SCTP_EOF and then close(2), the
> client could afterwards still receive the server's INIT_ACK due
> to a connection with higher latency. This INIT_ACK is then considered
> out of the blue and hence responded with ABORT as there was no
> alive assoc found anymore. This can be easily reproduced f.e.
> with sctp_test application from lksctp. One way to fix this race
> is to wait for the handshake to actually complete.
> 
> The fix defers waiting after sctp_primitive_ASSOCIATE() and
> sctp_primitive_SEND() succeeded, so that DATA chunks cooked up
> from sctp_sendmsg() have already been placed into the output
> queue through the side-effect interpreter, and therefore can then
> be bundeled together with COOKIE_ECHO control chunks.
> 
> strace from example application (shortened):
> 
> socket(PF_INET, SOCK_SEQPACKET, IPPROTO_SCTP) = 3
> sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
>            msg_iov(1)=[{"hello", 5}], msg_controllen=0, msg_flags=0}, 0) = 5
> sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
>            msg_iov(1)=[{"hello", 5}], msg_controllen=0, msg_flags=0}, 0) = 5
> sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
>            msg_iov(1)=[{"hello", 5}], msg_controllen=0, msg_flags=0}, 0) = 5
> sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
>            msg_iov(1)=[{"hello", 5}], msg_controllen=0, msg_flags=0}, 0) = 5
> sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
>            msg_iov(0)=[], msg_controllen=48, {cmsg_len=48, cmsg_level=0x84 /* SOL_??? */, cmsg_type=, ...},
>            msg_flags=0}, 0) = 0 // graceful shutdown for SOCK_SEQPACKET via SCTP_EOF
> close(3) = 0
> 
> tcpdump before patch (fooling the application):
> 
> 22:33:36.306142 IP 192.168.1.114.41462 > 192.168.1.115.8888: sctp (1) [INIT] [init tag: 3879023686] [rwnd: 106496] [OS: 10] [MIS: 65535] [init TSN: 3139201684]
> 22:33:36.316619 IP 192.168.1.115.8888 > 192.168.1.114.41462: sctp (1) [INIT ACK] [init tag: 3345394793] [rwnd: 106496] [OS: 10] [MIS: 10] [init TSN: 3380109591]
> 22:33:36.317600 IP 192.168.1.114.41462 > 192.168.1.115.8888: sctp (1) [ABORT]
> 
> tcpdump after patch:
> 
> 14:28:58.884116 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [INIT] [init tag: 438593213] [rwnd: 106496] [OS: 10] [MIS: 65535] [init TSN: 3092969729]
> 14:28:58.888414 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [INIT ACK] [init tag: 381429855] [rwnd: 106496] [OS: 10] [MIS: 10] [init TSN: 2141904492]
> 14:28:58.888638 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [COOKIE ECHO] , (2) [DATA] (B)(E) [TSN: 3092969729] [...]
> 14:28:58.893278 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [COOKIE ACK] , (2) [SACK] [cum ack 3092969729] [a_rwnd 106491] [#gap acks 0] [#dup tsns 0]
> 14:28:58.893591 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [DATA] (B)(E) [TSN: 3092969730] [...]
> 14:28:59.096963 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [SACK] [cum ack 3092969730] [a_rwnd 106496] [#gap acks 0] [#dup tsns 0]
> 14:28:59.097086 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [DATA] (B)(E) [TSN: 3092969731] [...] , (2) [DATA] (B)(E) [TSN: 3092969732] [...]
> 14:28:59.103218 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [SACK] [cum ack 3092969732] [a_rwnd 106486] [#gap acks 0] [#dup tsns 0]
> 14:28:59.103330 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [SHUTDOWN]
> 14:28:59.107793 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [SHUTDOWN ACK]
> 14:28:59.107890 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [SHUTDOWN COMPLETE]
> 
> Looks like this bug is from the pre-git history museum. ;)
> 
> Fixes: 08707d5482df ("lksctp-2_5_31-0_5_1.patch")
> Signed-off-by: Daniel Borkmann <dborkman@redhat.com>

Acked-by: Vlad Yasevich <vyasevich@gmail.com>

We also need to be remember that the same scenario can be reproduced without an
implicit connect by simply using non-blocking socket on a high rtt link.  I've made this
comment privately to Daniel, but want to mention this on the list.  Daniel and I will be
working on additional patches to address that issue.

-vlad

> ---
>  net/sctp/socket.c | 8 +++++++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index 2625ecc..aafe94b 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -1603,7 +1603,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
>  	sctp_assoc_t associd = 0;
>  	sctp_cmsgs_t cmsgs = { NULL };
>  	sctp_scope_t scope;
> -	bool fill_sinfo_ttl = false;
> +	bool fill_sinfo_ttl = false, wait_connect = false;
>  	struct sctp_datamsg *datamsg;
>  	int msg_flags = msg->msg_flags;
>  	__u16 sinfo_flags = 0;
> @@ -1943,6 +1943,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
>  		if (err < 0)
>  			goto out_free;
>  
> +		wait_connect = true;
>  		pr_debug("%s: we associated primitively\n", __func__);
>  	}
>  
> @@ -1980,6 +1981,11 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
>  	sctp_datamsg_put(datamsg);
>  	err = msg_len;
>  
> +	if (unlikely(wait_connect)) {
> +		timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT);
> +		sctp_wait_for_connect(asoc, &timeo);
> +	}
> +
>  	/* If we are already past ASSOCIATE, the lower
>  	 * layers are responsible for association cleanup.
>  	 */
> 

^ permalink raw reply

* Re: [PATCH v4 2/4] can: kvaser_usb: Update error counters before exiting on OOM
From: Marc Kleine-Budde @ 2015-01-16 14:39 UTC (permalink / raw)
  To: Ahmed S. Darwish
  Cc: Olivier Sobrie, Oliver Hartkopp, Wolfgang Grandegger,
	David S. Miller, Paul Gortmaker, Linux-CAN, netdev, LKML,
	andri.yngvason
In-Reply-To: <20150112203650.GA11355@linux>

[-- Attachment #1: Type: text/plain, Size: 1626 bytes --]

On 01/12/2015 09:36 PM, Ahmed S. Darwish wrote:
> On Mon, Jan 12, 2015 at 12:09:32PM +0100, Marc Kleine-Budde wrote:
>> On 01/11/2015 09:15 PM, Ahmed S. Darwish wrote:
>>> From: Ahmed S. Darwish <ahmed.darwish@valeo.com>
>>>
>>> Let the error counters be more accurate in case of Out of
>>> Memory conditions.
>>
>> Please have a look at kvaser_usb_rx_error(), the whole state handling is
>> omitted in case of OOM.
>>
> 
> I see. Regarding kvaser_usb_rx_error(), would something like
> below patch be acceptable? 
> 
> Kindly note that separating recording interface state from
> error frame packet building leads to duplication of a good
> number of if-conditions. Meanwhile, it truly saves _all_
> of the possible state before any ENOMEM -- the correct thing
> to do.
> 
> Another solution was to allocate the can frame on the stack,
> and thus avoiding any code duplication. But this only leads
> to calls of "kvaser_usb_simple_msg_async", which can fail
> with -ENOMEM by itself, returning to the very same problem
> again. 
> 
> If the patch is acceptable, I'll rebase my USBCAN-II driver
> above it and re-submit the series (minus the merged patch).

Looks good from my point of view, stats and state are handled
independent of the error skb.

Andri can you have a look at the state handling itself?

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCHv2] ixgbe: Re-enable relaxed ordering as part of init/restart sequence for non-DCA config
From: Jeff Kirsher @ 2015-01-16 14:45 UTC (permalink / raw)
  To: Sowmini Varadhan
  Cc: jesse.brandeburg, bruce.w.allan, carolyn.wyborny,
	donald.c.skidmore, gregory.v.rose, matthew.vick, john.ronciak,
	mitch.a.williams, linux.nics, e1000-devel, netdev, linux-kernel,
	sparclinux, emil.s.tantilov
In-Reply-To: <20150115010352.GK24238@oracle.com>

[-- Attachment #1: Type: text/plain, Size: 1393 bytes --]

On Wed, 2015-01-14 at 20:03 -0500, Sowmini Varadhan wrote:
> Relaxed ordering is disabled by default at driver initialization
> and re-enabled when DCA is used. The reason it is disabled  was
> due to an issue on some chipsets (see comments in
> ixgbe_update_tx_dca()).
> But when DCA is not used, RO needs to be re-enabled, else we have
> a serialization bottleneck on platforms like SPARC.
> 
> This patch eliminates the bottleneck for ixgbe when DCA is not
> configured.
> 
> Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
> Cc: Emil Tantilov <emil.s.tantilov@intel.com>
> 
> ---
> v2: incoroporate comments from Emil Tantilov
> 
>  drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c  |   23
> +++++++++++++++++++++++
>  drivers/net/ethernet/intel/ixgbe/ixgbe_common.c |   20
> ++++++++++++++++++++
>  drivers/net/ethernet/intel/ixgbe/ixgbe_common.h |    1 +
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   |   11 +++++++++++
>  drivers/net/ethernet/intel/ixgbe/ixgbe_type.h   |    1 +
>  drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c   |    1 +
>  6 files changed, 57 insertions(+), 0 deletions(-)

Thanks Sowmini, I have added your patch to my queue.

I know that Emil and others are looking into whether we can do this for
all ixgbe silicon, so we may have a follow-on patch to the work you have
done already to enable this for all devices.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCH] i40e: don't enable and init FCOE by default when do PF reset
From: Jeff Kirsher @ 2015-01-16 14:47 UTC (permalink / raw)
  To: ethan zhao
  Cc: Dev, Vasu, Ethan Zhao, Ronciak, John, Brandeburg, Jesse,
	Allan, Bruce W, Wyborny, Carolyn, Skidmore, Donald C,
	Rose, Gregory V, Vick, Matthew, Williams, Mitch A, Parikh, Neerav,
	Linux NICS, e1000-devel@lists.sourceforge.net,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	brian.maly@oracle.com
In-Reply-To: <54B86DF8.3040206@oracle.com>

[-- Attachment #1: Type: text/plain, Size: 9970 bytes --]

On Fri, 2015-01-16 at 09:48 +0800, ethan zhao wrote:
> Vasu,
> 
>     OK, disable FCOE as default configuration as a temporary step to 
> make it  work.

Sounds like I should expect a v2 coming, correct?

> 
> 
> Thanks,
> Ethan
> 
> On 2015/1/16 7:45, Dev, Vasu wrote:
> >> -----Original Message-----
> >> From: ethan zhao [mailto:ethan.zhao@oracle.com]
> >> Sent: Tuesday, January 13, 2015 6:41 PM
> >> To: Dev, Vasu
> >> Cc: Ethan Zhao; Ronciak, John; Kirsher, Jeffrey T; Brandeburg, Jesse; Allan,
> >> Bruce W; Wyborny, Carolyn; Skidmore, Donald C; Rose, Gregory V; Vick,
> >> Matthew; Williams, Mitch A; Parikh, Neerav; Linux NICS; e1000-
> >> devel@lists.sourceforge.net; netdev@vger.kernel.org; linux-
> >> kernel@vger.kernel.org; brian.maly@oracle.com
> >> Subject: Re: [PATCH] i40e: don't enable and init FCOE by default when do PF
> >> reset
> >>
> >> Vasu,
> >>
> >> On 2015/1/14 3:38, Dev, Vasu wrote:
> >>>> -----Original Message-----
> >>>>>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c
> >>>>>>> b/drivers/net/ethernet/intel/i40e/i40e_main.c
> >>>>>>> index a5f2660..a2572cc 100644
> >>>>>>> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> >>>>>>> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> >>>>>>> @@ -6180,9 +6180,12 @@ static void i40e_reset_and_rebuild(struct
> >>>>>>> i40e_pf *pf, bool reinit)
> >>>>>>>       }
> >>>>>>>    #endif /* CONFIG_I40E_DCB */
> >>>>>>>    #ifdef I40E_FCOE
> >>>>>>> -   ret = i40e_init_pf_fcoe(pf);
> >>>>>>> -   if (ret)
> >>>>>>> -           dev_info(&pf->pdev->dev, "init_pf_fcoe failed: %d\n", ret);
> >>>>>>> +   if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
> >>>>>>> +           ret = i40e_init_pf_fcoe(pf);
> >>>>> Calling i40e_init_pf_fcoe() here conflicts with its
> >>>> I40E_FLAG_FCOE_ENABLED pre-condition since
> >> I40E_FLAG_FCOE_ENABLED is
> >>>> set by very same i40e_init_pf_fcoe(), in turn i40e_init_pf_fcoe()
> >>>> will never get called.
> >>>>
> >>>> I don't think so,  here ,i40e_reset_and_rebuild()  is not the only
> >>>> and the first place that  i40e_init_pf_fcoe() is called, see
> >>>> i40e_probe(), that is the first chance.
> >>>>
> >>>> i40e_probe()
> >>>> -->i40e_sw_init()
> >>>>        -->i40e_init_pf_fcoe()
> >>>>
> >>>> And the I40E_FLAG_FCOE_ENABLED is possible be set by
> >>>> i40e_fcoe_enable() or i40e_fcoe_disable() interface before the reset
> >>>> action is to be done.
> >>>>
> >>> It is set by i40e_init_pf_fcoe() and you are right that the modified call flow
> >> by your patch won't impact setting of I40E_FLAG_FCOE_ENABLED anyway
> >> which could have prevented calling i40e_init_pf_fcoe() as I described above,
> >> so this is not an issue with the patch.
> >>>> BTW, the reason I post this patch is that we hit a bug, after setup
> >>>> vlan, the PF is enabled to FCOE.
> >>>>
> >>> Then that BUG would still remain un-fixed and calling i40e_init_pf_fcoe()
> >> under I40E_FLAG_FCOE_ENABLED  flag really won't affect call flow to fix
> >> anything. I mean I40E_FLAG_FCOE_ENABLED  condition will be true with "pf-
> >>> hw.func_caps.fcoe == true" and otherwise calling i40e_init_pf_fcoe() simply
> >> returns back early on after checking "pf->hw.func_caps.fcoe == false", so
> >> how that bug is fixed here by added I40E_FLAG_FCOE_ENABLED  condition ?
> >> What is the bug ?
> >>    The func_caps.fcoe is assigned by following call path, under our test
> >> environment,
> >>
> >>    i40e_probe()
> >>     ->i40e_get_capabilities()
> >>        ->i40e_aq_discover_capabilities()
> >>           ->i40e_parse_discover_capabilities()
> >>
> >>    Or
> >>
> >>    i40e_reset_and_rebuild()
> >>     ->i40e_get_capabilities()
> >>       ->i40e_aq_discover_capabilities()
> >>         ->i40e_parse_discover_capabilities()
> >>
> >>    Under our test environment, the "pf->hw.func_caps.fcoe" is true. so if
> >> i40e_reset_and_rebuild() is called for VLAN setup, ethtool diagnostic test.
> >>    And then i40e_init_pf_fcoe() is to be called,
> >>
> >>    While if (!pf->hw.func_caps.fcoe) wouldn't return,
> >>
> > I said it would return with "pf->hw.func_caps.fcoe == false" in my last response, more details below.
> >
> >>    So  pf->flags is set to I40E_FLAG_FCOE_ENABLED.
> >>
> >>    With my patch,  i40e_init_pf_fcoe() is only called after
> >> I40E_FLAG_FCOE_ENABLED is set before reset.
> >>
> >> Enable FCOE in i40e_probe() or not is another issue.
> >>
> > Nope since both cases we should do i40e_init_pf_fcoe() or don't based on fcoe cap true or false.
> >
> > I don't have much to add as I described before with the your patch that "calling i40e_init_pf_fcoe() under I40E_FLAG_FCOE_ENABLED  flag really won't affect call flow to fix anything. I mean I40E_FLAG_FCOE_ENABLED  condition will be true with "pf->hw.func_caps.fcoe == true" and otherwise calling i40e_init_pf_fcoe() simply returns back early on after checking "pf->hw.func_caps.fcoe == false".
> >
> > May be I'm missing something, I guess next either go with CONFIG_I40E_FCOE disable as I suggested before and now it in upstream kernel or we can have further off list discussion to fix the issue you are trying to fix with the patch.
> >
> > Thanks,
> > Vasu
> >
> >> Thanks,
> >> Ethan
> >>
> >>
> >>>>> Jeff Kirsher should be getting out a patch queued by me which adds
> >>>> I40E_FCoE Kbuild option, in that FCoE is disabled by default and
> >>>> user could enable FCoE only if needed, that patch would do same of
> >>>> skipping
> >>>> i40e_init_pf_fcoe() whether FCoE capability in device enabled or not
> >>>> in default config.
> >>>> The following patch will not fix the above issue -- configuration of
> >>>> PF will be changed via reset.
> >>>> How about the FCOE is configured and disabled by  i40e_fcoe_disable()
> >>>> , then reset happens ?
> >>>>
> >>> May be but if the BUG is due to FCoE being enabled then having it disabled
> >> in config will avoid the bug for non FCoE config option and once bug is
> >> understood then that has to be fixed for FCoE enabled config also as I asked
> >> above.
> >>> Thanks Ethan for detailed response.
> >>> Vasu
> >>>
> >>>>>   From patchwork Wed Oct  2 23:26:08 2013
> >>>>> Content-Type: text/plain; charset="utf-8"
> >>>>> MIME-Version: 1.0
> >>>>> Content-Transfer-Encoding: 7bit
> >>>>> Subject: [net] i40e: adds FCoE configure option
> >>>>> Date: Thu, 03 Oct 2013 07:26:08 -0000
> >>>>> From: Vasu Dev <vasu.dev@intel.com>
> >>>>> X-Patchwork-Id: 11797
> >>>>>
> >>>>> Adds FCoE config option I40E_FCOE, so that FCoE can be enabled as
> >>>>> needed but otherwise have it disabled by default.
> >>>>>
> >>>>> This also eliminate multiple FCoE config checks, instead now just
> >>>>> one config check for CONFIG_I40E_FCOE.
> >>>>>
> >>>>> The I40E FCoE was added with 3.17 kernel and therefore this patch
> >>>>> shall be applied to stable 3.17 kernel also.
> >>>>>
> >>>>> CC: <stable@vger.kernel.org>
> >>>>> Signed-off-by: Vasu Dev <vasu.dev@intel.com>
> >>>>> Tested-by: Jim Young <jamesx.m.young@intel.com>
> >>>>>
> >>>>> ---
> >>>>> drivers/net/ethernet/intel/Kconfig           |   11 +++++++++++
> >>>>>    drivers/net/ethernet/intel/i40e/Makefile     |    2 +-
> >>>>>    drivers/net/ethernet/intel/i40e/i40e_osdep.h |    4 ++--
> >>>>>    3 files changed, 14 insertions(+), 3 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/net/ethernet/intel/Kconfig
> >>>>> b/drivers/net/ethernet/intel/Kconfig
> >>>>> index 5b8300a..4d61ef5 100644
> >>>>> --- a/drivers/net/ethernet/intel/Kconfig
> >>>>> +++ b/drivers/net/ethernet/intel/Kconfig
> >>>>> @@ -281,6 +281,17 @@ config I40E_DCB
> >>>>>
> >>>>>             If unsure, say N.
> >>>>>
> >>>>> +config I40E_FCOE
> >>>>> +       bool "Fibre Channel over Ethernet (FCoE)"
> >>>>> +       default n
> >>>>> +       depends on I40E && DCB && FCOE
> >>>>> +       ---help---
> >>>>> +         Say Y here if you want to use Fibre Channel over Ethernet (FCoE)
> >>>>> +         in the driver. This will create new netdev for exclusive FCoE
> >>>>> +         use with XL710 FCoE offloads enabled.
> >>>>> +
> >>>>> +         If unsure, say N.
> >>>>> +
> >>>>>    config I40EVF
> >>>>>           tristate "Intel(R) XL710 X710 Virtual Function Ethernet support"
> >>>>>           depends on PCI_MSI
> >>>>> diff --git a/drivers/net/ethernet/intel/i40e/Makefile
> >>>>> b/drivers/net/ethernet/intel/i40e/Makefile
> >>>>> index 4b94ddb..c405819 100644
> >>>>> --- a/drivers/net/ethernet/intel/i40e/Makefile
> >>>>> +++ b/drivers/net/ethernet/intel/i40e/Makefile
> >>>>> @@ -44,4 +44,4 @@ i40e-objs := i40e_main.o \
> >>>>>           i40e_virtchnl_pf.o
> >>>>>
> >>>>>    i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o
> >>>>> -i40e-$(CONFIG_FCOE:m=y) += i40e_fcoe.o
> >>>>> +i40e-$(CONFIG_I40E_FCOE) += i40e_fcoe.o
> >>>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
> >>>>> b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
> >>>>> index 045b5c4..ad802dd 100644
> >>>>> --- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
> >>>>> +++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
> >>>>> @@ -78,7 +78,7 @@ do {                                                            \
> >>>>>    } while (0)
> >>>>>
> >>>>>    typedef enum i40e_status_code i40e_status; -#if
> >>>>> defined(CONFIG_FCOE)
> >>>>> || defined(CONFIG_FCOE_MODULE)
> >>>>> +#ifdef CONFIG_I40E_FCOE
> >>>>>    #define I40E_FCOE
> >>>>> -#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */
> >>>>> +#endif
> >>>>>    #endif /* _I40E_OSDEP_H_ */
> >>>>>
> >>>>>>> +           if (ret)
> >>>>>>> +                   dev_info(&pf->pdev->dev,
> >>>>>>> +                            "init_pf_fcoe failed: %d\n", ret);
> >>>>>>> +   }
> >>>>>>>
> >>>>>>>    #endif
> >>>>>>>       /* do basic switch setup */
> >>>>>>> --
> >>>>>>> 1.8.3.1
> >>>> Thanks,
> >>>> Ethan
> 



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [net-next PATCH v3 1/1] atm: remove deprecated use of pci api
From: chas williams - CONTRACTOR @ 2015-01-16 14:52 UTC (permalink / raw)
  To: Quentin Lambert; +Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <54B91BD1.70909@gmail.com>

On Fri, 16 Jan 2015 15:10:25 +0100
Quentin Lambert <lambert.quentin@gmail.com> wrote:


> > -    u32 dma_addr = pci_map_single((struct pci_dev*)fore200e->bus_dev, virt_addr, size, direction);
> > +    u32 dma_addr = dma_map_single(&((struct pci_dev *) fore200e->bus_dev)->dev, virt_addr, size, direction);
> >   
> >       DPRINTK(3, "PCI DVMA mapping: virt_addr = 0x%p, size = %d, direction = %d,  --> dma_addr = 0x%08x\n",
> >   	    virt_addr, size, direction, dma_addr);
> >
> []
> 
> I am going try to make similar changes in some other part of the kernel and
> I was wondering if you could explain how you decided it wasn't necessary to
> check for "((struct pci_dev *) fore200e->bus_dev" nullity for instance.

This gets set up in fore200e_pca_detect() which is pretty early in the
intialization process.  We don't get as far as using any of the "DVMA"
stubs unless pci_enable_device() succeeds, meaning pci_dev is good, and
fore200e->bus_dev is assigned to pci_dev (around line 2724).

fore200e->bus_dev is never cleared back to NULL, but obviously you
shouldn't be using any of the DMA routines after disabling the pci
device.  Hopefully the driver shuts down in an orderly fashion such
that all DMA is over by the time the driver disables the pci device.

^ permalink raw reply

* Re: [net-next PATCH v3 1/1] atm: remove deprecated use of pci api
From: Quentin Lambert @ 2015-01-16 14:54 UTC (permalink / raw)
  To: chas williams - CONTRACTOR
  Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <20150116095244.40249284@thirdoffive.cmf.nrl.navy.mil>


On 16/01/2015 15:52, chas williams - CONTRACTOR wrote:
> On Fri, 16 Jan 2015 15:10:25 +0100
> Quentin Lambert <lambert.quentin@gmail.com> wrote:
>
>
>>> -    u32 dma_addr = pci_map_single((struct pci_dev*)fore200e->bus_dev, virt_addr, size, direction);
>>> +    u32 dma_addr = dma_map_single(&((struct pci_dev *) fore200e->bus_dev)->dev, virt_addr, size, direction);
>>>    
>>>        DPRINTK(3, "PCI DVMA mapping: virt_addr = 0x%p, size = %d, direction = %d,  --> dma_addr = 0x%08x\n",
>>>    	    virt_addr, size, direction, dma_addr);
>>>
>> []
>>
>> I am going try to make similar changes in some other part of the kernel and
>> I was wondering if you could explain how you decided it wasn't necessary to
>> check for "((struct pci_dev *) fore200e->bus_dev" nullity for instance.
> This gets set up in fore200e_pca_detect() which is pretty early in the
> intialization process.  We don't get as far as using any of the "DVMA"
> stubs unless pci_enable_device() succeeds, meaning pci_dev is good, and
> fore200e->bus_dev is assigned to pci_dev (around line 2724).
>
> fore200e->bus_dev is never cleared back to NULL, but obviously you
> shouldn't be using any of the DMA routines after disabling the pci
> device.  Hopefully the driver shuts down in an orderly fashion such
> that all DMA is over by the time the driver disables the pci device.
Thank you

^ permalink raw reply

* Re: [PATCH tip 0/9] tracing: attach eBPF programs to tracepoints/syscalls/kprobe
From: Steven Rostedt @ 2015-01-16 15:02 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Ingo Molnar, Namhyung Kim, Arnaldo Carvalho de Melo, Jiri Olsa,
	David S. Miller, Daniel Borkmann, Hannes Frederic Sowa,
	Brendan Gregg, linux-api-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1421381770-4866-1-git-send-email-ast-uqk4Ao+rVK5Wk0Htik3J/w@public.gmane.org>

On Thu, 15 Jan 2015 20:16:01 -0800
Alexei Starovoitov <ast-uqk4Ao+rVK5Wk0Htik3J/w@public.gmane.org> wrote:

> Hi Ingo, Steven,
> 
> This patch set is based on tip/master.

Note, the tracing code isn't maintained in tip/master, but perf code is.

Using the latest 3.19-rc is probably sufficient for now.

Do you have a git repo somewhere that I can look at? It makes it easier
than loading in 9 patches ;-)

> It adds ability to attach eBPF programs to tracepoints, syscalls and kprobes.
> 
> Mechanism of attaching:
> - load program via bpf() syscall and receive program_fd
> - event_fd = open("/sys/kernel/debug/tracing/events/.../filter")
> - write 'bpf-123' to event_fd where 123 is program_fd
> - program will be attached to particular event and event automatically enabled
> - close(event_fd) will detach bpf program from event and event disabled
> 
> Program attach point and input arguments:
> - programs attached to kprobes receive 'struct pt_regs *' as an input.
>   See tracex4_kern.c that demonstrates how users can write a C program like:
>   SEC("events/kprobes/sys_write")
>   int bpf_prog4(struct pt_regs *regs)
>   {
>      long write_size = regs->dx; 
>      // here user need to know the proto of sys_write() from kernel
>      // sources and x64 calling convention to know that register $rdx
>      // contains 3rd argument to sys_write() which is 'size_t count'
> 
>   it's obviously architecture dependent, but allows building sophisticated
>   user tools on top, that can see from debug info of vmlinux which variables
>   are in which registers or stack locations and fetch it from there.
>   'perf probe' can potentialy use this hook to generate programs in user space
>   and insert them instead of letting kernel parse string during kprobe creation.
> 
> - programs attached to tracepoints and syscalls receive 'struct bpf_context *':
>   u64 arg1, arg2, ..., arg6;
>   for syscalls they match syscall arguments.
>   for tracepoints these args match arguments passed to tracepoint.
>   For example:
>   trace_sched_migrate_task(p, new_cpu); from sched/core.c
>   arg1 <- p        which is 'struct task_struct *'
>   arg2 <- new_cpu  which is 'unsigned int'
>   arg3..arg6 = 0
>   the program can use bpf_fetch_u8/16/32/64/ptr() helpers to walk 'task_struct'
>   or any other kernel data structures.
>   These helpers are using probe_kernel_read() similar to 'perf probe' which is
>   not 100% safe in both cases, but good enough.
>   To access task_struct's pid inside 'sched_migrate_task' tracepoint
>   the program can do:
>   struct task_struct *task = (struct task_struct *)ctx->arg1;
>   u32 pid = bpf_fetch_u32(&task->pid);
>   Since struct layout is kernel configuration specific such programs are not
>   portable and require access to kernel headers to be compiled,
>   but in this case we don't need debug info.
>   llvm with bpf backend will statically compute task->pid offset as a constant
>   based on kernel headers only.
>   The example of this arbitrary pointer walking is tracex1_kern.c
>   which does skb->dev->name == "lo" filtering.
> 
> In all cases the programs are called before trace buffer is allocated to
> minimize the overhead, since we want to filter huge number of events, but
> buffer alloc/free and argument copy for every event is too costly.

For syscalls this is fine as the parameters are usually set. But
there's a lot of tracepoints that we need to know the result of the
copied data to decide to filter or not, where the result happens at the
TP_fast_assign() part which requires allocating the buffers.

Maybe we should have a way to do the program before and/or after the
buffering depending on what to filter on. There's no way to know what
the parameters of the tracepoint are without looking at the source.



> Theoretically we can invoke programs after buffer is allocated, but it
> doesn't seem needed, since above approach is faster and achieves the same.

Again, for syscalls it may not be a problem, but for other tracepoints,
I'm not sure we can do that. How do you handle sched_switch for
example? The tracepoint only gets two pointers to task structs, you
need to then dereference them to get the pid, prio, state and other
data.

> 
> Note, tracepoint/syscall and kprobe programs are two different types:
> BPF_PROG_TYPE_TRACING_FILTER and BPF_PROG_TYPE_KPROBE_FILTER,
> since they expect different input.
> Both use the same set of helper functions:
> - map access (lookup/update/delete)
> - fetch (probe_kernel_read wrappers)
> - memcmp (probe_kernel_read + memcmp)
> - dump_stack
> - trace_printk
> The last two are mainly to debug the programs and to print data for user
> space consumptions.

I have to look at the code, but currently trace_printk() isn't made to
be used in production systems.

> 
> Portability:
> - kprobe programs are architecture dependent and need user scripting
>   language like ktap/stap/dtrace/perf that will dynamically generate
>   them based on debug info in vmlinux
> - tracepoint programs are architecture independent, but if arbitrary pointer
>   walking (with fetch() helpers) is used, they need data struct layout to match.
>   Debug info is not necessary

If the program runs after the buffers are allocated, it could still be
architecture independent because ftrace gives the information on how to
retrieve the fields.

One last thing. If the ebpf is used for anything but filtering, it
should go into the trigger file. The filtering is only a way to say if
the event should be recorded or not. But the trigger could do something
else (a printk, a stacktrace, etc).

-- Steve


> - for networking use case we need to access 'struct sk_buff' fields in portable
>   way (user space needs to fetch packet length without knowing skb->len offset),
>   so for some frequently used data structures we will add helper functions
>   or pseudo instructions to access them. I've hacked few ways specifically
>   for skb, but abandoned them in favor of more generic type/field infra.
>   That work is still wip. Not part of this set.
>   Once it's ready tracepoint programs that access common data structs
>   will be kernel independent.
> 
> Program return value:
> - programs return 0 to discard an event
> - and return non-zero to proceed with event (allocate trace buffer, copy
>   arguments there and print it eventually in trace_pipe in traditional way)
> 
> Examples:
> - dropmon.c - simple kfree_skb() accounting in eBPF assembler, similar
>   to dropmon tool
> - tracex1_kern.c - does net/netif_receive_skb event filtering
>   for dev->skb->name == "lo" condition
> - tracex2_kern.c - same kfree_skb() accounting like dropmon, but now in C
>   plus computes histogram of all write sizes from sys_write syscall
>   and prints the histogram in userspace
> - tracex3_kern.c - most sophisticated example that computes IO latency
>   between block/block_rq_issue and block/block_rq_complete events
>   and prints 'heatmap' using gray shades of text terminal.
>   Useful to analyze disk performance.
> - tracex4_kern.c - computes histogram of write sizes from sys_write syscall
>   using kprobe mechanism instead of syscall. Since kprobe is optimized into
>   ftrace the overhead of instrumentation is smaller than in example 2.
> 
> The user space tools like ktap/dtrace/systemptap/perf that has access
> to debug info would probably want to use kprobe attachment point, since kprobe
> can be inserted anywhere and all registers are avaiable in the program.
> tracepoint attachments are useful without debug info, so standalone tools
> like iosnoop will use them.
> 
> The main difference vs existing perf_probe/ftrace infra is in kernel aggregation
> and conditional walking of arbitrary data structures.
> 
> Thanks!
> 
> Alexei Starovoitov (9):
>   tracing: attach eBPF programs to tracepoints and syscalls
>   tracing: allow eBPF programs to call bpf_printk()
>   tracing: allow eBPF programs to call ktime_get_ns()
>   samples: bpf: simple tracing example in eBPF assembler
>   samples: bpf: simple tracing example in C
>   samples: bpf: counting example for kfree_skb tracepoint and write
>     syscall
>   samples: bpf: IO latency analysis (iosnoop/heatmap)
>   tracing: attach eBPF programs to kprobe/kretprobe
>   samples: bpf: simple kprobe example
> 
>  include/linux/ftrace_event.h       |    6 +
>  include/trace/bpf_trace.h          |   25 ++++
>  include/trace/ftrace.h             |   30 +++++
>  include/uapi/linux/bpf.h           |   11 ++
>  kernel/trace/Kconfig               |    1 +
>  kernel/trace/Makefile              |    1 +
>  kernel/trace/bpf_trace.c           |  250 ++++++++++++++++++++++++++++++++++++
>  kernel/trace/trace.h               |    3 +
>  kernel/trace/trace_events.c        |   41 +++++-
>  kernel/trace/trace_events_filter.c |   80 +++++++++++-
>  kernel/trace/trace_kprobe.c        |   11 +-
>  kernel/trace/trace_syscalls.c      |   31 +++++
>  samples/bpf/Makefile               |   18 +++
>  samples/bpf/bpf_helpers.h          |   18 +++
>  samples/bpf/bpf_load.c             |   62 ++++++++-
>  samples/bpf/bpf_load.h             |    3 +
>  samples/bpf/dropmon.c              |  129 +++++++++++++++++++
>  samples/bpf/tracex1_kern.c         |   28 ++++
>  samples/bpf/tracex1_user.c         |   24 ++++
>  samples/bpf/tracex2_kern.c         |   71 ++++++++++
>  samples/bpf/tracex2_user.c         |   95 ++++++++++++++
>  samples/bpf/tracex3_kern.c         |   96 ++++++++++++++
>  samples/bpf/tracex3_user.c         |  146 +++++++++++++++++++++
>  samples/bpf/tracex4_kern.c         |   36 ++++++
>  samples/bpf/tracex4_user.c         |   83 ++++++++++++
>  25 files changed, 1290 insertions(+), 9 deletions(-)
>  create mode 100644 include/trace/bpf_trace.h
>  create mode 100644 kernel/trace/bpf_trace.c
>  create mode 100644 samples/bpf/dropmon.c
>  create mode 100644 samples/bpf/tracex1_kern.c
>  create mode 100644 samples/bpf/tracex1_user.c
>  create mode 100644 samples/bpf/tracex2_kern.c
>  create mode 100644 samples/bpf/tracex2_user.c
>  create mode 100644 samples/bpf/tracex3_kern.c
>  create mode 100644 samples/bpf/tracex3_user.c
>  create mode 100644 samples/bpf/tracex4_kern.c
>  create mode 100644 samples/bpf/tracex4_user.c
> 

^ permalink raw reply

* Re: [PATCH 7/9] rhashtable: Per bucket locks & deferred expansion/shrinking
From: Patrick McHardy @ 2015-01-16 15:34 UTC (permalink / raw)
  To: Thomas Graf
  Cc: davem, netdev, kernel, herbert, paulmck, edumazet,
	john.r.fastabend, josh, netfilter-devel
In-Reply-To: <75db38bc9313a55cf02a8c36a3376c32b691e5d9.1418647641.git.tgraf@suug.ch>

On 15.12, Thomas Graf wrote:
> The patch also defers expansion and shrinking to a worker queue which
> allows insertion and removal from atomic context. Insertions and
> deletions may occur in parallel to it and are only held up briefly
> while the particular bucket is linked or unzipped.
>
> Mutations of the bucket table pointer is protected by a new mutex, read
> access is RCU protected.
> 
> In the event of an expansion or shrinking, the new bucket table allocated
> is exposed as a so called future table as soon as the resize process
> starts.  Lookups, deletions, and insertions will briefly use both tables.
> The future table becomes the main table after an RCU grace period and
> initial linking of the old to the new table was performed. Optimization
> of the chains to make use of the new number of buckets follows only the
> new table is in use.

AFAICT nft_hash_walk() will miss new entries during this period.
Am I missing anything here?

^ permalink raw reply

* Re: [PATCH 1/9] rhashtable: Do hashing inside of rhashtable_lookup_compare()
From: Patrick McHardy @ 2015-01-16 15:37 UTC (permalink / raw)
  To: Thomas Graf
  Cc: davem, netdev, linux-kernel, herbert, paulmck, edumazet,
	john.r.fastabend, josh, netfilter-devel
In-Reply-To: <d579f486c3d4a88744dffc23a794601b025b41f4.1420230585.git.tgraf@suug.ch>

On 02.01, Thomas Graf wrote:
> Hash the key inside of rhashtable_lookup_compare() like
> rhashtable_lookup() does. This allows to simplify the hashing
> functions and keep them private.

One more question:

> diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
> index 1e316ce..614ee09 100644
> --- a/net/netfilter/nft_hash.c
> +++ b/net/netfilter/nft_hash.c
> @@ -94,28 +94,40 @@ static void nft_hash_remove(const struct nft_set *set,
>  	kfree(he);
>  }
>  
> +struct nft_compare_arg {
> +	const struct nft_set *set;
> +	struct nft_set_elem *elem;
> +};
> +
> +static bool nft_hash_compare(void *ptr, void *arg)
> +{
> +	struct nft_hash_elem *he = ptr;
> +	struct nft_compare_arg *x = arg;
> +
> +	if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) {
> +		x->elem->cookie = &he->node;
> +		x->elem->flags = 0;
> +		if (x->set->flags & NFT_SET_MAP)
> +			nft_data_copy(&x->elem->data, he->data);

Is there any reason why we need to perform the assignments in the
compare function? The reason why I'm asking is because to add
timeout support, I need another compare function for nft_hash_lookup()
and I'd prefer to use a single one for both cases.

> +
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
>  static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
>  {
>  	const struct rhashtable *priv = nft_set_priv(set);
> -	const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv);
> -	struct rhash_head __rcu * const *pprev;
> -	struct nft_hash_elem *he;
> -	u32 h;
> -
> -	h = rhashtable_hashfn(priv, &elem->key, set->klen);
> -	pprev = &tbl->buckets[h];
> -	rht_for_each_entry_rcu(he, tbl->buckets[h], node) {
> -		if (nft_data_cmp(&he->key, &elem->key, set->klen)) {
> -			pprev = &he->node.next;
> -			continue;
> -		}
> +	struct nft_compare_arg arg = {
> +		.set = set,
> +		.elem = elem,
> +	};
>  
> -		elem->cookie = (void *)pprev;
> -		elem->flags = 0;
> -		if (set->flags & NFT_SET_MAP)
> -			nft_data_copy(&elem->data, he->data);
> +	if (rhashtable_lookup_compare(priv, &elem->key,
> +				      &nft_hash_compare, &arg))
>  		return 0;
> -	}
> +
>  	return -ENOENT;
>  }
>  

^ permalink raw reply

* [PATCH net-next 0/5] bonding: various 802.3ad fixes
From: Jonathan Toppins @ 2015-01-16 15:57 UTC (permalink / raw)
  To: netdev

This patch series is a forward porting of patches we (Cumulus) are shipping
in our 3.2 series kernels. These fixes attempt to make 802.3ad bonding mode
more predictable in certian state machine transtions in addition to enhancing
802.3ad bond carrier determination based on acutal number of peered ports plus
if the bond has an active aggregrator. Specific notes are contained within each
patch.

For this patch series there are no userspace facing changes, a diff between
the modinfo output showed no difference. However, there are behavioral
facing changes, primarily in the bond carrier state. Please make sure to
review carefully.

Jonathan Toppins (1):
  bonding: cleanup and remove dead code

Satish Ashok (1):
  bonding: fix LACP PDU not sent on slave port sometimes

Scott Feldman (1):
  bonding: keep bond interface carrier off until at least one active
    member

Wilson Kok (2):
  bonding: fix bond_open() don't always set slave active flag
  bonding: fix incorrect lacp mux state when agg not active

 drivers/net/bonding/bond_3ad.c     |   73 ++++++++++++++++++++++++++++--------
 drivers/net/bonding/bond_main.c    |    6 +--
 drivers/net/bonding/bond_options.c |    1 +
 include/net/bond_3ad.h             |    1 -
 include/net/bonding.h              |    1 +
 5 files changed, 62 insertions(+), 20 deletions(-)

-- 
1.7.10.4

^ permalink raw reply

* [PATCH net-next 1/5] bonding: keep bond interface carrier off until at least one active member
From: Jonathan Toppins @ 2015-01-16 15:57 UTC (permalink / raw)
  To: netdev; +Cc: Scott Feldman, Andy Gospodarek, Jonathan Toppins
In-Reply-To: <1421423848-414-1-git-send-email-jtoppins@cumulusnetworks.com>

From: Scott Feldman <sfeldma@cumulusnetworks.com>

Bonding driver parameter min_links is now used to signal upper-level
protocols of bond status. The way it works is if the total number of
active members in slaves drops below min_links, the bond link carrier
will go down, signaling upper levels that bond is inactive.  When active
members returns to >= min_links, bond link carrier will go up (RUNNING),
and protocols can resume.  When bond is carrier down, member ports are
in stp fwd state blocked (rather than normal disabled state), so
low-level ctrl protocols (LACP) can still get in and be processed by
bonding driver.

LACP will still do it's job while bond is carrier off, and if bond members
become active, bond carrier will be turned back on, signaling higher-level
protocols that bond is viable.

Suggested setting of min_links is 1, rather than the default of zero.
Using min_links=1 says that at least 1 slave must be active within bond
for bond to be carrier on.

Finally, when min_links bonding option is changed update carrier status.

Cc: Scott Feldman <sfeldma@gmail.com>
Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
---
 drivers/net/bonding/bond_3ad.c     |   18 ++++++++++++++----
 drivers/net/bonding/bond_main.c    |    2 +-
 drivers/net/bonding/bond_options.c |    1 +
 include/net/bonding.h              |    1 +
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 8baa87d..e9b706f 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -189,6 +189,7 @@ static inline int __agg_has_partner(struct aggregator *agg)
 static inline void __disable_port(struct port *port)
 {
 	bond_set_slave_inactive_flags(port->slave, BOND_SLAVE_NOTIFY_LATER);
+	bond_3ad_set_carrier(port->slave->bond);
 }
 
 /**
@@ -199,8 +200,10 @@ static inline void __enable_port(struct port *port)
 {
 	struct slave *slave = port->slave;
 
-	if ((slave->link == BOND_LINK_UP) && bond_slave_is_up(slave))
+	if ((slave->link == BOND_LINK_UP) && bond_slave_is_up(slave)) {
 		bond_set_slave_active_flags(slave, BOND_SLAVE_NOTIFY_LATER);
+		bond_3ad_set_carrier(slave->bond);
+	}
 }
 
 /**
@@ -2372,8 +2375,10 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
 int bond_3ad_set_carrier(struct bonding *bond)
 {
 	struct aggregator *active;
-	struct slave *first_slave;
+	struct slave *first_slave, *slave;
+	struct list_head *iter;
 	int ret = 1;
+	int active_slaves = 0;
 
 	rcu_read_lock();
 	first_slave = bond_first_slave_rcu(bond);
@@ -2381,10 +2386,15 @@ int bond_3ad_set_carrier(struct bonding *bond)
 		ret = 0;
 		goto out;
 	}
+
+	bond_for_each_slave_rcu(bond, slave, iter)
+		if (SLAVE_AD_INFO(slave)->aggregator.is_active)
+			active_slaves++;
+
 	active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator));
-	if (active) {
+	if (active && __agg_has_partner(active)) {
 		/* are enough slaves available to consider link up? */
-		if (active->num_of_ports < bond->params.min_links) {
+		if (active_slaves < bond->params.min_links) {
 			if (netif_carrier_ok(bond->dev)) {
 				netif_carrier_off(bond->dev);
 				goto out;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 0dceba1..02ffedb 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -334,7 +334,7 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev,
  *
  * Returns zero if carrier state does not change, nonzero if it does.
  */
-static int bond_set_carrier(struct bonding *bond)
+int bond_set_carrier(struct bonding *bond)
 {
 	struct list_head *iter;
 	struct slave *slave;
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 9bd538d4..4df2894 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1181,6 +1181,7 @@ static int bond_option_min_links_set(struct bonding *bond,
 	netdev_info(bond->dev, "Setting min links value to %llu\n",
 		    newval->value);
 	bond->params.min_links = newval->value;
+	bond_set_carrier(bond);
 
 	return 0;
 }
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 983a94b..29f53ea 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -525,6 +525,7 @@ void bond_sysfs_slave_del(struct slave *slave);
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev);
 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
 u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb);
+int bond_set_carrier(struct bonding *bond);
 void bond_select_active_slave(struct bonding *bond);
 void bond_change_active_slave(struct bonding *bond, struct slave *new_active);
 void bond_create_debugfs(void);
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net-next 2/5] bonding: fix bond_open() don't always set slave active flag
From: Jonathan Toppins @ 2015-01-16 15:57 UTC (permalink / raw)
  To: netdev; +Cc: Andy Gospodarek, Wilson Kok, Jonathan Toppins
In-Reply-To: <1421423848-414-1-git-send-email-jtoppins@cumulusnetworks.com>

From: Wilson Kok <wkok@cumulusnetworks.com>

Mode 802.3ad, fix incorrect bond slave active state when slave is not in
active aggregator. During bond_open(), the bonding driver always sets
the slave active flag to true if the bond is not in active-backup, alb,
or tlb modes. Bonding should let the aggregator selection logic set the
active flag when in 802.3ad mode.

Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Wilson Kok <wkok@cumulusnetworks.com>
Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
---
 drivers/net/bonding/bond_main.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 02ffedb..c475d90 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3066,7 +3066,7 @@ static int bond_open(struct net_device *bond_dev)
 			    slave != rcu_access_pointer(bond->curr_active_slave)) {
 				bond_set_slave_inactive_flags(slave,
 							      BOND_SLAVE_NOTIFY_NOW);
-			} else {
+			} else if (BOND_MODE(bond) != BOND_MODE_8023AD) {
 				bond_set_slave_active_flags(slave,
 							    BOND_SLAVE_NOTIFY_NOW);
 			}
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net-next 3/5] bonding: fix incorrect lacp mux state when agg not active
From: Jonathan Toppins @ 2015-01-16 15:57 UTC (permalink / raw)
  To: netdev; +Cc: Andy Gospodarek, Wilson Kok, Jonathan Toppins
In-Reply-To: <1421423848-414-1-git-send-email-jtoppins@cumulusnetworks.com>

From: Wilson Kok <wkok@cumulusnetworks.com>

This patch attempts to fix the following problems when an actor or
partner's aggregator is not active:
    1. a slave's lacp port state is marked as AD_STATE_SYNCHRONIZATION
       even if it is attached to an inactive aggregator. LACP advertises
       this state to the partner, making the partner think he can move
       into COLLECTING_DISTRIBUTING state even though this link will not
       pass traffic on the local side

    2. a slave goes into COLLECTING_DISTRIBUTING state without checking
       if the aggregator is actually active

    3. when in COLLECTING_DISTRIBUTING state, the partner parameters may
       change, e.g. the partner_oper_port_state.SYNCHRONIZATION. The
       local mux machine is not reacting to the change and continue to
       keep the slave and bond up

    4. When bond slave leaves an inactive aggregator and joins an active
       aggregator, the actor oper port state need to update to SYNC state.

Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Wilson Kok <wkok@cumulusnetworks.com>
Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
---
 drivers/net/bonding/bond_3ad.c |   44 ++++++++++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index e9b706f..52a8772 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -471,10 +471,13 @@ static void __record_pdu(struct lacpdu *lacpdu, struct port *port)
 		 * and the port is matched
 		 */
 		if ((port->sm_vars & AD_PORT_MATCHED)
-		    && (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION))
+			&& (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION)) {
 			partner->port_state |= AD_STATE_SYNCHRONIZATION;
-		else
+			pr_debug("%s partner sync=1\n", port->slave->dev->name);
+		} else {
 			partner->port_state &= ~AD_STATE_SYNCHRONIZATION;
+			pr_debug("%s partner sync=0\n", port->slave->dev->name);
+		}
 	}
 }
 
@@ -729,6 +732,8 @@ static inline void __update_lacpdu_from_port(struct port *port)
 	lacpdu->actor_port_priority = htons(port->actor_port_priority);
 	lacpdu->actor_port = htons(port->actor_port_number);
 	lacpdu->actor_state = port->actor_oper_port_state;
+	pr_debug("update lacpdu: %s, actor port state %x\n",
+		 port->slave->dev->name, port->actor_oper_port_state);
 
 	/* lacpdu->reserved_3_1              initialized
 	 * lacpdu->tlv_type_partner_info     initialized
@@ -901,7 +906,9 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
 			if ((port->sm_vars & AD_PORT_SELECTED) &&
 			    (port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) &&
 			    !__check_agg_selection_timer(port)) {
-				port->sm_mux_state = AD_MUX_COLLECTING_DISTRIBUTING;
+				if (port->aggregator->is_active)
+					port->sm_mux_state =
+					    AD_MUX_COLLECTING_DISTRIBUTING;
 			} else if (!(port->sm_vars & AD_PORT_SELECTED) ||
 				   (port->sm_vars & AD_PORT_STANDBY)) {
 				/* if UNSELECTED or STANDBY */
@@ -913,12 +920,18 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
 				 */
 				__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator));
 				port->sm_mux_state = AD_MUX_DETACHED;
+			} else if (port->aggregator->is_active) {
+				port->actor_oper_port_state |=
+				    AD_STATE_SYNCHRONIZATION;
 			}
 			break;
 		case AD_MUX_COLLECTING_DISTRIBUTING:
 			if (!(port->sm_vars & AD_PORT_SELECTED) ||
 			    (port->sm_vars & AD_PORT_STANDBY) ||
-			    !(port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION)) {
+			    !(port->partner_oper.port_state &
+				AD_STATE_SYNCHRONIZATION) ||
+			    !(port->actor_oper_port_state &
+				AD_STATE_SYNCHRONIZATION)) {
 				port->sm_mux_state = AD_MUX_ATTACHED;
 			} else {
 				/* if port state hasn't changed make
@@ -940,8 +953,10 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
 
 	/* check if the state machine was changed */
 	if (port->sm_mux_state != last_state) {
-		pr_debug("Mux Machine: Port=%d, Last State=%d, Curr State=%d\n",
-			 port->actor_port_number, last_state,
+		pr_debug("Mux Machine: Port=%d (%s), Last State=%d, Curr State=%d\n",
+			 port->actor_port_number,
+			 port->slave->dev->name,
+			 last_state,
 			 port->sm_mux_state);
 		switch (port->sm_mux_state) {
 		case AD_MUX_DETACHED:
@@ -956,7 +971,12 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
 			port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0);
 			break;
 		case AD_MUX_ATTACHED:
-			port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;
+			if (port->aggregator->is_active)
+				port->actor_oper_port_state |=
+				    AD_STATE_SYNCHRONIZATION;
+			else
+				port->actor_oper_port_state &=
+				    ~AD_STATE_SYNCHRONIZATION;
 			port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
 			port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
 			ad_disable_collecting_distributing(port,
@@ -966,6 +986,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
 		case AD_MUX_COLLECTING_DISTRIBUTING:
 			port->actor_oper_port_state |= AD_STATE_COLLECTING;
 			port->actor_oper_port_state |= AD_STATE_DISTRIBUTING;
+			port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;
 			ad_enable_collecting_distributing(port,
 							  update_slave_arr);
 			port->ntt = true;
@@ -1047,8 +1068,10 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
 
 	/* check if the State machine was changed or new lacpdu arrived */
 	if ((port->sm_rx_state != last_state) || (lacpdu)) {
-		pr_debug("Rx Machine: Port=%d, Last State=%d, Curr State=%d\n",
-			 port->actor_port_number, last_state,
+		pr_debug("Rx Machine: Port=%d (%s), Last State=%d, Curr State=%d\n",
+			 port->actor_port_number,
+			 port->slave->dev->name,
+			 last_state,
 			 port->sm_rx_state);
 		switch (port->sm_rx_state) {
 		case AD_RX_INITIALIZE:
@@ -1397,6 +1420,9 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
 
 	aggregator = __get_first_agg(port);
 	ad_agg_selection_logic(aggregator, update_slave_arr);
+
+	if (!port->aggregator->is_active)
+		port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
 }
 
 /* Decide if "agg" is a better choice for the new active aggregator that
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net-next 4/5] bonding: fix LACP PDU not sent on slave port sometimes
From: Jonathan Toppins @ 2015-01-16 15:57 UTC (permalink / raw)
  To: netdev; +Cc: Andy Gospodarek, Satish Ashok, Jonathan Toppins
In-Reply-To: <1421423848-414-1-git-send-email-jtoppins@cumulusnetworks.com>

From: Satish Ashok <sashok@cumulusnetworks.com>

When a slave is added to a bond and it is not in full duplex mode,
AD_PORT_LACP_ENABLED flag is cleared, due to this LACP PDU is not sent
on slave. When the duplex is changed to full, the flag needs to be set
to send LACP PDU.

Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Satish Ashok <sashok@cumulusnetworks.com>
Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
---
 drivers/net/bonding/bond_3ad.c |   11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 52a8772..43bb0b0 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2224,8 +2224,10 @@ static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave,
 		switch (lacpdu->subtype) {
 		case AD_TYPE_LACPDU:
 			ret = RX_HANDLER_CONSUMED;
-			netdev_dbg(slave->bond->dev, "Received LACPDU on port %d\n",
-				   port->actor_port_number);
+			netdev_dbg(slave->bond->dev,
+				   "Received LACPDU on port %d slave %s\n",
+				   port->actor_port_number,
+				   slave->dev->name);
 			/* Protect against concurrent state machines */
 			spin_lock(&slave->bond->mode_lock);
 			ad_rx_machine(lacpdu, port);
@@ -2317,7 +2319,10 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave)
 	port->actor_admin_port_key &= ~AD_DUPLEX_KEY_MASKS;
 	port->actor_oper_port_key = port->actor_admin_port_key |=
 		__get_duplex(port);
-	netdev_dbg(slave->bond->dev, "Port %d changed duplex\n", port->actor_port_number);
+	netdev_dbg(slave->bond->dev, "Port %d slave %s changed duplex\n",
+		   port->actor_port_number, slave->dev->name);
+	if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS)
+		port->sm_vars |= AD_PORT_LACP_ENABLED;
 	/* there is no need to reselect a new aggregator, just signal the
 	 * state machines to reinitialize
 	 */
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net-next 5/5] bonding: cleanup and remove dead code
From: Jonathan Toppins @ 2015-01-16 15:57 UTC (permalink / raw)
  To: netdev; +Cc: Jonathan Toppins
In-Reply-To: <1421423848-414-1-git-send-email-jtoppins@cumulusnetworks.com>

fix sparse warning about non-static function

drivers/net/bonding/bond_main.c:3737:5: warning: symbol
'bond_3ad_xor_xmit' was not declared. Should it be static?

Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
---
 drivers/net/bonding/bond_main.c |    2 +-
 include/net/bond_3ad.h          |    1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c475d90..67437f3 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3734,7 +3734,7 @@ out:
  * usable slave array is formed in the control path. The xmit function
  * just calculates hash and sends the packet out.
  */
-int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
+static int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bonding *bond = netdev_priv(dev);
 	struct slave *slave;
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index e01d903..f04cdbb 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -274,7 +274,6 @@ void bond_3ad_handle_link_change(struct slave *slave, char link);
 int  bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info);
 int  __bond_3ad_get_active_agg_info(struct bonding *bond,
 				    struct ad_info *ad_info);
-int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev);
 int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
 			 struct slave *slave);
 int bond_3ad_set_carrier(struct bonding *bond);
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH 7/9] rhashtable: Per bucket locks & deferred expansion/shrinking
From: Thomas Graf @ 2015-01-16 15:58 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: davem, netdev, kernel, herbert, paulmck, edumazet,
	john.r.fastabend, josh, netfilter-devel
In-Reply-To: <20150116153415.GF30132@acer.localdomain>

On 01/16/15 at 03:34pm, Patrick McHardy wrote:
> On 15.12, Thomas Graf wrote:
> > In the event of an expansion or shrinking, the new bucket table allocated
> > is exposed as a so called future table as soon as the resize process
> > starts.  Lookups, deletions, and insertions will briefly use both tables.
> > The future table becomes the main table after an RCU grace period and
> > initial linking of the old to the new table was performed. Optimization
> > of the chains to make use of the new number of buckets follows only the
> > new table is in use.
> 
> AFAICT nft_hash_walk() will miss new entries during this period.
> Am I missing anything here?

A walker may not see insertions that occur after the walker was started
if resizing is enabled. Is that a problem for nftables?

^ permalink raw reply

* [PATCH v2 0/2] csiostor:Remove T4 FCoE support
From: Praveen Madhavan @ 2015-01-16 16:00 UTC (permalink / raw)
  To: netdev, linux-scsi; +Cc: davem, JBottomley, hch, hariprasad, praveenm, varun

We found a subtle issue with FCoE on T4 very late in the game
and decided not to productize FCoE on T4 and therefore there
are no customers that will be impacted by this change. FCoE is
supported on T5 cards.

Please apply on net-next since depends on previous commits.

Changes in v2:
  - Make the commit message more clearer.

Praveen Madhavan (2):
  csiostor:Remove T4 FCoE Support.
  csiostor:Removed file csio_hw_t4.c

 drivers/scsi/csiostor/Makefile       |   2 +-
 drivers/scsi/csiostor/csio_hw.c      |  61 ++----
 drivers/scsi/csiostor/csio_hw_chip.h |  43 ----
 drivers/scsi/csiostor/csio_hw_t4.c   | 404 -----------------------------------
 drivers/scsi/csiostor/csio_init.c    |   6 +-
 drivers/scsi/csiostor/csio_wr.c      |  15 +-
 6 files changed, 25 insertions(+), 506 deletions(-)
 delete mode 100644 drivers/scsi/csiostor/csio_hw_t4.c

-- 
2.0.2


^ permalink raw reply

* Re: [PATCH 1/9] rhashtable: Do hashing inside of rhashtable_lookup_compare()
From: Thomas Graf @ 2015-01-16 16:00 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: davem, netdev, linux-kernel, herbert, paulmck, edumazet,
	john.r.fastabend, josh, netfilter-devel
In-Reply-To: <20150116153740.GG30132@acer.localdomain>

On 01/16/15 at 03:37pm, Patrick McHardy wrote:
> On 02.01, Thomas Graf wrote:
> > +{
> > +	struct nft_hash_elem *he = ptr;
> > +	struct nft_compare_arg *x = arg;
> > +
> > +	if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) {
> > +		x->elem->cookie = &he->node;
> > +		x->elem->flags = 0;
> > +		if (x->set->flags & NFT_SET_MAP)
> > +			nft_data_copy(&x->elem->data, he->data);
> 
> Is there any reason why we need to perform the assignments in the
> compare function? The reason why I'm asking is because to add
> timeout support, I need another compare function for nft_hash_lookup()
> and I'd prefer to use a single one for both cases.

No. I kept the nft_hash code as intact as possible without changing
any semantics (aside from the lost optimiztion of keeping the pprev
pointer from the lookup when doing removals). No reason speaks against
moving it out.

^ permalink raw reply

* [PATCH v2 2/2] csiostor:Removed file csio_hw_t4.c
From: Praveen Madhavan @ 2015-01-16 16:00 UTC (permalink / raw)
  To: netdev, linux-scsi; +Cc: davem, JBottomley, hch, hariprasad, praveenm, varun
In-Reply-To: <cover.1421423434.git.praveenm@chelsio.com>

We have decided not to productize FCoE on T4.
Hence file is removed.

Signed-off-by: Praveen Madhavan <praveenm@chelsio.com>
---
 drivers/scsi/csiostor/Makefile     |   2 +-
 drivers/scsi/csiostor/csio_hw_t4.c | 404 -------------------------------------
 2 files changed, 1 insertion(+), 405 deletions(-)
 delete mode 100644 drivers/scsi/csiostor/csio_hw_t4.c

diff --git a/drivers/scsi/csiostor/Makefile b/drivers/scsi/csiostor/Makefile
index 913b9a9..3681a3f 100644
--- a/drivers/scsi/csiostor/Makefile
+++ b/drivers/scsi/csiostor/Makefile
@@ -8,5 +8,5 @@ ccflags-y += -I$(srctree)/drivers/net/ethernet/chelsio/cxgb4
 obj-$(CONFIG_SCSI_CHELSIO_FCOE) += csiostor.o
 
 csiostor-objs := csio_attr.o csio_init.o csio_lnode.o csio_scsi.o \
-		csio_hw.o csio_hw_t4.o csio_hw_t5.o csio_isr.o \
+		csio_hw.o csio_hw_t5.o csio_isr.o \
 		csio_mb.o csio_rnode.o csio_wr.o
diff --git a/drivers/scsi/csiostor/csio_hw_t4.c b/drivers/scsi/csiostor/csio_hw_t4.c
deleted file mode 100644
index 14884e4..0000000
--- a/drivers/scsi/csiostor/csio_hw_t4.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * This file is part of the Chelsio FCoE driver for Linux.
- *
- * Copyright (c) 2008-2013 Chelsio Communications, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "csio_hw.h"
-#include "csio_init.h"
-
-/*
- * Return the specified PCI-E Configuration Space register from our Physical
- * Function.  We try first via a Firmware LDST Command since we prefer to let
- * the firmware own all of these registers, but if that fails we go for it
- * directly ourselves.
- */
-static uint32_t
-csio_t4_read_pcie_cfg4(struct csio_hw *hw, int reg)
-{
-	u32 val = 0;
-	struct csio_mb *mbp;
-	int rv;
-	struct fw_ldst_cmd *ldst_cmd;
-
-	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
-	if (!mbp) {
-		CSIO_INC_STATS(hw, n_err_nomem);
-		pci_read_config_dword(hw->pdev, reg, &val);
-		return val;
-	}
-
-	csio_mb_ldst(hw, mbp, CSIO_MB_DEFAULT_TMO, reg);
-	rv = csio_mb_issue(hw, mbp);
-
-	/*
-	 * If the LDST Command suucceeded, exctract the returned register
-	 * value.  Otherwise read it directly ourself.
-	 */
-	if (rv == 0) {
-		ldst_cmd = (struct fw_ldst_cmd *)(mbp->mb);
-		val = ntohl(ldst_cmd->u.pcie.data[0]);
-	} else
-		pci_read_config_dword(hw->pdev, reg, &val);
-
-	mempool_free(mbp, hw->mb_mempool);
-
-	return val;
-}
-
-static int
-csio_t4_set_mem_win(struct csio_hw *hw, uint32_t win)
-{
-	u32 bar0;
-	u32 mem_win_base;
-
-	/*
-	 * Truncation intentional: we only read the bottom 32-bits of the
-	 * 64-bit BAR0/BAR1 ...  We use the hardware backdoor mechanism to
-	 * read BAR0 instead of using pci_resource_start() because we could be
-	 * operating from within a Virtual Machine which is trapping our
-	 * accesses to our Configuration Space and we need to set up the PCI-E
-	 * Memory Window decoders with the actual addresses which will be
-	 * coming across the PCI-E link.
-	 */
-	bar0 = csio_t4_read_pcie_cfg4(hw, PCI_BASE_ADDRESS_0);
-	bar0 &= PCI_BASE_ADDRESS_MEM_MASK;
-
-	mem_win_base = bar0 + MEMWIN_BASE;
-
-	/*
-	 * Set up memory window for accessing adapter memory ranges.  (Read
-	 * back MA register to ensure that changes propagate before we attempt
-	 * to use the new values.)
-	 */
-	csio_wr_reg32(hw, mem_win_base | BIR_V(0) |
-			  WINDOW_V(ilog2(MEMWIN_APERTURE) - 10),
-			  PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, win));
-	csio_rd_reg32(hw,
-		      PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, win));
-	return 0;
-}
-
-/*
- * Interrupt handler for the PCIE module.
- */
-static void
-csio_t4_pcie_intr_handler(struct csio_hw *hw)
-{
-	static struct intr_info sysbus_intr_info[] = {
-		{ RNPP_F, "RXNP array parity error", -1, 1 },
-		{ RPCP_F, "RXPC array parity error", -1, 1 },
-		{ RCIP_F, "RXCIF array parity error", -1, 1 },
-		{ RCCP_F, "Rx completions control array parity error", -1, 1 },
-		{ RFTP_F, "RXFT array parity error", -1, 1 },
-		{ 0, NULL, 0, 0 }
-	};
-	static struct intr_info pcie_port_intr_info[] = {
-		{ TPCP_F, "TXPC array parity error", -1, 1 },
-		{ TNPP_F, "TXNP array parity error", -1, 1 },
-		{ TFTP_F, "TXFT array parity error", -1, 1 },
-		{ TCAP_F, "TXCA array parity error", -1, 1 },
-		{ TCIP_F, "TXCIF array parity error", -1, 1 },
-		{ RCAP_F, "RXCA array parity error", -1, 1 },
-		{ OTDD_F, "outbound request TLP discarded", -1, 1 },
-		{ RDPE_F, "Rx data parity error", -1, 1 },
-		{ TDUE_F, "Tx uncorrectable data error", -1, 1 },
-		{ 0, NULL, 0, 0 }
-	};
-
-	static struct intr_info pcie_intr_info[] = {
-		{ MSIADDRLPERR_F, "MSI AddrL parity error", -1, 1 },
-		{ MSIADDRHPERR_F, "MSI AddrH parity error", -1, 1 },
-		{ MSIDATAPERR_F, "MSI data parity error", -1, 1 },
-		{ MSIXADDRLPERR_F, "MSI-X AddrL parity error", -1, 1 },
-		{ MSIXADDRHPERR_F, "MSI-X AddrH parity error", -1, 1 },
-		{ MSIXDATAPERR_F, "MSI-X data parity error", -1, 1 },
-		{ MSIXDIPERR_F, "MSI-X DI parity error", -1, 1 },
-		{ PIOCPLPERR_F, "PCI PIO completion FIFO parity error", -1, 1 },
-		{ PIOREQPERR_F, "PCI PIO request FIFO parity error", -1, 1 },
-		{ TARTAGPERR_F, "PCI PCI target tag FIFO parity error", -1, 1 },
-		{ CCNTPERR_F, "PCI CMD channel count parity error", -1, 1 },
-		{ CREQPERR_F, "PCI CMD channel request parity error", -1, 1 },
-		{ CRSPPERR_F, "PCI CMD channel response parity error", -1, 1 },
-		{ DCNTPERR_F, "PCI DMA channel count parity error", -1, 1 },
-		{ DREQPERR_F, "PCI DMA channel request parity error", -1, 1 },
-		{ DRSPPERR_F, "PCI DMA channel response parity error", -1, 1 },
-		{ HCNTPERR_F, "PCI HMA channel count parity error", -1, 1 },
-		{ HREQPERR_F, "PCI HMA channel request parity error", -1, 1 },
-		{ HRSPPERR_F, "PCI HMA channel response parity error", -1, 1 },
-		{ CFGSNPPERR_F, "PCI config snoop FIFO parity error", -1, 1 },
-		{ FIDPERR_F, "PCI FID parity error", -1, 1 },
-		{ INTXCLRPERR_F, "PCI INTx clear parity error", -1, 1 },
-		{ MATAGPERR_F, "PCI MA tag parity error", -1, 1 },
-		{ PIOTAGPERR_F, "PCI PIO tag parity error", -1, 1 },
-		{ RXCPLPERR_F, "PCI Rx completion parity error", -1, 1 },
-		{ RXWRPERR_F, "PCI Rx write parity error", -1, 1 },
-		{ RPLPERR_F, "PCI replay buffer parity error", -1, 1 },
-		{ PCIESINT_F, "PCI core secondary fault", -1, 1 },
-		{ PCIEPINT_F, "PCI core primary fault", -1, 1 },
-		{ UNXSPLCPLERR_F, "PCI unexpected split completion error", -1,
-		  0 },
-		{ 0, NULL, 0, 0 }
-	};
-
-	int fat;
-	fat = csio_handle_intr_status(hw,
-				      PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS_A,
-				      sysbus_intr_info) +
-	      csio_handle_intr_status(hw,
-				      PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS_A,
-				      pcie_port_intr_info) +
-	      csio_handle_intr_status(hw, PCIE_INT_CAUSE_A, pcie_intr_info);
-	if (fat)
-		csio_hw_fatal_err(hw);
-}
-
-/*
- * csio_t4_flash_cfg_addr - return the address of the flash configuration file
- * @hw: the HW module
- *
- * Return the address within the flash where the Firmware Configuration
- * File is stored.
- */
-static unsigned int
-csio_t4_flash_cfg_addr(struct csio_hw *hw)
-{
-	return FLASH_CFG_OFFSET;
-}
-
-/*
- *      csio_t4_mc_read - read from MC through backdoor accesses
- *      @hw: the hw module
- *      @idx: not used for T4 adapter
- *      @addr: address of first byte requested
- *      @data: 64 bytes of data containing the requested address
- *      @ecc: where to store the corresponding 64-bit ECC word
- *
- *      Read 64 bytes of data from MC starting at a 64-byte-aligned address
- *      that covers the requested address @addr.  If @parity is not %NULL it
- *      is assigned the 64-bit ECC word for the read data.
- */
-static int
-csio_t4_mc_read(struct csio_hw *hw, int idx, uint32_t addr, __be32 *data,
-		uint64_t *ecc)
-{
-	int i;
-
-	if (csio_rd_reg32(hw, MC_BIST_CMD_A) & START_BIST_F)
-		return -EBUSY;
-	csio_wr_reg32(hw, addr & ~0x3fU, MC_BIST_CMD_ADDR_A);
-	csio_wr_reg32(hw, 64, MC_BIST_CMD_LEN_A);
-	csio_wr_reg32(hw, 0xc, MC_BIST_DATA_PATTERN_A);
-	csio_wr_reg32(hw, BIST_OPCODE_V(1) | START_BIST_F | BIST_CMD_GAP_V(1),
-		      MC_BIST_CMD_A);
-	i = csio_hw_wait_op_done_val(hw, MC_BIST_CMD_A, START_BIST_F,
-				     0, 10, 1, NULL);
-	if (i)
-		return i;
-
-#define MC_DATA(i) MC_BIST_STATUS_REG(MC_BIST_STATUS_RDATA_A, i)
-
-	for (i = 15; i >= 0; i--)
-		*data++ = htonl(csio_rd_reg32(hw, MC_DATA(i)));
-	if (ecc)
-		*ecc = csio_rd_reg64(hw, MC_DATA(16));
-#undef MC_DATA
-	return 0;
-}
-
-/*
- *      csio_t4_edc_read - read from EDC through backdoor accesses
- *      @hw: the hw module
- *      @idx: which EDC to access
- *      @addr: address of first byte requested
- *      @data: 64 bytes of data containing the requested address
- *      @ecc: where to store the corresponding 64-bit ECC word
- *
- *      Read 64 bytes of data from EDC starting at a 64-byte-aligned address
- *      that covers the requested address @addr.  If @parity is not %NULL it
- *      is assigned the 64-bit ECC word for the read data.
- */
-static int
-csio_t4_edc_read(struct csio_hw *hw, int idx, uint32_t addr, __be32 *data,
-		uint64_t *ecc)
-{
-	int i;
-
-	idx *= EDC_STRIDE;
-	if (csio_rd_reg32(hw, EDC_BIST_CMD_A + idx) & START_BIST_F)
-		return -EBUSY;
-	csio_wr_reg32(hw, addr & ~0x3fU, EDC_BIST_CMD_ADDR_A + idx);
-	csio_wr_reg32(hw, 64, EDC_BIST_CMD_LEN_A + idx);
-	csio_wr_reg32(hw, 0xc, EDC_BIST_DATA_PATTERN_A + idx);
-	csio_wr_reg32(hw, BIST_OPCODE_V(1) | BIST_CMD_GAP_V(1) | START_BIST_F,
-		      EDC_BIST_CMD_A + idx);
-	i = csio_hw_wait_op_done_val(hw, EDC_BIST_CMD_A + idx, START_BIST_F,
-				     0, 10, 1, NULL);
-	if (i)
-		return i;
-
-#define EDC_DATA(i) (EDC_BIST_STATUS_REG(EDC_BIST_STATUS_RDATA_A, i) + idx)
-
-	for (i = 15; i >= 0; i--)
-		*data++ = htonl(csio_rd_reg32(hw, EDC_DATA(i)));
-	if (ecc)
-		*ecc = csio_rd_reg64(hw, EDC_DATA(16));
-#undef EDC_DATA
-	return 0;
-}
-
-/*
- * csio_t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
- * @hw: the csio_hw
- * @win: PCI-E memory Window to use
- * @mtype: memory type: MEM_EDC0, MEM_EDC1, MEM_MC0 (or MEM_MC) or MEM_MC1
- * @addr: address within indicated memory type
- * @len: amount of memory to transfer
- * @buf: host memory buffer
- * @dir: direction of transfer 1 => read, 0 => write
- *
- * Reads/writes an [almost] arbitrary memory region in the firmware: the
- * firmware memory address, length and host buffer must be aligned on
- * 32-bit boudaries.  The memory is transferred as a raw byte sequence
- * from/to the firmware's memory.  If this memory contains data
- * structures which contain multi-byte integers, it's the callers
- * responsibility to perform appropriate byte order conversions.
- */
-static int
-csio_t4_memory_rw(struct csio_hw *hw, u32 win, int mtype, u32 addr,
-		u32 len, uint32_t *buf, int dir)
-{
-	u32 pos, start, offset, memoffset, bar0;
-	u32 edc_size, mc_size, mem_reg, mem_aperture, mem_base;
-
-	/*
-	 * Argument sanity checks ...
-	 */
-	if ((addr & 0x3) || (len & 0x3))
-		return -EINVAL;
-
-	/* Offset into the region of memory which is being accessed
-	 * MEM_EDC0 = 0
-	 * MEM_EDC1 = 1
-	 * MEM_MC   = 2 -- T4
-	 */
-	edc_size  = EDRAM0_SIZE_G(csio_rd_reg32(hw, MA_EDRAM0_BAR_A));
-	if (mtype != MEM_MC1)
-		memoffset = (mtype * (edc_size * 1024 * 1024));
-	else {
-		mc_size = EXT_MEM_SIZE_G(csio_rd_reg32(hw,
-						       MA_EXT_MEMORY_BAR_A));
-		memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
-	}
-
-	/* Determine the PCIE_MEM_ACCESS_OFFSET */
-	addr = addr + memoffset;
-
-	/*
-	 * Each PCI-E Memory Window is programmed with a window size -- or
-	 * "aperture" -- which controls the granularity of its mapping onto
-	 * adapter memory.  We need to grab that aperture in order to know
-	 * how to use the specified window.  The window is also programmed
-	 * with the base address of the Memory Window in BAR0's address
-	 * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
-	 * the address is relative to BAR0.
-	 */
-	mem_reg = csio_rd_reg32(hw,
-			PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, win));
-	mem_aperture = 1 << (WINDOW_V(mem_reg) + 10);
-	mem_base = PCIEOFST_G(mem_reg) << 10;
-
-	bar0 = csio_t4_read_pcie_cfg4(hw, PCI_BASE_ADDRESS_0);
-	bar0 &= PCI_BASE_ADDRESS_MEM_MASK;
-	mem_base -= bar0;
-
-	start = addr & ~(mem_aperture-1);
-	offset = addr - start;
-
-	csio_dbg(hw, "csio_t4_memory_rw: mem_reg: 0x%x, mem_aperture: 0x%x\n",
-		 mem_reg, mem_aperture);
-	csio_dbg(hw, "csio_t4_memory_rw: mem_base: 0x%x, mem_offset: 0x%x\n",
-		 mem_base, memoffset);
-	csio_dbg(hw, "csio_t4_memory_rw: bar0: 0x%x, start:0x%x, offset:0x%x\n",
-		 bar0, start, offset);
-	csio_dbg(hw, "csio_t4_memory_rw: mtype: %d, addr: 0x%x, len: %d\n",
-		 mtype, addr, len);
-
-	for (pos = start; len > 0; pos += mem_aperture, offset = 0) {
-		/*
-		 * Move PCI-E Memory Window to our current transfer
-		 * position.  Read it back to ensure that changes propagate
-		 * before we attempt to use the new value.
-		 */
-		csio_wr_reg32(hw, pos,
-			PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
-		csio_rd_reg32(hw,
-			PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
-
-		while (offset < mem_aperture && len > 0) {
-			if (dir)
-				*buf++ = csio_rd_reg32(hw, mem_base + offset);
-			else
-				csio_wr_reg32(hw, *buf++, mem_base + offset);
-
-			offset += sizeof(__be32);
-			len -= sizeof(__be32);
-		}
-	}
-	return 0;
-}
-
-/*
- * csio_t4_dfs_create_ext_mem - setup debugfs for MC to read the values
- * @hw: the csio_hw
- *
- * This function creates files in the debugfs with external memory region MC.
- */
-static void
-csio_t4_dfs_create_ext_mem(struct csio_hw *hw)
-{
-	u32 size;
-	int i = csio_rd_reg32(hw, MA_TARGET_MEM_ENABLE_A);
-
-	if (i & EXT_MEM_ENABLE_F) {
-		size = csio_rd_reg32(hw, MA_EXT_MEMORY_BAR_A);
-		csio_add_debugfs_mem(hw, "mc", MEM_MC,
-				     EXT_MEM_SIZE_G(size));
-	}
-}
-
-/* T4 adapter specific function */
-struct csio_hw_chip_ops t4_ops = {
-	.chip_set_mem_win		= csio_t4_set_mem_win,
-	.chip_pcie_intr_handler		= csio_t4_pcie_intr_handler,
-	.chip_flash_cfg_addr		= csio_t4_flash_cfg_addr,
-	.chip_mc_read			= csio_t4_mc_read,
-	.chip_edc_read			= csio_t4_edc_read,
-	.chip_memory_rw			= csio_t4_memory_rw,
-	.chip_dfs_create_ext_mem	= csio_t4_dfs_create_ext_mem,
-};
-- 
2.0.2

^ permalink raw reply related

* [PATCH v2 1/2] csiostor:Remove T4 FCoE Support.
From: Praveen Madhavan @ 2015-01-16 16:00 UTC (permalink / raw)
  To: netdev, linux-scsi; +Cc: davem, JBottomley, hch, hariprasad, praveenm, varun
In-Reply-To: <cover.1421423434.git.praveenm@chelsio.com>

We found a subtle issue with FCoE on T4 very late in the game
and decided not to productize FCoE on T4 and therefore there
are no customers that will be impacted by this change. Hence
T4 FCoE support is removed. FCoE supported only on T5 cards.

changes in v2:
  - Make the commit message more clearer.

Signed-off-by: Praveen Madhavan <praveenm@chelsio.com>
---
 drivers/scsi/csiostor/csio_hw.c      | 61 +++++++++---------------------------
 drivers/scsi/csiostor/csio_hw_chip.h | 43 -------------------------
 drivers/scsi/csiostor/csio_init.c    |  6 ++--
 drivers/scsi/csiostor/csio_wr.c      | 15 +++++----
 4 files changed, 24 insertions(+), 101 deletions(-)

diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c
index 5c31fa6..35c5f83 100644
--- a/drivers/scsi/csiostor/csio_hw.c
+++ b/drivers/scsi/csiostor/csio_hw.c
@@ -60,37 +60,10 @@ int csio_msi = 2;
 static int dev_num;
 
 /* FCoE Adapter types & its description */
-static const struct csio_adap_desc csio_t4_fcoe_adapters[] = {
-	{"T440-Dbg 10G", "Chelsio T440-Dbg 10G [FCoE]"},
-	{"T420-CR 10G", "Chelsio T420-CR 10G [FCoE]"},
-	{"T422-CR 10G/1G", "Chelsio T422-CR 10G/1G [FCoE]"},
-	{"T440-CR 10G", "Chelsio T440-CR 10G [FCoE]"},
-	{"T420-BCH 10G", "Chelsio T420-BCH 10G [FCoE]"},
-	{"T440-BCH 10G", "Chelsio T440-BCH 10G [FCoE]"},
-	{"T440-CH 10G", "Chelsio T440-CH 10G [FCoE]"},
-	{"T420-SO 10G", "Chelsio T420-SO 10G [FCoE]"},
-	{"T420-CX4 10G", "Chelsio T420-CX4 10G [FCoE]"},
-	{"T420-BT 10G", "Chelsio T420-BT 10G [FCoE]"},
-	{"T404-BT 1G", "Chelsio T404-BT 1G [FCoE]"},
-	{"B420-SR 10G", "Chelsio B420-SR 10G [FCoE]"},
-	{"B404-BT 1G", "Chelsio B404-BT 1G [FCoE]"},
-	{"T480-CR 10G", "Chelsio T480-CR 10G [FCoE]"},
-	{"T440-LP-CR 10G", "Chelsio T440-LP-CR 10G [FCoE]"},
-	{"AMSTERDAM 10G", "Chelsio AMSTERDAM 10G [FCoE]"},
-	{"HUAWEI T480 10G", "Chelsio HUAWEI T480 10G [FCoE]"},
-	{"HUAWEI T440 10G", "Chelsio HUAWEI T440 10G [FCoE]"},
-	{"HUAWEI STG 10G", "Chelsio HUAWEI STG 10G [FCoE]"},
-	{"ACROMAG XAUI 10G", "Chelsio ACROMAG XAUI 10G [FCoE]"},
-	{"ACROMAG SFP+ 10G", "Chelsio ACROMAG SFP+ 10G [FCoE]"},
-	{"QUANTA SFP+ 10G", "Chelsio QUANTA SFP+ 10G [FCoE]"},
-	{"HUAWEI 10Gbase-T", "Chelsio HUAWEI 10Gbase-T [FCoE]"},
-	{"HUAWEI T4TOE 10G", "Chelsio HUAWEI T4TOE 10G [FCoE]"}
-};
-
 static const struct csio_adap_desc csio_t5_fcoe_adapters[] = {
 	{"T580-Dbg 10G", "Chelsio T580-Dbg 10G [FCoE]"},
 	{"T520-CR 10G", "Chelsio T520-CR 10G [FCoE]"},
-	{"T522-CR 10G/1G", "Chelsio T452-CR 10G/1G [FCoE]"},
+	{"T522-CR 10G/1G", "Chelsio T522-CR 10G/1G [FCoE]"},
 	{"T540-CR 10G", "Chelsio T540-CR 10G [FCoE]"},
 	{"T520-BCH 10G", "Chelsio T520-BCH 10G [FCoE]"},
 	{"T540-BCH 10G", "Chelsio T540-BCH 10G [FCoE]"},
@@ -107,7 +80,9 @@ static const struct csio_adap_desc csio_t5_fcoe_adapters[] = {
 	{"T580-LP-CR 40G", "Chelsio T580-LP-CR 40G [FCoE]"},
 	{"T520-LL-CR 10G", "Chelsio T520-LL-CR 10G [FCoE]"},
 	{"T560-CR 40G", "Chelsio T560-CR 40G [FCoE]"},
-	{"T580-CR 40G", "Chelsio T580-CR 40G [FCoE]"}
+	{"T580-CR 40G", "Chelsio T580-CR 40G [FCoE]"},
+	{"T580-SO 40G", "Chelsio T580-SO 40G [FCoE]"},
+	{"T502-BT 1G", "Chelsio T502-BT 1G [FCoE]"}
 };
 
 static void csio_mgmtm_cleanup(struct csio_mgmtm *);
@@ -1716,9 +1691,9 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path)
 	uint32_t *cfg_data;
 	int value_to_add = 0;
 
-	if (request_firmware(&cf, CSIO_CF_FNAME(hw), dev) < 0) {
+	if (request_firmware(&cf, FW_CFG_NAME_T5, dev) < 0) {
 		csio_err(hw, "could not find config file %s, err: %d\n",
-			 CSIO_CF_FNAME(hw), ret);
+			 FW_CFG_NAME_T5, ret);
 		return -ENOENT;
 	}
 
@@ -1758,8 +1733,8 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path)
 	}
 	if (ret == 0) {
 		csio_info(hw, "config file upgraded to %s\n",
-			  CSIO_CF_FNAME(hw));
-		snprintf(path, 64, "%s%s", "/lib/firmware/", CSIO_CF_FNAME(hw));
+			  FW_CFG_NAME_T5);
+		snprintf(path, 64, "%s%s", "/lib/firmware/", FW_CFG_NAME_T5);
 	}
 
 leave:
@@ -2123,9 +2098,9 @@ csio_hw_flash_fw(struct csio_hw *hw, int *reset)
 		return -EINVAL;
 	}
 
-	if (request_firmware(&fw, CSIO_FW_FNAME(hw), dev) < 0) {
+	if (request_firmware(&fw, FW_FNAME_T5, dev) < 0) {
 		csio_err(hw, "could not find firmware image %s, err: %d\n",
-			 CSIO_FW_FNAME(hw), ret);
+			 FW_FNAME_T5, ret);
 		return -EINVAL;
 	}
 
@@ -3207,7 +3182,7 @@ static void csio_ncsi_intr_handler(struct csio_hw *hw)
  */
 static void csio_xgmac_intr_handler(struct csio_hw *hw, int port)
 {
-	uint32_t v = csio_rd_reg32(hw, CSIO_MAC_INT_CAUSE_REG(hw, port));
+	uint32_t v = csio_rd_reg32(hw, T5_PORT_REG(port, MAC_PORT_INT_CAUSE_A));
 
 	v &= TXFIFO_PRTY_ERR_F | RXFIFO_PRTY_ERR_F;
 	if (!v)
@@ -3217,7 +3192,7 @@ static void csio_xgmac_intr_handler(struct csio_hw *hw, int port)
 		csio_fatal(hw, "XGMAC %d Tx FIFO parity error\n", port);
 	if (v & RXFIFO_PRTY_ERR_F)
 		csio_fatal(hw, "XGMAC %d Rx FIFO parity error\n", port);
-	csio_wr_reg32(hw, v, CSIO_MAC_INT_CAUSE_REG(hw, port));
+	csio_wr_reg32(hw, v, T5_PORT_REG(port, MAC_PORT_INT_CAUSE_A));
 	csio_hw_fatal_err(hw);
 }
 
@@ -3966,13 +3941,7 @@ csio_hw_set_description(struct csio_hw *hw, uint16_t ven_id, uint16_t dev_id)
 		prot_type = (dev_id & CSIO_ASIC_DEVID_PROTO_MASK);
 		adap_type = (dev_id & CSIO_ASIC_DEVID_TYPE_MASK);
 
-		if (prot_type == CSIO_T4_FCOE_ASIC) {
-			memcpy(hw->hw_ver,
-			       csio_t4_fcoe_adapters[adap_type].model_no, 16);
-			memcpy(hw->model_desc,
-			       csio_t4_fcoe_adapters[adap_type].description,
-			       32);
-		} else if (prot_type == CSIO_T5_FCOE_ASIC) {
+		if (prot_type == CSIO_T5_FCOE_ASIC) {
 			memcpy(hw->hw_ver,
 			       csio_t5_fcoe_adapters[adap_type].model_no, 16);
 			memcpy(hw->model_desc,
@@ -4009,8 +3978,8 @@ csio_hw_init(struct csio_hw *hw)
 
 	strcpy(hw->name, CSIO_HW_NAME);
 
-	/* Initialize the HW chip ops with T4/T5 specific ops */
-	hw->chip_ops = csio_is_t4(hw->chip_id) ? &t4_ops : &t5_ops;
+	/* Initialize the HW chip ops T5 specific ops */
+	hw->chip_ops = &t5_ops;
 
 	/* Set the model & its description */
 
diff --git a/drivers/scsi/csiostor/csio_hw_chip.h b/drivers/scsi/csiostor/csio_hw_chip.h
index eec98f5..e962d3d 100644
--- a/drivers/scsi/csiostor/csio_hw_chip.h
+++ b/drivers/scsi/csiostor/csio_hw_chip.h
@@ -37,24 +37,14 @@
 #include "csio_defs.h"
 
 /* Define MACRO values */
-#define CSIO_HW_T4				0x4000
-#define CSIO_T4_FCOE_ASIC			0x4600
 #define CSIO_HW_T5				0x5000
 #define CSIO_T5_FCOE_ASIC			0x5600
 #define CSIO_HW_CHIP_MASK			0xF000
 
-#define T4_REGMAP_SIZE				(160 * 1024)
 #define T5_REGMAP_SIZE				(332 * 1024)
-#define FW_FNAME_T4				"cxgb4/t4fw.bin"
 #define FW_FNAME_T5				"cxgb4/t5fw.bin"
-#define FW_CFG_NAME_T4				"cxgb4/t4-config.txt"
 #define FW_CFG_NAME_T5				"cxgb4/t5-config.txt"
 
-#define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x0B
-#define T4FW_VERSION_MICRO 0x1B
-#define T4FW_VERSION_BUILD 0x00
-
 #define T5FW_VERSION_MAJOR 0x01
 #define T5FW_VERSION_MINOR 0x0B
 #define T5FW_VERSION_MICRO 0x1B
@@ -65,27 +55,15 @@
 #define CHELSIO_CHIP_VERSION(code) (((code) >> 12) & 0xf)
 #define CHELSIO_CHIP_RELEASE(code) ((code) & 0xf)
 
-#define CHELSIO_T4		0x4
 #define CHELSIO_T5		0x5
 
 enum chip_type {
-	T4_A1 = CHELSIO_CHIP_CODE(CHELSIO_T4, 1),
-	T4_A2 = CHELSIO_CHIP_CODE(CHELSIO_T4, 2),
-	T4_FIRST_REV	= T4_A1,
-	T4_LAST_REV	= T4_A2,
-
 	T5_A0 = CHELSIO_CHIP_CODE(CHELSIO_T5, 0),
 	T5_A1 = CHELSIO_CHIP_CODE(CHELSIO_T5, 1),
 	T5_FIRST_REV	= T5_A0,
 	T5_LAST_REV	= T5_A1,
 };
 
-/* Define static functions */
-static inline int csio_is_t4(uint16_t chip)
-{
-	return (chip == CSIO_HW_T4);
-}
-
 static inline int csio_is_t5(uint16_t chip)
 {
 	return (chip == CSIO_HW_T5);
@@ -95,21 +73,6 @@ static inline int csio_is_t5(uint16_t chip)
 #define CSIO_DEVICE(devid, idx)						\
 	{ PCI_VENDOR_ID_CHELSIO, (devid), PCI_ANY_ID, PCI_ANY_ID, 0, 0, (idx) }
 
-#define CSIO_HW_PIDX(hw, index)						\
-	(csio_is_t4(hw->chip_id) ? (PIDX_V(index)) :			\
-					(PIDX_T5_G(index) | DBTYPE_F))
-
-#define CSIO_HW_LP_INT_THRESH(hw, val)					\
-	(csio_is_t4(hw->chip_id) ? (LP_INT_THRESH_V(val)) :		\
-					(LP_INT_THRESH_T5_V(val)))
-
-#define CSIO_HW_M_LP_INT_THRESH(hw)					\
-	(csio_is_t4(hw->chip_id) ? (LP_INT_THRESH_M) : (LP_INT_THRESH_T5_M))
-
-#define CSIO_MAC_INT_CAUSE_REG(hw, port)				\
-	(csio_is_t4(hw->chip_id) ? (PORT_REG(port, XGMAC_PORT_INT_CAUSE_A)) : \
-				(T5_PORT_REG(port, MAC_PORT_INT_CAUSE_A)))
-
 #include "t4fw_api.h"
 
 #define FW_VERSION(chip) ( \
@@ -125,11 +88,6 @@ struct fw_info {
 	char *fw_mod_name;
 	struct fw_hdr fw_hdr;
 };
-#define CSIO_FW_FNAME(hw)						\
-	(csio_is_t4(hw->chip_id) ? FW_FNAME_T4 : FW_FNAME_T5)
-
-#define CSIO_CF_FNAME(hw)						\
-	(csio_is_t4(hw->chip_id) ? FW_CFG_NAME_T4 : FW_CFG_NAME_T5)
 
 /* Declare ENUMS */
 enum { MEM_EDC0, MEM_EDC1, MEM_MC, MEM_MC0 = MEM_MC, MEM_MC1 };
@@ -163,7 +121,6 @@ struct csio_hw_chip_ops {
 	void (*chip_dfs_create_ext_mem)(struct csio_hw *);
 };
 
-extern struct csio_hw_chip_ops t4_ops;
 extern struct csio_hw_chip_ops t5_ops;
 
 #endif /* #ifndef __CSIO_HW_CHIP_H__ */
diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c
index 34d20cc..9b9794d 100644
--- a/drivers/scsi/csiostor/csio_init.c
+++ b/drivers/scsi/csiostor/csio_init.c
@@ -1176,9 +1176,8 @@ static struct pci_error_handlers csio_err_handler = {
  */
 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
 	static struct pci_device_id csio_pci_tbl[] = {
-/* Define for iSCSI uses PF5, FCoE uses PF6 */
-#define CH_PCI_DEVICE_ID_FUNCTION	0x5
-#define CH_PCI_DEVICE_ID_FUNCTION2	0x6
+/* Define for FCoE uses PF6 */
+#define CH_PCI_DEVICE_ID_FUNCTION	0x6
 
 #define CH_PCI_ID_TABLE_ENTRY(devid) \
 		{ PCI_VDEVICE(CHELSIO, (devid)), 0 }
@@ -1256,5 +1255,4 @@ MODULE_DESCRIPTION(CSIO_DRV_DESC);
 MODULE_LICENSE(CSIO_DRV_LICENSE);
 MODULE_DEVICE_TABLE(pci, csio_pci_tbl);
 MODULE_VERSION(CSIO_DRV_VERSION);
-MODULE_FIRMWARE(FW_FNAME_T4);
 MODULE_FIRMWARE(FW_FNAME_T5);
diff --git a/drivers/scsi/csiostor/csio_wr.c b/drivers/scsi/csiostor/csio_wr.c
index b47ea33..e8f1817 100644
--- a/drivers/scsi/csiostor/csio_wr.c
+++ b/drivers/scsi/csiostor/csio_wr.c
@@ -85,7 +85,7 @@ csio_wr_ring_fldb(struct csio_hw *hw, struct csio_q *flq)
 	 */
 	if (flq->inc_idx >= 8) {
 		csio_wr_reg32(hw, DBPRIO_F | QID_V(flq->un.fl.flid) |
-				  CSIO_HW_PIDX(hw, flq->inc_idx / 8),
+				  PIDX_T5_V(flq->inc_idx / 8) | DBTYPE_F,
 				  MYPF_REG(SGE_PF_KDOORBELL_A));
 		flq->inc_idx &= 7;
 	}
@@ -983,7 +983,7 @@ csio_wr_issue(struct csio_hw *hw, int qidx, bool prio)
 	wmb();
 	/* Ring SGE Doorbell writing q->pidx into it */
 	csio_wr_reg32(hw, DBPRIO_V(prio) | QID_V(q->un.eq.physeqid) |
-			  CSIO_HW_PIDX(hw, q->inc_idx),
+			  PIDX_T5_V(q->inc_idx) | DBTYPE_F,
 			  MYPF_REG(SGE_PF_KDOORBELL_A));
 	q->inc_idx = 0;
 
@@ -1467,12 +1467,11 @@ csio_wr_set_sge(struct csio_hw *hw)
 	 * and generate an interrupt when this occurs so we can recover.
 	 */
 	csio_set_reg_field(hw, SGE_DBFIFO_STATUS_A,
-			   HP_INT_THRESH_V(HP_INT_THRESH_M) |
-			   CSIO_HW_LP_INT_THRESH(hw,
-						 CSIO_HW_M_LP_INT_THRESH(hw)),
-			   HP_INT_THRESH_V(CSIO_SGE_DBFIFO_INT_THRESH) |
-			   CSIO_HW_LP_INT_THRESH(hw,
-						 CSIO_SGE_DBFIFO_INT_THRESH));
+			   LP_INT_THRESH_T5_V(LP_INT_THRESH_T5_M),
+			   LP_INT_THRESH_T5_V(CSIO_SGE_DBFIFO_INT_THRESH));
+	csio_set_reg_field(hw, SGE_DBFIFO_STATUS2_A,
+			   HP_INT_THRESH_T5_V(LP_INT_THRESH_T5_M),
+			   HP_INT_THRESH_T5_V(CSIO_SGE_DBFIFO_INT_THRESH));
 
 	csio_set_reg_field(hw, SGE_DOORBELL_CONTROL_A, ENABLE_DROP_F,
 			   ENABLE_DROP_F);
-- 
2.0.2

^ permalink raw reply related

* Re: [PATCH 7/9] rhashtable: Per bucket locks & deferred expansion/shrinking
From: Patrick McHardy @ 2015-01-16 16:03 UTC (permalink / raw)
  To: Thomas Graf
  Cc: davem, netdev, kernel, herbert, paulmck, edumazet,
	john.r.fastabend, josh, netfilter-devel
In-Reply-To: <20150116155835.GA15052@casper.infradead.org>

On 16.01, Thomas Graf wrote:
> On 01/16/15 at 03:34pm, Patrick McHardy wrote:
> > On 15.12, Thomas Graf wrote:
> > > In the event of an expansion or shrinking, the new bucket table allocated
> > > is exposed as a so called future table as soon as the resize process
> > > starts.  Lookups, deletions, and insertions will briefly use both tables.
> > > The future table becomes the main table after an RCU grace period and
> > > initial linking of the old to the new table was performed. Optimization
> > > of the chains to make use of the new number of buckets follows only the
> > > new table is in use.
> > 
> > AFAICT nft_hash_walk() will miss new entries during this period.
> > Am I missing anything here?
> 
> A walker may not see insertions that occur after the walker was started
> if resizing is enabled. Is that a problem for nftables?

No, that would be Ok. The case I'm wondering about is:

- insertion
- resize starts
- another insertion
- walker, resize not finished yet

^ permalink raw reply

* [patch-net-next v3 1/2] net: ethernet: cpsw: unroll IRQ request loop
From: Felipe Balbi @ 2015-01-16 16:11 UTC (permalink / raw)
  To: davem
  Cc: Tony Lindgren, Linux OMAP Mailing List, mugunthanvnm, netdev,
	Felipe Balbi

This patch is in preparation for a nicer IRQ
handling scheme where we use different IRQ
handlers for each IRQ line (as it should be).

Later, we will also drop IRQs offset 0 and 3
because they are always disabled in this driver.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---

no changes

 drivers/net/ethernet/ti/cpsw.c | 62 ++++++++++++++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index e61ee8351272..8e1af51e4b76 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2156,7 +2156,8 @@ static int cpsw_probe(struct platform_device *pdev)
 	void __iomem			*ss_regs;
 	struct resource			*res, *ss_res;
 	u32 slave_offset, sliver_offset, slave_size;
-	int ret = 0, i, k = 0;
+	int ret = 0, i;
+	int irq;
 
 	ndev = alloc_etherdev(sizeof(struct cpsw_priv));
 	if (!ndev) {
@@ -2345,24 +2346,55 @@ static int cpsw_probe(struct platform_device *pdev)
 		goto clean_ale_ret;
 	}
 
-	while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, k))) {
-		if (k >= ARRAY_SIZE(priv->irqs_table)) {
-			ret = -EINVAL;
-			goto clean_ale_ret;
-		}
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		goto clean_ale_ret;
 
-		ret = devm_request_irq(&pdev->dev, res->start, cpsw_interrupt,
-				       0, dev_name(&pdev->dev), priv);
-		if (ret < 0) {
-			dev_err(priv->dev, "error attaching irq (%d)\n", ret);
-			goto clean_ale_ret;
-		}
+	priv->irqs_table[0] = irq;
+	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
+			       0, dev_name(&pdev->dev), priv);
+	if (ret < 0) {
+		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
+		goto clean_ale_ret;
+	}
 
-		priv->irqs_table[k] = res->start;
-		k++;
+	irq = platform_get_irq(pdev, 1);
+	if (irq < 0)
+		goto clean_ale_ret;
+
+	priv->irqs_table[1] = irq;
+	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
+			       0, dev_name(&pdev->dev), priv);
+	if (ret < 0) {
+		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
+		goto clean_ale_ret;
+	}
+
+	irq = platform_get_irq(pdev, 2);
+	if (irq < 0)
+		goto clean_ale_ret;
+
+	priv->irqs_table[2] = irq;
+	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
+			       0, dev_name(&pdev->dev), priv);
+	if (ret < 0) {
+		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
+		goto clean_ale_ret;
+	}
+
+	irq = platform_get_irq(pdev, 3);
+	if (irq < 0)
+		goto clean_ale_ret;
+
+	priv->irqs_table[3] = irq;
+	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
+			       0, dev_name(&pdev->dev), priv);
+	if (ret < 0) {
+		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
+		goto clean_ale_ret;
 	}
 
-	priv->num_irqs = k;
+	priv->num_irqs = 4;
 
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
-- 
2.2.0


^ permalink raw reply related

* [patch-net-next v3 2/2] net: ethernet: cpsw: don't requests IRQs we don't use
From: Felipe Balbi @ 2015-01-16 16:11 UTC (permalink / raw)
  To: davem
  Cc: Tony Lindgren, Linux OMAP Mailing List, mugunthanvnm, netdev,
	Felipe Balbi
In-Reply-To: <1421424672-19323-1-git-send-email-balbi@ti.com>

CPSW never uses RX_THRESHOLD or MISC interrupts. In
fact, they are always kept masked in their appropriate
IRQ Enable register.

Instead of allocating an IRQ that never fires, it's best
to remove that code altogether and let future patches
implement it if anybody needs those.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---

combined patches 2 and 3.

 drivers/net/ethernet/ti/cpsw.c | 72 +++++++++++++++++++-----------------------
 1 file changed, 33 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 8e1af51e4b76..ba09ff3c1695 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -754,17 +754,25 @@ requeue:
 		dev_kfree_skb_any(new_skb);
 }
 
-static irqreturn_t cpsw_interrupt(int irq, void *dev_id)
+static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id)
 {
 	struct cpsw_priv *priv = dev_id;
-	int value = irq - priv->irqs_table[0];
 
-	/* NOTICE: Ending IRQ here. The trick with the 'value' variable above
-	 * is to make sure we will always write the correct value to the EOI
-	 * register. Namely 0 for RX_THRESH Interrupt, 1 for RX Interrupt, 2
-	 * for TX Interrupt and 3 for MISC Interrupt.
-	 */
-	cpdma_ctlr_eoi(priv->dma, value);
+	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
+	cpdma_chan_process(priv->txch, 128);
+
+	priv = cpsw_get_slave_priv(priv, 1);
+	if (priv)
+		cpdma_chan_process(priv->txch, 128);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id)
+{
+	struct cpsw_priv *priv = dev_id;
+
+	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
 
 	cpsw_intr_disable(priv);
 	if (priv->irq_enabled == true) {
@@ -1617,7 +1625,8 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev)
 
 	cpsw_intr_disable(priv);
 	cpdma_ctlr_int_ctrl(priv->dma, false);
-	cpsw_interrupt(ndev->irq, priv);
+	cpsw_rx_interrupt(priv->irq[0], priv);
+	cpsw_tx_interrupt(priv->irq[1], priv);
 	cpdma_ctlr_int_ctrl(priv->dma, true);
 	cpsw_intr_enable(priv);
 }
@@ -2339,62 +2348,47 @@ static int cpsw_probe(struct platform_device *pdev)
 		goto clean_dma_ret;
 	}
 
-	ndev->irq = platform_get_irq(pdev, 0);
+	ndev->irq = platform_get_irq(pdev, 1);
 	if (ndev->irq < 0) {
 		dev_err(priv->dev, "error getting irq resource\n");
 		ret = -ENOENT;
 		goto clean_ale_ret;
 	}
 
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
-		goto clean_ale_ret;
-
-	priv->irqs_table[0] = irq;
-	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
-			       0, dev_name(&pdev->dev), priv);
-	if (ret < 0) {
-		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
-		goto clean_ale_ret;
-	}
+	/* Grab RX and TX IRQs. Note that we also have RX_THRESHOLD and
+	 * MISC IRQs which are always kept disabled with this driver so
+	 * we will not request them.
+	 *
+	 * If anyone wants to implement support for those, make sure to
+	 * first request and append them to irqs_table array.
+	 */
 
+	/* RX IRQ */
 	irq = platform_get_irq(pdev, 1);
 	if (irq < 0)
 		goto clean_ale_ret;
 
-	priv->irqs_table[1] = irq;
-	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
+	priv->irqs_table[0] = irq;
+	ret = devm_request_irq(&pdev->dev, irq, cpsw_rx_interrupt,
 			       0, dev_name(&pdev->dev), priv);
 	if (ret < 0) {
 		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
 		goto clean_ale_ret;
 	}
 
+	/* TX IRQ */
 	irq = platform_get_irq(pdev, 2);
 	if (irq < 0)
 		goto clean_ale_ret;
 
-	priv->irqs_table[2] = irq;
-	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
-			       0, dev_name(&pdev->dev), priv);
-	if (ret < 0) {
-		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
-		goto clean_ale_ret;
-	}
-
-	irq = platform_get_irq(pdev, 3);
-	if (irq < 0)
-		goto clean_ale_ret;
-
-	priv->irqs_table[3] = irq;
-	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
+	priv->irqs_table[1] = irq;
+	ret = devm_request_irq(&pdev->dev, irq, cpsw_tx_interrupt,
 			       0, dev_name(&pdev->dev), priv);
 	if (ret < 0) {
 		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
 		goto clean_ale_ret;
 	}
-
-	priv->num_irqs = 4;
+	priv->num_irqs = 2;
 
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
-- 
2.2.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox