From: Divy Le Ray <divy@chelsio.com>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
swise@opengridcomputing.com
Subject: [PATCH 2.6.30 5/5] cxgb3: map entire Rx page, feed map+offset to Rx ring.
Date: Thu, 26 Mar 2009 19:39:29 -0700 [thread overview]
Message-ID: <20090327023929.32137.65704.stgit@speedy5> (raw)
In-Reply-To: <20090327023909.32137.93884.stgit@speedy5>
From: Divy Le Ray <divy@chelsio.com>
DMA mapping can be expensive in the presence of iommus.
Reduce the Rx iommu activity by mapping an entire page, and provide the H/W
the mapped address + offset of the current page chunk.
Reserve bits at the end of the page to track mapping references, so the page
can be unmapped.
Signed-off-by: Divy Le Ray <divy@chelsio.com>
---
drivers/net/cxgb3/adapter.h | 3 +
drivers/net/cxgb3/sge.c | 138 ++++++++++++++++++++++++++++++++-----------
2 files changed, 106 insertions(+), 35 deletions(-)
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 2cf6c92..714df2b 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -85,6 +85,8 @@ struct fl_pg_chunk {
struct page *page;
void *va;
unsigned int offset;
+ u64 *p_cnt;
+ DECLARE_PCI_UNMAP_ADDR(mapping);
};
struct rx_desc;
@@ -101,6 +103,7 @@ struct sge_fl { /* SGE per free-buffer list state */
struct fl_pg_chunk pg_chunk;/* page chunk cache */
unsigned int use_pages; /* whether FL uses pages or sk_buffs */
unsigned int order; /* order of page allocations */
+ unsigned int alloc_size; /* size of allocated buffer */
struct rx_desc *desc; /* address of HW Rx descriptor ring */
struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */
dma_addr_t phys_addr; /* physical address of HW ring start */
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
old mode 100755
new mode 100644
index 54667f0..26d3587
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -50,6 +50,7 @@
#define SGE_RX_COPY_THRES 256
#define SGE_RX_PULL_LEN 128
+#define SGE_PG_RSVD SMP_CACHE_BYTES
/*
* Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
* It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
@@ -57,8 +58,10 @@
*/
#define FL0_PG_CHUNK_SIZE 2048
#define FL0_PG_ORDER 0
+#define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER)
#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
+#define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER)
#define SGE_RX_DROP_THRES 16
#define RX_RECLAIM_PERIOD (HZ/4)
@@ -345,13 +348,21 @@ static inline int should_restart_tx(const struct sge_txq *q)
return q->in_use - r < (q->size >> 1);
}
-static void clear_rx_desc(const struct sge_fl *q, struct rx_sw_desc *d)
+static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
+ struct rx_sw_desc *d)
{
- if (q->use_pages) {
- if (d->pg_chunk.page)
- put_page(d->pg_chunk.page);
+ if (q->use_pages && d->pg_chunk.page) {
+ (*d->pg_chunk.p_cnt)--;
+ if (!*d->pg_chunk.p_cnt)
+ pci_unmap_page(pdev,
+ pci_unmap_addr(&d->pg_chunk, mapping),
+ q->alloc_size, PCI_DMA_FROMDEVICE);
+
+ put_page(d->pg_chunk.page);
d->pg_chunk.page = NULL;
} else {
+ pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
+ q->buf_size, PCI_DMA_FROMDEVICE);
kfree_skb(d->skb);
d->skb = NULL;
}
@@ -372,9 +383,8 @@ static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
while (q->credits--) {
struct rx_sw_desc *d = &q->sdesc[cidx];
- pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
- q->buf_size, PCI_DMA_FROMDEVICE);
- clear_rx_desc(q, d);
+
+ clear_rx_desc(pdev, q, d);
if (++cidx == q->size)
cidx = 0;
}
@@ -417,18 +427,39 @@ static inline int add_one_rx_buf(void *va, unsigned int len,
return 0;
}
-static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp,
+static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d,
+ unsigned int gen)
+{
+ d->addr_lo = cpu_to_be32(mapping);
+ d->addr_hi = cpu_to_be32((u64) mapping >> 32);
+ wmb();
+ d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
+ d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
+ return 0;
+}
+
+static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
+ struct rx_sw_desc *sd, gfp_t gfp,
unsigned int order)
{
if (!q->pg_chunk.page) {
+ dma_addr_t mapping;
+
q->pg_chunk.page = alloc_pages(gfp, order);
if (unlikely(!q->pg_chunk.page))
return -ENOMEM;
q->pg_chunk.va = page_address(q->pg_chunk.page);
+ q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
+ SGE_PG_RSVD;
q->pg_chunk.offset = 0;
+ mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
+ 0, q->alloc_size, PCI_DMA_FROMDEVICE);
+ pci_unmap_addr_set(&q->pg_chunk, mapping, mapping);
}
sd->pg_chunk = q->pg_chunk;
+ prefetch(sd->pg_chunk.p_cnt);
+
q->pg_chunk.offset += q->buf_size;
if (q->pg_chunk.offset == (PAGE_SIZE << order))
q->pg_chunk.page = NULL;
@@ -436,6 +467,12 @@ static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp,
q->pg_chunk.va += q->buf_size;
get_page(q->pg_chunk.page);
}
+
+ if (sd->pg_chunk.offset == 0)
+ *sd->pg_chunk.p_cnt = 1;
+ else
+ *sd->pg_chunk.p_cnt += 1;
+
return 0;
}
@@ -460,35 +497,43 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
*/
static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
{
- void *buf_start;
struct rx_sw_desc *sd = &q->sdesc[q->pidx];
struct rx_desc *d = &q->desc[q->pidx];
unsigned int count = 0;
while (n--) {
+ dma_addr_t mapping;
int err;
if (q->use_pages) {
- if (unlikely(alloc_pg_chunk(q, sd, gfp, q->order))) {
+ if (unlikely(alloc_pg_chunk(adap, q, sd, gfp,
+ q->order))) {
nomem: q->alloc_failed++;
break;
}
- buf_start = sd->pg_chunk.va;
+ mapping = pci_unmap_addr(&sd->pg_chunk, mapping) +
+ sd->pg_chunk.offset;
+ pci_unmap_addr_set(sd, dma_addr, mapping);
+
+ add_one_rx_chunk(mapping, d, q->gen);
+ pci_dma_sync_single_for_device(adap->pdev, mapping,
+ q->buf_size - SGE_PG_RSVD,
+ PCI_DMA_FROMDEVICE);
} else {
- struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
+ void *buf_start;
+ struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
if (!skb)
goto nomem;
sd->skb = skb;
buf_start = skb->data;
- }
-
- err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
- adap->pdev);
- if (unlikely(err)) {
- clear_rx_desc(q, sd);
- break;
+ err = add_one_rx_buf(buf_start, q->buf_size, d, sd,
+ q->gen, adap->pdev);
+ if (unlikely(err)) {
+ clear_rx_desc(adap->pdev, q, sd);
+ break;
+ }
}
d++;
@@ -795,19 +840,19 @@ static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
struct sk_buff *newskb, *skb;
struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
- newskb = skb = q->pg_skb;
+ dma_addr_t dma_addr = pci_unmap_addr(sd, dma_addr);
+ newskb = skb = q->pg_skb;
if (!skb && (len <= SGE_RX_COPY_THRES)) {
newskb = alloc_skb(len, GFP_ATOMIC);
if (likely(newskb != NULL)) {
__skb_put(newskb, len);
- pci_dma_sync_single_for_cpu(adap->pdev,
- pci_unmap_addr(sd, dma_addr), len,
+ pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
PCI_DMA_FROMDEVICE);
memcpy(newskb->data, sd->pg_chunk.va, len);
- pci_dma_sync_single_for_device(adap->pdev,
- pci_unmap_addr(sd, dma_addr), len,
- PCI_DMA_FROMDEVICE);
+ pci_dma_sync_single_for_device(adap->pdev, dma_addr,
+ len,
+ PCI_DMA_FROMDEVICE);
} else if (!drop_thres)
return NULL;
recycle:
@@ -820,16 +865,25 @@ recycle:
if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
goto recycle;
+ prefetch(sd->pg_chunk.p_cnt);
+
if (!skb)
newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
+
if (unlikely(!newskb)) {
if (!drop_thres)
return NULL;
goto recycle;
}
- pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
- fl->buf_size, PCI_DMA_FROMDEVICE);
+ pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
+ PCI_DMA_FROMDEVICE);
+ (*sd->pg_chunk.p_cnt)--;
+ if (!*sd->pg_chunk.p_cnt)
+ pci_unmap_page(adap->pdev,
+ pci_unmap_addr(&sd->pg_chunk, mapping),
+ fl->alloc_size,
+ PCI_DMA_FROMDEVICE);
if (!skb) {
__skb_put(newskb, SGE_RX_PULL_LEN);
memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
@@ -1958,8 +2012,8 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
skb_pull(skb, sizeof(*p) + pad);
skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
pi = netdev_priv(skb->dev);
- if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid && p->csum == htons(0xffff) &&
- !p->fragment) {
+ if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid &&
+ p->csum == htons(0xffff) && !p->fragment) {
qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else
@@ -2034,10 +2088,19 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
fl->credits--;
len -= offset;
- pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
- fl->buf_size, PCI_DMA_FROMDEVICE);
+ pci_dma_sync_single_for_cpu(adap->pdev,
+ pci_unmap_addr(sd, dma_addr),
+ fl->buf_size - SGE_PG_RSVD,
+ PCI_DMA_FROMDEVICE);
+
+ (*sd->pg_chunk.p_cnt)--;
+ if (!*sd->pg_chunk.p_cnt)
+ pci_unmap_page(adap->pdev,
+ pci_unmap_addr(&sd->pg_chunk, mapping),
+ fl->alloc_size,
+ PCI_DMA_FROMDEVICE);
- prefetch(&qs->lro_frag_tbl);
+ prefetch(qs->lro_va);
rx_frag += nr_frags;
rx_frag->page = sd->pg_chunk.page;
@@ -2047,6 +2110,7 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
qs->lro_frag_tbl.nr_frags++;
qs->lro_frag_tbl.len = frag_len;
+
if (!complete)
return;
@@ -2236,6 +2300,8 @@ no_mem:
if (fl->use_pages) {
void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
+ prefetch(&qs->lro_frag_tbl);
+
prefetch(addr);
#if L1_CACHE_BYTES < 128
prefetch(addr + L1_CACHE_BYTES);
@@ -2972,21 +3038,23 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
q->fl[0].order = FL0_PG_ORDER;
q->fl[1].order = FL1_PG_ORDER;
+ q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE;
+ q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE;
spin_lock_irq(&adapter->sge.reg_lock);
/* FL threshold comparison uses < */
ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
q->rspq.phys_addr, q->rspq.size,
- q->fl[0].buf_size, 1, 0);
+ q->fl[0].buf_size - SGE_PG_RSVD, 1, 0);
if (ret)
goto err_unlock;
for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
q->fl[i].phys_addr, q->fl[i].size,
- q->fl[i].buf_size, p->cong_thres, 1,
- 0);
+ q->fl[i].buf_size - SGE_PG_RSVD,
+ p->cong_thres, 1, 0);
if (ret)
goto err_unlock;
}
next prev parent reply other threads:[~2009-03-27 2:41 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-03-27 2:39 [PATCH 2.6.30 1/5] cxgb3: start qset timers when setup succeeded Divy Le Ray
2009-03-27 2:39 ` [PATCH 2.6.30 2/5] cxgb3: sge setup fixes Divy Le Ray
2009-03-27 7:53 ` David Miller
2009-03-27 2:39 ` [PATCH 2.6.30 3/5] cxgb3: use resource_size_t for mmio declarations Divy Le Ray
2009-03-27 7:53 ` David Miller
2009-03-27 2:39 ` [PATCH 2.6.30 4/5] cxgb3: differentiate portx and Tx channels Divy Le Ray
2009-03-27 7:53 ` David Miller
2009-03-27 2:39 ` Divy Le Ray [this message]
2009-03-27 7:53 ` [PATCH 2.6.30 5/5] cxgb3: map entire Rx page, feed map+offset to Rx ring David Miller
2009-03-27 7:52 ` [PATCH 2.6.30 1/5] cxgb3: start qset timers when setup succeeded David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090327023929.32137.65704.stgit@speedy5 \
--to=divy@chelsio.com \
--cc=davem@davemloft.net \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=swise@opengridcomputing.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.