From: "Jakub Kiciński" <moorray3@wp.pl>
To: Johannes Berg <johannes@sipsolutions.net>
Cc: Kalle Valo <kvalo@codeaurora.org>,
Felix Fietkau <nbd@openwrt.org>,
linux-wireless <linux-wireless@vger.kernel.org>,
Jakub Kicinski <kubakici@wp.pl>
Subject: Re: [PATCH 1/2] add mt7601u driver
Date: Tue, 5 May 2015 02:49:49 +0200 [thread overview]
Message-ID: <20150505024949.0a672dd1@north> (raw)
In-Reply-To: <1430734511.2013.18.camel@sipsolutions.net>
On Mon, 04 May 2015 12:15:11 +0200, Johannes Berg wrote:
> On Mon, 2015-05-04 at 12:04 +0200, Jakub Kiciński wrote:
>
> > > Don't know how your buffers are set up, but if the DMA engine consumes
> > > pages you could consider using paged RX instead of the memcpy().
> >
> > DMA engine can concatenate multiple frames into a single USB bulk
> > transfer to a large continuous buffer. There is no way to request
> > any alignment of the frames within that large buffer so I think paged
> > RX is not an option.
>
> You could probably still do it because the networking stack only
> requires the *headers* to be aligned. But if they headers aren't in the
> skb->data (skb->head) then they'll be pulled into that by the network
> stack, where they'll be aligned.
I *think* I got it. Would you care to take a look? I basically
allocate a compound page with dev_alloc_pages() and run get_page() for
every fragment.
------>8-----------------------------------
diff --git a/dma.c b/dma.c
index e865cf8384a5..a92993a64aeb 100644
--- a/dma.c
+++ b/dma.c
@@ -16,6 +16,9 @@
#include "usb.h"
#include "trace.h"
+static int mt7601u_submit_rx_buf(struct mt7601u_dev *dev,
+ struct mt7601u_dma_buf_rx *e, gfp_t gfp);
+
static unsigned int ieee80211_get_hdrlen_from_buf(const u8 *data, unsigned len)
{
const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *)data;
@@ -60,7 +63,7 @@ mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data, u32 seg_len)
{
struct sk_buff *skb;
struct mt7601u_rxwi *rxwi;
- u32 fce_info;
+ u32 true_len, fce_info;
/* DMA_INFO field at the beginning of the segment contains only some of
* the information, we need to read the FCE descriptor from the end.
@@ -86,11 +89,81 @@ mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data, u32 seg_len)
if (!skb)
return;
- if (mt76_mac_process_rx(dev, skb, rxwi)) {
- dev_kfree_skb(skb);
- return;
+ true_len = mt76_mac_process_rx(dev, skb, skb->data, rxwi);
+ skb_trim(skb, true_len);
+
+ ieee80211_rx_ni(dev->hw, skb);
+}
+
+static void mt7601u_rx_add_frags(struct mt7601u_dev *dev, struct sk_buff *skb,
+ void *data, u32 true_len, u32 truesize,
+ struct page *p)
+{
+ unsigned long addr = (unsigned long) data;
+ u32 overpage = 0;
+ u32 p_off = (data - page_address(p)) >> PAGE_SHIFT;
+
+ if ((addr & ~PAGE_MASK) + true_len > PAGE_SIZE)
+ overpage = (addr & ~PAGE_MASK) + true_len - PAGE_SIZE;
+
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, p + p_off,
+ addr & ~PAGE_MASK, true_len - overpage, truesize);
+ get_page(p + p_off);
+
+ /* Do we really need to split the buffer if it crosses a page boundary?
+ * Does networking code care?
+ */
+ if (overpage) {
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, p + p_off + 1,
+ 0, overpage, 0);
+ get_page(p + p_off + 1);
+ }
+}
+
+static void mt7601u_rx_process_seg_paged(struct mt7601u_dev *dev, u8 *data,
+ u32 seg_len, struct page *p)
+{
+ struct sk_buff *skb;
+ struct mt7601u_rxwi *rxwi;
+ u32 true_len, fce_info, truesize = seg_len;
+
+ /* DMA_INFO field at the beginning of the segment contains only some of
+ * the information, we need to read the FCE descriptor from the end.
+ */
+ fce_info = get_unaligned_le32(data + seg_len - MT_FCE_INFO_LEN);
+ seg_len -= MT_FCE_INFO_LEN;
+
+ data += MT_DMA_HDR_LEN;
+ seg_len -= MT_DMA_HDR_LEN;
+
+ rxwi = (struct mt7601u_rxwi *) data;
+ data += sizeof(struct mt7601u_rxwi);
+ seg_len -= sizeof(struct mt7601u_rxwi);
+
+ if (unlikely(rxwi->zero[0] || rxwi->zero[1] || rxwi->zero[2]))
+ dev_err_once(dev->dev, "Error: RXWI zero fields are set\n");
+ if (unlikely(MT76_GET(MT_RXD_INFO_TYPE, fce_info)))
+ dev_err_once(dev->dev, "Error: RX path seen a non-pkt urb\n");
+
+ trace_mt_rx(dev, rxwi, fce_info);
+
+ if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_L2PAD)) {
+ int hdr_len = ieee80211_get_hdrlen_from_buf(data, seg_len);
+
+ memmove(data + 2, data, hdr_len);
+ seg_len -= 2;
+ data += 2;
}
+ skb = __netdev_alloc_skb(NULL, 256, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ true_len = mt76_mac_process_rx(dev, skb, data, rxwi);
+ WARN_ON(true_len > seg_len || true_len < seg_len - 3);
+
+ mt7601u_rx_add_frags(dev, skb, data, true_len, truesize, p);
+
ieee80211_rx_ni(dev->hw, skb);
}
@@ -110,17 +183,31 @@ static u16 mt7601u_rx_next_seg_len(u8 *data, u32 data_len)
}
static void
-mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf *e)
+mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf_rx *e)
{
u32 seg_len, data_len = e->urb->actual_length;
- u8 *data = e->buf;
+ u8 *data = page_address(e->p);
+ struct page *new_p = NULL;
+ bool paged = true;
int cnt = 0;
if (!test_bit(MT7601U_STATE_INITIALIZED, &dev->state))
return;
+ /* Copy if there is very little data in the buffer. */
+ if (data_len < 512) {
+ paged = false;
+ } else {
+ new_p = dev_alloc_pages(MT_RX_ORDER);
+ if (!new_p)
+ paged = false;
+ }
+
while ((seg_len = mt7601u_rx_next_seg_len(data, data_len))) {
- mt7601u_rx_process_seg(dev, data, seg_len);
+ if (paged)
+ mt7601u_rx_process_seg_paged(dev, data, seg_len, e->p);
+ else
+ mt7601u_rx_process_seg(dev, data, seg_len);
data_len -= seg_len;
data += seg_len;
@@ -128,14 +215,21 @@ mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf *e)
}
if (cnt > 1)
- trace_mt_rx_dma_aggr(dev, cnt);
+ trace_mt_rx_dma_aggr(dev, cnt, paged);
+
+ if (paged) {
+ /* we have one extra ref from the allcator */
+ put_page(e->p);
+
+ e->p = new_p;
+ }
}
-static struct mt7601u_dma_buf *
+static struct mt7601u_dma_buf_rx *
mt7601u_rx_get_pending_entry(struct mt7601u_dev *dev)
{
struct mt7601u_rx_queue *q = &dev->rx_q;
- struct mt7601u_dma_buf *buf = NULL;
+ struct mt7601u_dma_buf_rx *buf = NULL;
unsigned long flags;
spin_lock_irqsave(&dev->rx_lock, flags);
@@ -175,15 +269,14 @@ out:
static void mt7601u_rx_tasklet(unsigned long data)
{
struct mt7601u_dev *dev = (struct mt7601u_dev *) data;
- struct mt7601u_dma_buf *e;
+ struct mt7601u_dma_buf_rx *e;
while ((e = mt7601u_rx_get_pending_entry(dev))) {
if (e->urb->status)
continue;
mt7601u_rx_process_entry(dev, e);
- mt7601u_usb_submit_buf(dev, USB_DIR_IN, MT_EP_IN_PKT_RX, e,
- GFP_ATOMIC, mt7601u_complete_rx, dev);
+ mt7601u_submit_rx_buf(dev, e, GFP_ATOMIC);
}
}
@@ -325,14 +418,33 @@ static void mt7601u_kill_rx(struct mt7601u_dev *dev)
spin_unlock_irqrestore(&dev->rx_lock, flags);
}
+static int mt7601u_submit_rx_buf(struct mt7601u_dev *dev,
+ struct mt7601u_dma_buf_rx *e, gfp_t gfp)
+{
+ struct usb_device *usb_dev = mt7601u_to_usb_dev(dev);
+ u8 *buf = page_address(e->p);
+ unsigned pipe;
+ int ret;
+
+ pipe = usb_rcvbulkpipe(usb_dev, dev->in_eps[MT_EP_IN_PKT_RX]);
+
+ usb_fill_bulk_urb(e->urb, usb_dev, pipe, buf, MT_RX_URB_SIZE,
+ mt7601u_complete_rx, dev);
+
+ trace_mt_submit_urb(dev, e->urb);
+ ret = usb_submit_urb(e->urb, gfp);
+ if (ret)
+ dev_err(dev->dev, "Error: submit RX URB failed:%d\n", ret);
+
+ return ret;
+}
+
static int mt7601u_submit_rx(struct mt7601u_dev *dev)
{
int i, ret;
for (i = 0; i < dev->rx_q.entries; i++) {
- ret = mt7601u_usb_submit_buf(dev, USB_DIR_IN, MT_EP_IN_PKT_RX,
- &dev->rx_q.e[i], GFP_KERNEL,
- mt7601u_complete_rx, dev);
+ ret = mt7601u_submit_rx_buf(dev, &dev->rx_q.e[i], GFP_KERNEL);
if (ret)
return ret;
}
@@ -344,8 +456,10 @@ static void mt7601u_free_rx(struct mt7601u_dev *dev)
{
int i;
- for (i = 0; i < dev->rx_q.entries; i++)
- mt7601u_usb_free_buf(dev, &dev->rx_q.e[i]);
+ for (i = 0; i < dev->rx_q.entries; i++) {
+ __free_pages(dev->rx_q.e[i].p, MT_RX_ORDER);
+ usb_free_urb(dev->rx_q.e[i].urb);
+ }
}
static int mt7601u_alloc_rx(struct mt7601u_dev *dev)
@@ -356,9 +470,13 @@ static int mt7601u_alloc_rx(struct mt7601u_dev *dev)
dev->rx_q.dev = dev;
dev->rx_q.entries = N_RX_ENTRIES;
- for (i = 0; i < N_RX_ENTRIES; i++)
- if (mt7601u_usb_alloc_buf(dev, MT_RX_URB_SIZE, &dev->rx_q.e[i]))
+ for (i = 0; i < N_RX_ENTRIES; i++) {
+ dev->rx_q.e[i].urb = usb_alloc_urb(0, GFP_KERNEL);
+ dev->rx_q.e[i].p = dev_alloc_pages(MT_RX_ORDER);
+
+ if (!dev->rx_q.e[i].urb || !dev->rx_q.e[i].p)
return -ENOMEM;
+ }
return 0;
}
diff --git a/mac.c b/mac.c
index 539751362b41..8802366f7181 100644
--- a/mac.c
+++ b/mac.c
@@ -441,30 +441,28 @@ mt7601u_rx_monitor_beacon(struct mt7601u_dev *dev, struct mt7601u_rxwi *rxwi,
}
static int
-mt7601u_rx_is_our_beacon(struct mt7601u_dev *dev, struct sk_buff *skb)
+mt7601u_rx_is_our_beacon(struct mt7601u_dev *dev, u8 *data)
{
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)data;
return ieee80211_is_beacon(hdr->frame_control) &&
ether_addr_equal(hdr->addr2, dev->ap_bssid);
}
-int mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, void *rxi)
+u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb,
+ u8 *data, void *rxi)
{
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct mt7601u_rxwi *rxwi = rxi;
u32 ctl = le32_to_cpu(rxwi->ctl);
u16 rate = le16_to_cpu(rxwi->rate);
- int len, rssi;
+ int rssi;
if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_DECRYPT)) {
status->flag |= RX_FLAG_DECRYPTED;
status->flag |= RX_FLAG_IV_STRIPPED | RX_FLAG_MMIC_STRIPPED;
}
- len = MT76_GET(MT_RXWI_CTL_MPDU_LEN, ctl);
- skb_trim(skb, len);
-
status->chains = BIT(0);
rssi = mt7601u_phy_get_rssi(dev, rxwi, rate);
status->chain_signal[0] = status->signal = rssi;
@@ -474,13 +472,13 @@ int mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, void *rxi)
mt76_mac_process_rate(status, rate);
spin_lock_bh(&dev->con_mon_lock);
- if (mt7601u_rx_is_our_beacon(dev, skb))
+ if (mt7601u_rx_is_our_beacon(dev, data))
mt7601u_rx_monitor_beacon(dev, rxwi, rate, rssi);
else if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_U2M))
dev->avg_rssi = (dev->avg_rssi * 15) / 16 + (rssi << 8);
spin_unlock_bh(&dev->con_mon_lock);
- return 0;
+ return MT76_GET(MT_RXWI_CTL_MPDU_LEN, ctl);
}
static enum mt76_cipher_type
diff --git a/mac.h b/mac.h
index 90cf147f37e7..2c22d63c63a2 100644
--- a/mac.h
+++ b/mac.h
@@ -160,8 +160,8 @@ struct mt76_txwi {
#define MT_TXWI_CTL_CHAN_CHECK_PKT BIT(4)
#define MT_TXWI_CTL_PIFS_REV BIT(6)
-int
-mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, void *rxwi);
+u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb,
+ u8 *data, void *rxi);
int mt76_mac_wcid_set_key(struct mt7601u_dev *dev, u8 idx,
struct ieee80211_key_conf *key);
void mt76_mac_wcid_set_rate(struct mt7601u_dev *dev, struct mt76_wcid *wcid,
diff --git a/mt7601u.h b/mt7601u.h
index f9957350a187..d6f83e03cb58 100644
--- a/mt7601u.h
+++ b/mt7601u.h
@@ -37,6 +37,7 @@
#define MT_USB_AGGR_SIZE_LIMIT 21 /* * 1024B */
#define MT_USB_AGGR_TIMEOUT 0x80 /* * 33ns */
#define MT_RX_URB_SIZE (24 * 1024)
+#define MT_RX_ORDER 3
struct mt7601u_dma_buf {
struct urb *urb;
@@ -73,7 +74,10 @@ struct mac_stats {
struct mt7601u_rx_queue {
struct mt7601u_dev *dev;
- struct mt7601u_dma_buf e[N_RX_ENTRIES];
+ struct mt7601u_dma_buf_rx {
+ struct urb *urb;
+ struct page *p;
+ } e[N_RX_ENTRIES];
unsigned int start;
unsigned int end;
diff --git a/trace.h b/trace.h
index db86272401ff..289897300ef0 100644
--- a/trace.h
+++ b/trace.h
@@ -352,17 +352,20 @@ TRACE_EVENT(mt_tx_status,
);
TRACE_EVENT(mt_rx_dma_aggr,
- TP_PROTO(struct mt7601u_dev *dev, int cnt),
- TP_ARGS(dev, cnt),
+ TP_PROTO(struct mt7601u_dev *dev, int cnt, bool paged),
+ TP_ARGS(dev, cnt, paged),
TP_STRUCT__entry(
DEV_ENTRY
__field(u8, cnt)
+ __field(bool, paged)
),
TP_fast_assign(
DEV_ASSIGN;
__entry->cnt = cnt;
+ __entry->paged = paged;
),
- TP_printk(DEV_PR_FMT "%d", DEV_PR_ARG, __entry->cnt)
+ TP_printk(DEV_PR_FMT "cnt:%d paged:%d",
+ DEV_PR_ARG, __entry->cnt, __entry->paged)
);
DEFINE_EVENT(dev_simple_evt, set_key,
next prev parent reply other threads:[~2015-05-05 0:49 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-05-02 13:01 [PATCH 0/2] add mt7601u driver moorray3
2015-05-02 13:01 ` [PATCH 1/2] " moorray3
2015-05-04 9:37 ` Johannes Berg
2015-05-04 10:04 ` Jakub Kiciński
2015-05-04 10:15 ` Johannes Berg
2015-05-05 0:49 ` Jakub Kiciński [this message]
2015-05-05 6:45 ` Johannes Berg
2015-05-05 10:44 ` Jakub Kiciński
2015-05-02 13:01 ` [PATCH 2/2] add mt7601u build infrastructure and co moorray3
2015-05-05 20:22 ` [PATCHv2 0/2] add mt7601u driver Jakub Kicinski
2015-05-05 20:22 ` [PATCHv2 1/2] " Jakub Kicinski
2015-05-05 20:22 ` [PATCHv2 2/2] add mt7601u kbuild and others Jakub Kicinski
2015-05-25 8:13 ` [PATCHv2 0/2] add mt7601u driver Kalle Valo
2015-05-25 9:35 ` Jakub Kiciński
2015-05-26 7:16 ` Johannes Berg
2015-05-25 9:34 ` [PATCHv3 " Jakub Kicinski
2015-05-25 9:34 ` [PATCHv3 1/2] " Jakub Kicinski
2015-05-25 9:34 ` [PATCHv3 2/2] add mt7601u kbuild and others Jakub Kicinski
2015-05-26 10:26 ` [PATCHv3 0/2] add mt7601u driver Kalle Valo
2015-05-26 9:16 ` [PATCHv4] " Jakub Kicinski
2015-05-28 8:36 ` Kalle Valo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150505024949.0a672dd1@north \
--to=moorray3@wp.pl \
--cc=johannes@sipsolutions.net \
--cc=kubakici@wp.pl \
--cc=kvalo@codeaurora.org \
--cc=linux-wireless@vger.kernel.org \
--cc=nbd@openwrt.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).