linux-wireless.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Jakub Kiciński" <moorray3@wp.pl>
To: Johannes Berg <johannes@sipsolutions.net>
Cc: Kalle Valo <kvalo@codeaurora.org>,
	Felix Fietkau <nbd@openwrt.org>,
	linux-wireless <linux-wireless@vger.kernel.org>,
	Jakub Kicinski <kubakici@wp.pl>
Subject: Re: [PATCH 1/2] add mt7601u driver
Date: Tue, 5 May 2015 02:49:49 +0200	[thread overview]
Message-ID: <20150505024949.0a672dd1@north> (raw)
In-Reply-To: <1430734511.2013.18.camel@sipsolutions.net>

On Mon, 04 May 2015 12:15:11 +0200, Johannes Berg wrote:
> On Mon, 2015-05-04 at 12:04 +0200, Jakub Kiciński wrote:
> 
> > > Don't know how your buffers are set up, but if the DMA engine consumes
> > > pages you could consider using paged RX instead of the memcpy().
> > 
> > DMA engine can concatenate multiple frames into a single USB bulk
> > transfer to a large continuous buffer.  There is no way to request 
> > any alignment of the frames within that large buffer so I think paged 
> > RX is not an option.
> 
> You could probably still do it because the networking stack only
> requires the *headers* to be aligned. But if they headers aren't in the
> skb->data (skb->head) then they'll be pulled into that by the network
> stack, where they'll be aligned.

I *think* I got it.  Would you care to take a look?  I basically
allocate a compound page with dev_alloc_pages() and run get_page() for
every fragment.

------>8-----------------------------------

diff --git a/dma.c b/dma.c
index e865cf8384a5..a92993a64aeb 100644
--- a/dma.c
+++ b/dma.c
@@ -16,6 +16,9 @@
 #include "usb.h"
 #include "trace.h"
 
+static int mt7601u_submit_rx_buf(struct mt7601u_dev *dev,
+				 struct mt7601u_dma_buf_rx *e, gfp_t gfp);
+
 static unsigned int ieee80211_get_hdrlen_from_buf(const u8 *data, unsigned len)
 {
 	const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *)data;
@@ -60,7 +63,7 @@ mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data, u32 seg_len)
 {
 	struct sk_buff *skb;
 	struct mt7601u_rxwi *rxwi;
-	u32 fce_info;
+	u32 true_len, fce_info;
 
 	/* DMA_INFO field at the beginning of the segment contains only some of
 	 * the information, we need to read the FCE descriptor from the end.
@@ -86,11 +89,81 @@ mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data, u32 seg_len)
 	if (!skb)
 		return;
 
-	if (mt76_mac_process_rx(dev, skb, rxwi)) {
-		dev_kfree_skb(skb);
-		return;
+	true_len = mt76_mac_process_rx(dev, skb, skb->data, rxwi);
+	skb_trim(skb, true_len);
+
+	ieee80211_rx_ni(dev->hw, skb);
+}
+
+static void mt7601u_rx_add_frags(struct mt7601u_dev *dev, struct sk_buff *skb,
+				 void *data, u32 true_len, u32 truesize,
+				 struct page *p)
+{
+	unsigned long addr = (unsigned long) data;
+	u32 overpage = 0;
+	u32 p_off = (data - page_address(p)) >> PAGE_SHIFT;
+
+	if ((addr & ~PAGE_MASK) + true_len > PAGE_SIZE)
+		overpage = (addr & ~PAGE_MASK) + true_len - PAGE_SIZE;
+
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, p + p_off,
+			addr & ~PAGE_MASK, true_len - overpage, truesize);
+	get_page(p + p_off);
+
+	/* Do we really need to split the buffer if it crosses a page boundary?
+	 * Does networking code care?
+	 */
+	if (overpage) {
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, p + p_off + 1,
+				0, overpage, 0);
+		get_page(p + p_off + 1);
+	}
+}
+
+static void mt7601u_rx_process_seg_paged(struct mt7601u_dev *dev, u8 *data,
+					 u32 seg_len, struct page *p)
+{
+	struct sk_buff *skb;
+	struct mt7601u_rxwi *rxwi;
+	u32 true_len, fce_info, truesize = seg_len;
+
+	/* DMA_INFO field at the beginning of the segment contains only some of
+	 * the information, we need to read the FCE descriptor from the end.
+	 */
+	fce_info = get_unaligned_le32(data + seg_len - MT_FCE_INFO_LEN);
+	seg_len -= MT_FCE_INFO_LEN;
+
+	data += MT_DMA_HDR_LEN;
+	seg_len -= MT_DMA_HDR_LEN;
+
+	rxwi = (struct mt7601u_rxwi *) data;
+	data += sizeof(struct mt7601u_rxwi);
+	seg_len -= sizeof(struct mt7601u_rxwi);
+
+	if (unlikely(rxwi->zero[0] || rxwi->zero[1] || rxwi->zero[2]))
+		dev_err_once(dev->dev, "Error: RXWI zero fields are set\n");
+	if (unlikely(MT76_GET(MT_RXD_INFO_TYPE, fce_info)))
+		dev_err_once(dev->dev, "Error: RX path seen a non-pkt urb\n");
+
+	trace_mt_rx(dev, rxwi, fce_info);
+
+	if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_L2PAD)) {
+		int hdr_len = ieee80211_get_hdrlen_from_buf(data, seg_len);
+
+		memmove(data + 2, data, hdr_len);
+		seg_len -= 2;
+		data += 2;
 	}
 
+	skb = __netdev_alloc_skb(NULL, 256, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	true_len = mt76_mac_process_rx(dev, skb, data, rxwi);
+	WARN_ON(true_len > seg_len || true_len < seg_len - 3);
+
+	mt7601u_rx_add_frags(dev, skb, data, true_len, truesize, p);
+
 	ieee80211_rx_ni(dev->hw, skb);
 }
 
@@ -110,17 +183,31 @@ static u16 mt7601u_rx_next_seg_len(u8 *data, u32 data_len)
 }
 
 static void
-mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf *e)
+mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf_rx *e)
 {
 	u32 seg_len, data_len = e->urb->actual_length;
-	u8 *data = e->buf;
+	u8 *data = page_address(e->p);
+	struct page *new_p = NULL;
+	bool paged = true;
 	int cnt = 0;
 
 	if (!test_bit(MT7601U_STATE_INITIALIZED, &dev->state))
 		return;
 
+	/* Copy if there is very little data in the buffer. */
+	if (data_len < 512) {
+		paged = false;
+	} else {
+		new_p = dev_alloc_pages(MT_RX_ORDER);
+		if (!new_p)
+			paged = false;
+	}
+
 	while ((seg_len = mt7601u_rx_next_seg_len(data, data_len))) {
-		mt7601u_rx_process_seg(dev, data, seg_len);
+		if (paged)
+			mt7601u_rx_process_seg_paged(dev, data, seg_len, e->p);
+		else
+			mt7601u_rx_process_seg(dev, data, seg_len);
 
 		data_len -= seg_len;
 		data += seg_len;
@@ -128,14 +215,21 @@ mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf *e)
 	}
 
 	if (cnt > 1)
-		trace_mt_rx_dma_aggr(dev, cnt);
+		trace_mt_rx_dma_aggr(dev, cnt, paged);
+
+	if (paged) {
+		/* we have one extra ref from the allcator */
+		put_page(e->p);
+
+		e->p = new_p;
+	}
 }
 
-static struct mt7601u_dma_buf *
+static struct mt7601u_dma_buf_rx *
 mt7601u_rx_get_pending_entry(struct mt7601u_dev *dev)
 {
 	struct mt7601u_rx_queue *q = &dev->rx_q;
-	struct mt7601u_dma_buf *buf = NULL;
+	struct mt7601u_dma_buf_rx *buf = NULL;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev->rx_lock, flags);
@@ -175,15 +269,14 @@ out:
 static void mt7601u_rx_tasklet(unsigned long data)
 {
 	struct mt7601u_dev *dev = (struct mt7601u_dev *) data;
-	struct mt7601u_dma_buf *e;
+	struct mt7601u_dma_buf_rx *e;
 
 	while ((e = mt7601u_rx_get_pending_entry(dev))) {
 		if (e->urb->status)
 			continue;
 
 		mt7601u_rx_process_entry(dev, e);
-		mt7601u_usb_submit_buf(dev, USB_DIR_IN, MT_EP_IN_PKT_RX, e,
-				       GFP_ATOMIC, mt7601u_complete_rx, dev);
+		mt7601u_submit_rx_buf(dev, e, GFP_ATOMIC);
 	}
 }
 
@@ -325,14 +418,33 @@ static void mt7601u_kill_rx(struct mt7601u_dev *dev)
 	spin_unlock_irqrestore(&dev->rx_lock, flags);
 }
 
+static int mt7601u_submit_rx_buf(struct mt7601u_dev *dev,
+				 struct mt7601u_dma_buf_rx *e, gfp_t gfp)
+{
+	struct usb_device *usb_dev = mt7601u_to_usb_dev(dev);
+	u8 *buf = page_address(e->p);
+	unsigned pipe;
+	int ret;
+
+	pipe = usb_rcvbulkpipe(usb_dev, dev->in_eps[MT_EP_IN_PKT_RX]);
+
+	usb_fill_bulk_urb(e->urb, usb_dev, pipe, buf, MT_RX_URB_SIZE,
+			  mt7601u_complete_rx, dev);
+
+	trace_mt_submit_urb(dev, e->urb);
+	ret = usb_submit_urb(e->urb, gfp);
+	if (ret)
+		dev_err(dev->dev, "Error: submit RX URB failed:%d\n", ret);
+
+	return ret;
+}
+
 static int mt7601u_submit_rx(struct mt7601u_dev *dev)
 {
 	int i, ret;
 
 	for (i = 0; i < dev->rx_q.entries; i++) {
-		ret = mt7601u_usb_submit_buf(dev, USB_DIR_IN, MT_EP_IN_PKT_RX,
-					     &dev->rx_q.e[i], GFP_KERNEL,
-					     mt7601u_complete_rx, dev);
+		ret = mt7601u_submit_rx_buf(dev, &dev->rx_q.e[i], GFP_KERNEL);
 		if (ret)
 			return ret;
 	}
@@ -344,8 +456,10 @@ static void mt7601u_free_rx(struct mt7601u_dev *dev)
 {
 	int i;
 
-	for (i = 0; i < dev->rx_q.entries; i++)
-		mt7601u_usb_free_buf(dev, &dev->rx_q.e[i]);
+	for (i = 0; i < dev->rx_q.entries; i++) {
+		__free_pages(dev->rx_q.e[i].p, MT_RX_ORDER);
+		usb_free_urb(dev->rx_q.e[i].urb);
+	}
 }
 
 static int mt7601u_alloc_rx(struct mt7601u_dev *dev)
@@ -356,9 +470,13 @@ static int mt7601u_alloc_rx(struct mt7601u_dev *dev)
 	dev->rx_q.dev = dev;
 	dev->rx_q.entries = N_RX_ENTRIES;
 
-	for (i = 0; i < N_RX_ENTRIES; i++)
-		if (mt7601u_usb_alloc_buf(dev, MT_RX_URB_SIZE, &dev->rx_q.e[i]))
+	for (i = 0; i < N_RX_ENTRIES; i++) {
+		dev->rx_q.e[i].urb = usb_alloc_urb(0, GFP_KERNEL);
+		dev->rx_q.e[i].p = dev_alloc_pages(MT_RX_ORDER);
+
+		if (!dev->rx_q.e[i].urb || !dev->rx_q.e[i].p)
 			return -ENOMEM;
+	}
 
 	return 0;
 }
diff --git a/mac.c b/mac.c
index 539751362b41..8802366f7181 100644
--- a/mac.c
+++ b/mac.c
@@ -441,30 +441,28 @@ mt7601u_rx_monitor_beacon(struct mt7601u_dev *dev, struct mt7601u_rxwi *rxwi,
 }
 
 static int
-mt7601u_rx_is_our_beacon(struct mt7601u_dev *dev, struct sk_buff *skb)
+mt7601u_rx_is_our_beacon(struct mt7601u_dev *dev, u8 *data)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)data;
 
 	return ieee80211_is_beacon(hdr->frame_control) &&
 		ether_addr_equal(hdr->addr2, dev->ap_bssid);
 }
 
-int mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, void *rxi)
+u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb,
+			u8 *data, void *rxi)
 {
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct mt7601u_rxwi *rxwi = rxi;
 	u32 ctl = le32_to_cpu(rxwi->ctl);
 	u16 rate = le16_to_cpu(rxwi->rate);
-	int len, rssi;
+	int rssi;
 
 	if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_DECRYPT)) {
 		status->flag |= RX_FLAG_DECRYPTED;
 		status->flag |= RX_FLAG_IV_STRIPPED | RX_FLAG_MMIC_STRIPPED;
 	}
 
-	len = MT76_GET(MT_RXWI_CTL_MPDU_LEN, ctl);
-	skb_trim(skb, len);
-
 	status->chains = BIT(0);
 	rssi = mt7601u_phy_get_rssi(dev, rxwi, rate);
 	status->chain_signal[0] = status->signal = rssi;
@@ -474,13 +472,13 @@ int mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, void *rxi)
 	mt76_mac_process_rate(status, rate);
 
 	spin_lock_bh(&dev->con_mon_lock);
-	if (mt7601u_rx_is_our_beacon(dev, skb))
+	if (mt7601u_rx_is_our_beacon(dev, data))
 		mt7601u_rx_monitor_beacon(dev, rxwi, rate, rssi);
 	else if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_U2M))
 		dev->avg_rssi = (dev->avg_rssi * 15) / 16 + (rssi << 8);
 	spin_unlock_bh(&dev->con_mon_lock);
 
-	return 0;
+	return MT76_GET(MT_RXWI_CTL_MPDU_LEN, ctl);
 }
 
 static enum mt76_cipher_type
diff --git a/mac.h b/mac.h
index 90cf147f37e7..2c22d63c63a2 100644
--- a/mac.h
+++ b/mac.h
@@ -160,8 +160,8 @@ struct mt76_txwi {
 #define MT_TXWI_CTL_CHAN_CHECK_PKT	BIT(4)
 #define MT_TXWI_CTL_PIFS_REV		BIT(6)
 
-int
-mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, void *rxwi);
+u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb,
+			u8 *data, void *rxi);
 int mt76_mac_wcid_set_key(struct mt7601u_dev *dev, u8 idx,
 			  struct ieee80211_key_conf *key);
 void mt76_mac_wcid_set_rate(struct mt7601u_dev *dev, struct mt76_wcid *wcid,
diff --git a/mt7601u.h b/mt7601u.h
index f9957350a187..d6f83e03cb58 100644
--- a/mt7601u.h
+++ b/mt7601u.h
@@ -37,6 +37,7 @@
 #define MT_USB_AGGR_SIZE_LIMIT		21 /* * 1024B */
 #define MT_USB_AGGR_TIMEOUT		0x80 /* * 33ns */
 #define MT_RX_URB_SIZE			(24 * 1024)
+#define MT_RX_ORDER			3
 
 struct mt7601u_dma_buf {
 	struct urb *urb;
@@ -73,7 +74,10 @@ struct mac_stats {
 struct mt7601u_rx_queue {
 	struct mt7601u_dev *dev;
 
-	struct mt7601u_dma_buf e[N_RX_ENTRIES];
+	struct mt7601u_dma_buf_rx {
+		struct urb *urb;
+		struct page *p;
+	} e[N_RX_ENTRIES];
 
 	unsigned int start;
 	unsigned int end;
diff --git a/trace.h b/trace.h
index db86272401ff..289897300ef0 100644
--- a/trace.h
+++ b/trace.h
@@ -352,17 +352,20 @@ TRACE_EVENT(mt_tx_status,
 );
 
 TRACE_EVENT(mt_rx_dma_aggr,
-	TP_PROTO(struct mt7601u_dev *dev, int cnt),
-	TP_ARGS(dev, cnt),
+	TP_PROTO(struct mt7601u_dev *dev, int cnt, bool paged),
+	TP_ARGS(dev, cnt, paged),
 	TP_STRUCT__entry(
 		DEV_ENTRY
 		__field(u8, cnt)
+		__field(bool, paged)
 	),
 	TP_fast_assign(
 		DEV_ASSIGN;
 		__entry->cnt = cnt;
+		__entry->paged = paged;
 	),
-	TP_printk(DEV_PR_FMT "%d", DEV_PR_ARG, __entry->cnt)
+	TP_printk(DEV_PR_FMT "cnt:%d paged:%d",
+		  DEV_PR_ARG, __entry->cnt, __entry->paged)
 );
 
 DEFINE_EVENT(dev_simple_evt, set_key,


  reply	other threads:[~2015-05-05  0:49 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-02 13:01 [PATCH 0/2] add mt7601u driver moorray3
2015-05-02 13:01 ` [PATCH 1/2] " moorray3
2015-05-04  9:37   ` Johannes Berg
2015-05-04 10:04     ` Jakub Kiciński
2015-05-04 10:15       ` Johannes Berg
2015-05-05  0:49         ` Jakub Kiciński [this message]
2015-05-05  6:45           ` Johannes Berg
2015-05-05 10:44             ` Jakub Kiciński
2015-05-02 13:01 ` [PATCH 2/2] add mt7601u build infrastructure and co moorray3
2015-05-05 20:22 ` [PATCHv2 0/2] add mt7601u driver Jakub Kicinski
2015-05-05 20:22   ` [PATCHv2 1/2] " Jakub Kicinski
2015-05-05 20:22   ` [PATCHv2 2/2] add mt7601u kbuild and others Jakub Kicinski
2015-05-25  8:13   ` [PATCHv2 0/2] add mt7601u driver Kalle Valo
2015-05-25  9:35     ` Jakub Kiciński
2015-05-26  7:16       ` Johannes Berg
2015-05-25  9:34 ` [PATCHv3 " Jakub Kicinski
2015-05-25  9:34   ` [PATCHv3 1/2] " Jakub Kicinski
2015-05-25  9:34   ` [PATCHv3 2/2] add mt7601u kbuild and others Jakub Kicinski
2015-05-26 10:26   ` [PATCHv3 0/2] add mt7601u driver Kalle Valo
2015-05-26  9:16 ` [PATCHv4] " Jakub Kicinski
2015-05-28  8:36   ` Kalle Valo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150505024949.0a672dd1@north \
    --to=moorray3@wp.pl \
    --cc=johannes@sipsolutions.net \
    --cc=kubakici@wp.pl \
    --cc=kvalo@codeaurora.org \
    --cc=linux-wireless@vger.kernel.org \
    --cc=nbd@openwrt.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).