netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 00/15] netvsc: misc patches
@ 2017-05-03 23:01 Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 01/15] vmbus: simplify hv_ringbuffer_read Stephen Hemminger
                   ` (15 more replies)
  0 siblings, 16 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

Mostly these are performance related. There is also one bug fix for
incorrect handling of NAPI on device removal

Stephen Hemminger (15):
  vmbus: simplify hv_ringbuffer_read
  vmbus: fix unnecessary signal events as result of NAPI
  netvsc: make sure napi enabled before vmbus_open
  netvsc: don't reacquire rtnl on device removal
  netvsc: optimize avail percent calculation
  netvsc: prefetch the first incoming ring element
  netvsc: convert ring_size to unsigned
  netvsc: allow overriding send/recv buffer size
  netvsc: optimize netvsc_send_pkt
  netvsc: replace modulus with mask for alignment
  netvsc: reduce unnecessary memset
  netvsc: size receive completion ring based on receive area
  netvsc: convert open count from atomic to refcount
  netvsc: optimize receive completions
  netvsc: use vzalloc_node for receive completion data

 drivers/hv/ring_buffer.c          | 179 +++++++++---------------
 drivers/net/hyperv/hyperv_net.h   |  32 +++--
 drivers/net/hyperv/netvsc.c       | 282 +++++++++++++++++---------------------
 drivers/net/hyperv/netvsc_drv.c   |  56 ++++++--
 drivers/net/hyperv/rndis_filter.c |  20 +--
 include/linux/hyperv.h            |  70 +---------
 6 files changed, 266 insertions(+), 373 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH net-next 01/15] vmbus: simplify hv_ringbuffer_read
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 02/15] vmbus: fix unnecessary signal events as result of NAPI Stephen Hemminger
                   ` (14 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

With new iterator functions (and the double mapping) the ring buffer
read function can be greatly simplified.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/hv/ring_buffer.c | 122 +++++++----------------------------------------
 1 file changed, 18 insertions(+), 104 deletions(-)

diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index c3f1a9e33cef..4bffeae6990b 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -96,30 +96,6 @@ hv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
 	ring_info->ring_buffer->write_index = next_write_location;
 }
 
-/* Get the next read location for the specified ring buffer. */
-static inline u32
-hv_get_next_read_location(const struct hv_ring_buffer_info *ring_info)
-{
-	return ring_info->ring_buffer->read_index;
-}
-
-/*
- * Get the next read location + offset for the specified ring buffer.
- * This allows the caller to skip.
- */
-static inline u32
-hv_get_next_readlocation_withoffset(const struct hv_ring_buffer_info *ring_info,
-				    u32 offset)
-{
-	u32 next = ring_info->ring_buffer->read_index;
-
-	next += offset;
-	if (next >= ring_info->ring_datasize)
-		next -= ring_info->ring_datasize;
-
-	return next;
-}
-
 /* Set the next read location for the specified ring buffer. */
 static inline void
 hv_set_next_read_location(struct hv_ring_buffer_info *ring_info,
@@ -144,29 +120,6 @@ hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
 }
 
 /*
- * Helper routine to copy to source from ring buffer.
- * Assume there is enough room. Handles wrap-around in src case only!!
- */
-static u32 hv_copyfrom_ringbuffer(
-	const struct hv_ring_buffer_info *ring_info,
-	void				*dest,
-	u32				destlen,
-	u32				start_read_offset)
-{
-	void *ring_buffer = hv_get_ring_buffer(ring_info);
-	u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
-
-	memcpy(dest, ring_buffer + start_read_offset, destlen);
-
-	start_read_offset += destlen;
-	if (start_read_offset >= ring_buffer_size)
-		start_read_offset -= ring_buffer_size;
-
-	return start_read_offset;
-}
-
-
-/*
  * Helper routine to copy from source to ring buffer.
  * Assume there is enough room. Handles wrap-around in dest case only!!
  */
@@ -338,86 +291,47 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
 	return 0;
 }
 
-static inline void
-init_cached_read_index(struct hv_ring_buffer_info *rbi)
-{
-	rbi->cached_read_index = rbi->ring_buffer->read_index;
-}
-
 int hv_ringbuffer_read(struct vmbus_channel *channel,
 		       void *buffer, u32 buflen, u32 *buffer_actual_len,
 		       u64 *requestid, bool raw)
 {
-	u32 bytes_avail_toread;
-	u32 next_read_location = 0;
-	u64 prev_indices = 0;
-	struct vmpacket_descriptor desc;
-	u32 offset;
-	u32 packetlen;
-	int ret = 0;
-	struct hv_ring_buffer_info *inring_info = &channel->inbound;
+	struct vmpacket_descriptor *desc;
+	u32 packetlen, offset;
 
-	if (buflen <= 0)
+	if (unlikely(buflen == 0))
 		return -EINVAL;
 
-
 	*buffer_actual_len = 0;
 	*requestid = 0;
 
-	bytes_avail_toread = hv_get_bytes_to_read(inring_info);
 	/* Make sure there is something to read */
-	if (bytes_avail_toread < sizeof(desc)) {
+	desc = hv_pkt_iter_first(channel);
+	if (desc == NULL) {
 		/*
 		 * No error is set when there is even no header, drivers are
 		 * supposed to analyze buffer_actual_len.
 		 */
-		return ret;
+		return 0;
 	}
 
-	init_cached_read_index(inring_info);
-
-	next_read_location = hv_get_next_read_location(inring_info);
-	next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc,
-						    sizeof(desc),
-						    next_read_location);
-
-	offset = raw ? 0 : (desc.offset8 << 3);
-	packetlen = (desc.len8 << 3) - offset;
+	offset = raw ? 0 : (desc->offset8 << 3);
+	packetlen = (desc->len8 << 3) - offset;
 	*buffer_actual_len = packetlen;
-	*requestid = desc.trans_id;
-
-	if (bytes_avail_toread < packetlen + offset)
-		return -EAGAIN;
+	*requestid = desc->trans_id;
 
-	if (packetlen > buflen)
+	if (unlikely(packetlen > buflen))
 		return -ENOBUFS;
 
-	next_read_location =
-		hv_get_next_readlocation_withoffset(inring_info, offset);
+	/* since ring is double mapped, only one copy is necessary */
+	memcpy(buffer, (const char *)desc + offset, packetlen);
 
-	next_read_location = hv_copyfrom_ringbuffer(inring_info,
-						buffer,
-						packetlen,
-						next_read_location);
+	/* Advance ring index to next packet descriptor */
+	__hv_pkt_iter_next(channel, desc);
 
-	next_read_location = hv_copyfrom_ringbuffer(inring_info,
-						&prev_indices,
-						sizeof(u64),
-						next_read_location);
+	/* Notify host of update */
+	hv_pkt_iter_close(channel);
 
-	/*
-	 * Make sure all reads are done before we update the read index since
-	 * the writer may start writing to the read area once the read index
-	 * is updated.
-	 */
-	virt_mb();
-
-	/* Update the read index */
-	hv_set_next_read_location(inring_info, next_read_location);
-
-	hv_signal_on_read(channel);
-
-	return ret;
+	return 0;
 }
 
 /*
@@ -448,7 +362,7 @@ struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
 	struct hv_ring_buffer_info *rbi = &channel->inbound;
 
 	/* set state for later hv_signal_on_read() */
-	init_cached_read_index(rbi);
+	rbi->cached_read_index = rbi->ring_buffer->read_index;
 
 	if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
 		return NULL;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next 02/15] vmbus: fix unnecessary signal events as result of NAPI
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 01/15] vmbus: simplify hv_ringbuffer_read Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 03/15] netvsc: make sure napi enabled before vmbus_open Stephen Hemminger
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

With NAPI, the ring buffer is processed in incremental steps so
the read index needs to be updated after each section. But this can
lead to lots of bogus vmbus signal events which hurts performance.

This patch rearranges the host incoming signalling logic to be
more complete and eliminate unnecessary ring buffer bookkeeping.
The new code also looks at mask flag from host to avoid signaling
even more.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/hv/ring_buffer.c    | 59 ++++++++++++++++++++++++++++++++------
 drivers/net/hyperv/netvsc.c | 13 ++++++---
 include/linux/hyperv.h      | 70 +--------------------------------------------
 3 files changed, 60 insertions(+), 82 deletions(-)

diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 4bffeae6990b..3fba9490a1bb 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -361,9 +361,6 @@ struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
 {
 	struct hv_ring_buffer_info *rbi = &channel->inbound;
 
-	/* set state for later hv_signal_on_read() */
-	rbi->cached_read_index = rbi->ring_buffer->read_index;
-
 	if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
 		return NULL;
 
@@ -390,20 +387,27 @@ __hv_pkt_iter_next(struct vmbus_channel *channel,
 	if (rbi->priv_read_index >= dsize)
 		rbi->priv_read_index -= dsize;
 
-	/* more data? */
-	if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
-		return NULL;
-	else
-		return hv_get_ring_buffer(rbi) + rbi->priv_read_index;
+	return hv_pkt_iter_first(channel);
 }
 EXPORT_SYMBOL_GPL(__hv_pkt_iter_next);
 
 /*
  * Update host ring buffer after iterating over packets.
+ *
+ * Avoid unnecessary signaling of the host by making sure that all
+ * data is read, and the host has not masked off the interrupt.
+ *
+ * In addition, in Windows 8 or later there is an extension for the
+ * host to indicate how much space needs to be available before
+ * signaling. The hos sets pending_send_sz to the number of bytes
+ * that it is waiting to send.
  */
 void hv_pkt_iter_close(struct vmbus_channel *channel)
 {
 	struct hv_ring_buffer_info *rbi = &channel->inbound;
+	u32 orig_write_sz;
+
+	orig_write_sz = hv_get_bytes_to_write(rbi);
 
 	/*
 	 * Make sure all reads are done before we update the read index since
@@ -411,8 +415,45 @@ void hv_pkt_iter_close(struct vmbus_channel *channel)
 	 * is updated.
 	 */
 	virt_rmb();
+
+	/* Update the position where ring buffer has been read from */
 	rbi->ring_buffer->read_index = rbi->priv_read_index;
 
-	hv_signal_on_read(channel);
+	/* If more data is available then no need to signal */
+	if (hv_get_bytes_to_read(rbi))
+		return;
+
+	/*
+	 * If the reading of the pend_sz were to be reordered and read
+	 * before we commit the new read index.  Then we could have if
+	 * the host were to set the pending_sz after we have already
+	 * sampled pending_sz.
+	 */
+	virt_wmb();
+
+	/* If host has disabled notifications then skip */
+	if (rbi->ring_buffer->interrupt_mask)
+		return;
+
+	/* If host supports pending send size feature
+	 * then don't signal until that amount of space is available.
+	 */
+	if (rbi->ring_buffer->feature_bits.feat_pending_send_sz) {
+		u32 pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
+
+		/*
+		 * If there was space before we began iteration, then
+		 * host was not blocked. If pending_sz is zero then
+		 * host has nothing pending.
+		 */
+		if (orig_write_sz > pending_sz)
+			return;
+
+		/* If pending write will not fit, don't give false hope. */
+		if (hv_get_bytes_to_write(rbi) < pending_sz)
+			return;
+	}
+
+	vmbus_setevent(channel);
 }
 EXPORT_SYMBOL_GPL(hv_pkt_iter_close);
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 15749d359e60..8108e119d8a5 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1251,9 +1251,13 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 	while (nvchan->desc && work_done < budget) {
 		work_done += netvsc_process_raw_pkt(device, channel, net_device,
 						    ndev, nvchan->desc, budget);
-		nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
+		nvchan->desc = __hv_pkt_iter_next(channel, nvchan->desc);
 	}
 
+	hv_pkt_iter_close(channel);
+
+	netvsc_chk_recv_comp(net_device, channel, q_idx);
+
 	/* If receive ring was exhausted
 	 * and not doing busy poll
 	 * then re-enable host interrupts
@@ -1261,10 +1265,11 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 	 */
 	if (work_done < budget &&
 	    napi_complete_done(napi, work_done) &&
-	    hv_end_read(&channel->inbound) != 0)
+	    hv_end_read(&channel->inbound) != 0) {
+		/* special case if new messages are available */
+		hv_begin_read(&channel->inbound);
 		napi_reschedule(napi);
-
-	netvsc_chk_recv_comp(net_device, channel, q_idx);
+	}
 
 	/* Driver may overshoot since multiple packets per descriptor */
 	return min(work_done, budget);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 0c170a3f0d8b..545feabfe70b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -124,10 +124,7 @@ struct hv_ring_buffer_info {
 	spinlock_t ring_lock;
 
 	u32 ring_datasize;		/* < ring_size */
-	u32 ring_data_startoffset;
-	u32 priv_write_index;
-	u32 priv_read_index;
-	u32 cached_read_index;
+	u32 priv_read_index;		/* read cursor */
 };
 
 /*
@@ -180,19 +177,6 @@ static inline u32 hv_get_bytes_to_write(const struct hv_ring_buffer_info *rbi)
 	return write;
 }
 
-static inline u32 hv_get_cached_bytes_to_write(
-	const struct hv_ring_buffer_info *rbi)
-{
-	u32 read_loc, write_loc, dsize, write;
-
-	dsize = rbi->ring_datasize;
-	read_loc = rbi->cached_read_index;
-	write_loc = rbi->ring_buffer->write_index;
-
-	write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
-		read_loc - write_loc;
-	return write;
-}
 /*
  * VMBUS version is 32 bit entity broken up into
  * two 16 bit quantities: major_number. minor_number.
@@ -1456,58 +1440,6 @@ hv_get_ring_buffer(const struct hv_ring_buffer_info *ring_info)
 {
 	return ring_info->ring_buffer->buffer;
 }
-
-/*
- * To optimize the flow management on the send-side,
- * when the sender is blocked because of lack of
- * sufficient space in the ring buffer, potential the
- * consumer of the ring buffer can signal the producer.
- * This is controlled by the following parameters:
- *
- * 1. pending_send_sz: This is the size in bytes that the
- *    producer is trying to send.
- * 2. The feature bit feat_pending_send_sz set to indicate if
- *    the consumer of the ring will signal when the ring
- *    state transitions from being full to a state where
- *    there is room for the producer to send the pending packet.
- */
-
-static inline  void hv_signal_on_read(struct vmbus_channel *channel)
-{
-	u32 cur_write_sz, cached_write_sz;
-	u32 pending_sz;
-	struct hv_ring_buffer_info *rbi = &channel->inbound;
-
-	/*
-	 * Issue a full memory barrier before making the signaling decision.
-	 * Here is the reason for having this barrier:
-	 * If the reading of the pend_sz (in this function)
-	 * were to be reordered and read before we commit the new read
-	 * index (in the calling function)  we could
-	 * have a problem. If the host were to set the pending_sz after we
-	 * have sampled pending_sz and go to sleep before we commit the
-	 * read index, we could miss sending the interrupt. Issue a full
-	 * memory barrier to address this.
-	 */
-	virt_mb();
-
-	pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
-	/* If the other end is not blocked on write don't bother. */
-	if (pending_sz == 0)
-		return;
-
-	cur_write_sz = hv_get_bytes_to_write(rbi);
-
-	if (cur_write_sz < pending_sz)
-		return;
-
-	cached_write_sz = hv_get_cached_bytes_to_write(rbi);
-	if (cached_write_sz < pending_sz)
-		vmbus_setevent(channel);
-
-	return;
-}
-
 /*
  * Mask off host interrupt callback notifications
  */
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next 03/15] netvsc: make sure napi enabled before vmbus_open
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 01/15] vmbus: simplify hv_ringbuffer_read Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 02/15] vmbus: fix unnecessary signal events as result of NAPI Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 04/15] netvsc: don't reacquire rtnl on device removal Stephen Hemminger
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

This fixes a race where vmbus callback for new packet arriving
could occur before NAPI is initialized.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/netvsc.c       | 8 +++++---
 drivers/net/hyperv/rndis_filter.c | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 8108e119d8a5..385809512c9f 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1327,6 +1327,10 @@ int netvsc_device_add(struct hv_device *device,
 		nvchan->channel = device->channel;
 	}
 
+	/* Enable NAPI handler before init callbacks */
+	netif_napi_add(ndev, &net_device->chan_table[0].napi,
+		       netvsc_poll, NAPI_POLL_WEIGHT);
+
 	/* Open the channel */
 	ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
 			 ring_size * PAGE_SIZE, NULL, 0,
@@ -1334,6 +1338,7 @@ int netvsc_device_add(struct hv_device *device,
 			 net_device->chan_table);
 
 	if (ret != 0) {
+		netif_napi_del(&net_device->chan_table[0].napi);
 		netdev_err(ndev, "unable to open channel: %d\n", ret);
 		goto cleanup;
 	}
@@ -1341,9 +1346,6 @@ int netvsc_device_add(struct hv_device *device,
 	/* Channel is opened */
 	netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
 
-	/* Enable NAPI handler for init callbacks */
-	netif_napi_add(ndev, &net_device->chan_table[0].napi,
-		       netvsc_poll, NAPI_POLL_WEIGHT);
 	napi_enable(&net_device->chan_table[0].napi);
 
 	/* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index ab92c3c95951..f9d5b0b8209a 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1018,7 +1018,7 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 	if (ret == 0)
 		napi_enable(&nvchan->napi);
 	else
-		netdev_err(ndev, "sub channel open failed (%d)\n", ret);
+		netif_napi_del(&nvchan->napi);
 
 	if (refcount_dec_and_test(&nvscdev->sc_offered))
 		complete(&nvscdev->channel_init_wait);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next 04/15] netvsc: don't reacquire rtnl on device removal
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
                   ` (2 preceding siblings ...)
  2017-05-03 23:01 ` [PATCH net-next 03/15] netvsc: make sure napi enabled before vmbus_open Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 05/15] netvsc: optimize avail percent calculation Stephen Hemminger
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

Since rtnl_lock is already held in netvsc_remove, it is easier
to just call unregister_netdevice which expects RTNL.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/netvsc_drv.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 4421a6d00375..3fef45421634 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1630,10 +1630,9 @@ static int netvsc_remove(struct hv_device *dev)
 	 */
 	rtnl_lock();
 	rndis_filter_device_remove(dev, ndev_ctx->nvdev);
+	unregister_netdevice(net);
 	rtnl_unlock();
 
-	unregister_netdev(net);
-
 	hv_set_drvdata(dev, NULL);
 
 	free_netdev(net);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next 05/15] netvsc: optimize avail percent calculation
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
                   ` (3 preceding siblings ...)
  2017-05-03 23:01 ` [PATCH net-next 04/15] netvsc: don't reacquire rtnl on device removal Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 06/15] netvsc: prefetch the first incoming ring element Stephen Hemminger
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

Only need to look at write space (not read space) when computing
percent available.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/netvsc.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 385809512c9f..ee5f8c520977 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -598,14 +598,13 @@ void netvsc_device_remove(struct hv_device *device)
  * Get the percentage of available bytes to write in the ring.
  * The return value is in range from 0 to 100.
  */
-static inline u32 hv_ringbuf_avail_percent(
-		struct hv_ring_buffer_info *ring_info)
+static inline
+u32 hv_ringbuf_avail_percent(const struct vmbus_channel *channel)
 {
-	u32 avail_read, avail_write;
+	const struct hv_ring_buffer_info *ring_info = &channel->outbound;
+	u32 avail_write = hv_get_bytes_to_write(ring_info);
 
-	hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);
-
-	return avail_write * 100 / ring_info->ring_datasize;
+	return (avail_write * 100) / ring_info->ring_datasize;
 }
 
 static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
@@ -655,7 +654,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
 		wake_up(&net_device->wait_drain);
 
 	if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
-	    (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
+	    (hv_ringbuf_avail_percent(channel) > RING_AVAIL_PERCENT_HIWATER ||
 	     queue_sends < 1))
 		netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
 }
@@ -764,7 +763,7 @@ static inline int netvsc_send_pkt(
 	u64 req_id;
 	int ret;
 	struct hv_page_buffer *pgbuf;
-	u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound);
+	u32 ring_avail = hv_ringbuf_avail_percent(out_channel);
 
 	nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
 	if (skb != NULL) {
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next 06/15] netvsc: prefetch the first incoming ring element
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
                   ` (4 preceding siblings ...)
  2017-05-03 23:01 ` [PATCH net-next 05/15] netvsc: optimize avail percent calculation Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 07/15] netvsc: convert ring_size to unsigned Stephen Hemminger
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

In interrupt handler, prefetch the first incoming ring element
so that it is in cache by the time NAPI poll gets to it.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/netvsc.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index ee5f8c520977..cee8e020ac56 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -29,6 +29,8 @@
 #include <linux/netdevice.h>
 #include <linux/if_ether.h>
 #include <linux/vmalloc.h>
+#include <linux/prefetch.h>
+
 #include <asm/sync_bitops.h>
 
 #include "hyperv_net.h"
@@ -1280,10 +1282,15 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 void netvsc_channel_cb(void *context)
 {
 	struct netvsc_channel *nvchan = context;
+	struct vmbus_channel *channel = nvchan->channel;
+	struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+	/* preload first vmpacket descriptor */
+	prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
 
 	if (napi_schedule_prep(&nvchan->napi)) {
 		/* disable interupts from host */
-		hv_begin_read(&nvchan->channel->inbound);
+		hv_begin_read(rbi);
 
 		__napi_schedule(&nvchan->napi);
 	}
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next 07/15] netvsc: convert ring_size to unsigned
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
                   ` (5 preceding siblings ...)
  2017-05-03 23:01 ` [PATCH net-next 06/15] netvsc: prefetch the first incoming ring element Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 08/15] netvsc: allow overriding send/recv buffer size Stephen Hemminger
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

The ring size module parameter is unsigned not integer.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h | 5 ++---
 drivers/net/hyperv/netvsc.c     | 2 +-
 drivers/net/hyperv/netvsc_drv.c | 8 ++++----
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 262b2ea576a3..2b4a9b058f6d 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -147,7 +147,7 @@ struct hv_netvsc_packet {
 struct netvsc_device_info {
 	unsigned char mac_adr[ETH_ALEN];
 	bool link_state;	/* 0 - link up, 1 - link down */
-	int  ring_size;
+	u32  ring_size;
 	u32  max_num_vrss_chns;
 	u32  num_chn;
 };
@@ -766,8 +766,7 @@ struct netvsc_device {
 	/* Holds rndis device info */
 	void *extension;
 
-	int ring_size;
-
+	u32 ring_size;
 	u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
 	u32 pkt_align; /* alignment bytes, e.g. 8 */
 
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index cee8e020ac56..480bd7704b68 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1304,7 +1304,7 @@ int netvsc_device_add(struct hv_device *device,
 		      const struct netvsc_device_info *device_info)
 {
 	int i, ret = 0;
-	int ring_size = device_info->ring_size;
+	u32 ring_size = device_info->ring_size;
 	struct netvsc_device *net_device;
 	struct net_device *ndev = hv_get_drvdata(device);
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 3fef45421634..fb743c78f3dc 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -43,8 +43,8 @@
 #define RING_SIZE_MIN 64
 #define LINKCHANGE_INT (2 * HZ)
 
-static int ring_size = 128;
-module_param(ring_size, int, S_IRUGO);
+static unsigned int ring_size = 128;
+module_param(ring_size, uint, 0444);
 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
 
 static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
@@ -1713,8 +1713,8 @@ static int __init netvsc_drv_init(void)
 
 	if (ring_size < RING_SIZE_MIN) {
 		ring_size = RING_SIZE_MIN;
-		pr_info("Increased ring_size to %d (min allowed)\n",
-			ring_size);
+		pr_notice("Increased ring_size to %u (min allowed)\n",
+			  ring_size);
 	}
 	ret = vmbus_driver_register(&netvsc_drv);
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next 08/15] netvsc: allow overriding send/recv buffer size
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
                   ` (6 preceding siblings ...)
  2017-05-03 23:01 ` [PATCH net-next 07/15] netvsc: convert ring_size to unsigned Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 09/15] netvsc: optimize netvsc_send_pkt Stephen Hemminger
                   ` (7 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

The default value of send and receive buffer area for host DMA
is much larger than it needs to be. Experimentation shows that
a much smaller buffer still keeps same performance; change
from 16M buffer to 4M receive and 1M send.

Make the size a module parameter so that it can be adjusted
as needed for testing or special needs. It would have been
better to use ethtool to control this but ethtool rx/tx parameters
are in number of descriptors not bytes.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h |  2 ++
 drivers/net/hyperv/netvsc.c     | 18 ++++++++++--------
 drivers/net/hyperv/netvsc_drv.c | 40 +++++++++++++++++++++++++++++++++++-----
 3 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 2b4a9b058f6d..21666df4cd35 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -148,6 +148,8 @@ struct netvsc_device_info {
 	unsigned char mac_adr[ETH_ALEN];
 	bool link_state;	/* 0 - link up, 1 - link down */
 	u32  ring_size;
+	u32  recv_buf_size;
+	u32  send_buf_size;
 	u32  max_num_vrss_chns;
 	u32  num_chn;
 };
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 480bd7704b68..59ca5fd6797d 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -488,16 +488,16 @@ static int negotiate_nvsp_ver(struct hv_device *device,
 	return ret;
 }
 
-static int netvsc_connect_vsp(struct hv_device *device)
+static int netvsc_connect_vsp(struct hv_device *device,
+			      const struct netvsc_device_info *device_info)
 {
-	int ret;
 	struct netvsc_device *net_device;
 	struct nvsp_message *init_packet;
-	int ndis_version;
 	const u32 ver_list[] = {
 		NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
 		NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 };
-	int i;
+	u32 max_recv_buf_size, ndis_version;
+	int i, ret;
 
 	net_device = get_outbound_net_device(device);
 	if (!net_device)
@@ -546,10 +546,12 @@ static int netvsc_connect_vsp(struct hv_device *device)
 
 	/* Post the big receive buffer to NetVSP */
 	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
-		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
+		max_recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
 	else
-		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
-	net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
+		max_recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
+
+	net_device->recv_buf_size = min(max_recv_buf_size, device_info->recv_buf_size);
+	net_device->send_buf_size = min_t(u32, NETVSC_SEND_BUFFER_SIZE, device_info->send_buf_size);
 
 	ret = netvsc_init_buf(device);
 
@@ -1360,7 +1362,7 @@ int netvsc_device_add(struct hv_device *device,
 	rcu_assign_pointer(net_device_ctx->nvdev, net_device);
 
 	/* Connect with the NetVsp */
-	ret = netvsc_connect_vsp(device);
+	ret = netvsc_connect_vsp(device, device_info);
 	if (ret != 0) {
 		netdev_err(ndev,
 			"unable to connect to NetVSP - %d\n", ret);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index fb743c78f3dc..ef3a3a46790f 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -40,13 +40,23 @@
 
 #include "hyperv_net.h"
 
-#define RING_SIZE_MIN 64
+#define RING_SIZE_MIN	 64
+#define RECV_BUFFER_MIN	 16
+#define SEND_BUFFER_MIN	 4
 #define LINKCHANGE_INT (2 * HZ)
 
 static unsigned int ring_size = 128;
 module_param(ring_size, uint, 0444);
 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
 
+static unsigned int recv_buffer_size = (4 * 1024 * 1024) / PAGE_SIZE;
+module_param(recv_buffer_size, uint, 0444);
+MODULE_PARM_DESC(recv_buffer_size, "Receive buffer size (# of pages)");
+
+static unsigned int send_buffer_size = (1024 * 1024) / PAGE_SIZE;
+module_param(send_buffer_size, uint, 0444);
+MODULE_PARM_DESC(send_buffer_size, "Send buffer size (# of pages)");
+
 static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
 				NETIF_MSG_LINK | NETIF_MSG_IFUP |
 				NETIF_MSG_IFDOWN | NETIF_MSG_RX_ERR |
@@ -764,8 +774,8 @@ static void netvsc_get_channels(struct net_device *net,
 	}
 }
 
-static int netvsc_set_queues(struct net_device *net, struct hv_device *dev,
-			     u32 num_chn)
+static int netvsc_set_queues(struct net_device *net, struct netvsc_device *nvdev,
+			     struct hv_device *dev, u32 num_chn)
 {
 	struct netvsc_device_info device_info;
 	int ret;
@@ -774,6 +784,8 @@ static int netvsc_set_queues(struct net_device *net, struct hv_device *dev,
 	device_info.num_chn = num_chn;
 	device_info.ring_size = ring_size;
 	device_info.max_num_vrss_chns = num_chn;
+	device_info.send_buf_size = nvdev->send_buf_size;
+	device_info.recv_buf_size = nvdev->recv_buf_size;
 
 	ret = rndis_filter_device_add(dev, &device_info);
 	if (ret)
@@ -824,11 +836,11 @@ static int netvsc_set_channels(struct net_device *net,
 
 	rndis_filter_device_remove(dev, nvdev);
 
-	ret = netvsc_set_queues(net, dev, count);
+	ret = netvsc_set_queues(net, nvdev, dev, count);
 	if (ret == 0)
 		nvdev->num_chn = count;
 	else
-		netvsc_set_queues(net, dev, nvdev->num_chn);
+		netvsc_set_queues(net, nvdev, dev, nvdev->num_chn);
 
 	if (was_running)
 		ret = netvsc_open(net);
@@ -917,6 +929,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 	device_info.ring_size = ring_size;
 	device_info.num_chn = nvdev->num_chn;
 	device_info.max_num_vrss_chns = nvdev->num_chn;
+	device_info.send_buf_size = nvdev->send_buf_size;
+	device_info.recv_buf_size = nvdev->recv_buf_size;
 
 	rndis_filter_device_remove(hdev, nvdev);
 
@@ -1568,6 +1582,9 @@ static int netvsc_probe(struct hv_device *dev,
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.ring_size = ring_size;
 	device_info.num_chn = VRSS_CHANNEL_DEFAULT;
+	device_info.send_buf_size = send_buffer_size * PAGE_SIZE;
+	device_info.recv_buf_size = recv_buffer_size * PAGE_SIZE;
+
 	ret = rndis_filter_device_add(dev, &device_info);
 	if (ret != 0) {
 		netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
@@ -1716,6 +1733,19 @@ static int __init netvsc_drv_init(void)
 		pr_notice("Increased ring_size to %u (min allowed)\n",
 			  ring_size);
 	}
+
+	if (recv_buffer_size < RECV_BUFFER_MIN) {
+		recv_buffer_size = RECV_BUFFER_MIN;
+		pr_notice("Increased receive buffer size to %u (min allowed)\n",
+			  recv_buffer_size);
+	}
+
+	if (send_buffer_size < SEND_BUFFER_MIN) {
+		send_buffer_size = SEND_BUFFER_MIN;
+		pr_notice("Increased receive buffer size to %u (min allowed)\n",
+			  send_buffer_size);
+	}
+
 	ret = vmbus_driver_register(&netvsc_drv);
 
 	if (ret)
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next 09/15] netvsc: optimize netvsc_send_pkt
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
                   ` (7 preceding siblings ...)
  2017-05-03 23:01 ` [PATCH net-next 08/15] netvsc: allow overriding send/recv buffer size Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 10/15] netvsc: replace modulus with mask for alignment Stephen Hemminger
                   ` (6 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

Hand optimize netvsc_send_pkt by adding likely/unlikely.
Also don't print pointer in warning message, instead dump info.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/netvsc.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 59ca5fd6797d..d9bd1a2db4db 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -751,9 +751,9 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
 	return msg_size;
 }
 
-static inline int netvsc_send_pkt(
+static int netvsc_send_pkt(
 	struct hv_device *device,
-	struct hv_netvsc_packet *packet,
+	const struct hv_netvsc_packet *packet,
 	struct netvsc_device *net_device,
 	struct hv_page_buffer **pb,
 	struct sk_buff *skb)
@@ -766,7 +766,6 @@ static inline int netvsc_send_pkt(
 	struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
 	u64 req_id;
 	int ret;
-	struct hv_page_buffer *pgbuf;
 	u32 ring_avail = hv_ringbuf_avail_percent(out_channel);
 
 	nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
@@ -788,10 +787,12 @@ static inline int netvsc_send_pkt(
 
 	req_id = (ulong)skb;
 
-	if (out_channel->rescind)
+	if (unlikely(out_channel->rescind))
 		return -ENODEV;
 
 	if (packet->page_buf_cnt) {
+		struct hv_page_buffer *pgbuf;
+
 		pgbuf = packet->cp_partial ? (*pb) +
 			packet->rmsg_pgcnt : (*pb);
 		ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
@@ -809,20 +810,23 @@ static inline int netvsc_send_pkt(
 					   VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	}
 
-	if (ret == 0) {
-		atomic_inc_return(&nvchan->queue_sends);
+	if (likely(ret == 0)) {
+		atomic_inc(&nvchan->queue_sends);
 
 		if (ring_avail < RING_AVAIL_PERCENT_LOWATER)
 			netif_tx_stop_queue(txq);
-	} else if (ret == -EAGAIN) {
+	} else if (likely(ret == -EAGAIN)) {
 		netif_tx_stop_queue(txq);
 		if (atomic_read(&nvchan->queue_sends) < 1) {
 			netif_tx_wake_queue(txq);
 			ret = -ENOSPC;
 		}
 	} else {
-		netdev_err(ndev, "Unable to send packet %p ret %d\n",
-			   packet, ret);
+		if (net_ratelimit())
+			netdev_warn(ndev,
+				    "Unable to send packet qid %u index %d len %u (%d)\n",
+				    packet->q_idx, packet->send_buf_index,
+				    packet->total_data_buflen, ret);
 	}
 
 	return ret;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next 10/15] netvsc: replace modulus with mask for alignment
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
                   ` (8 preceding siblings ...)
  2017-05-03 23:01 ` [PATCH net-next 09/15] netvsc: optimize netvsc_send_pkt Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 11/15] netvsc: reduce unnecessary memset Stephen Hemminger
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

Since packet alignment (pkt_align) is always a power of 2, it
is safe to replace expensive divide with shift.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/netvsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index d9bd1a2db4db..767ff20d659e 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -722,7 +722,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
 	int i;
 	u32 msg_size = 0;
 	u32 padding = 0;
-	u32 remain = packet->total_data_buflen % net_device->pkt_align;
+	u32 remain = packet->total_data_buflen & (net_device->pkt_align - 1);
 	u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
 		packet->page_buf_cnt;
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next 11/15] netvsc: reduce unnecessary memset
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
                   ` (9 preceding siblings ...)
  2017-05-03 23:01 ` [PATCH net-next 10/15] netvsc: replace modulus with mask for alignment Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 12/15] netvsc: size receive completion ring based on receive area Stephen Hemminger
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

Only part of the headroom maybe used on typical packet. Avoid doing memset
of whole area.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/netvsc_drv.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index ef3a3a46790f..d9a690ad7cd5 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -195,6 +195,7 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
 	ppi->size = ppi_size;
 	ppi->type = pkt_type;
 	ppi->ppi_offset = sizeof(struct rndis_per_packet_info);
+	memset(ppi + 1, 0, ppi_size - sizeof(struct rndis_per_packet_info));
 
 	rndis_pkt->per_pkt_info_len += ppi_size;
 
@@ -461,12 +462,12 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 
 	rndis_msg = (struct rndis_message *)skb->head;
 
-	memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE);
-
 	/* Add the rndis header */
 	rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
 	rndis_msg->msg_len = packet->total_data_buflen;
+
 	rndis_pkt = &rndis_msg->msg.pkt;
+	memset(rndis_pkt, 0, sizeof(*rndis_pkt));
 	rndis_pkt->data_offset = sizeof(struct rndis_packet);
 	rndis_pkt->data_len = packet->total_data_buflen;
 	rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next 12/15] netvsc: size receive completion ring based on receive area
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
                   ` (10 preceding siblings ...)
  2017-05-03 23:01 ` [PATCH net-next 11/15] netvsc: reduce unnecessary memset Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 13/15] netvsc: convert open count from atomic to refcount Stephen Hemminger
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

Now that receive area is parameterized, also need to adjust the
size of the ring for receive completions based on receive area.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h   |  4 +---
 drivers/net/hyperv/netvsc.c       | 22 ++++++++++++++--------
 drivers/net/hyperv/rndis_filter.c |  5 ++---
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 21666df4cd35..5627003bd8b6 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -653,13 +653,11 @@ struct recv_comp_data {
 	u32 status;
 };
 
-/* Netvsc Receive Slots Max */
-#define NETVSC_RECVSLOT_MAX (NETVSC_RECEIVE_BUFFER_SIZE / ETH_DATA_LEN + 1)
-
 struct multi_recv_comp {
 	void *buf; /* queued receive completions */
 	u32 first; /* first data entry */
 	u32 next; /* next entry for writing */
+	u32 size; /* number of slots in ring */
 };
 
 struct netvsc_stats {
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 767ff20d659e..56e0721f703c 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -61,16 +61,22 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 			       VM_PKT_DATA_INBAND, 0);
 }
 
-static struct netvsc_device *alloc_net_device(void)
+static struct netvsc_device *alloc_net_device(u32 recvslot_max)
 {
 	struct netvsc_device *net_device;
+	struct multi_recv_comp *mrc;
 
 	net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
 	if (!net_device)
 		return NULL;
 
-	net_device->chan_table[0].mrc.buf
-		= vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
+	mrc = &net_device->chan_table[0].mrc;
+	mrc->size = recvslot_max;
+	mrc->buf = vzalloc(recvslot_max * sizeof(struct recv_comp_data));
+	if (!mrc->buf) {
+		kfree(net_device);
+		return NULL;
+	}
 
 	init_waitqueue_head(&net_device->wait_drain);
 	net_device->destroy = false;
@@ -993,10 +999,10 @@ static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx,
 	u32 first = mrc->first;
 	u32 next = mrc->next;
 
-	*filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next :
+	*filled = (first > next) ? mrc->size - first + next :
 		  next - first;
 
-	*avail = NETVSC_RECVSLOT_MAX - *filled - 1;
+	*avail = mrc->size - *filled - 1;
 }
 
 /* Read the first filled slot, no change to index */
@@ -1022,7 +1028,7 @@ static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx)
 	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
 	int num_recv;
 
-	mrc->first = (mrc->first + 1) % NETVSC_RECVSLOT_MAX;
+	mrc->first = (mrc->first + 1) % mrc->size;
 
 	num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs);
 
@@ -1077,7 +1083,7 @@ static inline struct recv_comp_data *get_recv_comp_slot(
 
 	next = mrc->next;
 	rcd = mrc->buf + next * sizeof(struct recv_comp_data);
-	mrc->next = (next + 1) % NETVSC_RECVSLOT_MAX;
+	mrc->next = (next + 1) % mrc->size;
 
 	atomic_inc(&nvdev->num_outstanding_recvs);
 
@@ -1315,7 +1321,7 @@ int netvsc_device_add(struct hv_device *device,
 	struct net_device *ndev = hv_get_drvdata(device);
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 
-	net_device = alloc_net_device();
+	net_device = alloc_net_device(device_info->recv_buf_size / ETH_DATA_LEN + 1);
 	if (!net_device)
 		return -ENOMEM;
 
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index f9d5b0b8209a..bfeaa0549f7f 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -996,9 +996,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 		return;
 
 	nvchan = nvscdev->chan_table + chn_index;
-	nvchan->mrc.buf
-		= vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
-
+	nvchan->mrc.size = nvscdev->recv_buf_size / ETH_DATA_LEN + 1;
+	nvchan->mrc.buf = vzalloc(nvchan->mrc.size * sizeof(struct recv_comp_data));
 	if (!nvchan->mrc.buf)
 		return;
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next 13/15] netvsc: convert open count from atomic to refcount
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
                   ` (11 preceding siblings ...)
  2017-05-03 23:01 ` [PATCH net-next 12/15] netvsc: size receive completion ring based on receive area Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH net-next 14/15] netvsc: optimize receive completions Stephen Hemminger
                   ` (2 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

Refcount provides wraparond protection.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h   | 2 +-
 drivers/net/hyperv/netvsc.c       | 3 ++-
 drivers/net/hyperv/rndis_filter.c | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 5627003bd8b6..29555317ca05 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -772,7 +772,7 @@ struct netvsc_device {
 
 	atomic_t num_outstanding_recvs;
 
-	atomic_t open_cnt;
+	refcount_t open_cnt;
 
 	struct netvsc_channel chan_table[VRSS_CHANNEL_MAX];
 
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 56e0721f703c..eb9f3e517fa5 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -80,7 +80,8 @@ static struct netvsc_device *alloc_net_device(u32 recvslot_max)
 
 	init_waitqueue_head(&net_device->wait_drain);
 	net_device->destroy = false;
-	atomic_set(&net_device->open_cnt, 0);
+
+	refcount_set(&net_device->open_cnt, 0);
 	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
 	init_completion(&net_device->channel_init_wait);
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index bfeaa0549f7f..2a89bbd6e42b 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1259,7 +1259,7 @@ int rndis_filter_open(struct netvsc_device *nvdev)
 	if (!nvdev)
 		return -EINVAL;
 
-	if (atomic_inc_return(&nvdev->open_cnt) != 1)
+	if (refcount_inc_not_zero(&nvdev->open_cnt))
 		return 0;
 
 	return rndis_filter_open_device(nvdev->extension);
@@ -1270,7 +1270,7 @@ int rndis_filter_close(struct netvsc_device *nvdev)
 	if (!nvdev)
 		return -EINVAL;
 
-	if (atomic_dec_return(&nvdev->open_cnt) != 0)
+	if (refcount_dec_not_one(&nvdev->open_cnt))
 		return 0;
 
 	return rndis_filter_close_device(nvdev->extension);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH net-next 14/15] netvsc: optimize receive completions
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
                   ` (12 preceding siblings ...)
  2017-05-03 23:01 ` [PATCH net-next 13/15] netvsc: convert open count from atomic to refcount Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:01 ` [PATCH 15/15] netvsc: use vzalloc_node for receive completion data Stephen Hemminger
  2017-05-03 23:35 ` [PATCH net-next 00/15] netvsc: misc patches David Miller
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

Handle receive completions better:
 * format message directly in ring rather than in different bookkeeping structure
 * eliminate atomic operation
 * get rid of modulus (divide) on ring wrap
 * avoid potential stall if ring gets full
 * don't make ring element opaque

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h   |  16 +++-
 drivers/net/hyperv/netvsc.c       | 168 +++++++++++---------------------------
 drivers/net/hyperv/rndis_filter.c |  11 +--
 3 files changed, 64 insertions(+), 131 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 29555317ca05..a4417100a040 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -650,16 +650,24 @@ struct multi_send_data {
 
 struct recv_comp_data {
 	u64 tid; /* transaction id */
-	u32 status;
+	struct  {
+		struct nvsp_message_header hdr;
+		u32 status;
+	} msg __packed;
 };
 
 struct multi_recv_comp {
-	void *buf; /* queued receive completions */
-	u32 first; /* first data entry */
-	u32 next; /* next entry for writing */
+	struct recv_comp_data *ring;
+	u32 read;
+	u32 write;
 	u32 size; /* number of slots in ring */
 };
 
+static inline bool recv_complete_ring_empty(const struct multi_recv_comp *mrc)
+{
+	return mrc->read == mrc->write;
+}
+
 struct netvsc_stats {
 	u64 packets;
 	u64 bytes;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index eb9f3e517fa5..2938f1a2b765 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -72,8 +72,8 @@ static struct netvsc_device *alloc_net_device(u32 recvslot_max)
 
 	mrc = &net_device->chan_table[0].mrc;
 	mrc->size = recvslot_max;
-	mrc->buf = vzalloc(recvslot_max * sizeof(struct recv_comp_data));
-	if (!mrc->buf) {
+	mrc->ring = vzalloc(recvslot_max * sizeof(struct recv_comp_data));
+	if (!mrc->ring) {
 		kfree(net_device);
 		return NULL;
 	}
@@ -96,7 +96,7 @@ static void free_netvsc_device(struct rcu_head *head)
 	int i;
 
 	for (i = 0; i < VRSS_CHANNEL_MAX; i++)
-		vfree(nvdev->chan_table[i].mrc.buf);
+		vfree(nvdev->chan_table[i].mrc.ring);
 
 	kfree(nvdev);
 }
@@ -974,120 +974,51 @@ int netvsc_send(struct hv_device *device,
 	return ret;
 }
 
-static int netvsc_send_recv_completion(struct vmbus_channel *channel,
-				       u64 transaction_id, u32 status)
-{
-	struct nvsp_message recvcompMessage;
-	int ret;
-
-	recvcompMessage.hdr.msg_type =
-				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
-
-	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
-
-	/* Send the completion */
-	ret = vmbus_sendpacket(channel, &recvcompMessage,
-			       sizeof(struct nvsp_message_header) + sizeof(u32),
-			       transaction_id, VM_PKT_COMP, 0);
-
-	return ret;
-}
-
-static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx,
-					u32 *filled, u32 *avail)
-{
-	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
-	u32 first = mrc->first;
-	u32 next = mrc->next;
-
-	*filled = (first > next) ? mrc->size - first + next :
-		  next - first;
-
-	*avail = mrc->size - *filled - 1;
-}
 
-/* Read the first filled slot, no change to index */
-static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device
-							 *nvdev, u16 q_idx)
+/* Check and send pending recv completions */
+static int send_receive_comp(struct netvsc_device *nvdev,
+			     struct vmbus_channel *channel, u16 q_idx)
 {
 	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
-	u32 filled, avail;
 
-	if (unlikely(!mrc->buf))
-		return NULL;
+	while (!recv_complete_ring_empty(mrc)) {
+		struct recv_comp_data *rcd = mrc->ring + mrc->read;
+		int ret;
 
-	count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
-	if (!filled)
-		return NULL;
+		ret = vmbus_sendpacket(channel, &rcd->msg, sizeof(rcd->msg),
+				       rcd->tid, VM_PKT_COMP, 0);
 
-	return mrc->buf + mrc->first * sizeof(struct recv_comp_data);
-}
+		/* if ring to host gets full, retry later */
+		if (unlikely(ret != 0))
+			return ret;
 
-/* Put the first filled slot back to available pool */
-static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx)
-{
-	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
-	int num_recv;
-
-	mrc->first = (mrc->first + 1) % mrc->size;
-
-	num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs);
+		if (++mrc->read == mrc->size)
+			mrc->read = 0;
+	}
 
-	if (nvdev->destroy && num_recv == 0)
+	/* ring now empty */
+	if (unlikely(nvdev->destroy))
 		wake_up(&nvdev->wait_drain);
+	return 0;
 }
 
-/* Check and send pending recv completions */
-static void netvsc_chk_recv_comp(struct netvsc_device *nvdev,
-				 struct vmbus_channel *channel, u16 q_idx)
-{
-	struct recv_comp_data *rcd;
-	int ret;
-
-	while (true) {
-		rcd = read_recv_comp_slot(nvdev, q_idx);
-		if (!rcd)
-			break;
-
-		ret = netvsc_send_recv_completion(channel, rcd->tid,
-						  rcd->status);
-		if (ret)
-			break;
-
-		put_recv_comp_slot(nvdev, q_idx);
-	}
-}
-
-#define NETVSC_RCD_WATERMARK 80
-
 /* Get next available slot */
-static inline struct recv_comp_data *get_recv_comp_slot(
-	struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx)
+static struct recv_comp_data *
+get_recv_comp_slot(struct netvsc_device *nvdev,
+		   struct vmbus_channel *channel, u16 q_idx)
 {
 	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
-	u32 filled, avail, next;
 	struct recv_comp_data *rcd;
+	u32 next = mrc->write;
 
-	if (unlikely(!nvdev->recv_section))
-		return NULL;
-
-	if (unlikely(!mrc->buf))
-		return NULL;
-
-	if (atomic_read(&nvdev->num_outstanding_recvs) >
-	    nvdev->recv_section->num_sub_allocs * NETVSC_RCD_WATERMARK / 100)
-		netvsc_chk_recv_comp(nvdev, channel, q_idx);
+	if (++next == mrc->size)
+		next = 0;
 
-	count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
-	if (!avail)
+	if (unlikely(next == mrc->read))
 		return NULL;
 
-	next = mrc->next;
-	rcd = mrc->buf + next * sizeof(struct recv_comp_data);
-	mrc->next = (next + 1) % mrc->size;
-
-	atomic_inc(&nvdev->num_outstanding_recvs);
-
+	rcd = mrc->ring + mrc->write;
+	mrc->write = next;
 	return rcd;
 }
 
@@ -1104,9 +1035,8 @@ static int netvsc_receive(struct net_device *ndev,
 	u16 q_idx = channel->offermsg.offer.sub_channel_index;
 	char *recv_buf = net_device->recv_buf;
 	u32 status = NVSP_STAT_SUCCESS;
-	int i;
-	int count = 0;
-	int ret;
+	struct recv_comp_data *rcd;
+	int i, count = 0;
 
 	/* Make sure this is a valid nvsp packet */
 	if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
@@ -1137,25 +1067,16 @@ static int netvsc_receive(struct net_device *ndev,
 					      channel, data, buflen);
 	}
 
-	if (net_device->chan_table[q_idx].mrc.buf) {
-		struct recv_comp_data *rcd;
-
-		rcd = get_recv_comp_slot(net_device, channel, q_idx);
-		if (rcd) {
-			rcd->tid = vmxferpage_packet->d.trans_id;
-			rcd->status = status;
-		} else {
-			netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
-				   q_idx, vmxferpage_packet->d.trans_id);
-		}
+	rcd = get_recv_comp_slot(net_device, channel, q_idx);
+	if (likely(rcd)) {
+		rcd->tid = vmxferpage_packet->d.trans_id;
+		rcd->msg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
+		rcd->msg.status = status;
 	} else {
-		ret = netvsc_send_recv_completion(channel,
-						  vmxferpage_packet->d.trans_id,
-						  status);
-		if (ret)
-			netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
-				   q_idx, vmxferpage_packet->d.trans_id, ret);
+		netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
+			   q_idx, vmxferpage_packet->d.trans_id);
 	}
+
 	return count;
 }
 
@@ -1258,6 +1179,9 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 	struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
 	int work_done = 0;
 
+	/* If ring has leftover completions flush them now */
+	send_receive_comp(net_device, channel, q_idx);
+
 	/* If starting a new interval */
 	if (!nvchan->desc)
 		nvchan->desc = hv_pkt_iter_first(channel);
@@ -1270,14 +1194,14 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 
 	hv_pkt_iter_close(channel);
 
-	netvsc_chk_recv_comp(net_device, channel, q_idx);
-
-	/* If receive ring was exhausted
+	/* If all receive completions sent to host
+	 * and budget was not used up
 	 * and not doing busy poll
 	 * then re-enable host interrupts
 	 *  and reschedule if ring is not empty.
 	 */
-	if (work_done < budget &&
+	if (send_receive_comp(net_device, channel, q_idx) == 0 &&
+	    work_done < budget &&
 	    napi_complete_done(napi, work_done) &&
 	    hv_end_read(&channel->inbound) != 0) {
 		/* special case if new messages are available */
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 2a89bbd6e42b..1b8ce9bc0ce7 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -901,12 +901,12 @@ static bool netvsc_device_idle(const struct netvsc_device *nvdev)
 {
 	int i;
 
-	if (atomic_read(&nvdev->num_outstanding_recvs) > 0)
-		return false;
-
 	for (i = 0; i < nvdev->num_chn; i++) {
 		const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
 
+		if (!recv_complete_ring_empty(&nvchan->mrc))
+			return false;
+
 		if (atomic_read(&nvchan->queue_sends) > 0)
 			return false;
 	}
@@ -997,8 +997,9 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 
 	nvchan = nvscdev->chan_table + chn_index;
 	nvchan->mrc.size = nvscdev->recv_buf_size / ETH_DATA_LEN + 1;
-	nvchan->mrc.buf = vzalloc(nvchan->mrc.size * sizeof(struct recv_comp_data));
-	if (!nvchan->mrc.buf)
+	nvchan->mrc.ring = vzalloc(nvchan->mrc.size
+				   * sizeof(struct recv_comp_data));
+	if (!nvchan->mrc.ring)
 		return;
 
 	/* Because the device uses NAPI, all the interrupt batching and
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 15/15] netvsc: use vzalloc_node for receive completion data
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
                   ` (13 preceding siblings ...)
  2017-05-03 23:01 ` [PATCH net-next 14/15] netvsc: optimize receive completions Stephen Hemminger
@ 2017-05-03 23:01 ` Stephen Hemminger
  2017-05-03 23:35 ` [PATCH net-next 00/15] netvsc: misc patches David Miller
  15 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2017-05-03 23:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger

Put the receive completion ring on the NUMA node of the CPU
assigned to the channel.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h   |  3 +++
 drivers/net/hyperv/netvsc.c       | 30 +++++++++++++++++++++++-------
 drivers/net/hyperv/rndis_filter.c |  8 ++++----
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index a4417100a040..779c77a1638b 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -668,6 +668,9 @@ static inline bool recv_complete_ring_empty(const struct multi_recv_comp *mrc)
 	return mrc->read == mrc->write;
 }
 
+int netvsc_recv_comp_alloc(const struct vmbus_channel *chan,
+			   struct multi_recv_comp *mrc, u32 recv_slots);
+
 struct netvsc_stats {
 	u64 packets;
 	u64 bytes;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 2938f1a2b765..ee42aa56460c 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -61,19 +61,33 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 			       VM_PKT_DATA_INBAND, 0);
 }
 
-static struct netvsc_device *alloc_net_device(u32 recvslot_max)
+int netvsc_recv_comp_alloc(const struct vmbus_channel *channel,
+			   struct multi_recv_comp *mrc, u32 recv_slots)
+{
+	int node = cpu_to_node(channel->target_cpu);
+	size_t size = recv_slots * sizeof(struct recv_comp_data);
+
+	mrc->ring = vzalloc_node(size, node);
+	if (!mrc->ring)
+		mrc->ring = vzalloc(size);
+	if (!mrc->ring)
+		return -ENOMEM;
+
+	mrc->size = recv_slots;
+	return 0;
+}
+
+static struct netvsc_device *alloc_net_device(const struct vmbus_channel *chan,
+					      u32 recvslot_max)
 {
 	struct netvsc_device *net_device;
-	struct multi_recv_comp *mrc;
 
 	net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
 	if (!net_device)
 		return NULL;
 
-	mrc = &net_device->chan_table[0].mrc;
-	mrc->size = recvslot_max;
-	mrc->ring = vzalloc(recvslot_max * sizeof(struct recv_comp_data));
-	if (!mrc->ring) {
+	if (netvsc_recv_comp_alloc(chan, &net_device->chan_table[0].mrc,
+				   recvslot_max) != 0) {
 		kfree(net_device);
 		return NULL;
 	}
@@ -1246,7 +1260,9 @@ int netvsc_device_add(struct hv_device *device,
 	struct net_device *ndev = hv_get_drvdata(device);
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 
-	net_device = alloc_net_device(device_info->recv_buf_size / ETH_DATA_LEN + 1);
+	net_device = alloc_net_device(device->channel,
+				      device_info->recv_buf_size
+				      / ETH_DATA_LEN + 1);
 	if (!net_device)
 		return -ENOMEM;
 
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 1b8ce9bc0ce7..da4992b26397 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -996,10 +996,10 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 		return;
 
 	nvchan = nvscdev->chan_table + chn_index;
-	nvchan->mrc.size = nvscdev->recv_buf_size / ETH_DATA_LEN + 1;
-	nvchan->mrc.ring = vzalloc(nvchan->mrc.size
-				   * sizeof(struct recv_comp_data));
-	if (!nvchan->mrc.ring)
+	ret = netvsc_recv_comp_alloc(new_sc,
+				     &nvchan->mrc,
+				     nvscdev->recv_buf_size / ETH_DATA_LEN + 1);
+	if (ret)
 		return;
 
 	/* Because the device uses NAPI, all the interrupt batching and
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next 00/15] netvsc: misc patches
  2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
                   ` (14 preceding siblings ...)
  2017-05-03 23:01 ` [PATCH 15/15] netvsc: use vzalloc_node for receive completion data Stephen Hemminger
@ 2017-05-03 23:35 ` David Miller
  15 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2017-05-03 23:35 UTC (permalink / raw)
  To: stephen; +Cc: netdev, sthemmin

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Wed,  3 May 2017 16:01:02 -0700

> Mostly these are performance related. There is also one bug fix for
> incorrect handling of NAPI on device removal

Stephen, the net-next tree is closed as we are in the merge window.

Please resubmit this when the net-next tree opens back up.

Thank you.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2017-05-03 23:35 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-05-03 23:01 [PATCH net-next 00/15] netvsc: misc patches Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 01/15] vmbus: simplify hv_ringbuffer_read Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 02/15] vmbus: fix unnecessary signal events as result of NAPI Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 03/15] netvsc: make sure napi enabled before vmbus_open Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 04/15] netvsc: don't reacquire rtnl on device removal Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 05/15] netvsc: optimize avail percent calculation Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 06/15] netvsc: prefetch the first incoming ring element Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 07/15] netvsc: convert ring_size to unsigned Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 08/15] netvsc: allow overriding send/recv buffer size Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 09/15] netvsc: optimize netvsc_send_pkt Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 10/15] netvsc: replace modulus with mask for alignment Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 11/15] netvsc: reduce unnecessary memset Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 12/15] netvsc: size receive completion ring based on receive area Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 13/15] netvsc: convert open count from atomic to refcount Stephen Hemminger
2017-05-03 23:01 ` [PATCH net-next 14/15] netvsc: optimize receive completions Stephen Hemminger
2017-05-03 23:01 ` [PATCH 15/15] netvsc: use vzalloc_node for receive completion data Stephen Hemminger
2017-05-03 23:35 ` [PATCH net-next 00/15] netvsc: misc patches David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).