Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next RFC 1/5] vhost: split out ring head fetching logic
From: Jason Wang @ 2017-09-22  8:02 UTC (permalink / raw)
  To: mst, jasowang, virtualization, netdev, linux-kernel; +Cc: kvm
In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com>

This patch splits out ring head fetching logic and leave the
descriptor fetching and translation logic. This makes it is possible
to batch fetching the descriptor indices.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vhost.c | 75 +++++++++++++++++++++++++++++++++------------------
 drivers/vhost/vhost.h |  5 ++++
 2 files changed, 54 insertions(+), 26 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index d6dbb28..f87ec75 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1984,41 +1984,23 @@ static int get_indirect(struct vhost_virtqueue *vq,
 	return 0;
 }
 
-/* This looks in the virtqueue and for the first available buffer, and converts
- * it to an iovec for convenient access.  Since descriptors consist of some
- * number of output then some number of input descriptors, it's actually two
- * iovecs, but we pack them into one and note how many of each there were.
- *
- * This function returns the descriptor number found, or vq->num (which is
- * never a valid descriptor number) if none was found.  A negative code is
- * returned on error. */
-int vhost_get_vq_desc(struct vhost_virtqueue *vq,
-		      struct iovec iov[], unsigned int iov_size,
-		      unsigned int *out_num, unsigned int *in_num,
-		      struct vhost_log *log, unsigned int *log_num)
+static unsigned int vhost_get_vq_head(struct vhost_virtqueue *vq, int *err)
 {
-	struct vring_desc desc;
-	unsigned int i, head, found = 0;
-	u16 last_avail_idx;
-	__virtio16 avail_idx;
-	__virtio16 ring_head;
-	int ret, access;
-
-	/* Check it isn't doing very strange things with descriptor numbers. */
-	last_avail_idx = vq->last_avail_idx;
+	u16 last_avail_idx = vq->last_avail_idx;
+	__virtio16 avail_idx, ring_head;
 
 	if (vq->avail_idx == vq->last_avail_idx) {
 		if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) {
 			vq_err(vq, "Failed to access avail idx at %p\n",
 				&vq->avail->idx);
-			return -EFAULT;
+			goto err;
 		}
 		vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
 
 		if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
 			vq_err(vq, "Guest moved used index from %u to %u",
 				last_avail_idx, vq->avail_idx);
-			return -EFAULT;
+			goto err;
 		}
 
 		/* If there's nothing new since last we looked, return
@@ -2040,13 +2022,35 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 		vq_err(vq, "Failed to read head: idx %d address %p\n",
 		       last_avail_idx,
 		       &vq->avail->ring[last_avail_idx % vq->num]);
-		return -EFAULT;
+		goto err;
 	}
 
-	head = vhost16_to_cpu(vq, ring_head);
+	return vhost16_to_cpu(vq, ring_head);
+err:
+	*err = -EFAULT;
+	return 0;
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access.  Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which is
+ * never a valid descriptor number) if none was found.  A negative code is
+ * returned on error. */
+int __vhost_get_vq_desc(struct vhost_virtqueue *vq,
+			struct iovec iov[], unsigned int iov_size,
+			unsigned int *out_num, unsigned int *in_num,
+			struct vhost_log *log, unsigned int *log_num,
+			__virtio16 head)
+{
+	struct vring_desc desc;
+	unsigned int i, found = 0;
+	int ret = 0, access;
 
 	/* If their number is silly, that's an error. */
-	if (unlikely(head >= vq->num)) {
+	if (unlikely(head > vq->num)) {
 		vq_err(vq, "Guest says index %u > %u is available",
 		       head, vq->num);
 		return -EINVAL;
@@ -2133,6 +2137,25 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 	BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
 	return head;
 }
+EXPORT_SYMBOL_GPL(__vhost_get_vq_desc);
+
+int vhost_get_vq_desc(struct vhost_virtqueue *vq,
+		      struct iovec iov[], unsigned int iov_size,
+		      unsigned int *out_num, unsigned int *in_num,
+		      struct vhost_log *log, unsigned int *log_num)
+{
+	int ret = 0;
+	unsigned int head = vhost_get_vq_head(vq, &ret);
+
+	if (ret)
+		return ret;
+
+	if (head == vq->num)
+		return head;
+
+	return __vhost_get_vq_desc(vq, iov, iov_size, out_num, in_num,
+				   log, log_num, head);
+}
 EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
 
 /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index d59a9cc..39ff897 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -191,6 +191,11 @@ int vhost_get_vq_desc(struct vhost_virtqueue *,
 		      struct iovec iov[], unsigned int iov_count,
 		      unsigned int *out_num, unsigned int *in_num,
 		      struct vhost_log *log, unsigned int *log_num);
+int __vhost_get_vq_desc(struct vhost_virtqueue *vq,
+			struct iovec iov[], unsigned int iov_count,
+			unsigned int *out_num, unsigned int *in_num,
+			struct vhost_log *log, unsigned int *log_num,
+			__virtio16 ring_head);
 void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
 
 int vhost_vq_init_access(struct vhost_virtqueue *);
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next RFC 2/5] vhost: introduce helper to prefetch desc index
From: Jason Wang @ 2017-09-22  8:02 UTC (permalink / raw)
  To: mst, jasowang, virtualization, netdev, linux-kernel; +Cc: kvm
In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com>

This patch introduces vhost_prefetch_desc_indices() which could batch
descriptor indices fetching and used ring updating. This intends to
reduce the cache misses of indices fetching and updating and reduce
cache line bounce when virtqueue is almost full. copy_to_user() was
used in order to benefit from modern cpus that support fast string
copy. Batched virtqueue processing will be the first user.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vhost.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/vhost/vhost.h |  3 +++
 2 files changed, 58 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index f87ec75..8424166d 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2437,6 +2437,61 @@ struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev,
 }
 EXPORT_SYMBOL_GPL(vhost_dequeue_msg);
 
+int vhost_prefetch_desc_indices(struct vhost_virtqueue *vq,
+				struct vring_used_elem *heads,
+				u16 num, bool used_update)
+{
+	int ret, ret2;
+	u16 last_avail_idx, last_used_idx, total, copied;
+	__virtio16 avail_idx;
+	struct vring_used_elem __user *used;
+	int i;
+
+	if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) {
+		vq_err(vq, "Failed to access avail idx at %p\n",
+		       &vq->avail->idx);
+		return -EFAULT;
+	}
+	last_avail_idx = vq->last_avail_idx & (vq->num - 1);
+	vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+	total = vq->avail_idx - vq->last_avail_idx;
+	ret = total = min(total, num);
+
+	for (i = 0; i < ret; i++) {
+		ret2 = vhost_get_avail(vq, heads[i].id,
+				      &vq->avail->ring[last_avail_idx]);
+		if (unlikely(ret2)) {
+			vq_err(vq, "Failed to get descriptors\n");
+			return -EFAULT;
+		}
+		last_avail_idx = (last_avail_idx + 1) & (vq->num - 1);
+	}
+
+	if (!used_update)
+		return ret;
+
+	last_used_idx = vq->last_used_idx & (vq->num - 1);
+	while (total) {
+		copied = min((u16)(vq->num - last_used_idx), total);
+		ret2 = vhost_copy_to_user(vq,
+					  &vq->used->ring[last_used_idx],
+					  &heads[ret - total],
+					  copied * sizeof(*used));
+
+		if (unlikely(ret2)) {
+			vq_err(vq, "Failed to update used ring!\n");
+			return -EFAULT;
+		}
+
+		last_used_idx = 0;
+		total -= copied;
+	}
+
+	/* Only get avail ring entries after they have been exposed by guest. */
+	smp_rmb();
+	return ret;
+}
+EXPORT_SYMBOL(vhost_prefetch_desc_indices);
 
 static int __init vhost_init(void)
 {
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 39ff897..16c2cb6 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -228,6 +228,9 @@ ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to,
 ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
 			     struct iov_iter *from);
 int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
+int vhost_prefetch_desc_indices(struct vhost_virtqueue *vq,
+				struct vring_used_elem *heads,
+				u16 num, bool used_update);
 
 #define vq_err(vq, fmt, ...) do {                                  \
 		pr_debug(pr_fmt(fmt), ##__VA_ARGS__);       \
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next RFC 3/5] vhost: introduce vhost_add_used_idx()
From: Jason Wang @ 2017-09-22  8:02 UTC (permalink / raw)
  To: mst, jasowang, virtualization, netdev, linux-kernel; +Cc: kvm
In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com>

This patch introduces a helper which just increase the used idx. This
will be used in pair with vhost_prefetch_desc_indices() by batching
code.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vhost.c | 33 +++++++++++++++++++++++++++++++++
 drivers/vhost/vhost.h |  1 +
 2 files changed, 34 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 8424166d..6532cda 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2178,6 +2178,39 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
 }
 EXPORT_SYMBOL_GPL(vhost_add_used);
 
+int vhost_add_used_idx(struct vhost_virtqueue *vq, int n)
+{
+	u16 old, new;
+
+	old = vq->last_used_idx;
+	new = (vq->last_used_idx += n);
+	/* If the driver never bothers to signal in a very long while,
+	 * used index might wrap around. If that happens, invalidate
+	 * signalled_used index we stored. TODO: make sure driver
+	 * signals at least once in 2^16 and remove this.
+	 */
+	if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
+		vq->signalled_used_valid = false;
+
+	/* Make sure buffer is written before we update index. */
+	smp_wmb();
+	if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
+			   &vq->used->idx)) {
+		vq_err(vq, "Failed to increment used idx");
+		return -EFAULT;
+	}
+	if (unlikely(vq->log_used)) {
+		/* Log used index update. */
+		log_write(vq->log_base,
+			  vq->log_addr + offsetof(struct vring_used, idx),
+			  sizeof(vq->used->idx));
+		if (vq->log_ctx)
+			eventfd_signal(vq->log_ctx, 1);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vhost_add_used_idx);
+
 static int __vhost_add_used_n(struct vhost_virtqueue *vq,
 			    struct vring_used_elem *heads,
 			    unsigned count)
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 16c2cb6..5dd6c05 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -199,6 +199,7 @@ int __vhost_get_vq_desc(struct vhost_virtqueue *vq,
 void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
 
 int vhost_vq_init_access(struct vhost_virtqueue *);
+int vhost_add_used_idx(struct vhost_virtqueue *vq, int n);
 int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
 int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
 		     unsigned count);
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next RFC 4/5] vhost_net: rename VHOST_RX_BATCH to VHOST_NET_BATCH
From: Jason Wang @ 2017-09-22  8:02 UTC (permalink / raw)
  To: mst, jasowang, virtualization, netdev, linux-kernel; +Cc: kvm
In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com>

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/net.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 58585ec..c89640e 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -87,7 +87,7 @@ struct vhost_net_ubuf_ref {
 	struct vhost_virtqueue *vq;
 };
 
-#define VHOST_RX_BATCH 64
+#define VHOST_NET_BATCH 64
 struct vhost_net_buf {
 	struct sk_buff **queue;
 	int tail;
@@ -159,7 +159,7 @@ static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
 
 	rxq->head = 0;
 	rxq->tail = skb_array_consume_batched(nvq->rx_array, rxq->queue,
-					      VHOST_RX_BATCH);
+					      VHOST_NET_BATCH);
 	return rxq->tail;
 }
 
@@ -912,7 +912,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 		return -ENOMEM;
 	}
 
-	queue = kmalloc_array(VHOST_RX_BATCH, sizeof(struct sk_buff *),
+	queue = kmalloc_array(VHOST_NET_BATCH, sizeof(struct sk_buff *),
 			      GFP_KERNEL);
 	if (!queue) {
 		kfree(vqs);
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next RFC 5/5] vhost_net: basic tx virtqueue batched processing
From: Jason Wang @ 2017-09-22  8:02 UTC (permalink / raw)
  To: mst, jasowang, virtualization, netdev, linux-kernel; +Cc: kvm
In-Reply-To: <1506067355-5771-1-git-send-email-jasowang@redhat.com>

This patch implements basic batched processing of tx virtqueue by
prefetching desc indices and updating used ring in a batch. For
non-zerocopy case, vq->heads were used for storing the prefetched
indices and updating used ring. It is also a requirement for doing
more batching on top. For zerocopy case and for simplicity, batched
processing were simply disabled by only fetching and processing one
descriptor at a time, this could be optimized in the future.

XDP_DROP (without touching skb) on tun (with Moongen in guest) with
zercopy disabled:

Intel(R) Xeon(R) CPU E5-2650 0 @ 2.00GHz:
Before: 3.20Mpps
After:  3.90Mpps (+22%)

No differences were seen with zerocopy enabled.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/net.c   | 215 ++++++++++++++++++++++++++++----------------------
 drivers/vhost/vhost.c |   2 +-
 2 files changed, 121 insertions(+), 96 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c89640e..c439892 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -408,27 +408,25 @@ static int vhost_net_enable_vq(struct vhost_net *n,
 	return vhost_poll_start(poll, sock->file);
 }
 
-static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
-				    struct vhost_virtqueue *vq,
-				    struct iovec iov[], unsigned int iov_size,
-				    unsigned int *out_num, unsigned int *in_num)
+static bool vhost_net_tx_avail(struct vhost_net *net,
+			       struct vhost_virtqueue *vq)
 {
 	unsigned long uninitialized_var(endtime);
-	int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
-				  out_num, in_num, NULL, NULL);
 
-	if (r == vq->num && vq->busyloop_timeout) {
-		preempt_disable();
-		endtime = busy_clock() + vq->busyloop_timeout;
-		while (vhost_can_busy_poll(vq->dev, endtime) &&
-		       vhost_vq_avail_empty(vq->dev, vq))
-			cpu_relax();
-		preempt_enable();
-		r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
-				      out_num, in_num, NULL, NULL);
-	}
+	if (!vq->busyloop_timeout)
+		return false;
 
-	return r;
+	if (!vhost_vq_avail_empty(vq->dev, vq))
+		return true;
+
+	preempt_disable();
+	endtime = busy_clock() + vq->busyloop_timeout;
+	while (vhost_can_busy_poll(vq->dev, endtime) &&
+		vhost_vq_avail_empty(vq->dev, vq))
+		cpu_relax();
+	preempt_enable();
+
+	return !vhost_vq_avail_empty(vq->dev, vq);
 }
 
 static bool vhost_exceeds_maxpend(struct vhost_net *net)
@@ -446,8 +444,9 @@ static void handle_tx(struct vhost_net *net)
 {
 	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
 	struct vhost_virtqueue *vq = &nvq->vq;
+	struct vring_used_elem used, *heads = vq->heads;
 	unsigned out, in;
-	int head;
+	int avails, head;
 	struct msghdr msg = {
 		.msg_name = NULL,
 		.msg_namelen = 0,
@@ -461,6 +460,7 @@ static void handle_tx(struct vhost_net *net)
 	struct socket *sock;
 	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
 	bool zcopy, zcopy_used;
+	int i, batched = VHOST_NET_BATCH;
 
 	mutex_lock(&vq->mutex);
 	sock = vq->private_data;
@@ -475,6 +475,12 @@ static void handle_tx(struct vhost_net *net)
 	hdr_size = nvq->vhost_hlen;
 	zcopy = nvq->ubufs;
 
+	/* Disable zerocopy batched fetching for simplicity */
+	if (zcopy) {
+		heads = &used;
+		batched = 1;
+	}
+
 	for (;;) {
 		/* Release DMAs done buffers first */
 		if (zcopy)
@@ -486,95 +492,114 @@ static void handle_tx(struct vhost_net *net)
 		if (unlikely(vhost_exceeds_maxpend(net)))
 			break;
 
-		head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
-						ARRAY_SIZE(vq->iov),
-						&out, &in);
+		avails = vhost_prefetch_desc_indices(vq, heads, batched, !zcopy);
 		/* On error, stop handling until the next kick. */
-		if (unlikely(head < 0))
+		if (unlikely(avails < 0))
 			break;
-		/* Nothing new?  Wait for eventfd to tell us they refilled. */
-		if (head == vq->num) {
+		/* Nothing new?  Busy poll for a while or wait for
+		 * eventfd to tell us they refilled. */
+		if (!avails) {
+			if (vhost_net_tx_avail(net, vq))
+				continue;
 			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
 				vhost_disable_notify(&net->dev, vq);
 				continue;
 			}
 			break;
 		}
-		if (in) {
-			vq_err(vq, "Unexpected descriptor format for TX: "
-			       "out %d, int %d\n", out, in);
-			break;
-		}
-		/* Skip header. TODO: support TSO. */
-		len = iov_length(vq->iov, out);
-		iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
-		iov_iter_advance(&msg.msg_iter, hdr_size);
-		/* Sanity check */
-		if (!msg_data_left(&msg)) {
-			vq_err(vq, "Unexpected header len for TX: "
-			       "%zd expected %zd\n",
-			       len, hdr_size);
-			break;
-		}
-		len = msg_data_left(&msg);
-
-		zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
-				   && (nvq->upend_idx + 1) % UIO_MAXIOV !=
-				      nvq->done_idx
-				   && vhost_net_tx_select_zcopy(net);
-
-		/* use msg_control to pass vhost zerocopy ubuf info to skb */
-		if (zcopy_used) {
-			struct ubuf_info *ubuf;
-			ubuf = nvq->ubuf_info + nvq->upend_idx;
-
-			vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
-			vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
-			ubuf->callback = vhost_zerocopy_callback;
-			ubuf->ctx = nvq->ubufs;
-			ubuf->desc = nvq->upend_idx;
-			refcount_set(&ubuf->refcnt, 1);
-			msg.msg_control = ubuf;
-			msg.msg_controllen = sizeof(ubuf);
-			ubufs = nvq->ubufs;
-			atomic_inc(&ubufs->refcount);
-			nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
-		} else {
-			msg.msg_control = NULL;
-			ubufs = NULL;
-		}
+		for (i = 0; i < avails; i++) {
+			head = __vhost_get_vq_desc(vq, vq->iov,
+						   ARRAY_SIZE(vq->iov),
+						   &out, &in, NULL, NULL,
+					       vhost16_to_cpu(vq, heads[i].id));
+			if (in) {
+				vq_err(vq, "Unexpected descriptor format for "
+					   "TX: out %d, int %d\n", out, in);
+				goto out;
+			}
 
-		total_len += len;
-		if (total_len < VHOST_NET_WEIGHT &&
-		    !vhost_vq_avail_empty(&net->dev, vq) &&
-		    likely(!vhost_exceeds_maxpend(net))) {
-			msg.msg_flags |= MSG_MORE;
-		} else {
-			msg.msg_flags &= ~MSG_MORE;
-		}
+			/* Skip header. TODO: support TSO. */
+			len = iov_length(vq->iov, out);
+			iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
+			iov_iter_advance(&msg.msg_iter, hdr_size);
+			/* Sanity check */
+			if (!msg_data_left(&msg)) {
+				vq_err(vq, "Unexpected header len for TX: "
+					"%zd expected %zd\n",
+					len, hdr_size);
+				goto out;
+			}
+			len = msg_data_left(&msg);
 
-		/* TODO: Check specific error and bomb out unless ENOBUFS? */
-		err = sock->ops->sendmsg(sock, &msg, len);
-		if (unlikely(err < 0)) {
+			zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
+				     && (nvq->upend_idx + 1) % UIO_MAXIOV !=
+					nvq->done_idx
+				     && vhost_net_tx_select_zcopy(net);
+
+			/* use msg_control to pass vhost zerocopy ubuf
+			 * info to skb
+			 */
 			if (zcopy_used) {
-				vhost_net_ubuf_put(ubufs);
-				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
-					% UIO_MAXIOV;
+				struct ubuf_info *ubuf;
+				ubuf = nvq->ubuf_info + nvq->upend_idx;
+
+				vq->heads[nvq->upend_idx].id =
+					cpu_to_vhost32(vq, head);
+				vq->heads[nvq->upend_idx].len =
+					VHOST_DMA_IN_PROGRESS;
+				ubuf->callback = vhost_zerocopy_callback;
+				ubuf->ctx = nvq->ubufs;
+				ubuf->desc = nvq->upend_idx;
+				refcount_set(&ubuf->refcnt, 1);
+				msg.msg_control = ubuf;
+				msg.msg_controllen = sizeof(ubuf);
+				ubufs = nvq->ubufs;
+				atomic_inc(&ubufs->refcount);
+				nvq->upend_idx =
+					(nvq->upend_idx + 1) % UIO_MAXIOV;
+			} else {
+				msg.msg_control = NULL;
+				ubufs = NULL;
+			}
+
+			total_len += len;
+			if (total_len < VHOST_NET_WEIGHT &&
+				!vhost_vq_avail_empty(&net->dev, vq) &&
+				likely(!vhost_exceeds_maxpend(net))) {
+				msg.msg_flags |= MSG_MORE;
+			} else {
+				msg.msg_flags &= ~MSG_MORE;
+			}
+
+			/* TODO: Check specific error and bomb out
+			 * unless ENOBUFS?
+			 */
+			err = sock->ops->sendmsg(sock, &msg, len);
+			if (unlikely(err < 0)) {
+				if (zcopy_used) {
+					vhost_net_ubuf_put(ubufs);
+					nvq->upend_idx =
+				   ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV;
+				}
+				vhost_discard_vq_desc(vq, 1);
+				goto out;
+			}
+			if (err != len)
+				pr_debug("Truncated TX packet: "
+					" len %d != %zd\n", err, len);
+			if (!zcopy) {
+				vhost_add_used_idx(vq, 1);
+				vhost_signal(&net->dev, vq);
+			} else if (!zcopy_used) {
+				vhost_add_used_and_signal(&net->dev,
+							  vq, head, 0);
+			} else
+				vhost_zerocopy_signal_used(net, vq);
+			vhost_net_tx_packet(net);
+			if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+				vhost_poll_queue(&vq->poll);
+				goto out;
 			}
-			vhost_discard_vq_desc(vq, 1);
-			break;
-		}
-		if (err != len)
-			pr_debug("Truncated TX packet: "
-				 " len %d != %zd\n", err, len);
-		if (!zcopy_used)
-			vhost_add_used_and_signal(&net->dev, vq, head, 0);
-		else
-			vhost_zerocopy_signal_used(net, vq);
-		vhost_net_tx_packet(net);
-		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
-			vhost_poll_queue(&vq->poll);
-			break;
 		}
 	}
 out:
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 6532cda..8764df5 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -392,7 +392,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
 		vq->indirect = kmalloc(sizeof *vq->indirect * UIO_MAXIOV,
 				       GFP_KERNEL);
 		vq->log = kmalloc(sizeof *vq->log * UIO_MAXIOV, GFP_KERNEL);
-		vq->heads = kmalloc(sizeof *vq->heads * UIO_MAXIOV, GFP_KERNEL);
+		vq->heads = kzalloc(sizeof *vq->heads * UIO_MAXIOV, GFP_KERNEL);
 		if (!vq->indirect || !vq->log || !vq->heads)
 			goto err_nomem;
 	}
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH net-next RFC 1/5] vhost: split out ring head fetching logic
From: Stefan Hajnoczi @ 2017-09-22  8:31 UTC (permalink / raw)
  To: Jason Wang; +Cc: mst, virtualization, netdev, linux-kernel, kvm
In-Reply-To: <1506067355-5771-2-git-send-email-jasowang@redhat.com>

On Fri, Sep 22, 2017 at 04:02:31PM +0800, Jason Wang wrote:
> +/* This looks in the virtqueue and for the first available buffer, and converts
> + * it to an iovec for convenient access.  Since descriptors consist of some
> + * number of output then some number of input descriptors, it's actually two
> + * iovecs, but we pack them into one and note how many of each there were.
> + *
> + * This function returns the descriptor number found, or vq->num (which is
> + * never a valid descriptor number) if none was found.  A negative code is
> + * returned on error. */
> +int __vhost_get_vq_desc(struct vhost_virtqueue *vq,
> +			struct iovec iov[], unsigned int iov_size,
> +			unsigned int *out_num, unsigned int *in_num,
> +			struct vhost_log *log, unsigned int *log_num,
> +			__virtio16 head)
[...]
> +int vhost_get_vq_desc(struct vhost_virtqueue *vq,
> +		      struct iovec iov[], unsigned int iov_size,
> +		      unsigned int *out_num, unsigned int *in_num,
> +		      struct vhost_log *log, unsigned int *log_num)

Please document vhost_get_vq_desc().

Please also explain the difference between __vhost_get_vq_desc() and
vhost_get_vq_desc() in the documentation.

^ permalink raw reply

* Re: [patch net-next 07/12] mlxsw: spectrum: Add the multicast routing offloading logic
From: Yotam Gigi @ 2017-09-22  8:36 UTC (permalink / raw)
  To: Andrew Lunn, Jiri Pirko; +Cc: netdev, davem, idosch, mlxsw
In-Reply-To: <20170921152654.GF27589@lunn.ch>

On 09/21/2017 06:26 PM, Andrew Lunn wrote:
>> +static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
>> +					   struct mlxsw_sp_mr_route *mr_route)
>> +{
>> +	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +	u64 packets, bytes;
>> +
>> +	if (mr_route->route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP)
>> +		return;
>> +
>> +	mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets,
>> +				&bytes);
>> +
>> +	switch (mr_route->mr_table->proto) {
>> +	case MLXSW_SP_L3_PROTO_IPV4:
>> +		mr_route->mfc4->mfc_un.res.pkt = packets;
>> +		mr_route->mfc4->mfc_un.res.bytes = bytes;
> What about wrong_if and lastuse? 

wronf_if is updated by ipmr as it is trapped to the CPU. We did not
address lastuse currently, though it can be easily addressed here.

>
> Is an mfc with iif on the host, not the switch, not offloaded?


I am not sure I followed. What do you mean MFC with iif on the host? you mean
MFC with iif that is an external NIC which is not part of the spectrum ASIC? in
this case, the route will not be offloaded and all traffic will pass in slowpath.


>
>    Andrew

^ permalink raw reply

* Re: [PATCH 2/2] ip_tunnel: add mpls over gre encapsulation
From: Amine Kherbouche @ 2017-09-22  8:39 UTC (permalink / raw)
  To: Roopa Prabhu; +Cc: netdev@vger.kernel.org, xeb, David Lamparter
In-Reply-To: <CAJieiUicOTaxeT4t-y6QzoPibi1i7WJ31X978OP6sqjMjVuVNw@mail.gmail.com>



On 09/22/2017 12:35 AM, Roopa Prabhu wrote:
>> > diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
>> > index 36ea2ad..060ed07 100644
>> > --- a/net/mpls/af_mpls.c
>> > +++ b/net/mpls/af_mpls.c
>> > @@ -16,6 +16,7 @@
>> >  #include <net/arp.h>
>> >  #include <net/ip_fib.h>
>> >  #include <net/netevent.h>
>> > +#include <net/ip_tunnels.h>
>> >  #include <net/netns/generic.h>
>> >  #if IS_ENABLED(CONFIG_IPV6)
>> >  #include <net/ipv6.h>
>> > @@ -39,6 +40,40 @@ static int one = 1;
>> >  static int label_limit = (1 << 20) - 1;
>> >  static int ttl_max = 255;
>> >
>> > +size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e)
>> > +{
>> > +       return sizeof(struct mpls_shim_hdr);
>> > +}
>> > +
>> > +int ipgre_mpls_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
>> > +                           u8 *protocol, struct flowi4 *fl4)
>> > +{
>> > +       return 0;
>> > +}
>
> any reason you are supporting only rx ?

Tx path doesn't need changes, all gre hdr fields remain the same except 
for the protocol type which is loaded from skb->protocol this last is 
set by the mpls stack before entering gre device.

^ permalink raw reply

* Re: [net-next 1/2] dummy: add device MTU validation check
From: Sabrina Dubroca @ 2017-09-22  8:56 UTC (permalink / raw)
  To: Eric Dumazet, Jarod Wilson; +Cc: Zhang Shengju, davem, willemb, stephen, netdev
In-Reply-To: <1506006138.29839.132.camel@edumazet-glaptop3.roam.corp.google.com>

2017-09-21, 08:02:18 -0700, Eric Dumazet wrote:
> On Thu, 2017-09-21 at 21:32 +0800, Zhang Shengju wrote:
> > Currently, any mtu value can be assigned when adding a new dummy device:
> > [~]# ip link add name dummy1 mtu 100000 type dummy
> > [~]# ip link show dummy1
> > 15: dummy1: <BROADCAST,NOARP> mtu 100000 qdisc noop state DOWN mode DEFAULT group default qlen 1000
> >     link/ether 0a:61:6b:16:14:ce brd ff:ff:ff:ff:ff:ff
> > 
> > This patch adds device MTU validation check.
> 
> What is wrong with big MTU on dummy ?

It looks like the "centralize MTU checking" series broke that, but
only for changing the MTU on an existing dummy device. Commit
a52ad514fdf3 defined min_mtu/max_mtu in ether_setup, which dummy uses,
but there was no MTU check in dummy prior to that commit.


> If this is a generic rule, this check should belong in core network
> stack.
> 
> > 
> > Signed-off-by: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
> > ---
> >  drivers/net/dummy.c | 8 ++++++++
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
> > index e31ab3b..0276b2b 100644
> > --- a/drivers/net/dummy.c
> > +++ b/drivers/net/dummy.c
> > @@ -365,6 +365,14 @@ static int dummy_validate(struct nlattr *tb[], struct nlattr *data[],
> >  		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
> >  			return -EADDRNOTAVAIL;
> >  	}
> > +
> > +	if (tb[IFLA_MTU]) {
> > +		u32 mtu = nla_get_u32(tb[IFLA_MTU]);
> 
> You do not verify/validate nla_len(tb[IFLA_MTU]).

I think ifla_policy already performs that check:


static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[...]
	[IFLA_MTU]		= { .type = NLA_U32 },


static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
			struct netlink_ext_ack *extack)
{
[...]
	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);


-- 
Sabrina

^ permalink raw reply

* Re: [PATCH net-next RFC 2/5] vhost: introduce helper to prefetch desc index
From: Stefan Hajnoczi @ 2017-09-22  9:02 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, virtualization, linux-kernel, kvm, mst
In-Reply-To: <1506067355-5771-3-git-send-email-jasowang@redhat.com>

On Fri, Sep 22, 2017 at 04:02:32PM +0800, Jason Wang wrote:
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index f87ec75..8424166d 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -2437,6 +2437,61 @@ struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev,
>  }
>  EXPORT_SYMBOL_GPL(vhost_dequeue_msg);
>  
> +int vhost_prefetch_desc_indices(struct vhost_virtqueue *vq,
> +				struct vring_used_elem *heads,
> +				u16 num, bool used_update)

Missing doc comment.

> +{
> +	int ret, ret2;
> +	u16 last_avail_idx, last_used_idx, total, copied;
> +	__virtio16 avail_idx;
> +	struct vring_used_elem __user *used;
> +	int i;

The following variable names are a little confusing:

last_avail_idx vs vq->last_avail_idx.  last_avail_idx is a wrapped
avail->ring[] index, vq->last_avail_idx is a free-running counter.  The
same for last_used_idx vs vq->last_used_idx.

num argument vs vq->num.  The argument could be called nheads instead to
make it clear that this is heads[] and not the virtqueue size.

Not a bug but it took me a while to figure out what was going on.

^ permalink raw reply

* Re: [PATCH net-next RFC 3/5] vhost: introduce vhost_add_used_idx()
From: Stefan Hajnoczi @ 2017-09-22  9:07 UTC (permalink / raw)
  To: Jason Wang; +Cc: mst, virtualization, netdev, linux-kernel, kvm
In-Reply-To: <1506067355-5771-4-git-send-email-jasowang@redhat.com>

On Fri, Sep 22, 2017 at 04:02:33PM +0800, Jason Wang wrote:
> This patch introduces a helper which just increase the used idx. This
> will be used in pair with vhost_prefetch_desc_indices() by batching
> code.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  drivers/vhost/vhost.c | 33 +++++++++++++++++++++++++++++++++
>  drivers/vhost/vhost.h |  1 +
>  2 files changed, 34 insertions(+)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

^ permalink raw reply

* Re: [PATCH] wireless: iwlegacy:  make const array static to shink object code size Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit
From: Stanislaw Gruszka @ 2017-09-22  9:56 UTC (permalink / raw)
  To: Colin King
  Cc: Kalle Valo, linux-wireless, netdev, kernel-janitors, linux-kernel
In-Reply-To: <20170921225630.21916-1-colin.king@canonical.com>

On Thu, Sep 21, 2017 at 11:56:30PM +0100, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
> 
> Don't populate const array ac_to_fifo on the stack in an inlined
> function, instead make it static.  Makes the object code smaller
> by over 800 bytes:
> 
>    text	   data	    bss	    dec	    hex	filename
>  159029	  33154	   1216	 193399	  2f377	4965-mac.o
> 
>    text	   data	    bss	    dec	    hex	filename
>  158122	  33250	   1216	 192588	  2f04c	4965-mac.o
> 
> (gcc version 7.2.0 x86_64)
> 
> Signed-off-by: Colin Ian King <colin.king@canonical.com>

Content type information was added at the end of the topic, but
I think Kalle can fix that when he will be committing the patch.

Acked-by: Stanislaw Gruszka <sgruszka@redhat.com>

^ permalink raw reply

* Re: [PATCH] wireless: iwlegacy: make const array static to shink object code size Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit
From: Joe Perches @ 2017-09-22 10:03 UTC (permalink / raw)
  To: Julia Lawall, Colin King
  Cc: Stanislaw Gruszka, Kalle Valo, linux-wireless, netdev,
	kernel-janitors, linux-kernel
In-Reply-To: <alpine.DEB.2.20.1709220921350.2024@hadrien>

On Fri, 2017-09-22 at 09:23 +0200, Julia Lawall wrote:
> 
> On Thu, 21 Sep 2017, Colin King wrote:
> 
> > From: Colin Ian King <colin.king@canonical.com>
> > 
> > Don't populate const array ac_to_fifo on the stack in an inlined
> > function, instead make it static.  Makes the object code smaller
> > by over 800 bytes:
> > 
> >    text	   data	    bss	    dec	    hex	filename
> >  159029	  33154	   1216	 193399	  2f377	4965-mac.o
> > 
> >    text	   data	    bss	    dec	    hex	filename
> >  158122	  33250	   1216	 192588	  2f04c	4965-mac.o
> > 
> > (gcc version 7.2.0 x86_64)
> 
> Gcc 7 must be much more aggressive about inlining than gcc 4.  Here is the
> change that I got on this file:
> 
>      text          data     bss     dec     hex filename
> -   76346          1494     152   77992   130a8 drivers/net/wireless/intel/iwlegacy/4965-mac.o
> +   76298          1494     152   77944   13078 drivers/net/wireless/intel/iwlegacy/4965-mac.o
> decrease of 48

More likely different CONFIG options.

e.g. allyesconfig vs defconfig with wireless

^ permalink raw reply

* Re: [PATCH] wireless: iwlegacy: make const array static to shink object code size Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit
From: Colin Ian King @ 2017-09-22 10:05 UTC (permalink / raw)
  To: Joe Perches, Julia Lawall
  Cc: Stanislaw Gruszka, Kalle Valo, linux-wireless, netdev,
	kernel-janitors, linux-kernel
In-Reply-To: <1506074588.12311.37.camel@perches.com>

On 22/09/17 11:03, Joe Perches wrote:
> On Fri, 2017-09-22 at 09:23 +0200, Julia Lawall wrote:
>>
>> On Thu, 21 Sep 2017, Colin King wrote:
>>
>>> From: Colin Ian King <colin.king@canonical.com>
>>>
>>> Don't populate const array ac_to_fifo on the stack in an inlined
>>> function, instead make it static.  Makes the object code smaller
>>> by over 800 bytes:
>>>
>>>    text	   data	    bss	    dec	    hex	filename
>>>  159029	  33154	   1216	 193399	  2f377	4965-mac.o
>>>
>>>    text	   data	    bss	    dec	    hex	filename
>>>  158122	  33250	   1216	 192588	  2f04c	4965-mac.o
>>>
>>> (gcc version 7.2.0 x86_64)
>>
>> Gcc 7 must be much more aggressive about inlining than gcc 4.  Here is the
>> change that I got on this file:
>>
>>      text          data     bss     dec     hex filename
>> -   76346          1494     152   77992   130a8 drivers/net/wireless/intel/iwlegacy/4965-mac.o
>> +   76298          1494     152   77944   13078 drivers/net/wireless/intel/iwlegacy/4965-mac.o
>> decrease of 48
> 
> More likely different CONFIG options.
> 
> e.g. allyesconfig vs defconfig with wireless
> 
yup, I used allyesconfig

^ permalink raw reply

* Re: [PATCH] wireless: iwlegacy: make const array static to shink object code size Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit
From: Julia Lawall @ 2017-09-22 10:06 UTC (permalink / raw)
  To: Colin Ian King
  Cc: Joe Perches, Stanislaw Gruszka, Kalle Valo, linux-wireless,
	netdev, kernel-janitors, linux-kernel
In-Reply-To: <f6cb5b73-4ed5-92d9-3909-32b8e3aafc45@canonical.com>



On Fri, 22 Sep 2017, Colin Ian King wrote:

> On 22/09/17 11:03, Joe Perches wrote:
> > On Fri, 2017-09-22 at 09:23 +0200, Julia Lawall wrote:
> >>
> >> On Thu, 21 Sep 2017, Colin King wrote:
> >>
> >>> From: Colin Ian King <colin.king@canonical.com>
> >>>
> >>> Don't populate const array ac_to_fifo on the stack in an inlined
> >>> function, instead make it static.  Makes the object code smaller
> >>> by over 800 bytes:
> >>>
> >>>    text	   data	    bss	    dec	    hex	filename
> >>>  159029	  33154	   1216	 193399	  2f377	4965-mac.o
> >>>
> >>>    text	   data	    bss	    dec	    hex	filename
> >>>  158122	  33250	   1216	 192588	  2f04c	4965-mac.o
> >>>
> >>> (gcc version 7.2.0 x86_64)
> >>
> >> Gcc 7 must be much more aggressive about inlining than gcc 4.  Here is the
> >> change that I got on this file:
> >>
> >>      text          data     bss     dec     hex filename
> >> -   76346          1494     152   77992   130a8 drivers/net/wireless/intel/iwlegacy/4965-mac.o
> >> +   76298          1494     152   77944   13078 drivers/net/wireless/intel/iwlegacy/4965-mac.o
> >> decrease of 48
> >
> > More likely different CONFIG options.
> >
> > e.g. allyesconfig vs defconfig with wireless
> >
> yup, I used allyesconfig

Mine is also allyesconfig.

julia

>
> --
> To unsubscribe from this list: send the line "unsubscribe kernel-janitors" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* tools: selftests: psock_tpacket: skip un-supported tpacket_v3 test
From: Orson Zhai @ 2017-09-22 10:17 UTC (permalink / raw)
  To: Shuah Khan
  Cc: David S . Miller, milosz.wasilewski, sumit.semwal, netdev,
	linux-kselftest, Orson Zhai

The TPACKET_V3 test of PACKET_TX_RING will fail with kernel version
lower than v4.11. Supported code of tx ring was add with commit id
<7f953ab2ba46: af_packet: TX_RING support for TPACKET_V3> at Jan. 3
of 2017.

So skip this item test instead of reporting failing for old kernels.

Signed-off-by: Orson Zhai <orson.zhai@linaro.org>
---
 tools/testing/selftests/net/psock_tpacket.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 7f6cd9fdacf3..f0cfc18c3726 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -57,6 +57,7 @@
 #include <net/if.h>
 #include <inttypes.h>
 #include <poll.h>
+#include <errno.h>
 
 #include "psock_lib.h"
 
@@ -676,7 +677,7 @@ static void __v3_fill(struct ring *ring, unsigned int blocks, int type)
 	ring->flen = ring->req3.tp_block_size;
 }
 
-static void setup_ring(int sock, struct ring *ring, int version, int type)
+static int setup_ring(int sock, struct ring *ring, int version, int type)
 {
 	int ret = 0;
 	unsigned int blocks = 256;
@@ -703,7 +704,11 @@ static void setup_ring(int sock, struct ring *ring, int version, int type)
 
 	if (ret == -1) {
 		perror("setsockopt");
-		exit(1);
+		if (errno == EINVAL) {
+			printf("[SKIP] This type seems un-supported in current kernel, skipped.\n");
+			return -1;
+		} else
+			exit(1);
 	}
 
 	ring->rd_len = ring->rd_num * sizeof(*ring->rd);
@@ -715,6 +720,7 @@ static void setup_ring(int sock, struct ring *ring, int version, int type)
 
 	total_packets = 0;
 	total_bytes = 0;
+	return 0;
 }
 
 static void mmap_ring(int sock, struct ring *ring)
@@ -830,7 +836,12 @@ static int test_tpacket(int version, int type)
 
 	sock = pfsocket(version);
 	memset(&ring, 0, sizeof(ring));
-	setup_ring(sock, &ring, version, type);
+	if(setup_ring(sock, &ring, version, type)) {
+		/* skip test when error of invalid argument */
+		close(sock);
+		return 0;
+	}
+
 	mmap_ring(sock, &ring);
 	bind_ring(sock, &ring);
 	walk_ring(sock, &ring);
-- 
2.12.2

^ permalink raw reply related

* Re: [PATCH] wireless: iwlegacy:  make const array static to shink object code size Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit
From: Kalle Valo @ 2017-09-22 10:32 UTC (permalink / raw)
  To: Stanislaw Gruszka
  Cc: Colin King, linux-wireless, netdev, kernel-janitors, linux-kernel
In-Reply-To: <20170922095609.GA27551@redhat.com>

Stanislaw Gruszka <sgruszka@redhat.com> writes:

> On Thu, Sep 21, 2017 at 11:56:30PM +0100, Colin King wrote:
>> From: Colin Ian King <colin.king@canonical.com>
>> 
>> Don't populate const array ac_to_fifo on the stack in an inlined
>> function, instead make it static.  Makes the object code smaller
>> by over 800 bytes:
>> 
>>    text	   data	    bss	    dec	    hex	filename
>>  159029	  33154	   1216	 193399	  2f377	4965-mac.o
>> 
>>    text	   data	    bss	    dec	    hex	filename
>>  158122	  33250	   1216	 192588	  2f04c	4965-mac.o
>> 
>> (gcc version 7.2.0 x86_64)
>> 
>> Signed-off-by: Colin Ian King <colin.king@canonical.com>
>
> Content type information was added at the end of the topic, but
> I think Kalle can fix that when he will be committing the patch.

Yeah, I'll fix that when I commit this. But very good that you pointed
it out, I might miss stuff like this.

I'll also remove the "wireless:" prefix from the title.

-- 
Kalle Valo

^ permalink raw reply

* Re: [PATCH] wireless: iwlegacy: make const array static to shink object code size Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit
From: Joe Perches @ 2017-09-22 10:46 UTC (permalink / raw)
  To: Julia Lawall, Colin Ian King
  Cc: Stanislaw Gruszka, Kalle Valo, linux-wireless, netdev,
	kernel-janitors, linux-kernel
In-Reply-To: <alpine.DEB.2.20.1709221206050.3170@hadrien>

On Fri, 2017-09-22 at 12:06 +0200, Julia Lawall wrote:
> 
> On Fri, 22 Sep 2017, Colin Ian King wrote:
> 
> > On 22/09/17 11:03, Joe Perches wrote:
> > > On Fri, 2017-09-22 at 09:23 +0200, Julia Lawall wrote:
> > > > 
> > > > On Thu, 21 Sep 2017, Colin King wrote:
> > > > 
> > > > > From: Colin Ian King <colin.king@canonical.com>
> > > > > 
> > > > > Don't populate const array ac_to_fifo on the stack in an inlined
> > > > > function, instead make it static.  Makes the object code smaller
> > > > > by over 800 bytes:
> > > > > 
> > > > >    text	   data	    bss	    dec	    hex	filename
> > > > >  159029	  33154	   1216	 193399	  2f377	4965-mac.o
> > > > > 
> > > > >    text	   data	    bss	    dec	    hex	filename
> > > > >  158122	  33250	   1216	 192588	  2f04c	4965-mac.o
> > > > > 
> > > > > (gcc version 7.2.0 x86_64)
> > > > 
> > > > Gcc 7 must be much more aggressive about inlining than gcc 4.  Here is the
> > > > change that I got on this file:
> > > > 
> > > >      text          data     bss     dec     hex filename
> > > > -   76346          1494     152   77992   130a8 drivers/net/wireless/intel/iwlegacy/4965-mac.o
> > > > +   76298          1494     152   77944   13078 drivers/net/wireless/intel/iwlegacy/4965-mac.o
> > > > decrease of 48
> > > 
> > > More likely different CONFIG options.
> > > 
> > > e.g. allyesconfig vs defconfig with wireless
> > > 
> > 
> > yup, I used allyesconfig
> 
> Mine is also allyesconfig.

Odd.

Here is what I get: (Sorry, no gcc-7)

gcc4: gcc-4.9 (Ubuntu 4.9.4-2ubuntu1) 4.9.4
gcc5: gcc-5 (Ubuntu 5.4.1-8ubuntu1) 5.4.1 20170304
gcc6: gcc-6 (Ubuntu 6.3.0-12ubuntu2) 6.3.0 20170406

$ size drivers/net/wireless/intel/iwlegacy/4965-mac.o*
   text	   data	    bss	    dec	    hex	filename
 118559	   1766	    152	 120477	  1d69d	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc4.allyesconfig.new
 118623	   1766	    152	 120541	  1d6dd	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc4.allyesconfig.old
  51595	   1156	      0	  52751	   ce0f	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc4.defconfig.new
  51659	   1156	      0	  52815	   ce4f	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc4.defconfig.old
 141956	  29790	   1216	 172962	  2a3a2	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc5.allyesconfig.new
 142671	  29702	   1216	 173589	  2a615	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc5.allyesconfig.old
  51733	   1156	      0	  52889	   ce99	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc5.defconfig.new
  51813	   1156	      0	  52969	   cee9	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc5.defconfig.old
 152991	  29790	   1216	 183997	  2cebd	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc6.allyesconfig.new
 153722	  29702	   1216	 184640	  2d140	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc6.allyesconfig.old
  51813	   1156	      0	  52969	   cee9	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc6.defconfig.new
  51893	   1156	      0	  53049	   cf39	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc6.defconfig.old

^ permalink raw reply

* Re: [PATCH] wireless: iwlegacy: make const array static to shink object code size Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit
From: Julia Lawall @ 2017-09-22 10:54 UTC (permalink / raw)
  To: Joe Perches
  Cc: Colin Ian King, Stanislaw Gruszka, Kalle Valo, linux-wireless,
	netdev, kernel-janitors, linux-kernel
In-Reply-To: <1506077181.12311.39.camel@perches.com>

[-- Attachment #1: Type: text/plain, Size: 3584 bytes --]



On Fri, 22 Sep 2017, Joe Perches wrote:

> On Fri, 2017-09-22 at 12:06 +0200, Julia Lawall wrote:
> >
> > On Fri, 22 Sep 2017, Colin Ian King wrote:
> >
> > > On 22/09/17 11:03, Joe Perches wrote:
> > > > On Fri, 2017-09-22 at 09:23 +0200, Julia Lawall wrote:
> > > > >
> > > > > On Thu, 21 Sep 2017, Colin King wrote:
> > > > >
> > > > > > From: Colin Ian King <colin.king@canonical.com>
> > > > > >
> > > > > > Don't populate const array ac_to_fifo on the stack in an inlined
> > > > > > function, instead make it static.  Makes the object code smaller
> > > > > > by over 800 bytes:
> > > > > >
> > > > > >    text	   data	    bss	    dec	    hex	filename
> > > > > >  159029	  33154	   1216	 193399	  2f377	4965-mac.o
> > > > > >
> > > > > >    text	   data	    bss	    dec	    hex	filename
> > > > > >  158122	  33250	   1216	 192588	  2f04c	4965-mac.o
> > > > > >
> > > > > > (gcc version 7.2.0 x86_64)
> > > > >
> > > > > Gcc 7 must be much more aggressive about inlining than gcc 4.  Here is the
> > > > > change that I got on this file:
> > > > >
> > > > >      text          data     bss     dec     hex filename
> > > > > -   76346          1494     152   77992   130a8 drivers/net/wireless/intel/iwlegacy/4965-mac.o
> > > > > +   76298          1494     152   77944   13078 drivers/net/wireless/intel/iwlegacy/4965-mac.o
> > > > > decrease of 48
> > > >
> > > > More likely different CONFIG options.
> > > >
> > > > e.g. allyesconfig vs defconfig with wireless
> > > >
> > >
> > > yup, I used allyesconfig
> >
> > Mine is also allyesconfig.
>
> Odd.
>
> Here is what I get: (Sorry, no gcc-7)
>
> gcc4: gcc-4.9 (Ubuntu 4.9.4-2ubuntu1) 4.9.4
> gcc5: gcc-5 (Ubuntu 5.4.1-8ubuntu1) 5.4.1 20170304
> gcc6: gcc-6 (Ubuntu 6.3.0-12ubuntu2) 6.3.0 20170406
>
> $ size drivers/net/wireless/intel/iwlegacy/4965-mac.o*
>    text	   data	    bss	    dec	    hex	filename
>  118559	   1766	    152	 120477	  1d69d	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc4.allyesconfig.new
>  118623	   1766	    152	 120541	  1d6dd	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc4.allyesconfig.old

My gcc is 4.8.4 (Ubuntu 4.8.4-2ubuntu1~14.04.3)

You get a savings of 64 and I got a savings of 48, but it's not that big a
difference, compared to what the later versions give.

julia

>   51595	   1156	      0	  52751	   ce0f	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc4.defconfig.new
>   51659	   1156	      0	  52815	   ce4f	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc4.defconfig.old
>  141956	  29790	   1216	 172962	  2a3a2	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc5.allyesconfig.new
>  142671	  29702	   1216	 173589	  2a615	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc5.allyesconfig.old
>   51733	   1156	      0	  52889	   ce99	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc5.defconfig.new
>   51813	   1156	      0	  52969	   cee9	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc5.defconfig.old
>  152991	  29790	   1216	 183997	  2cebd	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc6.allyesconfig.new
>  153722	  29702	   1216	 184640	  2d140	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc6.allyesconfig.old
>   51813	   1156	      0	  52969	   cee9	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc6.defconfig.new
>   51893	   1156	      0	  53049	   cf39	drivers/net/wireless/intel/iwlegacy/4965-mac.o.gcc6.defconfig.old
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe kernel-janitors" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* [PATCH 0/5] use setup_timer() helper function.
From: Allen Pais @ 2017-09-22 10:58 UTC (permalink / raw)
  To: netdev; +Cc: davem, sameo, Allen Pais

This series uses setup_timer() helper function. The series
addresses the files under net/*.



Allen Pais (5):
  net: nfc: hci: use setup_timer() helper.
  net: nfc: hci: llc_shdlc: use setup_timer() helper.
  net: af_packet: use setup_timer() helper.
  net: nfc: core: use setup_timer() helper.
  net: nfc: llcp_core: use setup_timer() helper.

 net/nfc/core.c          |  5 ++---
 net/nfc/hci/core.c      |  5 ++---
 net/nfc/hci/llc_shdlc.c | 15 ++++++---------
 net/nfc/llcp_core.c     | 10 ++++------
 net/packet/af_packet.c  |  4 +---
 5 files changed, 15 insertions(+), 24 deletions(-)

-- 
2.7.4

^ permalink raw reply

* [PATCH 1/5] net: nfc: hci: use setup_timer() helper.
From: Allen Pais @ 2017-09-22 10:58 UTC (permalink / raw)
  To: netdev; +Cc: davem, sameo, Allen Pais
In-Reply-To: <1506077902-1796-1-git-send-email-allen.lkml@gmail.com>

   Use setup_timer function instead of initializing timer with the
   function and data fields.

Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
 net/nfc/hci/core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index b740fef..a8a6e78 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -1004,9 +1004,8 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev)
 
 	INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work);
 
-	init_timer(&hdev->cmd_timer);
-	hdev->cmd_timer.data = (unsigned long)hdev;
-	hdev->cmd_timer.function = nfc_hci_cmd_timeout;
+	setup_timer(&hdev->cmd_timer, nfc_hci_cmd_timeout,
+		    (unsigned long)hdev);
 
 	skb_queue_head_init(&hdev->rx_hcp_frags);
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH 2/5] net: nfc: hci: llc_shdlc: use setup_timer() helper.
From: Allen Pais @ 2017-09-22 10:58 UTC (permalink / raw)
  To: netdev; +Cc: davem, sameo, Allen Pais
In-Reply-To: <1506077902-1796-1-git-send-email-allen.lkml@gmail.com>

   Use setup_timer function instead of initializing timer with the
   function and data fields.

Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
 net/nfc/hci/llc_shdlc.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 17e59a0..58df37e 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -763,17 +763,14 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
 	mutex_init(&shdlc->state_mutex);
 	shdlc->state = SHDLC_DISCONNECTED;
 
-	init_timer(&shdlc->connect_timer);
-	shdlc->connect_timer.data = (unsigned long)shdlc;
-	shdlc->connect_timer.function = llc_shdlc_connect_timeout;
+	setup_timer(&shdlc->connect_timer, llc_shdlc_connect_timeout,
+		    (unsigned long)shdlc);
 
-	init_timer(&shdlc->t1_timer);
-	shdlc->t1_timer.data = (unsigned long)shdlc;
-	shdlc->t1_timer.function = llc_shdlc_t1_timeout;
+	setup_timer(&shdlc->t1_timer, llc_shdlc_t1_timeout,
+		    (unsigned long)shdlc);
 
-	init_timer(&shdlc->t2_timer);
-	shdlc->t2_timer.data = (unsigned long)shdlc;
-	shdlc->t2_timer.function = llc_shdlc_t2_timeout;
+	setup_timer(&shdlc->t2_timer, llc_shdlc_t2_timeout,
+		    (unsigned long)shdlc);
 
 	shdlc->w = SHDLC_MAX_WINDOW;
 	shdlc->srej_support = SHDLC_SREJ_SUPPORT;
-- 
2.7.4

^ permalink raw reply related

* [PATCH 3/5] net: af_packet: use setup_timer() helper.
From: Allen Pais @ 2017-09-22 10:58 UTC (permalink / raw)
  To: netdev; +Cc: davem, sameo, Allen Pais
In-Reply-To: <1506077902-1796-1-git-send-email-allen.lkml@gmail.com>

   Use setup_timer function instead of initializing timer with the
   function and data fields.

Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
 net/packet/af_packet.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c261729..1d3e3ce 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -544,9 +544,7 @@ static void prb_init_blk_timer(struct packet_sock *po,
 		struct tpacket_kbdq_core *pkc,
 		void (*func) (unsigned long))
 {
-	init_timer(&pkc->retire_blk_timer);
-	pkc->retire_blk_timer.data = (long)po;
-	pkc->retire_blk_timer.function = func;
+	setup_timer(&pkc->retire_blk_timer, func, (long)po);
 	pkc->retire_blk_timer.expires = jiffies;
 }
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH 4/5] net: nfc: core: use setup_timer() helper.
From: Allen Pais @ 2017-09-22 10:58 UTC (permalink / raw)
  To: netdev; +Cc: davem, sameo, Allen Pais
In-Reply-To: <1506077902-1796-1-git-send-email-allen.lkml@gmail.com>

   Use setup_timer function instead of initializing timer with the
   function and data fields.

Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
 net/nfc/core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/nfc/core.c b/net/nfc/core.c
index 5cf33df..e5e23c2 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1094,9 +1094,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
 	dev->targets_generation = 1;
 
 	if (ops->check_presence) {
-		init_timer(&dev->check_pres_timer);
-		dev->check_pres_timer.data = (unsigned long)dev;
-		dev->check_pres_timer.function = nfc_check_pres_timeout;
+		setup_timer(&dev->check_pres_timer, nfc_check_pres_timeout,
+			    (unsigned long)dev);
 
 		INIT_WORK(&dev->check_pres_work, nfc_check_pres_work);
 	}
-- 
2.7.4

^ permalink raw reply related

* [PATCH 5/5] net: nfc: llcp_core: use setup_timer() helper.
From: Allen Pais @ 2017-09-22 10:58 UTC (permalink / raw)
  To: netdev; +Cc: davem, sameo, Allen Pais
In-Reply-To: <1506077902-1796-1-git-send-email-allen.lkml@gmail.com>

   Use setup_timer function instead of initializing timer with the
   function and data fields.

Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
 net/nfc/llcp_core.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 02eef5c..7988185 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -1573,9 +1573,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
 	INIT_LIST_HEAD(&local->list);
 	kref_init(&local->ref);
 	mutex_init(&local->sdp_lock);
-	init_timer(&local->link_timer);
-	local->link_timer.data = (unsigned long) local;
-	local->link_timer.function = nfc_llcp_symm_timer;
+	setup_timer(&local->link_timer, nfc_llcp_symm_timer,
+		    (unsigned long)local);
 
 	skb_queue_head_init(&local->tx_queue);
 	INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
@@ -1601,9 +1600,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
 
 	mutex_init(&local->sdreq_lock);
 	INIT_HLIST_HEAD(&local->pending_sdreqs);
-	init_timer(&local->sdreq_timer);
-	local->sdreq_timer.data = (unsigned long) local;
-	local->sdreq_timer.function = nfc_llcp_sdreq_timer;
+	setup_timer(&local->sdreq_timer, nfc_llcp_sdreq_timer,
+		    (unsigned long)local);
 	INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
 
 	list_add(&local->list, &llcp_devices);
-- 
2.7.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox