Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next V5 8/9] tap: support receiving skb from msg_control
From: Jason Wang @ 2017-05-17  4:14 UTC (permalink / raw)
  To: mst, netdev, linux-kernel; +Cc: Jason Wang
In-Reply-To: <1494994485-12994-1-git-send-email-jasowang@redhat.com>

This patch makes tap_recvmsg() can receive from skb from its caller
through msg_control. Vhost_net will be the first user.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tap.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index abdaf86..9af3239 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -824,15 +824,17 @@ static ssize_t tap_put_user(struct tap_queue *q,
 
 static ssize_t tap_do_read(struct tap_queue *q,
 			   struct iov_iter *to,
-			   int noblock)
+			   int noblock, struct sk_buff *skb)
 {
 	DEFINE_WAIT(wait);
-	struct sk_buff *skb;
 	ssize_t ret = 0;
 
 	if (!iov_iter_count(to))
 		return 0;
 
+	if (skb)
+		goto put;
+
 	while (1) {
 		if (!noblock)
 			prepare_to_wait(sk_sleep(&q->sk), &wait,
@@ -856,6 +858,7 @@ static ssize_t tap_do_read(struct tap_queue *q,
 	if (!noblock)
 		finish_wait(sk_sleep(&q->sk), &wait);
 
+put:
 	if (skb) {
 		ret = tap_put_user(q, skb, to);
 		if (unlikely(ret < 0))
@@ -872,7 +875,7 @@ static ssize_t tap_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	struct tap_queue *q = file->private_data;
 	ssize_t len = iov_iter_count(to), ret;
 
-	ret = tap_do_read(q, to, file->f_flags & O_NONBLOCK);
+	ret = tap_do_read(q, to, file->f_flags & O_NONBLOCK, NULL);
 	ret = min_t(ssize_t, ret, len);
 	if (ret > 0)
 		iocb->ki_pos = ret;
@@ -1155,7 +1158,8 @@ static int tap_recvmsg(struct socket *sock, struct msghdr *m,
 	int ret;
 	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
 		return -EINVAL;
-	ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT);
+	ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT,
+			  m->msg_control);
 	if (ret > total_len) {
 		m->msg_flags |= MSG_TRUNC;
 		ret = flags & MSG_TRUNC ? ret : total_len;
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next V5 1/9] ptr_ring: add ptr_ring_unconsume
From: Jason Wang @ 2017-05-17  4:14 UTC (permalink / raw)
  To: mst, netdev, linux-kernel; +Cc: Jason Wang
In-Reply-To: <1494994485-12994-1-git-send-email-jasowang@redhat.com>

From: "Michael S. Tsirkin" <mst@redhat.com>

Applications that consume a batch of entries in one go
can benefit from ability to return some of them back
into the ring.

Add an API for that - assuming there's space. If there's no space
naturally can't do this and have to drop entries, but this implies ring
is full so we'd likely drop some anyway.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 include/linux/ptr_ring.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 6b2e0dd..796b90f 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -403,6 +403,61 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
 	return 0;
 }
 
+/*
+ * Return entries into ring. Destroy entries that don't fit.
+ *
+ * Note: this is expected to be a rare slow path operation.
+ *
+ * Note: producer lock is nested within consumer lock, so if you
+ * resize you must make sure all uses nest correctly.
+ * In particular if you consume ring in interrupt or BH context, you must
+ * disable interrupts/BH when doing so.
+ */
+static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
+				      void (*destroy)(void *))
+{
+	unsigned long flags;
+	int head;
+
+	spin_lock_irqsave(&r->consumer_lock, flags);
+	spin_lock(&r->producer_lock);
+
+	if (!r->size)
+		goto done;
+
+	/*
+	 * Clean out buffered entries (for simplicity). This way following code
+	 * can test entries for NULL and if not assume they are valid.
+	 */
+	head = r->consumer_head - 1;
+	while (likely(head >= r->consumer_tail))
+		r->queue[head--] = NULL;
+	r->consumer_tail = r->consumer_head;
+
+	/*
+	 * Go over entries in batch, start moving head back and copy entries.
+	 * Stop when we run into previously unconsumed entries.
+	 */
+	while (n) {
+		head = r->consumer_head - 1;
+		if (head < 0)
+			head = r->size - 1;
+		if (r->queue[head]) {
+			/* This batch entry will have to be destroyed. */
+			goto done;
+		}
+		r->queue[head] = batch[--n];
+		r->consumer_tail = r->consumer_head = head;
+	}
+
+done:
+	/* Destroy all entries left in the batch. */
+	while (n)
+		destroy(batch[--n]);
+	spin_unlock(&r->producer_lock);
+	spin_unlock_irqrestore(&r->consumer_lock, flags);
+}
+
 static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
 					   int size, gfp_t gfp,
 					   void (*destroy)(void *))
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next V5 5/9] tun: export skb_array
From: Jason Wang @ 2017-05-17  4:14 UTC (permalink / raw)
  To: mst, netdev, linux-kernel; +Cc: Jason Wang
In-Reply-To: <1494994485-12994-1-git-send-email-jasowang@redhat.com>

This patch exports skb_array through tun_get_skb_array(). Caller can
then manipulate skb array directly.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c      | 13 +++++++++++++
 include/linux/if_tun.h |  5 +++++
 2 files changed, 18 insertions(+)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index bbd707b..3cbfc5c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2626,6 +2626,19 @@ struct socket *tun_get_socket(struct file *file)
 }
 EXPORT_SYMBOL_GPL(tun_get_socket);
 
+struct skb_array *tun_get_skb_array(struct file *file)
+{
+	struct tun_file *tfile;
+
+	if (file->f_op != &tun_fops)
+		return ERR_PTR(-EINVAL);
+	tfile = file->private_data;
+	if (!tfile)
+		return ERR_PTR(-EBADFD);
+	return &tfile->tx_array;
+}
+EXPORT_SYMBOL_GPL(tun_get_skb_array);
+
 module_init(tun_init);
 module_exit(tun_cleanup);
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index ed6da2e..bf9bdf4 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -19,6 +19,7 @@
 
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
+struct skb_array *tun_get_skb_array(struct file *file);
 #else
 #include <linux/err.h>
 #include <linux/errno.h>
@@ -28,5 +29,9 @@ static inline struct socket *tun_get_socket(struct file *f)
 {
 	return ERR_PTR(-EINVAL);
 }
+static inline struct skb_array *tun_get_skb_array(struct file *f)
+{
+	return ERR_PTR(-EINVAL);
+}
 #endif /* CONFIG_TUN */
 #endif /* __IF_TUN_H */
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next V5 7/9] tun: support receiving skb through msg_control
From: Jason Wang @ 2017-05-17  4:14 UTC (permalink / raw)
  To: mst, netdev, linux-kernel; +Cc: Jason Wang
In-Reply-To: <1494994485-12994-1-git-send-email-jasowang@redhat.com>

This patch makes tun_recvmsg() can receive from skb from its caller
through msg_control. Vhost_net will be the first user.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 3cbfc5c..f8041f9c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1510,9 +1510,8 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
 
 static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
 			   struct iov_iter *to,
-			   int noblock)
+			   int noblock, struct sk_buff *skb)
 {
-	struct sk_buff *skb;
 	ssize_t ret;
 	int err;
 
@@ -1521,10 +1520,12 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
 	if (!iov_iter_count(to))
 		return 0;
 
-	/* Read frames from ring */
-	skb = tun_ring_recv(tfile, noblock, &err);
-	if (!skb)
-		return err;
+	if (!skb) {
+		/* Read frames from ring */
+		skb = tun_ring_recv(tfile, noblock, &err);
+		if (!skb)
+			return err;
+	}
 
 	ret = tun_put_user(tun, tfile, skb, to);
 	if (unlikely(ret < 0))
@@ -1544,7 +1545,7 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
 
 	if (!tun)
 		return -EBADFD;
-	ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK);
+	ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK, NULL);
 	ret = min_t(ssize_t, ret, len);
 	if (ret > 0)
 		iocb->ki_pos = ret;
@@ -1646,7 +1647,8 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
 					 SOL_PACKET, TUN_TX_TIMESTAMP);
 		goto out;
 	}
-	ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT);
+	ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT,
+			  m->msg_control);
 	if (ret > (ssize_t)total_len) {
 		m->msg_flags |= MSG_TRUNC;
 		ret = flags & MSG_TRUNC ? ret : total_len;
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next V5 9/9] vhost_net: try batch dequing from skb array
From: Jason Wang @ 2017-05-17  4:14 UTC (permalink / raw)
  To: mst, netdev, linux-kernel; +Cc: Jason Wang
In-Reply-To: <1494994485-12994-1-git-send-email-jasowang@redhat.com>

We used to dequeue one skb during recvmsg() from skb_array, this could
be inefficient because of the bad cache utilization and spinlock
touching for each packet. This patch tries to batch them by calling
batch dequeuing helpers explicitly on the exported skb array and pass
the skb back through msg_control for underlayer socket to finish the
userspace copying. Batch dequeuing is also the requirement for more
batching improvement on receive path.

Tests were done by pktgen on tap with XDP1 in guest. Host is Intel(R)
Xeon(R) CPU E5-2650 0 @ 2.00GHz.

rx batch | pps

0   2.25Mpps
1   2.33Mpps (+3.56%)
4   2.33Mpps (+3.56%)
16  2.35Mpps (+4.44%)
64  2.42Mpps (+7.56%) <- Default rx batching
128 2.40Mpps (+6.67%)
256 2.38Mpps (+5.78%)

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/net.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 122 insertions(+), 6 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f61f852..e3d7ea1 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -28,6 +28,8 @@
 #include <linux/if_macvlan.h>
 #include <linux/if_tap.h>
 #include <linux/if_vlan.h>
+#include <linux/skb_array.h>
+#include <linux/skbuff.h>
 
 #include <net/sock.h>
 
@@ -85,6 +87,13 @@ struct vhost_net_ubuf_ref {
 	struct vhost_virtqueue *vq;
 };
 
+#define VHOST_RX_BATCH 64
+struct vhost_net_buf {
+	struct sk_buff **queue;
+	int tail;
+	int head;
+};
+
 struct vhost_net_virtqueue {
 	struct vhost_virtqueue vq;
 	size_t vhost_hlen;
@@ -99,6 +108,8 @@ struct vhost_net_virtqueue {
 	/* Reference counting for outstanding ubufs.
 	 * Protected by vq mutex. Writers must also take device mutex. */
 	struct vhost_net_ubuf_ref *ubufs;
+	struct skb_array *rx_array;
+	struct vhost_net_buf rxq;
 };
 
 struct vhost_net {
@@ -117,6 +128,71 @@ struct vhost_net {
 
 static unsigned vhost_net_zcopy_mask __read_mostly;
 
+static void *vhost_net_buf_get_ptr(struct vhost_net_buf *rxq)
+{
+	if (rxq->tail != rxq->head)
+		return rxq->queue[rxq->head];
+	else
+		return NULL;
+}
+
+static int vhost_net_buf_get_size(struct vhost_net_buf *rxq)
+{
+	return rxq->tail - rxq->head;
+}
+
+static int vhost_net_buf_is_empty(struct vhost_net_buf *rxq)
+{
+	return rxq->tail == rxq->head;
+}
+
+static void *vhost_net_buf_consume(struct vhost_net_buf *rxq)
+{
+	void *ret = vhost_net_buf_get_ptr(rxq);
+	++rxq->head;
+	return ret;
+}
+
+static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
+{
+	struct vhost_net_buf *rxq = &nvq->rxq;
+
+	rxq->head = 0;
+	rxq->tail = skb_array_consume_batched(nvq->rx_array, rxq->queue,
+					      VHOST_RX_BATCH);
+	return rxq->tail;
+}
+
+static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq)
+{
+	struct vhost_net_buf *rxq = &nvq->rxq;
+
+	if (nvq->rx_array && !vhost_net_buf_is_empty(rxq)) {
+		skb_array_unconsume(nvq->rx_array, rxq->queue + rxq->head,
+				    vhost_net_buf_get_size(rxq));
+		rxq->head = rxq->tail = 0;
+	}
+}
+
+static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
+{
+	struct vhost_net_buf *rxq = &nvq->rxq;
+
+	if (!vhost_net_buf_is_empty(rxq))
+		goto out;
+
+	if (!vhost_net_buf_produce(nvq))
+		return 0;
+
+out:
+	return __skb_array_len_with_tag(vhost_net_buf_get_ptr(rxq));
+}
+
+static void vhost_net_buf_init(struct vhost_net_buf *rxq)
+{
+	rxq->head = rxq->tail = 0;
+}
+
 static void vhost_net_enable_zcopy(int vq)
 {
 	vhost_net_zcopy_mask |= 0x1 << vq;
@@ -201,6 +277,7 @@ static void vhost_net_vq_reset(struct vhost_net *n)
 		n->vqs[i].ubufs = NULL;
 		n->vqs[i].vhost_hlen = 0;
 		n->vqs[i].sock_hlen = 0;
+		vhost_net_buf_init(&n->vqs[i].rxq);
 	}
 
 }
@@ -503,15 +580,14 @@ static void handle_tx(struct vhost_net *net)
 	mutex_unlock(&vq->mutex);
 }
 
-static int peek_head_len(struct sock *sk)
+static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
 {
-	struct socket *sock = sk->sk_socket;
 	struct sk_buff *head;
 	int len = 0;
 	unsigned long flags;
 
-	if (sock->ops->peek_len)
-		return sock->ops->peek_len(sock);
+	if (rvq->rx_array)
+		return vhost_net_buf_peek(rvq);
 
 	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
 	head = skb_peek(&sk->sk_receive_queue);
@@ -537,10 +613,11 @@ static int sk_has_rx_data(struct sock *sk)
 
 static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 {
+	struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX];
 	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
 	struct vhost_virtqueue *vq = &nvq->vq;
 	unsigned long uninitialized_var(endtime);
-	int len = peek_head_len(sk);
+	int len = peek_head_len(rvq, sk);
 
 	if (!len && vq->busyloop_timeout) {
 		/* Both tx vq and rx socket were polled here */
@@ -561,7 +638,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 			vhost_poll_queue(&vq->poll);
 		mutex_unlock(&vq->mutex);
 
-		len = peek_head_len(sk);
+		len = peek_head_len(rvq, sk);
 	}
 
 	return len;
@@ -699,6 +776,8 @@ static void handle_rx(struct vhost_net *net)
 		/* On error, stop handling until the next kick. */
 		if (unlikely(headcount < 0))
 			goto out;
+		if (nvq->rx_array)
+			msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
 		/* On overrun, truncate and discard */
 		if (unlikely(headcount > UIO_MAXIOV)) {
 			iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
@@ -815,6 +894,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 	struct vhost_net *n;
 	struct vhost_dev *dev;
 	struct vhost_virtqueue **vqs;
+	struct sk_buff **queue;
 	int i;
 
 	n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_REPEAT);
@@ -826,6 +906,15 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 		return -ENOMEM;
 	}
 
+	queue = kmalloc_array(VHOST_RX_BATCH, sizeof(struct sk_buff *),
+			      GFP_KERNEL);
+	if (!queue) {
+		kfree(vqs);
+		kvfree(n);
+		return -ENOMEM;
+	}
+	n->vqs[VHOST_NET_VQ_RX].rxq.queue = queue;
+
 	dev = &n->dev;
 	vqs[VHOST_NET_VQ_TX] = &n->vqs[VHOST_NET_VQ_TX].vq;
 	vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq;
@@ -838,6 +927,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 		n->vqs[i].done_idx = 0;
 		n->vqs[i].vhost_hlen = 0;
 		n->vqs[i].sock_hlen = 0;
+		vhost_net_buf_init(&n->vqs[i].rxq);
 	}
 	vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
 
@@ -853,11 +943,14 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
 					struct vhost_virtqueue *vq)
 {
 	struct socket *sock;
+	struct vhost_net_virtqueue *nvq =
+		container_of(vq, struct vhost_net_virtqueue, vq);
 
 	mutex_lock(&vq->mutex);
 	sock = vq->private_data;
 	vhost_net_disable_vq(n, vq);
 	vq->private_data = NULL;
+	vhost_net_buf_unproduce(nvq);
 	mutex_unlock(&vq->mutex);
 	return sock;
 }
@@ -912,6 +1005,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
 	/* We do an extra flush before freeing memory,
 	 * since jobs can re-queue themselves. */
 	vhost_net_flush(n);
+	kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue);
 	kfree(n->dev.vqs);
 	kvfree(n);
 	return 0;
@@ -950,6 +1044,25 @@ static struct socket *get_raw_socket(int fd)
 	return ERR_PTR(r);
 }
 
+static struct skb_array *get_tap_skb_array(int fd)
+{
+	struct skb_array *array;
+	struct file *file = fget(fd);
+
+	if (!file)
+		return NULL;
+	array = tun_get_skb_array(file);
+	if (!IS_ERR(array))
+		goto out;
+	array = tap_get_skb_array(file);
+	if (!IS_ERR(array))
+		goto out;
+	array = NULL;
+out:
+	fput(file);
+	return array;
+}
+
 static struct socket *get_tap_socket(int fd)
 {
 	struct file *file = fget(fd);
@@ -1026,6 +1139,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 
 		vhost_net_disable_vq(n, vq);
 		vq->private_data = sock;
+		vhost_net_buf_unproduce(nvq);
+		if (index == VHOST_NET_VQ_RX)
+			nvq->rx_array = get_tap_skb_array(fd);
 		r = vhost_vq_init_access(vq);
 		if (r)
 			goto err_used;
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next] net: fix __skb_try_recv_from_queue to return the old behavior
From: Andrei Vagin @ 2017-05-17  4:47 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Andrei Vagin, Paolo Abeni, Eric Dumazet

This function has to return NULL on a error case, because there is a
separate error variable.

The offset has to be changed only if skb is returned

Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: David S. Miller <davem@davemloft.net>
Fixes: 65101aeca522 ("net/sock: factor out dequeue/peek with offset cod")
Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
 net/core/datagram.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/core/datagram.c b/net/core/datagram.c
index a4592b4..bc46118 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -170,20 +170,21 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
 					  struct sk_buff **last)
 {
 	struct sk_buff *skb;
+	int _off = *off;
 
 	*last = queue->prev;
 	skb_queue_walk(queue, skb) {
 		if (flags & MSG_PEEK) {
-			if (*off >= skb->len && (skb->len || *off ||
+			if (_off >= skb->len && (skb->len || _off ||
 						 skb->peeked)) {
-				*off -= skb->len;
+				_off -= skb->len;
 				continue;
 			}
 			if (!skb->len) {
 				skb = skb_set_peeked(skb);
 				if (unlikely(IS_ERR(skb))) {
 					*err = PTR_ERR(skb);
-					return skb;
+					return NULL;
 				}
 			}
 			*peeked = 1;
@@ -193,6 +194,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
 			if (destructor)
 				destructor(sk, skb);
 		}
+		*off = _off;
 		return skb;
 	}
 	return NULL;
@@ -253,8 +255,6 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
 
 	*peeked = 0;
 	do {
-		int _off = *off;
-
 		/* Again only user level code calls this function, so nothing
 		 * interrupt level will suddenly eat the receive_queue.
 		 *
@@ -263,8 +263,10 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
 		 */
 		spin_lock_irqsave(&queue->lock, cpu_flags);
 		skb = __skb_try_recv_from_queue(sk, queue, flags, destructor,
-						peeked, &_off, err, last);
+						peeked, off, &error, last);
 		spin_unlock_irqrestore(&queue->lock, cpu_flags);
+		if (error)
+			goto no_packet;
 		if (skb)
 			return skb;
 
-- 
2.9.3

^ permalink raw reply related

* Re: [patch net-next v3 02/10] net: sched: introduce tcf block infractructure
From: Jiri Pirko @ 2017-05-17  5:42 UTC (permalink / raw)
  To: Cong Wang
  Cc: David Miller, Linux Kernel Network Developers, Jamal Hadi Salim,
	David Ahern, Eric Dumazet, Stephen Hemminger, Daniel Borkmann,
	Alexander Duyck, Simon Horman, mlxsw
In-Reply-To: <CAM_iQpUZKjR1SXVaHNq4fRiiFx8-X=mjK+k=4TDvxd5CuK4gyw@mail.gmail.com>

Wed, May 17, 2017 at 12:34:04AM CEST, xiyou.wangcong@gmail.com wrote:
>On Tue, May 16, 2017 at 2:34 PM, David Miller <davem@davemloft.net> wrote:
>> From: Cong Wang <xiyou.wangcong@gmail.com>
>> Date: Tue, 16 May 2017 13:51:30 -0700
>>
>>> On Tue, May 16, 2017 at 10:27 AM, Jiri Pirko <jiri@resnulli.us> wrote:
>>>> +int tcf_block_get(struct tcf_block **p_block,
>>>> +                 struct tcf_proto __rcu **p_filter_chain)
>>>> +{
>>>> +       struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
>>>> +
>>>> +       if (!block)
>>>> +               return -ENOMEM;
>>>> +       block->p_filter_chain = p_filter_chain;
>>>> +       *p_block = block;
>>>> +       return 0;
>>>> +}
>>>> +EXPORT_SYMBOL(tcf_block_get);
>>>
>>>
>>> XXX_get() is usually for refcnt'ing, here you only allocate
>>> a block, so please rename it to tcf_block_alloc().
>>
>> Later in the series he adds refcounting to these objects.
>>
>> He explained this to Jamal too.
>
>I have read all patches, unless I miss something, block itself
>is not refcn'ted, only chains are, so it makes no sense to get
>a block, right?

It's not in this series. I just prepare the design so later on I can
easily add the block sharing between qdiscs.

^ permalink raw reply

* Re: [patch net-next v3 05/10] net: sched: move TC_H_MAJ macro call into tcf_auto_prio
From: Jiri Pirko @ 2017-05-17  5:47 UTC (permalink / raw)
  To: Cong Wang
  Cc: Linux Kernel Network Developers, David Miller, Jamal Hadi Salim,
	David Ahern, Eric Dumazet, Stephen Hemminger, Daniel Borkmann,
	Alexander Duyck, Simon Horman, mlxsw
In-Reply-To: <CAM_iQpUcoK04Xim8PbHg3GVykhioCA1MrBd3Ddd7HERaVYELQg@mail.gmail.com>

Wed, May 17, 2017 at 12:38:08AM CEST, xiyou.wangcong@gmail.com wrote:
>On Tue, May 16, 2017 at 2:03 PM, Jiri Pirko <jiri@resnulli.us> wrote:
>> Tue, May 16, 2017 at 11:01:52PM CEST, xiyou.wangcong@gmail.com wrote:
>>>On Tue, May 16, 2017 at 10:27 AM, Jiri Pirko <jiri@resnulli.us> wrote:
>>>> From: Jiri Pirko <jiri@mellanox.com>
>>>>
>>>> Call the helper from the function rather than to always adjust the
>>>> return value of the function.
>>>
>>>And rename the function name to reflect this change?
>>
>> ? What do you suggest?
>
>tcf_auto_major_prio()?

That makes no sense. prio is passed from user in upper 2 bytes (god
knows why but that is how it is). The helper returns prio, that's it.
Nothing major about it...

^ permalink raw reply

* Re: [patch net-next v3 06/10] net: sched: introduce helpers to work with filter chains
From: Jiri Pirko @ 2017-05-17  5:50 UTC (permalink / raw)
  To: Cong Wang
  Cc: Linux Kernel Network Developers, David Miller, Jamal Hadi Salim,
	David Ahern, Eric Dumazet, Stephen Hemminger, Daniel Borkmann,
	Alexander Duyck, Simon Horman, mlxsw
In-Reply-To: <CAM_iQpWDp3kFfCDL0JJ4c1_e9FZQ_-Wg+5HV0txz=KrTgWsq-w@mail.gmail.com>

Wed, May 17, 2017 at 12:17:11AM CEST, xiyou.wangcong@gmail.com wrote:
>On Tue, May 16, 2017 at 10:27 AM, Jiri Pirko <jiri@resnulli.us> wrote:
>> +static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
>> +{
>> +       return rtnl_dereference(*chain_info->pprev);
>> +}
>> +
>> +static void tcf_chain_tp_insert(struct tcf_chain *chain,
>> +                               struct tcf_chain_info *chain_info,
>> +                               struct tcf_proto *tp)
>> +{
>> +       if (chain->p_filter_chain &&
>> +           *chain_info->pprev == chain->filter_chain)
>> +               *chain->p_filter_chain = tp;
>> +       RCU_INIT_POINTER(tp->next, rtnl_dereference(*chain_info->pprev));
>
>Use tcf_chain_tp_prev()?

Ok. Will do that.

>
>
>> +       rcu_assign_pointer(*chain_info->pprev, tp);
>> +}
>> +
>> +static void tcf_chain_tp_remove(struct tcf_chain *chain,
>> +                               struct tcf_chain_info *chain_info,
>> +                               struct tcf_proto *tp)
>> +{
>> +       struct tcf_proto *next = rtnl_dereference(chain_info->next);
>> +
>> +       if (chain->p_filter_chain && tp == chain->filter_chain)
>> +               *chain->p_filter_chain = next;
>> +       RCU_INIT_POINTER(*chain_info->pprev, next);
>> +}
>> +
>> +static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
>> +                                          struct tcf_chain_info *chain_info,
>> +                                          u32 protocol, u32 prio,
>> +                                          bool prio_allocate)
>> +{
>> +       struct tcf_proto **pprev;
>> +       struct tcf_proto *tp;
>> +
>> +       /* Check the chain for existence of proto-tcf with this priority */
>> +       for (pprev = &chain->filter_chain;
>> +            (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
>
>Use tcf_chain_tp_prev()?

Can't be done.

^ permalink raw reply

* RE: [PATCH net 5/5] qede: Split PF/VF ndos.
From: Mintz, Yuval @ 2017-05-17  6:16 UTC (permalink / raw)
  To: davem@davemloft.net, netdev@vger.kernel.org
In-Reply-To: <1494331671-16273-6-git-send-email-Yuval.Mintz@cavium.com>

> PFs and VFs share the same structure of NDOs today, and the VFs explicitly
> fails the ndo_xdp() callback stating it doesn't support XDP.
> 
> This results in lots of:
> 
>   [qede_xdp:1032(enp131s2)]VFs don't support XDP
>   ------------[ cut here ]------------
>   WARNING: CPU: 4 PID: 1426 at net/core/rtnetlink.c:1637
> rtnl_dump_ifinfo+0x354/0x3c0
>   ...
>   Call Trace:
>     ? __alloc_skb+0x9b/0x1d0
>     netlink_dump+0x122/0x290
>     netlink_recvmsg+0x27d/0x430
>     sock_recvmsg+0x3d/0x50
>   ...
> 
> As every dump request for the VF interface info would fail due to
> rtnl_xdp_fill() returning an error code.
> 
> To resolve this, introduce a subset of the NDOs meant for the VF in a
> seperate structure and register that one instead for VFs, and omit the
> ndo_xdp initialization.
> 
> Fixes: 40b8c45492ef ("qede: Prevent VFs from using XDP")
> Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>

I'm currently working on adding XDP support for qede VFs.
Problem is there's quite a bit of IOV infrastructure involved in this,
and legacy [well, current] PFs would prevent their VFs from utilizing it.

Once I have it ready, if I'd add back the ndo_xdp unconditionally for VFs,
then when working over legacy PFs above issue would resurface.

Looking at my alternatives for solving this, I can't see any 'good' options -
it seems mightily unorthodox to modify net_device_ops, I.e., add/remove
an NDO function-pointer based on some device property, and having
multiple ops declared for the sake of a single feature seems unscalable.

Any better solutions?

^ permalink raw reply

* [PATCH v5] net: ethernet: faraday: To support device tree usage.
From: Greentime Hu @ 2017-05-17  7:28 UTC (permalink / raw)
  To: green.hu, netdev, devicetree, andrew, arnd, jiri, linux-kernel,
	f.fainelli, robh+dt, mark.rutland, davem

To support device tree usage for ftmac100.

Signed-off-by: Greentime Hu <green.hu@gmail.com>
---
Changes in v5:
  - Nothing changed in this patch and I have committed faraday,ftmac.txt.
Changes in v4:
  - Use the same binding document to describe the same faraday ethernet controller and add faraday to vendor-prefixes.txt.
Changes in v3:
  - Nothing changed in this patch but I have committed andestech to vendor-prefixes.txt.
Changes in v2:
  - Change atmac100_of_ids to ftmac100_of_ids
---
 drivers/net/ethernet/faraday/ftmac100.c |    7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 6ac336b..1536356 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1174,11 +1174,17 @@ static int ftmac100_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id ftmac100_of_ids[] = {
+	{ .compatible = "andestech,atmac100" },
+	{ }
+};
+
 static struct platform_driver ftmac100_driver = {
 	.probe		= ftmac100_probe,
 	.remove		= ftmac100_remove,
 	.driver		= {
 		.name	= DRV_NAME,
+		.of_match_table = ftmac100_of_ids
 	},
 };
 
@@ -1202,3 +1208,4 @@ static void __exit ftmac100_exit(void)
 MODULE_AUTHOR("Po-Yu Chuang <ratbert@faraday-tech.com>");
 MODULE_DESCRIPTION("FTMAC100 driver");
 MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(of, ftmac100_of_ids);
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v4] bridge: netlink: check vlan_default_pvid range
From: Tobias Jungel @ 2017-05-17  7:29 UTC (permalink / raw)
  To: Sabrina Dubroca, Nikolay Aleksandrov, Stephen Hemminger,
	David S. Miller, netdev

Currently it is allowed to set the default pvid of a bridge to a value
above VLAN_VID_MASK (0xfff). This patch adds a check to br_validate and
returns -EINVAL in case the pvid is out of bounds.

Reproduce by calling:

[root@test ~]# ip l a type bridge
[root@test ~]# ip l a type dummy
[root@test ~]# ip l s bridge0 type bridge vlan_filtering 1
[root@test ~]# ip l s bridge0 type bridge vlan_default_pvid 9999
[root@test ~]# ip l s dummy0 master bridge0
[root@test ~]# bridge vlan
port	vlan ids
bridge0	 9999 PVID Egress Untagged

dummy0	 9999 PVID Egress Untagged

Fixes: 0f963b7592ef ("bridge: netlink: add support for default_pvid")
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Tobias Jungel <tobias.jungel@bisdn.de>
---
 net/bridge/br_netlink.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index c5ce774..574f788 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -835,6 +835,13 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
 			return -EPROTONOSUPPORT;
 		}
 	}
+
+	if (data[IFLA_BR_VLAN_DEFAULT_PVID]) {
+		__u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]);
+
+		if (defpvid >= VLAN_VID_MASK)
+			return -EINVAL;
+	}
 #endif
 
 	return 0;
-- 
2.9.4

^ permalink raw reply related

* Alerte
From: ZIMBRA @ 2017-05-17  6:36 UTC (permalink / raw)


LA MISE À JOUR DU COMPTE EST EN COURS, FÉZ-VOUS PARTIE AUJOURD'HUI POUR UN FONCTIONNEMENT DE COMPTE PARFAIT. CLIQUEZ SUR LE LIEN CI-DESSOUS ET Mettez à jour votre compte IMMÉDIATEMENT.

http://kssdjklssja.tripod.com/

ZIMBRA WEBMASTER

^ permalink raw reply

* Alerte
From: ZIMBRA @ 2017-05-17  6:36 UTC (permalink / raw)


LA MISE À JOUR DU COMPTE EST EN COURS, FÉZ-VOUS PARTIE AUJOURD'HUI POUR UN FONCTIONNEMENT DE COMPTE PARFAIT. CLIQUEZ SUR LE LIEN CI-DESSOUS ET Mettez à jour votre compte IMMÉDIATEMENT.

http://kssdjklssja.tripod.com/

ZIMBRA WEBMASTER

^ permalink raw reply

* Re: [RFC V1 1/1] net: cdc_ncm: Reduce memory use when kernel memory low
From: Oliver Neukum @ 2017-05-17  7:44 UTC (permalink / raw)
  To: Bjørn Mork, Jim Baxter
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-usb-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <87shk4fynp.fsf-3F4PFWf5pNjpjLOzFPqGjWGXanvQGlWp@public.gmane.org>

Am Dienstag, den 16.05.2017, 20:24 +0200 schrieb Bjørn Mork:
> 
> I must say that I don't like the additional complexity added here.  If
> there are memory issues and you can reduce the buffer size to
> USB_CDC_NCM_NTB_MIN_OUT_SIZE, then why don't you just set a lower tx_max
> buffer size in the first place?
> 
>   echo 2048 > /sys/class/net/wwan0/cdc_ncm/tx_max
> 

Hi,

that would hurt performance across the board though.
Can we trigger memory compactation earlier?

	Regards
		Oliver

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v1] samples/bpf: Add a .gitignore for binaries
From: Alexander Alemayhu @ 2017-05-17  8:18 UTC (permalink / raw)
  To: David Ahern
  Cc: Mickaël Salaün, David Ahern, Alexei Starovoitov,
	Daniel Borkmann, linux-kernel, Arnaldo Carvalho de Melo, Wang Nan,
	netdev
In-Reply-To: <ad02fd5c-6812-8f1b-b348-b2ec81b9524b@gmail.com>

On Tue, May 16, 2017 at 04:27:36PM -0700, David Ahern wrote:
> 
> The problem stems from the fact that bpf samples do not really fall into
> the 'hostprogs' category (see "4 Host Program support" in
> Documentation/kbuild/makefiles.txt). Fixing samples/bpf to not rely on
> it is the better long term solution. Building of tools/ for example does
> not rely on it so there is an existing example of leveraging kernel
> headers without the overhead.
+1

I have looked into this but found it to be not easy and all attempts to
change the Makefile has resulted in obscure errors :/

Getting clang to output in a different directory was easy[0], but I guess
this is not the right approach either. Have you tried making the change?


[0]:
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 6c7468eb3684..79268d310ba5 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -1,6 +1,13 @@
 # kbuild trick to avoid linker error. Can be omitted if a module is built.
 obj- := dummy.o
 
+ifndef O
+  OUTPUT := $(shell pwd)/samples/bpf/_build/
+else
+  OUTPUT := $O/
+endif
+$(shell mkdir -p $(OUTPUT))
+
 # List of programs to build
 hostprogs-y := test_lru_dist
 hostprogs-y += sock_example
@@ -190,4 +197,4 @@ $(obj)/%.o: $(src)/%.c
 		-Wno-gnu-variable-sized-type-not-at-end \
 		-Wno-address-of-packed-member -Wno-tautological-compare \
 		-Wno-unknown-warning-option \
-		-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
+		-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $(OUTPUT)$(shell basename $@)

-- 
Mit freundlichen Grüßen

Alexander Alemayhu

^ permalink raw reply related

* Re: [PATCH 4.4-only] openvswitch: clear sender cpu before forwarding packets
From: Greg KH @ 2017-05-17  8:19 UTC (permalink / raw)
  To: Anoob Soman
  Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	stable-u79uwXL29TY76Z2rM5mHXA, davem-fT/PcQaiUtIeIZ0/mPfg9Q
In-Reply-To: <1494944710-7901-1-git-send-email-anoob.soman-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org>

On Tue, May 16, 2017 at 03:25:10PM +0100, Anoob Soman wrote:
> Similar to commit c29390c6dfee ("xps: must clear sender_cpu before
> forwarding") the skb->sender_cpu needs to be cleared before forwarding
> packets.
> 
> Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices")
> Signed-off-by: Anoob Soman <anoob.soman-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org>
> ---
>  net/openvswitch/vport.c | 1 +
>  1 file changed, 1 insertion(+)

Why is this a non-upstream patch?  What commit in Linus's tree fixed
this?  Why not just backport that?

thanks,

greg k-h

^ permalink raw reply

* Re: [PATCH 1/2] net-next: stmmac: add adjust_link function
From: Corentin Labbe @ 2017-05-17  8:21 UTC (permalink / raw)
  To: Florian Fainelli; +Cc: peppe.cavallaro, alexandre.torgue, netdev, linux-kernel
In-Reply-To: <b54c0535-f3b2-6184-1422-ec9eed22e67f@gmail.com>

On Tue, May 16, 2017 at 09:43:04AM -0700, Florian Fainelli wrote:
> On 05/15/2017 04:41 AM, Corentin Labbe wrote:
> > My dwmac-sun8i serie will add some if (has_sun8i) to
> > stmmac_adjust_link()
> > Since the current stmmac_adjust_link() alreaady have lots of if (has_gmac/gmac4),
> > It is now better to create an adjust_link() function for each dwmac.
> 
> Is it really, because the diffstat really seems to indicate otherwise
> and by looking at the code, I am definitively not convinced this is an
> improvement other the current code.
> 
> > 
> > So this patch add an adjust_link() function pointer, and move code out
> > of stmmac_adjust_link to it.
> 
> Can't we keep the existing adjust_link() function and just have a
> different one for dwmac-sun8i that either re-uses portions of the
> existing, or duplicate just what it needs?
> 

I found another way, removing port/speed variable and use new struct links member speed10/speed100/speed1000/speedmask 
I will make the current adjust_link function simplier. (and no if has_xxx for any dwmac type)

I will send that try soon.

^ permalink raw reply

* [PATCH] net: always include tcp.h in sock.c
From: Arnd Bergmann @ 2017-05-17  8:47 UTC (permalink / raw)
  To: David S. Miller; +Cc: Arnd Bergmann, Eric Dumazet, netdev, linux-kernel

kernelci reports a build regression in the latest 4.12 snapshots
in configurations without TCP:

net/core/sock.c: In function 'skb_orphan_partial':
net/core/sock.c:1810:6: error: implicit declaration of function 'skb_is_tcp_pure_ack' [-Werror=implicit-function-declaration]

The declaration is simply missing because the header is included
conditionally. There is no real reason to leave out the header, so
this just removes the #ifdef.

Fixes: f6ba8d33cfbb ("netem: fix skb_orphan_partial()")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 net/core/sock.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index e43e71d7856b..727f924b7f91 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -139,10 +139,7 @@

 #include <trace/events/sock.h>

-#ifdef CONFIG_INET
 #include <net/tcp.h>
-#endif
-
 #include <net/busy_poll.h>

 static DEFINE_MUTEX(proto_list_mutex);
-- 
2.9.0

^ permalink raw reply related

* [PATCH] udp: make function udp_skb_dtor_locked static
From: Colin King @ 2017-05-17  8:50 UTC (permalink / raw)
  To: Paolo Abeni, David S . Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev
  Cc: kernel-janitors, linux-kernel

From: Colin Ian King <colin.king@canonical.com>

Function udp_skb_dtor_locked does not need to be in global scope
so make it static to fix sparse warning:

net/ipv4/udp.c: warning: symbol 'udp_skb_dtor_locked' was not
declared. Should it be static?

Fixes: 6dfb4367cd911d ("udp: keep the sk_receive_queue held when splicing")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 net/ipv4/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7bd56c9889b3..922a62d45714 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1218,7 +1218,7 @@ void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
 EXPORT_SYMBOL(udp_skb_destructor);
 
 /* as above, but the caller held the rx queue lock, too */
-void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb)
+static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb)
 {
 	udp_rmem_release(sk, skb->dev_scratch, 1, true);
 }
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH net-next] net: fix __skb_try_recv_from_queue to return the old behavior
From: Paolo Abeni @ 2017-05-17  9:04 UTC (permalink / raw)
  To: Andrei Vagin, David S. Miller; +Cc: netdev, Eric Dumazet
In-Reply-To: <20170517044710.28300-1-avagin@openvz.org>

On Tue, 2017-05-16 at 21:47 -0700, Andrei Vagin wrote:
> This function has to return NULL on a error case, because there is a
> separate error variable.
> 
> The offset has to be changed only if skb is returned
> 
> Cc: Paolo Abeni <pabeni@redhat.com>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: David S. Miller <davem@davemloft.net>
> Fixes: 65101aeca522 ("net/sock: factor out dequeue/peek with offset cod")
> Signed-off-by: Andrei Vagin <avagin@openvz.org>
> ---
>  net/core/datagram.c | 14 ++++++++------
>  1 file changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/net/core/datagram.c b/net/core/datagram.c
> index a4592b4..bc46118 100644
> --- a/net/core/datagram.c
> +++ b/net/core/datagram.c
> @@ -170,20 +170,21 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
>  					  struct sk_buff **last)
>  {
>  	struct sk_buff *skb;
> +	int _off = *off;
>  
>  	*last = queue->prev;
>  	skb_queue_walk(queue, skb) {
>  		if (flags & MSG_PEEK) {
> -			if (*off >= skb->len && (skb->len || *off ||
> +			if (_off >= skb->len && (skb->len || _off ||
>  						 skb->peeked)) {
> -				*off -= skb->len;
> +				_off -= skb->len;
>  				continue;
>  			}
>  			if (!skb->len) {
>  				skb = skb_set_peeked(skb);
>  				if (unlikely(IS_ERR(skb))) {
>  					*err = PTR_ERR(skb);
> -					return skb;
> +					return NULL;
>  				}
>  			}
>  			*peeked = 1;
> @@ -193,6 +194,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
>  			if (destructor)
>  				destructor(sk, skb);
>  	
> 	}
> +		*off = _off;
>  		return skb;
>  	}
>  	return NULL;
> @@ -253,8 +255,6 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
>  
>  	*peeked = 0;
>  	do {
> -		int _off = *off;
> -
>  		/* Again only user level code calls this function, so nothing
>  		 * interrupt level will suddenly eat the receive_queue.
>  		 *
> @@ -263,8 +263,10 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
>  		 */
>  		spin_lock_irqsave(&queue->lock, cpu_flags);
>  		skb = __skb_try_recv_from_queue(sk, queue, flags, destructor,
> -						peeked, &_off, err, last);
> +						peeked, off, &error, last);
>  		spin_unlock_irqrestore(&queue->lock, cpu_flags);
> +		if (error)
> +			goto no_packet;
>  		if (skb)
>  			return skb;
>  

Thank you for catching this so early! 

If __skb_try_recv_from_queue() updates the offset if/only if the skb is
returned, than we can also add something like the following ?
(only compile tested, should not bring any functional changes, only
code cleanup)

BTW can you please share some entry pointer/walk-through for the CRIU
tests ? I'd like to try to integrate them in my workflow, thank you!

Paolo
---
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8ad5862..e65c7ed 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1555,16 +1555,13 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
                error = -EAGAIN;
                *peeked = 0;
                do {
-                       int _off = *off;
-
                        spin_lock_bh(&queue->lock);
                        skb = __skb_try_recv_from_queue(sk, queue, flags,
                                                        udp_skb_destructor,
-                                                       peeked, &_off, err,
+                                                       peeked, off, err,
                                                        &last);
                        if (skb) {
                                spin_unlock_bh(&queue->lock);
-                               *off = _off;
                                return skb;
                        }
 
@@ -1578,20 +1575,17 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
                         * the sk_receive_queue lock if fwd memory scheduling
                         * is needed.
                         */
-                       _off = *off;
                        spin_lock(&sk_queue->lock);
                        skb_queue_splice_tail_init(sk_queue, queue);
 
                        skb = __skb_try_recv_from_queue(sk, queue, flags,
                                                        udp_skb_dtor_locked,
-                                                       peeked, &_off, err,
+                                                       peeked, off, err,
                                                        &last);
                        spin_unlock(&sk_queue->lock);
                        spin_unlock_bh(&queue->lock);
-                       if (skb) {
-                               *off = _off;
+                       if (skb)
                                return skb;
-                       }

^ permalink raw reply related

* [patch net-next v4 00/10] net: sched: introduce multichain support for filters
From: Jiri Pirko @ 2017-05-17  9:07 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, dsa, edumazet, stephen, daniel,
	alexander.h.duyck, simon.horman, mlxsw

From: Jiri Pirko <jiri@mellanox.com>

Currently, each classful qdisc holds one chain of filters.
This chain is traversed and each filter could be matched on, which
may lead to execution of list of actions. One of such action
could be "reclassify", which would "reset" the processing of the
filter chain.

So this filter chain could be looked at as a flat table.
Sometimes it is convenient for user to configure a hierarchy
of tables. Example usecase is encapsulation.

Hierarchy of tables is a common way how it is done in HW pipelines.
So it is much more convenient to offload this.

This patchset contains two major patches:
8/10 - This patch introduces the support for having multiple
       chains of filters.
10/10 - This patch adds new control action to allow going to specified chain

The rest of the patches are smaller or bigger depencies of those 2.
Please see individual patch descriptions for details.

Corresponding iproute2 patches are appended as a reply to this cover letter.

Simple example:
$ tc qdisc add dev eth0 ingress
$ tc filter add dev eth0 parent ffff: protocol ip pref 33 flower dst_mac 52:54:00:3d:c7:6d action goto chain 11
$ tc filter add dev eth0 parent ffff: protocol ip pref 22 chain 11 flower dst_ip 192.168.40.1 action drop
$ tc filter show dev eth0 root
filter parent ffff: protocol ip pref 33 flower chain 0
filter parent ffff: protocol ip pref 33 flower chain 0 handle 0x1
  dst_mac 52:54:00:3d:c7:6d
  eth_type ipv4
        action order 1: gact action goto chain 11
         random type none pass val 0
         index 2 ref 1 bind 1

filter parent ffff: protocol ip pref 22 flower chain 11
filter parent ffff: protocol ip pref 22 flower chain 11 handle 0x1
  eth_type ipv4
  dst_ip 192.168.40.1
        action order 1: gact action drop
         random type none pass val 0
         index 3 ref 1 bind 1

---
v3->v4:
- 01/10 - bring back removed NET_CLS_ACT ifdefs as spotted by Cong
- 06/10 - use tcf_chain_tp_prev helper in tcf_chain_tp_insert as
          suggested by Cong
v2->v3:
- 10/10 - added "unlikely" to the reclassify and goto checks
          as suggested by Daniel
v1->v2:
- 09/10 - no need to push tp all the way down to actions
- 10/10 - reworked gact to generic control action as suggested by Jamal

Jiri Pirko (10):
  net: sched: move tc_classify function to cls_api.c
  net: sched: introduce tcf block infractructure
  net: sched: rename tcf_destroy_chain helper
  net: sched: replace nprio by a bool to make the function more readable
  net: sched: move TC_H_MAJ macro call into tcf_auto_prio
  net: sched: introduce helpers to work with filter chains
  net: sched: push chain dump to a separate function
  net: sched: introduce multichain support for filters
  net: sched: push tp down to action init
  net: sched: add termination action to allow goto chain

 include/net/act_api.h          |  13 +-
 include/net/pkt_cls.h          |  24 ++-
 include/net/pkt_sched.h        |   3 -
 include/net/sch_generic.h      |  26 ++-
 include/uapi/linux/pkt_cls.h   |   1 +
 include/uapi/linux/rtnetlink.h |   1 +
 net/core/dev.c                 |   5 +-
 net/sched/act_api.c            |  55 +++++-
 net/sched/cls_api.c            | 411 ++++++++++++++++++++++++++++++++---------
 net/sched/sch_api.c            |  50 +----
 net/sched/sch_atm.c            |  29 ++-
 net/sched/sch_cbq.c            |  21 ++-
 net/sched/sch_drr.c            |  15 +-
 net/sched/sch_dsmark.c         |  19 +-
 net/sched/sch_fq_codel.c       |  17 +-
 net/sched/sch_hfsc.c           |  21 ++-
 net/sched/sch_htb.c            |  28 ++-
 net/sched/sch_ingress.c        |  61 ++++--
 net/sched/sch_multiq.c         |  16 +-
 net/sched/sch_prio.c           |  19 +-
 net/sched/sch_qfq.c            |  16 +-
 net/sched/sch_sfb.c            |  17 +-
 net/sched/sch_sfq.c            |  17 +-
 23 files changed, 625 insertions(+), 260 deletions(-)

-- 
2.9.3

^ permalink raw reply

* [patch net-next v4 01/10] net: sched: move tc_classify function to cls_api.c
From: Jiri Pirko @ 2017-05-17  9:07 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, dsa, edumazet, stephen, daniel,
	alexander.h.duyck, simon.horman, mlxsw
In-Reply-To: <20170517090803.4461-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Move tc_classify function to cls_api.c where it belongs, rename it to
fit the namespace.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 include/net/pkt_cls.h    |  9 +++++++++
 include/net/pkt_sched.h  |  3 ---
 net/core/dev.c           |  5 +++--
 net/sched/cls_api.c      | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/sched/sch_api.c      | 48 ------------------------------------------------
 net/sched/sch_atm.c      |  2 +-
 net/sched/sch_cbq.c      |  2 +-
 net/sched/sch_drr.c      |  2 +-
 net/sched/sch_dsmark.c   |  2 +-
 net/sched/sch_fq_codel.c |  2 +-
 net/sched/sch_hfsc.c     |  2 +-
 net/sched/sch_htb.c      |  2 +-
 net/sched/sch_multiq.c   |  2 +-
 net/sched/sch_prio.c     |  2 +-
 net/sched/sch_qfq.c      |  2 +-
 net/sched/sch_sfb.c      |  2 +-
 net/sched/sch_sfq.c      |  2 +-
 17 files changed, 72 insertions(+), 65 deletions(-)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 269fd78..cb74506 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -19,10 +19,19 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
 
 #ifdef CONFIG_NET_CLS
 void tcf_destroy_chain(struct tcf_proto __rcu **fl);
+int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+		 struct tcf_result *res, bool compat_mode);
+
 #else
 static inline void tcf_destroy_chain(struct tcf_proto __rcu **fl)
 {
 }
+
+static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+			       struct tcf_result *res, bool compat_mode)
+{
+	return TC_ACT_UNSPEC;
+}
 #endif
 
 static inline unsigned long
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index bec46f6..2579c20 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -113,9 +113,6 @@ static inline void qdisc_run(struct Qdisc *q)
 		__qdisc_run(q);
 }
 
-int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
-		struct tcf_result *res, bool compat_mode);
-
 static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
 {
 	/* We need to take extra care in case the skb came via
diff --git a/net/core/dev.c b/net/core/dev.c
index fca407b..acd594c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -105,6 +105,7 @@
 #include <net/dst.h>
 #include <net/dst_metadata.h>
 #include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
 #include <linux/highmem.h>
@@ -3178,7 +3179,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 	/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
 	qdisc_bstats_cpu_update(cl->q, skb);
 
-	switch (tc_classify(skb, cl, &cl_res, false)) {
+	switch (tcf_classify(skb, cl, &cl_res, false)) {
 	case TC_ACT_OK:
 	case TC_ACT_RECLASSIFY:
 		skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -3948,7 +3949,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 	skb->tc_at_ingress = 1;
 	qdisc_bstats_cpu_update(cl->q, skb);
 
-	switch (tc_classify(skb, cl, &cl_res, false)) {
+	switch (tcf_classify(skb, cl, &cl_res, false)) {
 	case TC_ACT_OK:
 	case TC_ACT_RECLASSIFY:
 		skb->tc_index = TC_H_MIN(cl_res.classid);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 22f88b3..af58bbe 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -196,6 +196,54 @@ void tcf_destroy_chain(struct tcf_proto __rcu **fl)
 }
 EXPORT_SYMBOL(tcf_destroy_chain);
 
+/* Main classifier routine: scans classifier chain attached
+ * to this qdisc, (optionally) tests for protocol and asks
+ * specific classifiers.
+ */
+int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+		 struct tcf_result *res, bool compat_mode)
+{
+	__be16 protocol = tc_skb_protocol(skb);
+#ifdef CONFIG_NET_CLS_ACT
+	const int max_reclassify_loop = 4;
+	const struct tcf_proto *old_tp = tp;
+	int limit = 0;
+
+reclassify:
+#endif
+	for (; tp; tp = rcu_dereference_bh(tp->next)) {
+		int err;
+
+		if (tp->protocol != protocol &&
+		    tp->protocol != htons(ETH_P_ALL))
+			continue;
+
+		err = tp->classify(skb, tp, res);
+#ifdef CONFIG_NET_CLS_ACT
+		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
+			goto reset;
+#endif
+		if (err >= 0)
+			return err;
+	}
+
+	return TC_ACT_UNSPEC; /* signal: continue lookup */
+#ifdef CONFIG_NET_CLS_ACT
+reset:
+	if (unlikely(limit++ >= max_reclassify_loop)) {
+		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
+				       tp->q->ops->id, tp->prio & 0xffff,
+				       ntohs(tp->protocol));
+		return TC_ACT_SHOT;
+	}
+
+	tp = old_tp;
+	protocol = tc_skb_protocol(skb);
+	goto reclassify;
+#endif
+}
+EXPORT_SYMBOL(tcf_classify);
+
 /* Add/change/delete/get a filter node */
 
 static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e88342f..a3bcd97 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1878,54 +1878,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-/* Main classifier routine: scans classifier chain attached
- * to this qdisc, (optionally) tests for protocol and asks
- * specific classifiers.
- */
-int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
-		struct tcf_result *res, bool compat_mode)
-{
-	__be16 protocol = tc_skb_protocol(skb);
-#ifdef CONFIG_NET_CLS_ACT
-	const int max_reclassify_loop = 4;
-	const struct tcf_proto *old_tp = tp;
-	int limit = 0;
-
-reclassify:
-#endif
-	for (; tp; tp = rcu_dereference_bh(tp->next)) {
-		int err;
-
-		if (tp->protocol != protocol &&
-		    tp->protocol != htons(ETH_P_ALL))
-			continue;
-
-		err = tp->classify(skb, tp, res);
-#ifdef CONFIG_NET_CLS_ACT
-		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
-			goto reset;
-#endif
-		if (err >= 0)
-			return err;
-	}
-
-	return TC_ACT_UNSPEC; /* signal: continue lookup */
-#ifdef CONFIG_NET_CLS_ACT
-reset:
-	if (unlikely(limit++ >= max_reclassify_loop)) {
-		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
-				       tp->q->ops->id, tp->prio & 0xffff,
-				       ntohs(tp->protocol));
-		return TC_ACT_SHOT;
-	}
-
-	tp = old_tp;
-	protocol = tc_skb_protocol(skb);
-	goto reclassify;
-#endif
-}
-EXPORT_SYMBOL(tc_classify);
-
 #ifdef CONFIG_PROC_FS
 static int psched_show(struct seq_file *seq, void *v)
 {
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 40cbcee..89d32fa 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -377,7 +377,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		list_for_each_entry(flow, &p->flows, list) {
 			fl = rcu_dereference_bh(flow->filter_list);
 			if (fl) {
-				result = tc_classify(skb, fl, &res, true);
+				result = tcf_classify(skb, fl, &res, true);
 				if (result < 0)
 					continue;
 				flow = (struct atm_flow_data *)res.class;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 7415859..c543ea3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -233,7 +233,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		/*
 		 * Step 2+n. Apply classifier.
 		 */
-		result = tc_classify(skb, fl, &res, true);
+		result = tcf_classify(skb, fl, &res, true);
 		if (!fl || result < 0)
 			goto fallback;
 
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 58a8c32..446d79b 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -333,7 +333,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	fl = rcu_dereference_bh(q->filter_list);
-	result = tc_classify(skb, fl, &res, false);
+	result = tcf_classify(skb, fl, &res, false);
 	if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 1c0f877..7bc638d 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -234,7 +234,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	else {
 		struct tcf_result res;
 		struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
-		int result = tc_classify(skb, fl, &res, false);
+		int result = tcf_classify(skb, fl, &res, false);
 
 		pr_debug("result %d class 0x%04x\n", result, res.classid);
 
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 9201abc..42ba81a 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -96,7 +96,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
 		return fq_codel_hash(q, skb) + 1;
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-	result = tc_classify(skb, filter, &res, false);
+	result = tcf_classify(skb, filter, &res, false);
 	if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 5cb82f6..b0dcab1 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1142,7 +1142,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	head = &q->root;
 	tcf = rcu_dereference_bh(q->root.filter_list);
-	while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
+	while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
 		case TC_ACT_QUEUED:
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 570ef3b..640f5f3 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -231,7 +231,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 	}
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-	while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
+	while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
 		case TC_ACT_QUEUED:
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 43a3a10..25bb9ff 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -46,7 +46,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	int err;
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-	err = tc_classify(skb, fl, &res, false);
+	err = tcf_classify(skb, fl, &res, false);
 #ifdef CONFIG_NET_CLS_ACT
 	switch (err) {
 	case TC_ACT_STOLEN:
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 92c2e6d..7997363 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -42,7 +42,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	if (TC_H_MAJ(skb->priority) != sch->handle) {
 		fl = rcu_dereference_bh(q->filter_list);
-		err = tc_classify(skb, fl, &res, false);
+		err = tcf_classify(skb, fl, &res, false);
 #ifdef CONFIG_NET_CLS_ACT
 		switch (err) {
 		case TC_ACT_STOLEN:
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 041eba3..73c7ac3 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -720,7 +720,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	fl = rcu_dereference_bh(q->filter_list);
-	result = tc_classify(skb, fl, &res, false);
+	result = tcf_classify(skb, fl, &res, false);
 	if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 0f77727..b2878808 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -259,7 +259,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
 	struct tcf_result res;
 	int result;
 
-	result = tc_classify(skb, fl, &res, false);
+	result = tcf_classify(skb, fl, &res, false);
 	if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 332d94b..53a641f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -180,7 +180,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 		return sfq_hash(q, skb) + 1;
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-	result = tc_classify(skb, fl, &res, false);
+	result = tcf_classify(skb, fl, &res, false);
 	if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
-- 
2.9.3

^ permalink raw reply related

* [patch net-next v4 02/10] net: sched: introduce tcf block infractructure
From: Jiri Pirko @ 2017-05-17  9:07 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, dsa, edumazet, stephen, daniel,
	alexander.h.duyck, simon.horman, mlxsw
In-Reply-To: <20170517090803.4461-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Currently, the filter chains are direcly put into the private structures
of qdiscs. In order to be able to have multiple chains per qdisc and to
allow filter chains sharing among qdiscs, there is a need for common
object that would hold the chains. This introduces such object and calls
it "tcf_block".

Helpers to get and put the blocks are provided to be called from
individual qdisc code. Also, the original filter_list pointers are left
in qdisc privs to allow the entry into tcf_block processing without any
added overhead of possible multiple pointer dereference on fast path.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 include/net/pkt_cls.h     | 13 ++++++++--
 include/net/sch_generic.h |  7 +++++-
 net/sched/cls_api.c       | 48 +++++++++++++++++++++++++++++--------
 net/sched/sch_api.c       |  2 +-
 net/sched/sch_atm.c       | 27 ++++++++++++++-------
 net/sched/sch_cbq.c       | 19 ++++++++++-----
 net/sched/sch_drr.c       | 13 ++++++----
 net/sched/sch_dsmark.c    | 17 ++++++++-----
 net/sched/sch_fq_codel.c  | 15 ++++++++----
 net/sched/sch_hfsc.c      | 19 ++++++++++-----
 net/sched/sch_htb.c       | 26 +++++++++++++-------
 net/sched/sch_ingress.c   | 61 ++++++++++++++++++++++++++++++++++-------------
 net/sched/sch_multiq.c    | 14 +++++++----
 net/sched/sch_prio.c      | 17 +++++++++----
 net/sched/sch_qfq.c       | 14 +++++++----
 net/sched/sch_sfb.c       | 15 ++++++++----
 net/sched/sch_sfq.c       | 15 ++++++++----
 17 files changed, 243 insertions(+), 99 deletions(-)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index cb74506..e56e715 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -18,12 +18,21 @@ int register_tcf_proto_ops(struct tcf_proto_ops *ops);
 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
 
 #ifdef CONFIG_NET_CLS
-void tcf_destroy_chain(struct tcf_proto __rcu **fl);
+int tcf_block_get(struct tcf_block **p_block,
+		  struct tcf_proto __rcu **p_filter_chain);
+void tcf_block_put(struct tcf_block *block);
 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		 struct tcf_result *res, bool compat_mode);
 
 #else
-static inline void tcf_destroy_chain(struct tcf_proto __rcu **fl)
+static inline
+int tcf_block_get(struct tcf_block **p_block,
+		  struct tcf_proto __rcu **p_filter_chain)
+{
+	return 0;
+}
+
+static inline void tcf_block_put(struct tcf_block *block)
 {
 }
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 22e5209..98cf2f2 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -153,7 +153,7 @@ struct Qdisc_class_ops {
 	void			(*walk)(struct Qdisc *, struct qdisc_walker * arg);
 
 	/* Filter manipulation */
-	struct tcf_proto __rcu ** (*tcf_chain)(struct Qdisc *, unsigned long);
+	struct tcf_block *	(*tcf_block)(struct Qdisc *, unsigned long);
 	bool			(*tcf_cl_offload)(u32 classid);
 	unsigned long		(*bind_tcf)(struct Qdisc *, unsigned long,
 					u32 classid);
@@ -236,6 +236,7 @@ struct tcf_proto {
 	struct Qdisc		*q;
 	void			*data;
 	const struct tcf_proto_ops	*ops;
+	struct tcf_block	*block;
 	struct rcu_head		rcu;
 };
 
@@ -247,6 +248,10 @@ struct qdisc_skb_cb {
 	unsigned char		data[QDISC_CB_PRIV_LEN];
 };
 
+struct tcf_block {
+	struct tcf_proto __rcu **p_filter_chain;
+};
+
 static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
 {
 	struct qdisc_skb_cb *qcb;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index af58bbe..d30116f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -129,7 +129,8 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
 }
 
 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
-					  u32 prio, u32 parent, struct Qdisc *q)
+					  u32 prio, u32 parent, struct Qdisc *q,
+					  struct tcf_block *block)
 {
 	struct tcf_proto *tp;
 	int err;
@@ -165,6 +166,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
 	tp->prio = prio;
 	tp->classid = parent;
 	tp->q = q;
+	tp->block = block;
 
 	err = tp->ops->init(tp);
 	if (err) {
@@ -185,7 +187,7 @@ static void tcf_proto_destroy(struct tcf_proto *tp)
 	kfree_rcu(tp, rcu);
 }
 
-void tcf_destroy_chain(struct tcf_proto __rcu **fl)
+static void tcf_destroy_chain(struct tcf_proto __rcu **fl)
 {
 	struct tcf_proto *tp;
 
@@ -194,7 +196,28 @@ void tcf_destroy_chain(struct tcf_proto __rcu **fl)
 		tcf_proto_destroy(tp);
 	}
 }
-EXPORT_SYMBOL(tcf_destroy_chain);
+
+int tcf_block_get(struct tcf_block **p_block,
+		  struct tcf_proto __rcu **p_filter_chain)
+{
+	struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
+
+	if (!block)
+		return -ENOMEM;
+	block->p_filter_chain = p_filter_chain;
+	*p_block = block;
+	return 0;
+}
+EXPORT_SYMBOL(tcf_block_get);
+
+void tcf_block_put(struct tcf_block *block)
+{
+	if (!block)
+		return;
+	tcf_destroy_chain(block->p_filter_chain);
+	kfree(block);
+}
+EXPORT_SYMBOL(tcf_block_put);
 
 /* Main classifier routine: scans classifier chain attached
  * to this qdisc, (optionally) tests for protocol and asks
@@ -260,6 +283,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	struct Qdisc  *q;
 	struct tcf_proto __rcu **back;
 	struct tcf_proto __rcu **chain;
+	struct tcf_block *block;
 	struct tcf_proto *next;
 	struct tcf_proto *tp;
 	const struct Qdisc_class_ops *cops;
@@ -328,7 +352,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	if (!cops)
 		return -EINVAL;
 
-	if (cops->tcf_chain == NULL)
+	if (!cops->tcf_block)
 		return -EOPNOTSUPP;
 
 	/* Do we search for filter, attached to class? */
@@ -339,11 +363,13 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	}
 
 	/* And the last stroke */
-	chain = cops->tcf_chain(q, cl);
-	if (chain == NULL) {
+	block = cops->tcf_block(q, cl);
+	if (!block) {
 		err = -EINVAL;
 		goto errout;
 	}
+	chain = block->p_filter_chain;
+
 	if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
 		tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
 		tcf_destroy_chain(chain);
@@ -387,7 +413,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 			nprio = TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back)));
 
 		tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
-				      protocol, nprio, parent, q);
+				      protocol, nprio, parent, q, block);
 		if (IS_ERR(tp)) {
 			err = PTR_ERR(tp);
 			goto errout;
@@ -556,6 +582,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	int s_t;
 	struct net_device *dev;
 	struct Qdisc *q;
+	struct tcf_block *block;
 	struct tcf_proto *tp, __rcu **chain;
 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
 	unsigned long cl = 0;
@@ -577,16 +604,17 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	cops = q->ops->cl_ops;
 	if (!cops)
 		goto errout;
-	if (cops->tcf_chain == NULL)
+	if (!cops->tcf_block)
 		goto errout;
 	if (TC_H_MIN(tcm->tcm_parent)) {
 		cl = cops->get(q, tcm->tcm_parent);
 		if (cl == 0)
 			goto errout;
 	}
-	chain = cops->tcf_chain(q, cl);
-	if (chain == NULL)
+	block = cops->tcf_block(q, cl);
+	if (!block)
 		goto errout;
+	chain = block->p_filter_chain;
 
 	s_t = cb->args[0];
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a3bcd97..5d95401 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -163,7 +163,7 @@ int register_qdisc(struct Qdisc_ops *qops)
 		if (!(cops->get && cops->put && cops->walk && cops->leaf))
 			goto out_einval;
 
-		if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
+		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
 			goto out_einval;
 	}
 
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 89d32fa..f435546 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -43,6 +43,7 @@
 struct atm_flow_data {
 	struct Qdisc		*q;	/* FIFO, TBF, etc. */
 	struct tcf_proto __rcu	*filter_list;
+	struct tcf_block	*block;
 	struct atm_vcc		*vcc;	/* VCC; NULL if VCC is closed */
 	void			(*old_pop)(struct atm_vcc *vcc,
 					   struct sk_buff *skb); /* chaining */
@@ -143,7 +144,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 	list_del_init(&flow->list);
 	pr_debug("atm_tc_put: qdisc %p\n", flow->q);
 	qdisc_destroy(flow->q);
-	tcf_destroy_chain(&flow->filter_list);
+	tcf_block_put(flow->block);
 	if (flow->sock) {
 		pr_debug("atm_tc_put: f_count %ld\n",
 			file_count(flow->sock->file));
@@ -274,7 +275,13 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 		error = -ENOBUFS;
 		goto err_out;
 	}
-	RCU_INIT_POINTER(flow->filter_list, NULL);
+
+	error = tcf_block_get(&flow->block, &flow->filter_list);
+	if (error) {
+		kfree(flow);
+		goto err_out;
+	}
+
 	flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
 	if (!flow->q)
 		flow->q = &noop_qdisc;
@@ -346,14 +353,13 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 	}
 }
 
-static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch,
-						unsigned long cl)
+static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
 
 	pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
-	return flow ? &flow->filter_list : &p->link.filter_list;
+	return flow ? flow->block : p->link.block;
 }
 
 /* --------------------------- Qdisc operations ---------------------------- */
@@ -524,6 +530,7 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
 static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
+	int err;
 
 	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
 	INIT_LIST_HEAD(&p->flows);
@@ -534,7 +541,11 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!p->link.q)
 		p->link.q = &noop_qdisc;
 	pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
-	RCU_INIT_POINTER(p->link.filter_list, NULL);
+
+	err = tcf_block_get(&p->link.block, &p->link.filter_list);
+	if (err)
+		return err;
+
 	p->link.vcc = NULL;
 	p->link.sock = NULL;
 	p->link.classid = sch->handle;
@@ -561,7 +572,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
 
 	pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
 	list_for_each_entry(flow, &p->flows, list)
-		tcf_destroy_chain(&flow->filter_list);
+		tcf_block_put(flow->block);
 
 	list_for_each_entry_safe(flow, tmp, &p->flows, list) {
 		if (flow->ref > 1)
@@ -646,7 +657,7 @@ static const struct Qdisc_class_ops atm_class_ops = {
 	.change		= atm_tc_change,
 	.delete		= atm_tc_delete,
 	.walk		= atm_tc_walk,
-	.tcf_chain	= atm_tc_find_tcf,
+	.tcf_block	= atm_tc_tcf_block,
 	.bind_tcf	= atm_tc_bind_filter,
 	.unbind_tcf	= atm_tc_put,
 	.dump		= atm_tc_dump_class,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index c543ea3..8dd6d0a 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -127,6 +127,7 @@ struct cbq_class {
 	struct tc_cbq_xstats	xstats;
 
 	struct tcf_proto __rcu	*filter_list;
+	struct tcf_block	*block;
 
 	int			refcnt;
 	int			filters;
@@ -1405,7 +1406,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
 
 	WARN_ON(cl->filters);
 
-	tcf_destroy_chain(&cl->filter_list);
+	tcf_block_put(cl->block);
 	qdisc_destroy(cl->q);
 	qdisc_put_rtab(cl->R_tab);
 	gen_kill_estimator(&cl->rate_est);
@@ -1430,7 +1431,7 @@ static void cbq_destroy(struct Qdisc *sch)
 	 */
 	for (h = 0; h < q->clhash.hashsize; h++) {
 		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
-			tcf_destroy_chain(&cl->filter_list);
+			tcf_block_put(cl->block);
 	}
 	for (h = 0; h < q->clhash.hashsize; h++) {
 		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
@@ -1585,12 +1586,19 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	if (cl == NULL)
 		goto failure;
 
+	err = tcf_block_get(&cl->block, &cl->filter_list);
+	if (err) {
+		kfree(cl);
+		return err;
+	}
+
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
 					NULL,
 					qdisc_root_sleeping_running(sch),
 					tca[TCA_RATE]);
 		if (err) {
+			tcf_block_put(cl->block);
 			kfree(cl);
 			goto failure;
 		}
@@ -1688,8 +1696,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
 	return 0;
 }
 
-static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
-					     unsigned long arg)
+static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = (struct cbq_class *)arg;
@@ -1697,7 +1704,7 @@ static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
 	if (cl == NULL)
 		cl = &q->link;
 
-	return &cl->filter_list;
+	return cl->block;
 }
 
 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
@@ -1756,7 +1763,7 @@ static const struct Qdisc_class_ops cbq_class_ops = {
 	.change		=	cbq_change_class,
 	.delete		=	cbq_delete,
 	.walk		=	cbq_walk,
-	.tcf_chain	=	cbq_find_tcf,
+	.tcf_block	=	cbq_tcf_block,
 	.bind_tcf	=	cbq_bind_filter,
 	.unbind_tcf	=	cbq_unbind_filter,
 	.dump		=	cbq_dump_class,
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 446d79b..5db2a28 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -36,6 +36,7 @@ struct drr_class {
 struct drr_sched {
 	struct list_head		active;
 	struct tcf_proto __rcu		*filter_list;
+	struct tcf_block		*block;
 	struct Qdisc_class_hash		clhash;
 };
 
@@ -190,15 +191,14 @@ static void drr_put_class(struct Qdisc *sch, unsigned long arg)
 		drr_destroy_class(sch, cl);
 }
 
-static struct tcf_proto __rcu **drr_tcf_chain(struct Qdisc *sch,
-					      unsigned long cl)
+static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct drr_sched *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
 
-	return &q->filter_list;
+	return q->block;
 }
 
 static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,
@@ -431,6 +431,9 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	struct drr_sched *q = qdisc_priv(sch);
 	int err;
 
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
 	err = qdisc_class_hash_init(&q->clhash);
 	if (err < 0)
 		return err;
@@ -462,7 +465,7 @@ static void drr_destroy_qdisc(struct Qdisc *sch)
 	struct hlist_node *next;
 	unsigned int i;
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -477,7 +480,7 @@ static const struct Qdisc_class_ops drr_class_ops = {
 	.delete		= drr_delete_class,
 	.get		= drr_get_class,
 	.put		= drr_put_class,
-	.tcf_chain	= drr_tcf_chain,
+	.tcf_block	= drr_tcf_block,
 	.bind_tcf	= drr_bind_tcf,
 	.unbind_tcf	= drr_unbind_tcf,
 	.graft		= drr_graft_class,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 7bc638d..ba45102 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -44,6 +44,7 @@ struct mask_value {
 struct dsmark_qdisc_data {
 	struct Qdisc		*q;
 	struct tcf_proto __rcu	*filter_list;
+	struct tcf_block	*block;
 	struct mask_value	*mv;
 	u16			indices;
 	u8			set_tc_index;
@@ -183,11 +184,11 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 	}
 }
 
-static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch,
-						       unsigned long cl)
+static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	return &p->filter_list;
+
+	return p->block;
 }
 
 /* --------------------------- Qdisc operations ---------------------------- */
@@ -332,7 +333,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	struct nlattr *tb[TCA_DSMARK_MAX + 1];
-	int err = -EINVAL;
+	int err;
 	u32 default_index = NO_DEFAULT_INDEX;
 	u16 indices;
 	int i;
@@ -342,6 +343,10 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!opt)
 		goto errout;
 
+	err = tcf_block_get(&p->block, &p->filter_list);
+	if (err)
+		return err;
+
 	err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL);
 	if (err < 0)
 		goto errout;
@@ -400,7 +405,7 @@ static void dsmark_destroy(struct Qdisc *sch)
 
 	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
 
-	tcf_destroy_chain(&p->filter_list);
+	tcf_block_put(p->block);
 	qdisc_destroy(p->q);
 	if (p->mv != p->embedded)
 		kfree(p->mv);
@@ -468,7 +473,7 @@ static const struct Qdisc_class_ops dsmark_class_ops = {
 	.change		=	dsmark_change,
 	.delete		=	dsmark_delete,
 	.walk		=	dsmark_walk,
-	.tcf_chain	=	dsmark_find_tcf,
+	.tcf_block	=	dsmark_tcf_block,
 	.bind_tcf	=	dsmark_bind_filter,
 	.unbind_tcf	=	dsmark_put,
 	.dump		=	dsmark_dump_class,
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 42ba81a..f201e73 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -55,6 +55,7 @@ struct fq_codel_flow {
 
 struct fq_codel_sched_data {
 	struct tcf_proto __rcu *filter_list; /* optional external classifier */
+	struct tcf_block *block;
 	struct fq_codel_flow *flows;	/* Flows table [flows_cnt] */
 	u32		*backlogs;	/* backlog table [flows_cnt] */
 	u32		flows_cnt;	/* number of flows */
@@ -450,7 +451,7 @@ static void fq_codel_destroy(struct Qdisc *sch)
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 	kvfree(q->backlogs);
 	kvfree(q->flows);
 }
@@ -459,6 +460,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
 	int i;
+	int err;
 
 	sch->limit = 10*1024;
 	q->flows_cnt = 1024;
@@ -478,6 +480,10 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
 			return err;
 	}
 
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
+
 	if (!q->flows) {
 		q->flows = kvzalloc(q->flows_cnt *
 					   sizeof(struct fq_codel_flow), GFP_KERNEL);
@@ -589,14 +595,13 @@ static void fq_codel_put(struct Qdisc *q, unsigned long cl)
 {
 }
 
-static struct tcf_proto __rcu **fq_codel_find_tcf(struct Qdisc *sch,
-						  unsigned long cl)
+static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
-	return &q->filter_list;
+	return q->block;
 }
 
 static int fq_codel_dump_class(struct Qdisc *sch, unsigned long cl,
@@ -679,7 +684,7 @@ static const struct Qdisc_class_ops fq_codel_class_ops = {
 	.leaf		=	fq_codel_leaf,
 	.get		=	fq_codel_get,
 	.put		=	fq_codel_put,
-	.tcf_chain	=	fq_codel_find_tcf,
+	.tcf_block	=	fq_codel_tcf_block,
 	.bind_tcf	=	fq_codel_bind,
 	.unbind_tcf	=	fq_codel_put,
 	.dump		=	fq_codel_dump_class,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b0dcab1..a324f84 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -116,6 +116,7 @@ struct hfsc_class {
 	struct gnet_stats_queue qstats;
 	struct net_rate_estimator __rcu *rate_est;
 	struct tcf_proto __rcu *filter_list; /* filter list */
+	struct tcf_block *block;
 	unsigned int	filter_cnt;	/* filter count */
 	unsigned int	level;		/* class level in hierarchy */
 
@@ -1040,12 +1041,19 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (cl == NULL)
 		return -ENOBUFS;
 
+	err = tcf_block_get(&cl->block, &cl->filter_list);
+	if (err) {
+		kfree(cl);
+		return err;
+	}
+
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
 					NULL,
 					qdisc_root_sleeping_running(sch),
 					tca[TCA_RATE]);
 		if (err) {
+			tcf_block_put(cl->block);
 			kfree(cl);
 			return err;
 		}
@@ -1091,7 +1099,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&cl->filter_list);
+	tcf_block_put(cl->block);
 	qdisc_destroy(cl->qdisc);
 	gen_kill_estimator(&cl->rate_est);
 	if (cl != &q->root)
@@ -1261,8 +1269,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
 	cl->filter_cnt--;
 }
 
-static struct tcf_proto __rcu **
-hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
+static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 	struct hfsc_class *cl = (struct hfsc_class *)arg;
@@ -1270,7 +1277,7 @@ hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
 	if (cl == NULL)
 		cl = &q->root;
 
-	return &cl->filter_list;
+	return cl->block;
 }
 
 static int
@@ -1515,7 +1522,7 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode)
-			tcf_destroy_chain(&cl->filter_list);
+			tcf_block_put(cl->block);
 	}
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -1662,7 +1669,7 @@ static const struct Qdisc_class_ops hfsc_class_ops = {
 	.put		= hfsc_put_class,
 	.bind_tcf	= hfsc_bind_tcf,
 	.unbind_tcf	= hfsc_unbind_tcf,
-	.tcf_chain	= hfsc_tcf_chain,
+	.tcf_block	= hfsc_tcf_block,
 	.dump		= hfsc_dump_class,
 	.dump_stats	= hfsc_dump_class_stats,
 	.walk		= hfsc_walk
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 640f5f3..195bbca9 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -105,6 +105,7 @@ struct htb_class {
 	int			quantum;	/* but stored for parent-to-leaf return */
 
 	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
+	struct tcf_block	*block;
 	int			filter_cnt;
 	int			refcnt;		/* usage count of this class */
 
@@ -156,6 +157,7 @@ struct htb_sched {
 
 	/* filters for qdisc itself */
 	struct tcf_proto __rcu	*filter_list;
+	struct tcf_block	*block;
 
 #define HTB_WARN_TOOMANYEVENTS	0x1
 	unsigned int		warned;	/* only one warning */
@@ -1017,6 +1019,10 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!opt)
 		return -EINVAL;
 
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
+
 	err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL);
 	if (err < 0)
 		return err;
@@ -1230,7 +1236,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 		qdisc_destroy(cl->un.leaf.q);
 	}
 	gen_kill_estimator(&cl->rate_est);
-	tcf_destroy_chain(&cl->filter_list);
+	tcf_block_put(cl->block);
 	kfree(cl);
 }
 
@@ -1248,11 +1254,11 @@ static void htb_destroy(struct Qdisc *sch)
 	 * because filter need its target class alive to be able to call
 	 * unbind_filter on it (without Oops).
 	 */
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
-			tcf_destroy_chain(&cl->filter_list);
+			tcf_block_put(cl->block);
 	}
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -1396,6 +1402,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		if (!cl)
 			goto failure;
 
+		err = tcf_block_get(&cl->block, &cl->filter_list);
+		if (err) {
+			kfree(cl);
+			goto failure;
+		}
 		if (htb_rate_est || tca[TCA_RATE]) {
 			err = gen_new_estimator(&cl->bstats, NULL,
 						&cl->rate_est,
@@ -1403,6 +1414,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 						qdisc_root_sleeping_running(sch),
 						tca[TCA_RATE] ? : &est.nla);
 			if (err) {
+				tcf_block_put(cl->block);
 				kfree(cl);
 				goto failure;
 			}
@@ -1521,14 +1533,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	return err;
 }
 
-static struct tcf_proto __rcu **htb_find_tcf(struct Qdisc *sch,
-					     unsigned long arg)
+static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)arg;
-	struct tcf_proto __rcu **fl = cl ? &cl->filter_list : &q->filter_list;
 
-	return fl;
+	return cl ? cl->block : q->block;
 }
 
 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
@@ -1591,7 +1601,7 @@ static const struct Qdisc_class_ops htb_class_ops = {
 	.change		=	htb_change_class,
 	.delete		=	htb_delete,
 	.walk		=	htb_walk,
-	.tcf_chain	=	htb_find_tcf,
+	.tcf_block	=	htb_tcf_block,
 	.bind_tcf	=	htb_bind_filter,
 	.unbind_tcf	=	htb_unbind_filter,
 	.dump		=	htb_dump_class,
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 3bab5f6..d8a9beb 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -18,6 +18,10 @@
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 
+struct ingress_sched_data {
+	struct tcf_block *block;
+};
+
 static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
 {
 	return NULL;
@@ -47,16 +51,23 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
 }
 
-static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
-						 unsigned long cl)
+static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
-	struct net_device *dev = qdisc_dev(sch);
+	struct ingress_sched_data *q = qdisc_priv(sch);
 
-	return &dev->ingress_cl_list;
+	return q->block;
 }
 
 static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
 {
+	struct ingress_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	int err;
+
+	err = tcf_block_get(&q->block, &dev->ingress_cl_list);
+	if (err)
+		return err;
+
 	net_inc_ingress_queue();
 	sch->flags |= TCQ_F_CPUSTATS;
 
@@ -65,9 +76,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
 
 static void ingress_destroy(struct Qdisc *sch)
 {
-	struct net_device *dev = qdisc_dev(sch);
+	struct ingress_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&dev->ingress_cl_list);
+	tcf_block_put(q->block);
 	net_dec_ingress_queue();
 }
 
@@ -91,7 +102,7 @@ static const struct Qdisc_class_ops ingress_class_ops = {
 	.get		=	ingress_get,
 	.put		=	ingress_put,
 	.walk		=	ingress_walk,
-	.tcf_chain	=	ingress_find_tcf,
+	.tcf_block	=	ingress_tcf_block,
 	.tcf_cl_offload	=	ingress_cl_offload,
 	.bind_tcf	=	ingress_bind_filter,
 	.unbind_tcf	=	ingress_put,
@@ -100,12 +111,18 @@ static const struct Qdisc_class_ops ingress_class_ops = {
 static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
 	.cl_ops		=	&ingress_class_ops,
 	.id		=	"ingress",
+	.priv_size	=	sizeof(struct ingress_sched_data),
 	.init		=	ingress_init,
 	.destroy	=	ingress_destroy,
 	.dump		=	ingress_dump,
 	.owner		=	THIS_MODULE,
 };
 
+struct clsact_sched_data {
+	struct tcf_block *ingress_block;
+	struct tcf_block *egress_block;
+};
+
 static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
 {
 	switch (TC_H_MIN(classid)) {
@@ -128,16 +145,15 @@ static unsigned long clsact_bind_filter(struct Qdisc *sch,
 	return clsact_get(sch, classid);
 }
 
-static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch,
-						unsigned long cl)
+static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
-	struct net_device *dev = qdisc_dev(sch);
+	struct clsact_sched_data *q = qdisc_priv(sch);
 
 	switch (cl) {
 	case TC_H_MIN(TC_H_MIN_INGRESS):
-		return &dev->ingress_cl_list;
+		return q->ingress_block;
 	case TC_H_MIN(TC_H_MIN_EGRESS):
-		return &dev->egress_cl_list;
+		return q->egress_block;
 	default:
 		return NULL;
 	}
@@ -145,6 +161,18 @@ static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch,
 
 static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
 {
+	struct clsact_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	int err;
+
+	err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list);
+	if (err)
+		return err;
+
+	err = tcf_block_get(&q->egress_block, &dev->egress_cl_list);
+	if (err)
+		return err;
+
 	net_inc_ingress_queue();
 	net_inc_egress_queue();
 
@@ -155,10 +183,10 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
 
 static void clsact_destroy(struct Qdisc *sch)
 {
-	struct net_device *dev = qdisc_dev(sch);
+	struct clsact_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&dev->ingress_cl_list);
-	tcf_destroy_chain(&dev->egress_cl_list);
+	tcf_block_put(q->egress_block);
+	tcf_block_put(q->ingress_block);
 
 	net_dec_ingress_queue();
 	net_dec_egress_queue();
@@ -169,7 +197,7 @@ static const struct Qdisc_class_ops clsact_class_ops = {
 	.get		=	clsact_get,
 	.put		=	ingress_put,
 	.walk		=	ingress_walk,
-	.tcf_chain	=	clsact_find_tcf,
+	.tcf_block	=	clsact_tcf_block,
 	.tcf_cl_offload	=	clsact_cl_offload,
 	.bind_tcf	=	clsact_bind_filter,
 	.unbind_tcf	=	ingress_put,
@@ -178,6 +206,7 @@ static const struct Qdisc_class_ops clsact_class_ops = {
 static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
 	.cl_ops		=	&clsact_class_ops,
 	.id		=	"clsact",
+	.priv_size	=	sizeof(struct clsact_sched_data),
 	.init		=	clsact_init,
 	.destroy	=	clsact_destroy,
 	.dump		=	ingress_dump,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 25bb9ff..6047674 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -32,6 +32,7 @@ struct multiq_sched_data {
 	u16 max_bands;
 	u16 curband;
 	struct tcf_proto __rcu *filter_list;
+	struct tcf_block *block;
 	struct Qdisc **queues;
 };
 
@@ -170,7 +171,7 @@ multiq_destroy(struct Qdisc *sch)
 	int band;
 	struct multiq_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 	for (band = 0; band < q->bands; band++)
 		qdisc_destroy(q->queues[band]);
 
@@ -243,6 +244,10 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
 	if (opt == NULL)
 		return -EINVAL;
 
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
+
 	q->max_bands = qdisc_dev(sch)->num_tx_queues;
 
 	q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL);
@@ -367,14 +372,13 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 	}
 }
 
-static struct tcf_proto __rcu **multiq_find_tcf(struct Qdisc *sch,
-						unsigned long cl)
+static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
-	return &q->filter_list;
+	return q->block;
 }
 
 static const struct Qdisc_class_ops multiq_class_ops = {
@@ -383,7 +387,7 @@ static const struct Qdisc_class_ops multiq_class_ops = {
 	.get		=	multiq_get,
 	.put		=	multiq_put,
 	.walk		=	multiq_walk,
-	.tcf_chain	=	multiq_find_tcf,
+	.tcf_block	=	multiq_tcf_block,
 	.bind_tcf	=	multiq_bind,
 	.unbind_tcf	=	multiq_put,
 	.dump		=	multiq_dump_class,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 7997363..a240468 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -25,6 +25,7 @@
 struct prio_sched_data {
 	int bands;
 	struct tcf_proto __rcu *filter_list;
+	struct tcf_block *block;
 	u8  prio2band[TC_PRIO_MAX+1];
 	struct Qdisc *queues[TCQ_PRIO_BANDS];
 };
@@ -145,7 +146,7 @@ prio_destroy(struct Qdisc *sch)
 	int prio;
 	struct prio_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 	for (prio = 0; prio < q->bands; prio++)
 		qdisc_destroy(q->queues[prio]);
 }
@@ -204,9 +205,16 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 
 static int prio_init(struct Qdisc *sch, struct nlattr *opt)
 {
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int err;
+
 	if (!opt)
 		return -EINVAL;
 
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
+
 	return prio_tune(sch, opt);
 }
 
@@ -317,14 +325,13 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 	}
 }
 
-static struct tcf_proto __rcu **prio_find_tcf(struct Qdisc *sch,
-					      unsigned long cl)
+static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
-	return &q->filter_list;
+	return q->block;
 }
 
 static const struct Qdisc_class_ops prio_class_ops = {
@@ -333,7 +340,7 @@ static const struct Qdisc_class_ops prio_class_ops = {
 	.get		=	prio_get,
 	.put		=	prio_put,
 	.walk		=	prio_walk,
-	.tcf_chain	=	prio_find_tcf,
+	.tcf_block	=	prio_tcf_block,
 	.bind_tcf	=	prio_bind,
 	.unbind_tcf	=	prio_put,
 	.dump		=	prio_dump_class,
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 73c7ac3..076ad03 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -182,6 +182,7 @@ struct qfq_group {
 
 struct qfq_sched {
 	struct tcf_proto __rcu *filter_list;
+	struct tcf_block	*block;
 	struct Qdisc_class_hash clhash;
 
 	u64			oldV, V;	/* Precise virtual times. */
@@ -582,15 +583,14 @@ static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
 		qfq_destroy_class(sch, cl);
 }
 
-static struct tcf_proto __rcu **qfq_tcf_chain(struct Qdisc *sch,
-					      unsigned long cl)
+static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
 
-	return &q->filter_list;
+	return q->block;
 }
 
 static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
@@ -1438,6 +1438,10 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	int i, j, err;
 	u32 max_cl_shift, maxbudg_shift, max_classes;
 
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
+
 	err = qdisc_class_hash_init(&q->clhash);
 	if (err < 0)
 		return err;
@@ -1492,7 +1496,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
 	struct hlist_node *next;
 	unsigned int i;
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -1508,7 +1512,7 @@ static const struct Qdisc_class_ops qfq_class_ops = {
 	.delete		= qfq_delete_class,
 	.get		= qfq_get_class,
 	.put		= qfq_put_class,
-	.tcf_chain	= qfq_tcf_chain,
+	.tcf_block	= qfq_tcf_block,
 	.bind_tcf	= qfq_bind_tcf,
 	.unbind_tcf	= qfq_unbind_tcf,
 	.graft		= qfq_graft_class,
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index b2878808..9756b1c 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -56,6 +56,7 @@ struct sfb_bins {
 struct sfb_sched_data {
 	struct Qdisc	*qdisc;
 	struct tcf_proto __rcu *filter_list;
+	struct tcf_block *block;
 	unsigned long	rehash_interval;
 	unsigned long	warmup_time;	/* double buffering warmup time in jiffies */
 	u32		max;
@@ -465,7 +466,7 @@ static void sfb_destroy(struct Qdisc *sch)
 {
 	struct sfb_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 	qdisc_destroy(q->qdisc);
 }
 
@@ -549,6 +550,11 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
 static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfb_sched_data *q = qdisc_priv(sch);
+	int err;
+
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
 
 	q->qdisc = &noop_qdisc;
 	return sfb_change(sch, opt);
@@ -657,14 +663,13 @@ static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 	}
 }
 
-static struct tcf_proto __rcu **sfb_find_tcf(struct Qdisc *sch,
-					     unsigned long cl)
+static struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct sfb_sched_data *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
-	return &q->filter_list;
+	return q->block;
 }
 
 static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
@@ -682,7 +687,7 @@ static const struct Qdisc_class_ops sfb_class_ops = {
 	.change		=	sfb_change_class,
 	.delete		=	sfb_delete,
 	.walk		=	sfb_walk,
-	.tcf_chain	=	sfb_find_tcf,
+	.tcf_block	=	sfb_tcf_block,
 	.bind_tcf	=	sfb_bind,
 	.unbind_tcf	=	sfb_put,
 	.dump		=	sfb_dump_class,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 53a641f..66dfd15 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -126,6 +126,7 @@ struct sfq_sched_data {
 	u8		flags;
 	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 	struct tcf_proto __rcu *filter_list;
+	struct tcf_block *block;
 	sfq_index	*ht;		/* Hash table ('divisor' slots) */
 	struct sfq_slot	*slots;		/* Flows table ('maxflows' entries) */
 
@@ -697,7 +698,7 @@ static void sfq_destroy(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 	q->perturb_period = 0;
 	del_timer_sync(&q->perturb_timer);
 	sfq_free(q->ht);
@@ -709,6 +710,11 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	int i;
+	int err;
+
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
 
 	setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
 			       (unsigned long)sch);
@@ -815,14 +821,13 @@ static void sfq_put(struct Qdisc *q, unsigned long cl)
 {
 }
 
-static struct tcf_proto __rcu **sfq_find_tcf(struct Qdisc *sch,
-					     unsigned long cl)
+static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
-	return &q->filter_list;
+	return q->block;
 }
 
 static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
@@ -878,7 +883,7 @@ static const struct Qdisc_class_ops sfq_class_ops = {
 	.leaf		=	sfq_leaf,
 	.get		=	sfq_get,
 	.put		=	sfq_put,
-	.tcf_chain	=	sfq_find_tcf,
+	.tcf_block	=	sfq_tcf_block,
 	.bind_tcf	=	sfq_bind,
 	.unbind_tcf	=	sfq_put,
 	.dump		=	sfq_dump_class,
-- 
2.9.3

^ permalink raw reply related

* [patch net-next v4 03/10] net: sched: rename tcf_destroy_chain helper
From: Jiri Pirko @ 2017-05-17  9:07 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, dsa, edumazet, stephen, daniel,
	alexander.h.duyck, simon.horman, mlxsw
In-Reply-To: <20170517090803.4461-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Make the name consistent with the rest of the helpers around.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 net/sched/cls_api.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d30116f..c02b03e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -187,7 +187,7 @@ static void tcf_proto_destroy(struct tcf_proto *tp)
 	kfree_rcu(tp, rcu);
 }
 
-static void tcf_destroy_chain(struct tcf_proto __rcu **fl)
+static void tcf_chain_destroy(struct tcf_proto __rcu **fl)
 {
 	struct tcf_proto *tp;
 
@@ -214,7 +214,7 @@ void tcf_block_put(struct tcf_block *block)
 {
 	if (!block)
 		return;
-	tcf_destroy_chain(block->p_filter_chain);
+	tcf_chain_destroy(block->p_filter_chain);
 	kfree(block);
 }
 EXPORT_SYMBOL(tcf_block_put);
@@ -372,7 +372,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 
 	if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
 		tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
-		tcf_destroy_chain(chain);
+		tcf_chain_destroy(chain);
 		err = 0;
 		goto errout;
 	}
-- 
2.9.3

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox