Netdev List

Netdev List
 help / color / mirror / Atom feed

* [bpf PATCH v2 1/6] bpf: sockmap, fix crash when ipv6 sock is added
From: John Fastabend @ 2018-06-14 16:44 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev
In-Reply-To: <20180614164148.24994.65250.stgit@john-Precision-Tower-5810>

This fixes a crash where we assign tcp_prot to IPv6 sockets instead
of tcpv6_prot.

Previously we overwrote the sk->prot field with tcp_prot even in the
AF_INET6 case. This patch ensures the correct tcp_prot and tcpv6_prot
are used. Further, only allow ESTABLISHED connections to join the
map per note in TLS ULP,

   /* The TLS ulp is currently supported only for TCP sockets
    * in ESTABLISHED state.
    * Supporting sockets in LISTEN state will require us
    * to modify the accept implementation to clone rather then
    * share the ulp context.
    */

Also tested with 'netserver -6' and 'netperf -H [IPv6]' as well as
'netperf -H [IPv4]'. The ESTABLISHED check resolves the previously
crashing case here.

Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
Reported-by: syzbot+5c063698bdbfac19f363@syzkaller.appspotmail.com
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Wei Wang <weiwan@google.com>
---
 0 files changed

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 52a91d8..f6dd4cd 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -140,6 +140,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
 			    int offset, size_t size, int flags);
+static void bpf_tcp_close(struct sock *sk, long timeout);
 
 static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
 {
@@ -161,7 +162,42 @@ static bool bpf_tcp_stream_read(const struct sock *sk)
 	return !empty;
 }
 
-static struct proto tcp_bpf_proto;
+enum {
+	SOCKMAP_IPV4,
+	SOCKMAP_IPV6,
+	SOCKMAP_NUM_PROTS,
+};
+
+enum {
+	SOCKMAP_BASE,
+	SOCKMAP_TX,
+	SOCKMAP_NUM_CONFIGS,
+};
+
+static struct proto *saved_tcpv6_prot;
+static DEFINE_MUTEX(tcpv6_prot_mutex);
+static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS];
+static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
+			 struct proto *base)
+{
+	prot[SOCKMAP_BASE]			= *base;
+	prot[SOCKMAP_BASE].close		= bpf_tcp_close;
+	prot[SOCKMAP_BASE].recvmsg		= bpf_tcp_recvmsg;
+	prot[SOCKMAP_BASE].stream_memory_read	= bpf_tcp_stream_read;
+
+	prot[SOCKMAP_TX]			= prot[SOCKMAP_BASE];
+	prot[SOCKMAP_TX].sendmsg		= bpf_tcp_sendmsg;
+	prot[SOCKMAP_TX].sendpage		= bpf_tcp_sendpage;
+}
+
+static void update_sk_prot(struct sock *sk, struct smap_psock *psock)
+{
+	int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4;
+	int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE;
+
+	sk->sk_prot = &bpf_tcp_prots[family][conf];
+}
+
 static int bpf_tcp_init(struct sock *sk)
 {
 	struct smap_psock *psock;
@@ -181,14 +217,17 @@ static int bpf_tcp_init(struct sock *sk)
 	psock->save_close = sk->sk_prot->close;
 	psock->sk_proto = sk->sk_prot;
 
-	if (psock->bpf_tx_msg) {
-		tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
-		tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
-		tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
-		tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
+	/* Build IPv6 sockmap whenever the address of tcpv6_prot changes */
+	if (sk->sk_family == AF_INET6 &&
+	    unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
+		mutex_lock(&tcpv6_prot_mutex);
+		if (likely(sk->sk_prot != saved_tcpv6_prot)) {
+			build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot);
+			smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
+		}
+		mutex_unlock(&tcpv6_prot_mutex);
 	}
-
-	sk->sk_prot = &tcp_bpf_proto;
+	update_sk_prot(sk, psock);
 	rcu_read_unlock();
 	return 0;
 }
@@ -1111,8 +1150,7 @@ static void bpf_tcp_msg_add(struct smap_psock *psock,
 
 static int bpf_tcp_ulp_register(void)
 {
-	tcp_bpf_proto = tcp_prot;
-	tcp_bpf_proto.close = bpf_tcp_close;
+	build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot);
 	/* Once BPF TX ULP is registered it is never unregistered. It
 	 * will be in the ULP list for the lifetime of the system. Doing
 	 * duplicate registers is not a problem.

^ permalink raw reply related

* [bpf PATCH v2 2/6] bpf: sockmap only allow ESTABLISHED sock state
From: John Fastabend @ 2018-06-14 16:44 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev
In-Reply-To: <20180614164148.24994.65250.stgit@john-Precision-Tower-5810>

Per the note in the TLS ULP (which is actually a generic statement
regarding ULPs)

 /* The TLS ulp is currently supported only for TCP sockets
  * in ESTABLISHED state.
  * Supporting sockets in LISTEN state will require us
  * to modify the accept implementation to clone rather then
  * share the ulp context.
  */

After this patch we only allow socks that are in ESTABLISHED state or
are being added via a sock_ops event that is transitioning into an
ESTABLISHED state. By allowing sock_ops events we allow users to
manage sockmaps directly from sock ops programs. The two supported
sock_ops ops are BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB and
BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB.

>From the userspace BPF update API the sock lock is also taken now
to ensure we don't race with state changes after the ESTABLISHED
check. The BPF program sock ops hook already has the sock lock
taken.

Also tested with 'netserver -6' and 'netperf -H [IPv6]' as well as
'netperf -H [IPv4]'.

Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 0 files changed

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index f6dd4cd..f1ab52d 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1976,13 +1976,20 @@ static int sock_map_update_elem(struct bpf_map *map,
 		return -EINVAL;
 	}
 
+	lock_sock(skops.sk);
+	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
+	 * state.
+	 */
 	if (skops.sk->sk_type != SOCK_STREAM ||
-	    skops.sk->sk_protocol != IPPROTO_TCP) {
-		fput(socket->file);
-		return -EOPNOTSUPP;
+	    skops.sk->sk_protocol != IPPROTO_TCP ||
+	    skops.sk->sk_state != TCP_ESTABLISHED) {
+		err = -EOPNOTSUPP;
+		goto out;
 	}
 
 	err = sock_map_ctx_update_elem(&skops, map, key, flags);
+out:
+	release_sock(skops.sk);
 	fput(socket->file);
 	return err;
 }
@@ -2247,10 +2254,6 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 
 	sock = skops->sk;
 
-	if (sock->sk_type != SOCK_STREAM ||
-	    sock->sk_protocol != IPPROTO_TCP)
-		return -EOPNOTSUPP;
-
 	if (unlikely(map_flags > BPF_EXIST))
 		return -EINVAL;
 
@@ -2338,7 +2341,20 @@ static int sock_hash_update_elem(struct bpf_map *map,
 		return -EINVAL;
 	}
 
+	lock_sock(skops.sk);
+	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
+	 * state.
+	 */
+	if (skops.sk->sk_type != SOCK_STREAM ||
+	    skops.sk->sk_protocol != IPPROTO_TCP ||
+	    skops.sk->sk_state != TCP_ESTABLISHED) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
 	err = sock_hash_ctx_update_elem(&skops, map, key, flags);
+out:
+	release_sock(skops.sk);
 	fput(socket->file);
 	return err;
 }
@@ -2423,10 +2439,19 @@ struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
 	.map_delete_elem = sock_hash_delete_elem,
 };
 
+static bool bpf_is_valid_sock(struct bpf_sock_ops_kern *ops)
+{
+	return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
+	       ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
+}
+
 BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
 	   struct bpf_map *, map, void *, key, u64, flags)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (!bpf_is_valid_sock(bpf_sock))
+		return -EOPNOTSUPP;
 	return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
 }
 
@@ -2445,6 +2470,9 @@ struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
 	   struct bpf_map *, map, void *, key, u64, flags)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (!bpf_is_valid_sock(bpf_sock))
+		return -EOPNOTSUPP;
 	return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
 }
 

^ permalink raw reply related

* [bpf PATCH v2 3/6] bpf: sockhash fix omitted bucket lock in sock_close
From: John Fastabend @ 2018-06-14 16:44 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev
In-Reply-To: <20180614164148.24994.65250.stgit@john-Precision-Tower-5810>

First in tcp_close, reduce scope of sk_callback_lock() the lock is
only needed for protecting smap_release_sock() the ingress and cork
lists are protected by sock lock. Having the lock in wider scope is
harmless but may confuse the reader who may infer it is in fact
needed.

Next, in sock_hash_delete_elem() the pattern is as follows,

  sock_hash_delete_elem()
     [...]
     spin_lock(bucket_lock)
     l = lookup_elem_raw()
     if (l)
        hlist_del_rcu()
        write_lock(sk_callback_lock)
         .... destroy psock ...
        write_unlock(sk_callback_lock)
     spin_unlock(bucket_lock)

The ordering is necessary because we only know the {p}sock after
dereferencing the hash table which we can't do unless we have the
bucket lock held. Once we have the bucket lock and the psock element
it is deleted from the hashmap to ensure any other path doing a lookup
will fail. Finally, the refcnt is decremented and if zero the psock
is destroyed.

In parallel with the above (or free'ing the map) a tcp close event
may trigger tcp_close(). Which at the moment omits the bucket lock
altogether (oops!) where the flow looks like this,

  bpf_tcp_close()
     [...]
     write_lock(sk_callback_lock)
     for each psock->maps // list of maps this sock is part of
         hlist_del_rcu(ref_hash_node);
         .... destroy psock ...
     write_unlock(sk_callback_lock)

Obviously, and demonstrated by syzbot, this is broken because
we can have multiple threads deleting entries via hlist_del_rcu().

To fix this we might be tempted to wrap the hlist operation in a
bucket lock but that would create a lock inversion problem. In
summary to follow locking rules maps needs the sk_callback_lock but we
need the bucket lock to do the hlist_del_rcu. To resolve the lock
inversion problem note that when bpf_tcp_close is called no updates
can happen in parallel, due to ESTABLISH state check in update logic,
so pop the head of the list repeatedly and remove the reference until
no more are left. If a delete happens in parallel from the BPF API
that is OK as well because it will do a similar action, lookup the
sock in the map/hash, delete it from the map/hash, and dec the refcnt.
We check for this case before doing a destroy on the psock to ensure
we don't have two threads tearing down a psock. The new logic is
as follows,

  bpf_tcp_close()
  e = psock_map_pop(psock->maps) // done with sk_callback_lock
  bucket_lock() // lock hash list bucket
  l = lookup_elem_raw(head, hash, key, key_size);
  if (l) {
     //only get here if elmnt was not already removed
     hlist_del_rcu()
     ... destroy psock...
  }
  bucket_unlock()

And finally for all the above to work add missing sk_callback_lock
around smap_list_remove in sock_hash_ctx_update_elem(). Otherwise
delete and update may corrupt maps list.

(As an aside the sk_callback_lock serves two purposes. The
 first, is to update the sock callbacks sk_data_ready, sk_write_space,
 etc. The second is to protect the psock 'maps' list. The 'maps' list
 is used to (as shown above) to delete all map/hash references to a
 sock when the sock is closed)

(If we did not have the ESTABLISHED state guarantee from tcp_close
 then we could not ensure completion because updates could happen
 forever and pin thread in delete loop.)

Reported-by: syzbot+0ce137753c78f7b6acc1@syzkaller.appspotmail.com
Fixes: 81110384441a ("bpf: sockmap, add hash map support")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 0 files changed

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index f1ab52d..04764f5 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -258,16 +258,54 @@ static void bpf_tcp_release(struct sock *sk)
 	rcu_read_unlock();
 }
 
+static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
+					 u32 hash, void *key, u32 key_size)
+{
+	struct htab_elem *l;
+
+	hlist_for_each_entry_rcu(l, head, hash_node) {
+		if (l->hash == hash && !memcmp(&l->key, key, key_size))
+			return l;
+	}
+
+	return NULL;
+}
+
+static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
+{
+	return &htab->buckets[hash & (htab->n_buckets - 1)];
+}
+
+static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+{
+	return &__select_bucket(htab, hash)->head;
+}
+
 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 {
 	atomic_dec(&htab->count);
 	kfree_rcu(l, rcu);
 }
 
+struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
+					   struct smap_psock *psock)
+{
+	struct smap_psock_map_entry *e;
+
+	write_lock_bh(&sk->sk_callback_lock);
+	e = list_first_entry_or_null(&psock->maps,
+				     struct smap_psock_map_entry,
+				     list);
+	if (e)
+		list_del(&e->list);
+	write_unlock_bh(&sk->sk_callback_lock);
+	return e;
+}
+
 static void bpf_tcp_close(struct sock *sk, long timeout)
 {
 	void (*close_fun)(struct sock *sk, long timeout);
-	struct smap_psock_map_entry *e, *tmp;
+	struct smap_psock_map_entry *e;
 	struct sk_msg_buff *md, *mtmp;
 	struct smap_psock *psock;
 	struct sock *osk;
@@ -286,7 +324,6 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 	 */
 	close_fun = psock->save_close;
 
-	write_lock_bh(&sk->sk_callback_lock);
 	if (psock->cork) {
 		free_start_sg(psock->sock, psock->cork);
 		kfree(psock->cork);
@@ -299,20 +336,48 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 		kfree(md);
 	}
 
-	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+	/* Sock is in TCP_CLOSE state so any concurrent adds or updates will be
+	 * blocked by ESTABLISHED check. However, tcp_close() + delete + free
+	 * can all run at the same time. If a tcp_close + delete happens each
+	 * code path will remove the entry for the map/hash before deleting it.
+	 * In the map case a xchg and then check to verify we have a sk protects
+	 * two paths from tearing down the same object. For hash map we lock the
+	 * bucket and remove the object from the hash map before destroying to
+	 * ensure that only one reference exists. By pulling object off the head
+	 * of the list with (with sk_callback_lock) if multiple deleters are
+	 * running we avoid duplicate references.
+	 */
+	e = psock_map_pop(sk, psock);
+	while (e) {
 		if (e->entry) {
 			osk = cmpxchg(e->entry, sk, NULL);
 			if (osk == sk) {
-				list_del(&e->list);
 				smap_release_sock(psock, sk);
 			}
 		} else {
-			hlist_del_rcu(&e->hash_link->hash_node);
-			smap_release_sock(psock, e->hash_link->sk);
-			free_htab_elem(e->htab, e->hash_link);
+			struct htab_elem *link = e->hash_link;
+			struct hlist_head *head;
+			struct htab_elem *l;
+			struct bucket *b;
+
+			b = __select_bucket(e->htab, link->hash);
+			head = &b->head;
+			raw_spin_lock_bh(&b->lock);
+			l = lookup_elem_raw(head,
+					    link->hash, link->key,
+					    e->htab->elem_size);
+			/* If another thread deleted this object skip deletion.
+			 * The refcnt on psock may or may not be zero.
+			 */
+			if (l) {
+				hlist_del_rcu(&e->hash_link->hash_node);
+				smap_release_sock(psock, e->hash_link->sk);
+				free_htab_elem(e->htab, e->hash_link);
+			}
+			raw_spin_unlock_bh(&b->lock);
 		}
+		e = psock_map_pop(sk, psock);
 	}
-	write_unlock_bh(&sk->sk_callback_lock);
 	rcu_read_unlock();
 	close_fun(sk, timeout);
 }
@@ -2088,16 +2153,6 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
 	return ERR_PTR(err);
 }
 
-static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
-{
-	return &htab->buckets[hash & (htab->n_buckets - 1)];
-}
-
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
-{
-	return &__select_bucket(htab, hash)->head;
-}
-
 static void sock_hash_free(struct bpf_map *map)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
@@ -2114,10 +2169,13 @@ static void sock_hash_free(struct bpf_map *map)
 	 */
 	rcu_read_lock();
 	for (i = 0; i < htab->n_buckets; i++) {
-		struct hlist_head *head = select_bucket(htab, i);
+		struct bucket *b = __select_bucket(htab, i);
+		struct hlist_head *head;
 		struct hlist_node *n;
 		struct htab_elem *l;
 
+		raw_spin_lock_bh(&b->lock);
+		head = &b->head;
 		hlist_for_each_entry_safe(l, n, head, hash_node) {
 			struct sock *sock = l->sk;
 			struct smap_psock *psock;
@@ -2137,6 +2195,7 @@ static void sock_hash_free(struct bpf_map *map)
 			write_unlock_bh(&sock->sk_callback_lock);
 			kfree(l);
 		}
+		raw_spin_unlock_bh(&b->lock);
 	}
 	rcu_read_unlock();
 	bpf_map_area_free(htab->buckets);
@@ -2167,19 +2226,6 @@ static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
 	return l_new;
 }
 
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
-					 u32 hash, void *key, u32 key_size)
-{
-	struct htab_elem *l;
-
-	hlist_for_each_entry_rcu(l, head, hash_node) {
-		if (l->hash == hash && !memcmp(&l->key, key, key_size))
-			return l;
-	}
-
-	return NULL;
-}
-
 static inline u32 htab_map_hash(const void *key, u32 key_len)
 {
 	return jhash(key, key_len, 0);
@@ -2307,8 +2353,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		psock = smap_psock_sk(l_old->sk);
 
 		hlist_del_rcu(&l_old->hash_node);
+		write_lock_bh(&l_old->sk->sk_callback_lock);
 		smap_list_remove(psock, NULL, l_old);
 		smap_release_sock(psock, l_old->sk);
+		write_unlock_bh(&l_old->sk->sk_callback_lock);
 		free_htab_elem(htab, l_old);
 	}
 	raw_spin_unlock_bh(&b->lock);

^ permalink raw reply related

* [bpf PATCH v2 4/6] bpf: sockmap, tcp_disconnect to listen transition
From: John Fastabend @ 2018-06-14 16:45 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev
In-Reply-To: <20180614164148.24994.65250.stgit@john-Precision-Tower-5810>

After adding checks to ensure TCP is in ESTABLISHED state when a
sock is added we need to also ensure that user does not transition
through tcp_disconnect() and back into ESTABLISHED state without
sockmap removing the sock.

To do this add unhash hook and remove sock from map there.

Reported-by: Eric Dumazet <edumazet@google.com>
Fixes: 81110384441a ("bpf: sockmap, add hash map support")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 0 files changed

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 04764f5..ffc5152 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -130,6 +130,7 @@ struct smap_psock {
 
 	struct proto *sk_proto;
 	void (*save_close)(struct sock *sk, long timeout);
+	void (*save_unhash)(struct sock *sk);
 	void (*save_data_ready)(struct sock *sk);
 	void (*save_write_space)(struct sock *sk);
 };
@@ -141,6 +142,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
 			    int offset, size_t size, int flags);
 static void bpf_tcp_close(struct sock *sk, long timeout);
+static void bpf_tcp_unhash(struct sock *sk);
 
 static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
 {
@@ -182,6 +184,7 @@ static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
 {
 	prot[SOCKMAP_BASE]			= *base;
 	prot[SOCKMAP_BASE].close		= bpf_tcp_close;
+	prot[SOCKMAP_BASE].unhash		= bpf_tcp_unhash;
 	prot[SOCKMAP_BASE].recvmsg		= bpf_tcp_recvmsg;
 	prot[SOCKMAP_BASE].stream_memory_read	= bpf_tcp_stream_read;
 
@@ -215,6 +218,7 @@ static int bpf_tcp_init(struct sock *sk)
 	}
 
 	psock->save_close = sk->sk_prot->close;
+	psock->save_unhash = sk->sk_prot->unhash;
 	psock->sk_proto = sk->sk_prot;
 
 	/* Build IPv6 sockmap whenever the address of tcpv6_prot changes */
@@ -302,28 +306,12 @@ struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
 	return e;
 }
 
-static void bpf_tcp_close(struct sock *sk, long timeout)
+static void bpf_tcp_remove(struct sock *sk, struct smap_psock *psock)
 {
-	void (*close_fun)(struct sock *sk, long timeout);
 	struct smap_psock_map_entry *e;
 	struct sk_msg_buff *md, *mtmp;
-	struct smap_psock *psock;
 	struct sock *osk;
 
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock)) {
-		rcu_read_unlock();
-		return sk->sk_prot->close(sk, timeout);
-	}
-
-	/* The psock may be destroyed anytime after exiting the RCU critial
-	 * section so by the time we use close_fun the psock may no longer
-	 * be valid. However, bpf_tcp_close is called with the sock lock
-	 * held so the close hook and sk are still valid.
-	 */
-	close_fun = psock->save_close;
-
 	if (psock->cork) {
 		free_start_sg(psock->sock, psock->cork);
 		kfree(psock->cork);
@@ -378,6 +366,51 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 		}
 		e = psock_map_pop(sk, psock);
 	}
+}
+
+static void bpf_tcp_unhash(struct sock *sk)
+{
+	void (*unhash_fun)(struct sock *sk);
+	struct smap_psock *psock;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		return sk->sk_prot->unhash(sk);
+	}
+
+	/* The psock may be destroyed anytime after exiting the RCU critial
+	 * section so by the time we use close_fun the psock may no longer
+	 * be valid. However, bpf_tcp_close is called with the sock lock
+	 * held so the close hook and sk are still valid.
+	 */
+	unhash_fun = psock->save_unhash;
+	bpf_tcp_remove(sk, psock);
+	rcu_read_unlock();
+	unhash_fun(sk);
+
+}
+
+static void bpf_tcp_close(struct sock *sk, long timeout)
+{
+	void (*close_fun)(struct sock *sk, long timeout);
+	struct smap_psock *psock;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		return sk->sk_prot->close(sk, timeout);
+	}
+
+	/* The psock may be destroyed anytime after exiting the RCU critial
+	 * section so by the time we use close_fun the psock may no longer
+	 * be valid. However, bpf_tcp_close is called with the sock lock
+	 * held so the close hook and sk are still valid.
+	 */
+	close_fun = psock->save_close;
+	bpf_tcp_remove(sk, psock);
 	rcu_read_unlock();
 	close_fun(sk, timeout);
 }

^ permalink raw reply related

* [bpf PATCH v2 5/6] bpf: sockhash, add release routine
From: John Fastabend @ 2018-06-14 16:45 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev
In-Reply-To: <20180614164148.24994.65250.stgit@john-Precision-Tower-5810>

Add map_release_uref pointer to hashmap ops. This was dropped when
original sockhash code was ported into bpf-next before initial
commit.

Fixes: 81110384441a ("bpf: sockmap, add hash map support")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 0 files changed

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index ffc5152..77fe204 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -2518,6 +2518,7 @@ struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
 	.map_get_next_key = sock_hash_get_next_key,
 	.map_update_elem = sock_hash_update_elem,
 	.map_delete_elem = sock_hash_delete_elem,
+	.map_release_uref = sock_map_release,
 };
 
 static bool bpf_is_valid_sock(struct bpf_sock_ops_kern *ops)

^ permalink raw reply related

* [bpf PATCH v2 6/6] bpf: selftest remove attempts to add LISTEN sockets to sockmap
From: John Fastabend @ 2018-06-14 16:45 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev
In-Reply-To: <20180614164148.24994.65250.stgit@john-Precision-Tower-5810>

In selftest test_maps the sockmap test case attempts to add a socket
in listening state to the sockmap. This is no longer a valid operation
so it fails as expected. However, the test wrongly reports this as an
error now. Fix the test to avoid adding sockets in listening state.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 0 files changed

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 6c25334..9fed5f0 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -564,7 +564,7 @@ static void test_sockmap(int tasks, void *data)
 	}
 
 	/* Test update without programs */
-	for (i = 0; i < 6; i++) {
+	for (i = 2; i < 6; i++) {
 		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
 		if (err) {
 			printf("Failed noprog update sockmap '%i:%i'\n",
@@ -727,7 +727,7 @@ static void test_sockmap(int tasks, void *data)
 	}
 
 	/* Test map update elem afterwards fd lives in fd and map_fd */
-	for (i = 0; i < 6; i++) {
+	for (i = 2; i < 6; i++) {
 		err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
 		if (err) {
 			printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",

^ permalink raw reply related

* Re: [bpf PATCH 4/6] bpf: sockmap, tcp_disconnect to listen transition
From: John Fastabend @ 2018-06-14 16:47 UTC (permalink / raw)
  To: Martin KaFai Lau; +Cc: ast, daniel, netdev
In-Reply-To: <2bc8beab-4b38-851b-cefa-523c5f1a1fcf@gmail.com>

On 06/13/2018 10:48 PM, John Fastabend wrote:
> On 06/13/2018 05:56 PM, Martin KaFai Lau wrote:
>> On Wed, Jun 13, 2018 at 10:50:14AM -0700, John Fastabend wrote:
>>> After adding checks to ensure TCP is in ESTABLISHED state when a
>>> sock is added we need to also ensure that user does not transition
>>> through tcp_disconnect() and back into ESTABLISHED state without
>>> sockmap removing the sock.
>>>
>>> To do this add unhash hook and remove sock from map there.
>> In bpf_tcp_init():
>>         sk->sk_prot = &tcp_bpf_proto;
>>
>> I may have missed a lock while reading sockmap.c.
>> Is it possible that tcp_disconnect() is being called while
>> the above assignment is also being done (e.g. through BPF_MAP_UPDATE_ELEM)?
>> The same situation go for the ESTABLISHED check.
>>
> 
> Right because ESTABLISHED is checked without any locking its
> possible that the state changes during the update (from userspce
> BPF_MAP_UPDATE, from sock_ops program it is locked). I have
> the below patch on my tree now, I was thinking to send it as
> a follow on but on second thought it likely makes more sense
> as part of the patch that adds the ESTABLISHED check.
> 
> Also after the below the sk_callback lock used to protect
> psock->maps is becoming increasingly pointless it allows the
> delete and map free ops to be called without taking the full
> sock lock. It might be time to just drop it in bpf-next and
> use the sock lock in the delete cases. The more annoying part
> will be the delete will have to have different userspace and
> bpf program helpers so we know when we need the lock.
> 
> --- a/kernel/bpf/sockmap.c

Hi Martin,

I went ahead and sent a v2 with the sock lock addition included.

Thanks,
John

^ permalink raw reply

* Re: [PATCH 3/3] net: dsa: Add Vitesse VSC73xx DSA router driver
From: Florian Fainelli @ 2018-06-14 16:51 UTC (permalink / raw)
  To: Linus Walleij, Andrew Lunn, Vivien Didelot
  Cc: netdev, openwrt-devel, LEDE Development List, Gabor Juhos
In-Reply-To: <20180614123534.8063-4-linus.walleij@linaro.org>



On 06/14/2018 05:35 AM, Linus Walleij wrote:
> This adds a DSA driver for:
> 
> Vitesse VSC7385 SparX-G5 5-port Integrated Gigabit Ethernet Switch
> Vitesse VSC7388 SparX-G8 8-port Integrated Gigabit Ethernet Switch
> Vitesse VSC7395 SparX-G5e 5+1-port Integrated Gigabit Ethernet Switch
> Vitesse VSC7398 SparX-G8e 8-port Integrated Gigabit Ethernet Switch
> 
> These switches have a built-in 8051 CPU and can download and execute
> firmware in this CPU. They can also be configured to use an external
> CPU handling the switch in a memory-mapped manner by connecting to
> that external CPU's memory bus.
> 
> This driver (currently) only takes control of the switch chip over
> SPI and configures it to route packages around when connected to a
> CPU port. The chip has embedded PHYs and VLAN support so we model it
> using DSA as a best fit so we can easily add VLAN support and maybe
> later also exploit the internal frame header to get more direct
> control over the switch.

Yes having the internal frame header working would be really great,
DSA_TAG_PROTO_NONE is really difficult to use without knowing all the
DSA details which reminds that we should have the following action items:

- document how DSA_TAG_PROTO_NONE behave differently with respect to
bridges/VLAN configuration and the DSA master device

- possibly introduce DSA_TAG_PROTO_8021Q which would automatically
partition ports by allocating one VLAN ID per-port (e.g: from top to
bottom) that would effectively offer the same features/paradigms as what
a proper header would offer (Port separation, if nothing else) and it
could be made seemingly automatic from within DSA

- get rid of b53's DSA_TAG_PROTO_NONE

> 
> The four built-in GPIO lines are exposed using a standard GPIO chip.

What are those typically used for out of curiosity? Is this to connect
to an EEPROM?

> 
> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
> ---
>  drivers/net/dsa/Kconfig           |   12 +
>  drivers/net/dsa/Makefile          |    1 +
>  drivers/net/dsa/vitesse-vsc73xx.c | 1362 +++++++++++++++++++++++++++++
>  3 files changed, 1375 insertions(+)
>  create mode 100644 drivers/net/dsa/vitesse-vsc73xx.c
> 
> diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
> index 2b81b97e994f..2f6207b969e3 100644
> --- a/drivers/net/dsa/Kconfig
> +++ b/drivers/net/dsa/Kconfig
> @@ -76,4 +76,16 @@ config NET_DSA_SMSC_LAN9303_MDIO
>  	  Enable access functions if the SMSC/Microchip LAN9303 is configured
>  	  for MDIO managed mode.
>  
> +config NET_DSA_VITESSE_VSC73XX
> +	tristate "Vitesse VSC7385/7388/7395/7398 support"
> +	depends on OF && SPI
> +	depends on NET_DSA
> +	select FIXED_PHY
> +	select VITESSE_PHY
> +	select NET_DSA_TAG_TRAILER

You advertise DSA_TAG_PROTO_NONE, so that appears to be unnecessary right?

[snip]
> +/**
> + * struct vsc73xx - VSC73xx state container
> + */
> +struct vsc73xx {
> +	struct device		*dev;
> +	struct gpio_desc	*reset;
> +	struct spi_device	*spi;
> +	struct dsa_switch	*ds;
> +	struct gpio_chip	gc;
> +	u16			chipid;
> +	bool			is_vsc7385;
> +	bool			is_vsc7388;
> +	bool			is_vsc7395;
> +	bool			is_vsc7398;

How about having an u16/u32 chip_id instead?

> +	u8			addr[ETH_ALEN];
> +	struct mutex		lock; /* Protects SPI traffic */
> +};

[snip]

> +static enum dsa_tag_protocol vsc73xx_get_tag_protocol(struct dsa_switch *ds,
> +						      int port)
> +{
> +	/* The switch internally uses a 8 byte header with length,
> +	 * source port, tag, LPA and priority. This is supposedly
> +	 * only accessible when operating the switch using the internal
> +	 * CPU or with an external CPU mapping the device in, but not
> +	 * when operating the switch over SPI and putting frames in/out
> +	 * on port 6 (the CPU port). So far we must assume that we
> +	 * cannot access the tag. (See "Internal frame header" section
> +	 * 3.9.1 in the manual.)

I would be really good if we could get this to work even in SPI with the
CPU controlling the switch, I cannot really think of why the 8051 would
have to be involved, because having the 8051 means either the switch is
entirely standalone and runs off an EEPROM (which is additional cost on
your BOM), or the host, through SPI can entirely take over.

Is the datasheet public somehow?

> +	 */
> +	return DSA_TAG_PROTO_NONE;
> +}

[snip]

> +static void vsc73xx_adjust_link(struct dsa_switch *ds, int port,
> +				struct phy_device *phydev)
> +{
> +	struct vsc73xx *vsc = ds->priv;
> +	u32 val;

No is_pseudo_fixed_link() check, you really have to do all of this for
each front-panel port? That is really bad if that is the case... most
switches with front-panel built-in PHYs are at the very least capable of
re-configuring their internal MAC accordingly.

> +
> +	/* Special handling of the CPU-facing port */
> +	if (port == CPU_PORT) {
> +		/* Other ports are already initialized but not this one */
> +		vsc73xx_init_port(vsc, CPU_PORT);
> +		/* Select the external port for this interface (EXT_PORT)
> +		 * Enable the GMII GTX external clock
> +		 * Use double data rate (DDR mode)
> +		 */
> +		vsc73xx_write(vsc, VSC73XX_BLOCK_MAC,
> +			      CPU_PORT,
> +			      VSC73XX_ADVPORTM,
> +			      VSC73XX_ADVPORTM_EXT_PORT |
> +			      VSC73XX_ADVPORTM_ENA_GTX |
> +			      VSC73XX_ADVPORTM_DDR_MODE);
> +	}
> +
> +	/* This is the MAC confiuration that always need to happen
> +	 * after a PHY or the CPU port comes up or down.
> +	 */
> +	val = phy_read(phydev, 1);

MII_BMSR

> +	if ((val & 0x0024) != 0x0024) {

BMSR_ANEGCOMPLETE | BMSR_LSTATUS


> +		dev_info(vsc->dev, "port %d: went down\n",
> +			 port);

That would duplicate what PHYLIB already prints, please drop it or
demote it dev_dbg().

> +
> +		/* Disable RX on this port */
> +		vsc73xx_update_bits(vsc, VSC73XX_BLOCK_MAC, port,
> +				    VSC73XX_MAC_CFG,
> +				    VSC73XX_MAC_CFG_RX_EN, 0);
> +
> +		/* Discard packets */
> +		vsc73xx_update_bits(vsc, VSC73XX_BLOCK_ARBITER, 0,
> +				    VSC73XX_ARBDISC, BIT(port), BIT(port));
> +
> +		/* Wait until queue is empty */
> +		vsc73xx_read(vsc, VSC73XX_BLOCK_ARBITER, 0,
> +			     VSC73XX_ARBEMPTY, &val);
> +		while (!(val & BIT(port))) {
> +			msleep(1);
> +			vsc73xx_read(vsc, VSC73XX_BLOCK_ARBITER, 0,
> +				     VSC73XX_ARBEMPTY, &val);
> +		}

Possibly unbounded loop (if the HW behave incorrectly).

> +
> +		/* Put this port into reset */
> +		vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, port, VSC73XX_MAC_CFG,
> +			      VSC73XX_MAC_CFG_RESET);
> +
> +		/* Accept packets again */
> +		vsc73xx_update_bits(vsc, VSC73XX_BLOCK_ARBITER, 0,
> +				    VSC73XX_ARBDISC, BIT(port), 0);
> +
> +		/* Allow backward dropping of frames from this port */
> +		vsc73xx_update_bits(vsc, VSC73XX_BLOCK_ARBITER, 0,
> +				    VSC73XX_SBACKWDROP, BIT(port), BIT(port));
> +
> +		/* Receive mask (disable forwarding) */
> +		vsc73xx_update_bits(vsc, VSC73XX_BLOCK_ANALYZER, 0,
> +				    VSC73XX_RECVMASK, BIT(port), 0);
> +
> +		return;
> +	}
> +
> +	/* Figure out what speed was negotiated */
> +	val = phy_read(phydev, 0x0a);

MII_STAT1000

> +	if (val & 0x0c00) {

LPA_1000FULL | LPA_1000HALF

> +		dev_info(vsc->dev, "port %d: 1000 Mbit mode full duplex\n",
> +			 port);

Likewise, duplicates PHYLIB messages.

> +
> +		/* Set up default for internal or external RGMII */
> +		if (port == CPU_PORT)
> +			val = VSC73XX_MAC_CFG_1000M_F_RGMII;

You need to look at the CPU port's phy_device->interface for that (which
should be a fixed-link).

> +		else
> +			val = VSC73XX_MAC_CFG_1000M_F_PHY;
> +		vsc73xx_adjust_enable_port(vsc, port, phydev, val);
> +	} else {
> +		val = phy_read(phydev, 0x05);

MII_LPA

> +		val &= 0x05e0;
> +		val >>= 5;
> +		if (val & 0x0c) {
> +			if (val & 0x08) {
> +				val = VSC73XX_MAC_CFG_100_10M_F_PHY;
> +				dev_info(vsc->dev,
> +					 "port %d: 100 Mbit full duplex mode\n",
> +					 port);
> +			} else {
> +				val = VSC73XX_MAC_CFG_100_10M_H_PHY;
> +				dev_info(vsc->dev,
> +					 "port %d: 100 Mbit half duplex mode\n",
> +					 port);
> +			}
> +			vsc73xx_adjust_enable_port(vsc, port, phydev, val);
> +		} else if (val & 0x03) {
> +			if (val & 0x02) {
> +				val = VSC73XX_MAC_CFG_100_10M_F_PHY;
> +				dev_info(vsc->dev,
> +					 "port %d: 10 Mbit full duplex mode\n",
> +					 port);
> +			} else {
> +				val = VSC73XX_MAC_CFG_100_10M_H_PHY;
> +				dev_info(vsc->dev,
> +					 "port %d: 10 Mbit half duplex mode\n",
> +					 port);
> +			}
> +			vsc73xx_adjust_enable_port(vsc, port, phydev, val);
> +		} else {
> +			dev_err(vsc->dev,
> +				"could not adjust link: unknown speed\n");
> +		}
> +	}

A lot of what you are doing here has been dong by genphy_read_status()
and you should be able to extract the link speed/duplex/lpa from the
phy_device itself, and not perform additional MDIO reads.

> +
> +	/* Enable port (forwarding) in the receieve mask */
> +	vsc73xx_update_bits(vsc, VSC73XX_BLOCK_ANALYZER, 0,
> +			    VSC73XX_RECVMASK, BIT(port), BIT(port));
> +}
> +
> +static int vsc73xx_port_enable(struct dsa_switch *ds, int port,
> +			       struct phy_device *phy)
> +{
> +	struct vsc73xx *vsc = ds->priv;
> +
> +	dev_info(vsc->dev, "enable port %d\n", port);
> +
> +	/* VSC7385 and VSC7395 have ports 0..4 accessible */
> +	if ((vsc->is_vsc7385 || vsc->is_vsc7395) && port > 4)
> +		return -ENODEV;
> +	if ((vsc->is_vsc7388 || vsc->is_vsc7398) && port > 7)
> +		return -ENODEV;

Humm no, you would not even get there if you told DSA how many ports you
have, see below.

> +
> +	vsc73xx_init_port(vsc, port);
> +
> +	return 0;
> +}
> +
> +static void vsc73xx_port_disable(struct dsa_switch *ds, int port,
> +				 struct phy_device *phy)
> +{
> +	struct vsc73xx *vsc = ds->priv;
> +
> +	/* VSC7385 and VSC7395 have ports 0..4 accessible */
> +	if ((vsc->is_vsc7385 || vsc->is_vsc7395) && port > 4)
> +		return;
> +	if ((vsc->is_vsc7388 || vsc->is_vsc7398) && port > 7)
> +		return;

Likewise.

> +
> +	/* Just put the port into reset */
> +	vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, port,
> +		      VSC73XX_MAC_CFG, VSC73XX_MAC_CFG_RESET);
> +}
> +
> +const struct vsc73xx_counter *vsc73xx_find_counter(struct vsc73xx *vsc,
> +						   u8 counter,
> +						   bool tx)

Missing static?

[snip]

> +	vsc->gc.label = devm_kasprintf(dev, GFP_KERNEL, "VSC%04x",
> +				       vsc->chipid);
> +	vsc->gc.ngpio = 4;
> +	vsc->gc.owner = THIS_MODULE;
> +	vsc->gc.parent = dev;
> +	vsc->gc.of_node = dev->of_node;
> +	vsc->gc.base = -1;
> +	vsc->gc.get = vsc73xx_gpio_get;
> +	vsc->gc.set = vsc73xx_gpio_set;
> +	vsc->gc.direction_input = vsc73xx_gpio_direction_input;
> +	vsc->gc.direction_output = vsc73xx_gpio_direction_output;
> +	vsc->gc.get_direction = vsc73xx_gpio_get_direction;
> +	vsc->gc.can_sleep = true;
> +	ret = devm_gpiochip_add_data(dev, &vsc->gc, vsc);
> +	if (ret) {
> +		dev_err(dev, "unable to register GPIO chip\n");
> +		dsa_unregister_switch(vsc->ds);
> +		return ret;
> +	}

Would you consider putting this in a separate function so this can be
optionally disabled?

> +
> +	return 0;
> +}
> +
> +static int vsc73xx_remove(struct spi_device *spi)
> +{
> +	struct vsc73xx *vsc = spi_get_drvdata(spi);
> +
> +	dsa_unregister_switch(vsc->ds);
> +	gpiod_set_value(vsc->reset, 1);
> +
> +	return 0;
> +}
> +
> +static const struct of_device_id vsc73xx_of_match[] = {
> +	{
> +		.compatible = "vitesse,vsc7385",

Would not you want to pass additional data here, like the possible port
layout/chip id to be reading?
-- 
Florian

^ permalink raw reply

* Re: [PATCH] net: Fix device name resolving crash in default_device_exit()
From: David Ahern @ 2018-06-14 17:11 UTC (permalink / raw)
  To: Kirill Tkhai, netdev
  Cc: davem, daniel, jakub.kicinski, ast, linux, john.fastabend, brouer
In-Reply-To: <152897987484.3952.9263337756183251797.stgit@localhost.localdomain>

On 6/14/18 6:38 AM, Kirill Tkhai wrote:
> The following script makes kernel to crash since it can't obtain
> a name for a device, when the name is occupied by another device:
> 
> #!/bin/bash
> ifconfig eth0 down
> ifconfig eth1 down
> index=`cat /sys/class/net/eth1/ifindex`
> ip link set eth1 name dev$index
> unshare -n sleep 1h &
> pid=$!
> while [[ "`readlink /proc/self/ns/net`" == "`readlink /proc/$pid/ns/net`" ]]; do continue; done
> ip link set dev$index netns $pid
> ip link set eth0 name dev$index
> kill -9 $pid
> 
> Kernel messages:
> 
> virtio_net virtio1 dev3: renamed from eth1
> virtio_net virtio0 dev3: renamed from eth0
> default_device_exit: failed to move dev3 to init_net: -17
> ------------[ cut here ]------------
> kernel BUG at net/core/dev.c:8978!
> invalid opcode: 0000 [#1] PREEMPT SMP
> CPU: 1 PID: 276 Comm: kworker/u8:3 Not tainted 4.17.0+ #292
> Workqueue: netns cleanup_net
> RIP: 0010:default_device_exit+0x9c/0xb0
> [stack trace snipped]
> 
> This patch gives more variability during choosing new name
> of device and fixes the problem.
> 
> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>  net/core/dev.c |    4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 6e18242a1cae..6c9b9303ded6 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -8959,7 +8959,6 @@ static void __net_exit default_device_exit(struct net *net)
>  	rtnl_lock();
>  	for_each_netdev_safe(net, dev, aux) {
>  		int err;
> -		char fb_name[IFNAMSIZ];
>  
>  		/* Ignore unmoveable devices (i.e. loopback) */
>  		if (dev->features & NETIF_F_NETNS_LOCAL)
> @@ -8970,8 +8969,7 @@ static void __net_exit default_device_exit(struct net *net)
>  			continue;
>  
>  		/* Push remaining network devices to init_net */
> -		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
> -		err = dev_change_net_namespace(dev, &init_net, fb_name);
> +		err = dev_change_net_namespace(dev, &init_net, "dev%d");
>  		if (err) {
>  			pr_emerg("%s: failed to move %s to init_net: %d\n",
>  				 __func__, dev->name, err);
> 

This could cause repeated looping over __dev_alloc_name. If init_net has
a large number of devices, it is going to be a performance bottleneck.

^ permalink raw reply

* Re: [PATCH bpf-next v5 00/10] BTF: BPF Type Format
From: Arnaldo Carvalho de Melo @ 2018-06-14 17:18 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: netdev, Alexei Starovoitov, Daniel Borkmann, kernel-team,
	Wang Nan, Jiri Olsa, Namhyung Kim, Ingo Molnar
In-Reply-To: <20180614162227.r72d7wk57unfhqvo@kafai-mbp.dhcp.thefacebook.com>

Em Thu, Jun 14, 2018 at 09:22:27AM -0700, Martin KaFai Lau escreveu:
> On Thu, Jun 14, 2018 at 12:03:34PM -0300, Arnaldo Carvalho de Melo wrote:
> 
> > > > > > 1. The tools/testing/selftests/bpf/Makefile has the CLANG_FLAGS and
> > > > > >    LLC_FLAGS needed to compile the bpf prog.  It requires a new
> > > > > >    "-mattr=dwarf" llc option which was added to the future
> > > > > >    llvm 7.0.
> 
> [ ... ]
> 
> > I tried it, but it didn't work, see:
> > 
> > [root@jouet bpf]# cat hello.c 
> > #include "stdio.h"
> > 
> > int syscall_enter(openat)(void *ctx)
> > {
> > 	puts("Hello, world\n");
> > 	return 0;
> > }
> > [root@jouet bpf]# trace -e openat,hello.c touch /tmp/kafai
> > clang-6.0: error: unknown argument: '-mattr=dwarf'
> "-mattr=dwarf" is currently a llc only option.
> 
> tools/testing/selftests/bpf/Makefile has example on how to pipe clang to llc.
 
> e.g.:
> clang -g -O2 -target bpf -emit-llvm -c hello.c -o - | llc -march=bpf -mcpu=generic -mattr=dwarfris -filetype=obj -o hello.o

Ok, so I'll probably add a llvm.opts .perfconfig entry that, if present
will tell tools/perf/util/llvm-utils.c that piping the output of clang
to llvm, so that we can use llvm specific options, needs to be done.

Probably, for the time being I'll check for -g in llvm.clang-opt and if
it is there, set up the piping...

Just out of curiosity, is there any plan to have this as a clang option?

Just to finish this thing here, lemme try a slightly modified version of
your command line:

[root@jouet bpf]# clang -D__KERNEL__ -D__NR_CPUS__=4 -DLINUX_VERSION_CODE=0x41100 -g -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h  -I/home/acme/lib/include/perf/bpf -Wno-unused-value -Wno-pointer-sign -working-directory /lib/modules/4.17.0-rc5/build -c /home/acme/bpf/hello.c -target bpf -emit-llvm -O2 -o - | llc -march=bpf -mcpu=generic -mattr=dwarfris -filetype=obj -o hello2.o
[root@jouet bpf]# 

[root@jouet bpf]# file hello2.o
hello2.o: ELF 64-bit LSB relocatable, *unknown arch 0xf7* version 1 (SYSV), with debug_info, not stripped
[root@jouet bpf]# pahole hello2.o
struct bpf_map_def {
	unsigned int               type;                 /*     0     4 */
	unsigned int               key_size;             /*     4     4 */
	unsigned int               value_size;           /*     8     4 */
	unsigned int               max_entries;          /*    12     4 */

	/* size: 16, cachelines: 1, members: 4 */
	/* last cacheline: 16 bytes */
};
[root@jouet bpf]#

Finally works, thanks.

Thanks,

- Arnaldo
 
> > ERROR:	unable to compile hello.c
> > Hint:	Check error message shown above.
> > Hint:	You can also pre-compile it into .o using:
> >      		clang -target bpf -O2 -c hello.c
> >      	with proper -I and -D options.
> > event syntax error: 'hello.c'
> >                      \___ Failed to load hello.c from source: Error when compiling BPF scriptlet
> > 
> > (add -v to see detail)
> > Run 'perf list' for a list of valid events
> > 
> >  Usage: perf trace [<options>] [<command>]
> >     or: perf trace [<options>] -- <command> [<options>]
> >     or: perf trace record [<options>] [<command>]
> >     or: perf trace record [<options>] -- <command> [<options>]
> > 
> >     -e, --event <event>   event/syscall selector. use 'perf list' to list available events
> > [root@jouet bpf]#
> > 
> > The full command line with that is:
> > 
> > [root@jouet bpf]# trace -v -e openat,hello.c touch /tmp/kafai |& grep mattr
> > set env: CLANG_OPTIONS=-g -mattr=dwarf
> > llvm compiling command : /usr/local/bin/clang -D__KERNEL__ -D__NR_CPUS__=4 -DLINUX_VERSION_CODE=0x41100 -g -mattr=dwarf  -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h  -I/home/acme/lib/include/perf/bpf -Wno-unused-value -Wno-pointer-sign -working-directory /lib/modules/4.17.0-rc5/build -c /home/acme/bpf/hello.c -target bpf -O2 -o -
> > clang-6.0: error: unknown argument: '-mattr=dwarf'
> > [root@jouet bpf]#
> > 
> > This is with these llvm and clang trees:
> > 
> > [root@jouet llvm]# git log --oneline -5
> > 98c78e82f54 (HEAD -> master, origin/master, origin/HEAD) [asan] Instrument comdat globals on COFF targets
> > 6ad988b5998 [DAGCombiner] clean up comments; NFC
> > a735ba5b795 [X86][SSE] Support v8i16/v16i16 rotations
> > 1503b9f6fe8 [x86] add tests for node-level FMF; NFC
> > 4a49826736f [x86] regenerate test checks; NFC
> > [root@jouet llvm]#
> > 
> > [root@jouet llvm]# cd tools/clang/
> > [root@jouet clang]# git log --oneline -5
> > 8c873daccc (HEAD -> master, origin/master, origin/HEAD) [X86] Add builtins for vpermq/vpermpd instructions to enable target feature checking.
> > a344be6ba4 [X86] Change immediate type for some builtins from char to int.
> > dcdd53793e [CUDA] Fix emission of constant strings in sections
> > a90c85acaf [X86] Add builtins for shufps and shufpd to enable target feature and immediate range checking.
> > ff71c0eccc [X86] Add builtins for pshufd, pshuflw, and pshufhw to enable target feature and immediate range checking.
> > [root@jouet clang]#
> > 
> > [root@jouet clang]# git log | grep mattr=dwarf
> > [root@jouet clang]# cd -
> > /home/acme/git.tmp/git/llvm
> > [root@jouet llvm]# git log | grep mattr=dwarf
> >     bpf: introduce -mattr=dwarfris to disable DwarfUsesRelocationsAcrossSections
> >     This patch introduces a new flag -mattr=dwarfris
> > [root@jouet llvm]#
> > 
> > Humm, so its -mattr=dwarfris and not -attr=dwarf?
> > 
> > Didn't help :-\
> > 
> > commit 0e0047f8c9ada2f0fe0c5f01579a80e2455b8df5
> > Author: Yonghong Song <yhs@fb.com>
> > Date:   Thu Mar 1 23:04:59 2018 +0000
> > 
> >     bpf: introduce -mattr=dwarfris to disable DwarfUsesRelocationsAcrossSections
> >     
> >     Commit e4507fb8c94b ("bpf: disable DwarfUsesRelocationsAcrossSections")
> >     disables MCAsmInfo DwarfUsesRelocationsAcrossSections unconditionally
> >     so that dwarf will not use cross section (between dwarf and symbol table)
> >     relocations. This new debug format enables pahole to dump structures
> >     correctly as libdwarves.so does not have BPF backend support yet.
> >     
> >     This new debug format, however, breaks bcc (https://github.com/iovisor/bcc)
> >     source debug output as llvm in-memory Dwarf support has some issues to
> >     handle it. More specifically, with DwarfUsesRelocationsAcrossSections
> >     disabled, JIT compiler does not generate .debug_abbrev and Dwarf
> >     DIE (debug info entry) processing is not happy about this.
> >     
> >     This patch introduces a new flag -mattr=dwarfris
> >     (dwarf relocation in section) to disable DwarfUsesRelocationsAcrossSections.
> >     DwarfUsesRelocationsAcrossSections is true by default.
> >     
> >     Signed-off-by: Yonghong Song <yhs@fb.com>
> >     
> >     git-svn-id: https://urldefense.proofpoint.com/v2/url?u=https-3A__llvm.org_svn_llvm-2Dproject_llvm_trunk-40326505&d=DwIBAg&c=5VD0RTtNlTh3ycd41b3MUw&r=VQnoQ7LvghIj0gVEaiQSUw&m=LO28-RE-2ZJTXto_gff4BgnxXkbUq8d2CEz1jD_wDl4&s=VCR3pGVfY54-OsZ3BqRsOr3FF5JVyltwbnbzu30_4EY&e= 91177308-0d34-0410-b5e6-96231b3b80d8
> > 
> > 

^ permalink raw reply

* Re: [PATCH net-next,RFC 00/13] New fast forwarding path
From: David Miller @ 2018-06-14 17:18 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel, netdev, steffen.klassert
In-Reply-To: <20180614141947.3580-1-pablo@netfilter.org>

From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 14 Jun 2018 16:19:34 +0200

> This patchset proposes a new fast forwarding path infrastructure
> that combines the GRO/GSO and the flowtable infrastructures. The
> idea is to add a hook at the GRO layer that is invoked before the
> standard GRO protocol offloads. This allows us to build custom
> packet chains that we can quickly pass in one go to the neighbour
> layer to define fast forwarding path for flows.

We have full, complete, customizability of the packet path via XDP
and eBPF.

XDP and eBPF supports everything necessary to accomplish that,
there are implementations of forwarding implementations in
the tree and elsewhere.

And most importantly, XDP and eBPF are optimized in drivers and
offloaded to hardware.

There really is no need for something like what you are proposing.

^ permalink raw reply

* Re: [PATCH] SUNRPC: Move inline xprt_alloc_xid() up to fix compiler warning
From: Chuck Lever @ 2018-06-14 17:19 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Bruce Fields, Jeff Layton, Trond Myklebust, Anna Schumaker,
	David S . Miller, Linux NFS Mailing List, netdev, linux-kernel
In-Reply-To: <1528891298-26694-1-git-send-email-geert@linux-m68k.org>



> On Jun 13, 2018, at 8:01 AM, Geert Uytterhoeven <geert@linux-m68k.org> wrote:
> 
> With gcc 4.1.2:
> 
>    net/sunrpc/xprt.c:69: warning: ‘xprt_alloc_xid’ declared inline after being called
>    net/sunrpc/xprt.c:69: warning: previous declaration of ‘xprt_alloc_xid’ was here
> 
> To fix this, move the function up, before its caller, and remove the no
> longer needed forward declaration.
> 
> Fixes: 37ac86c3a76c1136 ("SUNRPC: Initialize rpc_rqst outside of xprt->reserve_lock")
> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
> ---
> net/sunrpc/xprt.c | 11 +++++------
> 1 file changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index 3c85af058227d14b..60a8b9f91cf94b54 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -66,7 +66,6 @@
>  * Local functions
>  */
> static void	 xprt_init(struct rpc_xprt *xprt, struct net *net);
> -static __be32	xprt_alloc_xid(struct rpc_xprt *xprt);
> static void	xprt_connect_status(struct rpc_task *task);
> static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
> static void     __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
> @@ -956,6 +955,11 @@ static void xprt_timer(struct rpc_task *task)
> 		task->tk_status = 0;
> }
> 
> +static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
> +{
> +	return (__force __be32)xprt->xid++;
> +}
> +
> /**
>  * xprt_prepare_transmit - reserve the transport before sending a request
>  * @task: RPC task about to send a request
> @@ -1296,11 +1300,6 @@ void xprt_retry_reserve(struct rpc_task *task)
> 	xprt->ops->alloc_slot(xprt, task);
> }
> 
> -static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
> -{
> -	return (__force __be32)xprt->xid++;
> -}
> -

For code organization, we might want to keep xprt_alloc_xid
together with xprt_init_xid. Would it be better to simply
remove the "inline" directive from these two and let the
compiler choose the best optimization?


> static inline void xprt_init_xid(struct rpc_xprt *xprt)
> {
> 	xprt->xid = prandom_u32();
> -- 
> 2.7.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever

^ permalink raw reply

* Re: [PATCH bpf-next v5 00/10] BTF: BPF Type Format
From: Alexei Starovoitov @ 2018-06-14 17:21 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Martin KaFai Lau
  Cc: netdev, Daniel Borkmann, kernel-team, Wang Nan, Jiri Olsa,
	Namhyung Kim, Ingo Molnar
In-Reply-To: <20180614171806.GG30043@kernel.org>

On 6/14/18 10:18 AM, Arnaldo Carvalho de Melo wrote:
>
> Just out of curiosity, is there any plan to have this as a clang option?

I think
clang ... -mllvm -mattr=dwarfris
should work.

^ permalink raw reply

* Re: [PATCH net-next] sctp: define sctp_packet_gso_append to build GSO frames
From: David Miller @ 2018-06-14 17:26 UTC (permalink / raw)
  To: lucien.xin; +Cc: netdev, linux-sctp, marcelo.leitner, nhorman, eric.dumazet
In-Reply-To: <d41bb7dda9b5c176d5c0a23a8705744f49fcb570.1528933022.git.lucien.xin@gmail.com>

From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 14 Jun 2018 07:37:02 +0800

> Now sctp GSO uses skb_gro_receive() to append the data into head
> skb frag_list. However it actually only needs very few code from
> skb_gro_receive(). Besides, NAPI_GRO_CB has to be set while most
> of its members are not needed here.
> 
> This patch is to add sctp_packet_gso_append() to build GSO frames
> instead of skb_gro_receive(), and it would avoid many unnecessary
> checks and make the code clearer.
> 
> Note that sctp will use page frags instead of frag_list to build
> GSO frames in another patch. But it may take time, as sctp's GSO
> frames may have different size. skb_segment() can only split it
> into the frags with the same size, which would break the border
> of sctp chunks.
> 
> Signed-off-by: Xin Long <lucien.xin@gmail.com>

Applied to 'net', thanks Xin.

^ permalink raw reply

* Re: [PATCH RFC v2] rhashtable: implement rhashtable_walk_peek() using rhashtable_walk_last_seen()
From: Tom Herbert @ 2018-06-14 17:41 UTC (permalink / raw)
  To: NeilBrown
  Cc: Herbert Xu, Thomas Graf, Linux Kernel Network Developers, LKML,
	Tom Herbert
In-Reply-To: <871sdck9ds.fsf@notabene.neil.brown.name>

On Mon, Jun 11, 2018 at 7:48 PM, NeilBrown <neilb@suse.com> wrote:
>
> rhashtable_walk_last_seen() does most of the work that
> rhashtable_walk_peek() needs done, so use it and put
> it in a "static inline".
> Also update the documentation for rhashtable_walk_peek() to clarify
> the expected use case.
>
> Signed-off-by: NeilBrown <neilb@suse.com>

Acked-by: Tom Herbert <tom@quantonium.net>

> ---
>
> v2 as static-inline - suggested by Tom.
>
> Thanks,
> NeilBrown
>
>  include/linux/rhashtable.h | 29 ++++++++++++++++++++++++++++-
>  lib/rhashtable.c           | 34 ----------------------------------
>  2 files changed, 28 insertions(+), 35 deletions(-)
>
> diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
> index d63b472e9d50..96ebc2690027 100644
> --- a/include/linux/rhashtable.h
> +++ b/include/linux/rhashtable.h
> @@ -247,10 +247,37 @@ static inline void rhashtable_walk_start(struct rhashtable_iter *iter)
>  }
>
>  void *rhashtable_walk_next(struct rhashtable_iter *iter);
> -void *rhashtable_walk_peek(struct rhashtable_iter *iter);
>  void *rhashtable_walk_last_seen(struct rhashtable_iter *iter);
>  void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
>
> +/**
> + * rhashtable_walk_peek - Return the next object to use in an interrupted walk
> + * @iter:      Hash table iterator
> + *
> + * Returns the "current" object or NULL when the end of the table is reached.
> + * When an rhashtable_walk is interrupted with rhashtable_walk_stop(),
> + * it is often because an object was found that could not be processed
> + * immediately, possible because there is no more space to encode details
> + * of the object (e.g. when producing a seq_file from the table).
> + * When the walk is restarted, the same object needs to be processed again,
> + * if possible.  The object might have been removed from the table while
> + * the walk was paused, so it might not be available.  In that case, the
> + * normal "next" object should be treated as "current".
> + *
> + * To support this common case, rhashtable_walk_peek() returns the
> + * appropriate object to process after an interrupted walk, either the
> + * one that was most recently returned, or if that doesn't exist - the
> + * next one.
> + *
> + * Returns -EAGAIN if resize event occurred.  In that case the iterator
> + * will rewind back to the beginning and you may continue to use it.
> + */
> +static inline void *rhashtable_walk_peek(struct rhashtable_iter *iter)
> +{
> +       return rhashtable_walk_last_seen(iter) ?:
> +               rhashtable_walk_next(iter);
> +}
> +
>  void rhashtable_free_and_destroy(struct rhashtable *ht,
>                                  void (*free_fn)(void *ptr, void *arg),
>                                  void *arg);
> diff --git a/lib/rhashtable.c b/lib/rhashtable.c
> index 45f2554399a5..354275037df3 100644
> --- a/lib/rhashtable.c
> +++ b/lib/rhashtable.c
> @@ -915,40 +915,6 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
>  }
>  EXPORT_SYMBOL_GPL(rhashtable_walk_next);
>
> -/**
> - * rhashtable_walk_peek - Return the next object but don't advance the iterator
> - * @iter:      Hash table iterator
> - *
> - * Returns the next object or NULL when the end of the table is reached.
> - *
> - * Returns -EAGAIN if resize event occurred.  Note that the iterator
> - * will rewind back to the beginning and you may continue to use it.
> - */
> -void *rhashtable_walk_peek(struct rhashtable_iter *iter)
> -{
> -       struct rhlist_head *list = iter->list;
> -       struct rhashtable *ht = iter->ht;
> -       struct rhash_head *p = iter->p;
> -
> -       if (p)
> -               return rht_obj(ht, ht->rhlist ? &list->rhead : p);
> -
> -       /* No object found in current iter, find next one in the table. */
> -
> -       if (iter->skip) {
> -               /* A nonzero skip value points to the next entry in the table
> -                * beyond that last one that was found. Decrement skip so
> -                * we find the current value. __rhashtable_walk_find_next
> -                * will restore the original value of skip assuming that
> -                * the table hasn't changed.
> -                */
> -               iter->skip--;
> -       }
> -
> -       return __rhashtable_walk_find_next(iter);
> -}
> -EXPORT_SYMBOL_GPL(rhashtable_walk_peek);
> -
>  /**
>   * rhashtable_walk_last_seen - Return the previously returned object, if available
>   * @iter:      Hash table iterator
> --
> 2.14.0.rc0.dirty
>

^ permalink raw reply

* Re: [PATCH bpf-next v5 00/10] BTF: BPF Type Format
From: Arnaldo Carvalho de Melo @ 2018-06-14 17:41 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Martin KaFai Lau, netdev, Daniel Borkmann, kernel-team, Wang Nan,
	Jiri Olsa, Namhyung Kim, Ingo Molnar
In-Reply-To: <1e7a418d-53e8-ee72-5a13-7e2df5a48000@fb.com>

Em Thu, Jun 14, 2018 at 10:21:30AM -0700, Alexei Starovoitov escreveu:
> On 6/14/18 10:18 AM, Arnaldo Carvalho de Melo wrote:
> > 
> > Just out of curiosity, is there any plan to have this as a clang option?
> 
> I think
> clang ... -mllvm -mattr=dwarfris

thanks, trying...

- Arnaldo

^ permalink raw reply

* Re: [PATCH bpf-next v5 00/10] BTF: BPF Type Format
From: Arnaldo Carvalho de Melo @ 2018-06-14 17:47 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Martin KaFai Lau, netdev, Daniel Borkmann, kernel-team, Wang Nan,
	Jiri Olsa, Namhyung Kim, Ingo Molnar
In-Reply-To: <1e7a418d-53e8-ee72-5a13-7e2df5a48000@fb.com>

Em Thu, Jun 14, 2018 at 10:21:30AM -0700, Alexei Starovoitov escreveu:
> On 6/14/18 10:18 AM, Arnaldo Carvalho de Melo wrote:
> > Just out of curiosity, is there any plan to have this as a clang option?
 
> I think
> clang ... -mllvm -mattr=dwarfris
> should work.

[root@jouet bpf]# cat ~/.perfconfig
[llvm]
dump-obj = true
clang-opt = -g -mllvm -mattr=dwarfris
[root@jouet bpf]# trace -e openat,hello.c touch /tmp/kafai
clang (LLVM option parsing): Unknown command line argument '-mattr=dwarfris'.  Try: 'clang (LLVM option parsing) -help'
clang (LLVM option parsing): Did you mean '-mxgot=dwarfris'?
ERROR:	unable to compile hello.c
Hint:	Check error message shown above.
Hint:	You can also pre-compile it into .o using:
     		clang -target bpf -O2 -c hello.c
     	with proper -I and -D options.
event syntax error: 'hello.c'
                     \___ Failed to load hello.c from source: Error when compiling BPF scriptlet

(add -v to see detail)
<SNIP>
[root@jouet bpf]# 

[root@jouet bpf]# trace -e openat,hello.c touch /tmp/kafai |& grep clang
clang (LLVM option parsing): Unknown command line argument '-mattr=dwarfris'.  Try: 'clang (LLVM option parsing) -help'
clang (LLVM option parsing): Did you mean '-mxgot=dwarfris'?
     		clang -target bpf -O2 -c hello.c
[root@jouet bpf]# trace -v -e openat,hello.c touch /tmp/kafai |& grep clang
set env: CLANG_EXEC=/usr/local/bin/clang
llvm compiling command : /usr/local/bin/clang -D__KERNEL__ -D__NR_CPUS__=4 -DLINUX_VERSION_CODE=0x41100 -g -mllvm -mattr=dwarfris  -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h  -I/home/acme/lib/include/perf/bpf -Wno-unused-value -Wno-pointer-sign -working-directory /lib/modules/4.17.0-rc5/build -c /home/acme/bpf/hello.c -target bpf -O2 -o -
clang (LLVM option parsing): Unknown command line argument '-mattr=dwarfris'.  Try: 'clang (LLVM option parsing) -help'
clang (LLVM option parsing): Did you mean '-mxgot=dwarfris'?
     		clang -target bpf -O2 -c hello.c
[root@jouet bpf]#

The message "(LLVM option parsing)" implies what you suggest, but didn't
worked :-\

  -mllvm <value>          Additional arguments to forward to LLVM's option processing

Almost there tho :-\

- Arnaldo

^ permalink raw reply

* Re: [PATCH bpf-next v5 00/10] BTF: BPF Type Format
From: Arnaldo Carvalho de Melo @ 2018-06-14 18:00 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Martin KaFai Lau, netdev, Daniel Borkmann, kernel-team, Wang Nan,
	Jiri Olsa, Namhyung Kim, Ingo Molnar
In-Reply-To: <20180614174759.GI30043@kernel.org>

Em Thu, Jun 14, 2018 at 02:47:59PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Thu, Jun 14, 2018 at 10:21:30AM -0700, Alexei Starovoitov escreveu:
> > On 6/14/18 10:18 AM, Arnaldo Carvalho de Melo wrote:
> > > Just out of curiosity, is there any plan to have this as a clang option?
  
> > I think
> > clang ... -mllvm -mattr=dwarfris
> > should work.
 
> The message "(LLVM option parsing)" implies what you suggest, but didn't
> worked :-\
 
>   -mllvm <value>          Additional arguments to forward to LLVM's option processing
 
> Almost there tho :-\

So I thought that this -mattr=dwarfris would be available only after I
set the target, because I tried 'llc -mattr=help' and dwarfris wasn't
there:

[acme@jouet perf]$ llc -mattr=help |& grep dwarf
[acme@jouet perf]$

Only after I set the arch it appears:

[acme@jouet perf]$ llc -march=bpf -mattr=help |& grep dwarf
  dwarfris - Disable MCAsmInfo DwarfUsesRelocationsAcrossSections.
  dwarfris - Disable MCAsmInfo DwarfUsesRelocationsAcrossSections.
  dwarfris - Disable MCAsmInfo DwarfUsesRelocationsAcrossSections.
[acme@jouet perf]$ 

But even after moving the '-mllvm -mattr=dwarfris' to after '-target
bpf' it still can't grok it :-\

/usr/local/bin/clang -D__KERNEL__ -D__NR_CPUS__=4 -DLINUX_VERSION_CODE=0x41100 -g -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h  -I/home/acme/lib/include/perf/bpf -Wno-unused-value -Wno-pointer-sign -working-directory /lib/modules/4.17.0-rc5/build -c /home/acme/bpf/hello.c -target bpf -mllvm -mattr=dwarfris -O2 -o hello.o

So onlye with 'clang ... -target bpf -emit-llvm -O2 -o - | llc -march=bpf -mattr=dwarfris ...'
things work as we expect.

- Arnaldo

^ permalink raw reply

* [PATCH net-next 1/1] tc-testing: initial version of tunnel_key unit tests
From: Keara Leibovitz @ 2018-06-14 18:05 UTC (permalink / raw)
  To: davem; +Cc: netdev, jhs, xiyou.wangcong, jiri, lucasb, Keara Leibovitz

Signed-off-by: Keara Leibovitz <kleib@mojatatu.com>
---
 .../tc-testing/tc-tests/actions/tunnel_key.json    | 676 +++++++++++++++++++++
 1 file changed, 676 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
new file mode 100644
index 000000000000..bfe522ac8177
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
@@ -0,0 +1,676 @@
+[
+    {
+        "id": "2b11",
+        "name": "Add tunnel_key set action with mandatory parameters",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "dc6b",
+        "name": "Add tunnel_key set action with missing mandatory src_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set dst_ip 20.20.20.2 id 100",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*dst_ip 20.20.20.2.*key_id 100",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "7f25",
+        "name": "Add tunnel_key set action with missing mandatory dst_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 id 100",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 10.10.10.1.*key_id 100",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "ba4e",
+        "name": "Add tunnel_key set action with missing mandatory id parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "a5e0",
+        "name": "Add tunnel_key set action with invalid src_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 300.168.100.1 dst_ip 192.168.200.1 id 7 index 1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 300.168.100.1.*dst_ip 192.168.200.1.*key_id 7.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "eaa8",
+        "name": "Add tunnel_key set action with invalid dst_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.100.1 dst_ip 192.168.800.1 id 10 index 11",
+        "expExitCode": "1",
+        "verifyCmd": "$TC actions get action tunnel_key index 11",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 192.168.100.1.*dst_ip 192.168.800.1.*key_id 10.*index 11 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "3b09",
+        "name": "Add tunnel_key set action with invalid id parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 112233445566778899 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 112233445566778899.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "9625",
+        "name": "Add tunnel_key set action with invalid dst_port parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 dst_port 998877 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 11.*dst_port 998877.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "05af",
+        "name": "Add tunnel_key set action with optional dst_port parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.100.1 dst_ip 192.168.200.1 id 789 dst_port 4000 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 10",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.100.1.*dst_ip 192.168.200.1.*key_id 789.*dst_port 4000.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "da80",
+        "name": "Add tunnel_key set action with index at 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 4294967295",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*id 11.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "d407",
+        "name": "Add tunnel_key set action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 index 4294967295678",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 4294967295678",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*index 4294967295678 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "5cba",
+        "name": "Add tunnel_key set action with id value at 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 4294967295 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 4294967295.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "e84a",
+        "name": "Add tunnel_key set action with id value exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42949672955 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 4294967295",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42949672955.*index 1",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "9c19",
+        "name": "Add tunnel_key set action with dst_port value at 16-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 429 dst_port 65535 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 429.*dst_port 65535.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "3bd9",
+        "name": "Add tunnel_key set action with dst_port value exceeding 16-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 429 dst_port 65535789 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 429.*dst_port 65535789.*index 1",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "68e2",
+        "name": "Add tunnel_key unset action",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key unset index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*unset.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "6192",
+        "name": "Add tunnel_key unset continue action",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key unset continue index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*unset continue.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "061d",
+        "name": "Add tunnel_key set continue action with cookie",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.10.1 dst_ip 192.168.20.2 id 123 continue index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.10.1.*dst_ip 192.168.20.2.*key_id 123.*csum continue.*index 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "8acb",
+        "name": "Add tunnel_key set continue action with invalid cookie",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.10.1 dst_ip 192.168.20.2 id 123 continue index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.10.1.*dst_ip 192.168.20.2.*key_id 123.*csum continue.*index 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "a07e",
+        "name": "Add tunnel_key action with no set/unset command specified",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "b227",
+        "name": "Add tunnel_key action with csum option",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 csum index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 99",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*csum pipe.*index 99",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "58a7",
+        "name": "Add tunnel_key action with nocsum option",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7823 nocsum index 234",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 234",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7823.*nocsum pipe.*index 234",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "2575",
+        "name": "Add tunnel_key action with not-supported parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 foobar 999 index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 4",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*foobar 999.*index 4",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "7a88",
+        "name": "Add tunnel_key action with cookie parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 index 4 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 4",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*dst_port 0.*csum pipe.*index 4 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "7afc",
+        "name": "Replace tunnel_key set action with all parameters",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 csum id 1 index 1"
+        ],
+        "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 nocsum id 11 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*nocsum pipe.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "364d",
+        "name": "Replace tunnel_key set action with all parameters and cookie",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 index 1 cookie aabbccddeeff112233445566778800a"
+        ],
+        "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 id 11 csum reclassify index 1 cookie a1b1c1d1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*csum reclassify.*index 1.*cookie a1b1c1d1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "937c",
+        "name": "Fetch all existing tunnel_key actions",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pipe index 1",
+            "$TC actions add action tunnel_key set src_ip 11.10.10.1 dst_ip 21.20.20.2 dst_port 3129 csum id 2 jump 10 index 2",
+            "$TC actions add action tunnel_key set src_ip 12.10.10.1 dst_ip 22.20.20.2 dst_port 3130 csum id 3 pass index 3",
+            "$TC actions add action tunnel_key set src_ip 13.10.10.1 dst_ip 23.20.20.2 dst_port 3131 nocsum id 4 continue index 4"
+        ],
+        "cmdUnderTest": "$TC actions list action tunnel_key",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*dst_port 3128.*nocsum pipe.*index 1.*set.*src_ip 11.10.10.1.*dst_ip 21.20.20.2.*key_id 2.*dst_port 3129.*csum jump 10.*index 2.*set.*src_ip 12.10.10.1.*dst_ip 22.20.20.2.*key_id 3.*dst_port 3130.*csum pass.*index 3.*set.*src_ip 13.10.10.1.*dst_ip 23.20.20.2.*key_id 4.*dst_port 3131.*nocsum continue.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "6783",
+        "name": "Flush all existing tunnel_key actions",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pipe index 1",
+            "$TC actions add action tunnel_key set src_ip 11.10.10.1 dst_ip 21.20.20.2 dst_port 3129 csum id 2 reclassify index 2",
+            "$TC actions add action tunnel_key set src_ip 12.10.10.1 dst_ip 22.20.20.2 dst_port 3130 csum id 3 pass index 3",
+            "$TC actions add action tunnel_key set src_ip 13.10.10.1 dst_ip 23.20.20.2 dst_port 3131 nocsum id 4 continue index 4"
+        ],
+        "cmdUnderTest": "$TC actions flush action tunnel_key",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions flush action tunnel_key",
+        "matchPattern": "action order [0-9]+:.*",
+        "matchCount": "0",
+        "teardown": []
+    }
+]
-- 
2.7.4

^ permalink raw reply related

* [PATCH bpf 0/2] bpf: fix the load time reporting and make offload test more resilient
From: Jakub Kicinski @ 2018-06-14 18:06 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: netdev, oss-drivers, Jakub Kicinski

Hi!

This small series allows test_offload.py selftest to run on modern
distributions which may create BPF programs for cgroups at boot,
like Ubuntu 18.04.  We still expect the program list to not be
altered by any other agent while the test is running, but no longer
depend on there being no BPF programs at all at the start.

Fixing the test revealed a small problem with bpftool, which doesn't
report the program load time very accurately.  Because nanoseconds
were not taken into account reported load time would fluctuate by
1 second.  First patch of the series takes care of fixing that.

Jakub Kicinski (2):
  tools: bpftool: improve accuracy of load time
  selftests/bpf: test offloads even with BPF programs present

 tools/bpf/bpftool/prog.c                    |  4 +++-
 tools/testing/selftests/bpf/test_offload.py | 12 ++++++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

-- 
2.17.1

^ permalink raw reply

* [PATCH bpf 1/2] tools: bpftool: improve accuracy of load time
From: Jakub Kicinski @ 2018-06-14 18:06 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: netdev, oss-drivers, Jakub Kicinski
In-Reply-To: <20180614180656.14550-1-jakub.kicinski@netronome.com>

BPF program load time is reported from the kernel relative to boot time.
If conversion to wall clock does not take nanosecond parts into account,
the load time reported by bpftool may differ by one second from run to
run.  This means JSON object reported by bpftool for a program will
randomly change.

Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
 tools/bpf/bpftool/prog.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index a4f435203fef..05f42a46d6ed 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -90,7 +90,9 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
 	}
 
 	wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) +
-		nsecs / 1000000000;
+		(real_time_ts.tv_nsec - boot_time_ts.tv_nsec + nsecs) /
+		1000000000;
+
 
 	if (!localtime_r(&wallclock_secs, &load_tm)) {
 		snprintf(buf, size, "%llu", nsecs / 1000000000);
-- 
2.17.1

^ permalink raw reply related

* [PATCH bpf 2/2] selftests/bpf: test offloads even with BPF programs present
From: Jakub Kicinski @ 2018-06-14 18:06 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: netdev, oss-drivers, Jakub Kicinski
In-Reply-To: <20180614180656.14550-1-jakub.kicinski@netronome.com>

Modern distroes increasingly make use of BPF programs.  Default
Ubuntu 18.04 installation boots with a number of cgroup_skb
programs loaded.

test_offloads.py tries to check if programs and maps are not
leaked on error paths by confirming the list of programs on the
system is empty between tests.

Since we can no longer expect the system to have no BPF objects
at boot try to remember the programs and maps present at the start,
and skip those when scanning the system.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
 tools/testing/selftests/bpf/test_offload.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index e78aad0a68bb..be800d0e7a84 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -163,6 +163,10 @@ netns = [] # net namespaces to be removed
 
 def bpftool_prog_list(expected=None, ns=""):
     _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
+    # Remove the base progs
+    for p in base_progs:
+        if p in progs:
+            progs.remove(p)
     if expected is not None:
         if len(progs) != expected:
             fail(True, "%d BPF programs loaded, expected %d" %
@@ -171,6 +175,10 @@ netns = [] # net namespaces to be removed
 
 def bpftool_map_list(expected=None, ns=""):
     _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
+    # Remove the base maps
+    for m in base_maps:
+        if m in maps:
+            maps.remove(m)
     if expected is not None:
         if len(maps) != expected:
             fail(True, "%d BPF maps loaded, expected %d" %
@@ -585,8 +593,8 @@ skip(os.getuid() != 0, "test must be run as root")
 # Check tools
 ret, progs = bpftool("prog", fail=False)
 skip(ret != 0, "bpftool not installed")
-# Check no BPF programs are loaded
-skip(len(progs) != 0, "BPF programs already loaded on the system")
+base_progs = progs
+_, base_maps = bpftool("map")
 
 # Check netdevsim
 ret, out = cmd("modprobe netdevsim", fail=False)
-- 
2.17.1

^ permalink raw reply related

* Re: [PATCH net-next,RFC 00/13] New fast forwarding path
From: Florian Fainelli @ 2018-06-14 18:14 UTC (permalink / raw)
  To: David Miller, pablo; +Cc: netfilter-devel, netdev, steffen.klassert
In-Reply-To: <20180614.101831.465275975690050595.davem@davemloft.net>



On 06/14/2018 10:18 AM, David Miller wrote:
> From: Pablo Neira Ayuso <pablo@netfilter.org>
> Date: Thu, 14 Jun 2018 16:19:34 +0200
> 
>> This patchset proposes a new fast forwarding path infrastructure
>> that combines the GRO/GSO and the flowtable infrastructures. The
>> idea is to add a hook at the GRO layer that is invoked before the
>> standard GRO protocol offloads. This allows us to build custom
>> packet chains that we can quickly pass in one go to the neighbour
>> layer to define fast forwarding path for flows.
> 
> We have full, complete, customizability of the packet path via XDP
> and eBPF.
> 
> XDP and eBPF supports everything necessary to accomplish that,
> there are implementations of forwarding implementations in
> the tree and elsewhere.
> 
> And most importantly, XDP and eBPF are optimized in drivers and
> offloaded to hardware.
> 
> There really is no need for something like what you are proposing.
> 

I see one possible upside to that approach here which is the low end
MIPS/ARM/PowerPC 32-bit based routers that do not have an eBPF JIT
available (that's only MIPS32 and PowerPC AFAICT), it would be great to
see what happens on those systems and if we do get any performance
improvements for a traditional forwarding/routing workload. On those
platforms there are a number of things that just literally kill the
routing performance: small I and D caches, small or not L2, limited
bandwidth DRAM, huge call depths, big struct sk_buff layout, you name it.
-- 
Florian

^ permalink raw reply

* List of Networking enhancements and bug fixes in a particular release
From: Joe Smith @ 2018-06-14 18:21 UTC (permalink / raw)
  To: netdev, David Miller; +Cc: Eric Dumazet

Hi Folks,

What is the best and authoritative mechanism to find out networking
enhancements in a Linux release?

Regards

-- 
JS

^ permalink raw reply

* [PATCH] bpf: attach type BPF_LIRC_MODE2 should not depend on CONFIG_CGROUP_BPF
From: Sean Young @ 2018-06-14 18:42 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Y Song, Matthias Reichl, linux-media, LKML, Alexei Starovoitov,
	Mauro Carvalho Chehab, netdev, Devin Heitmueller, Quentin Monnet
In-Reply-To: <34406f72-722d-9c23-327f-b7c5d7a3090c@iogearbox.net>

If the kernel is compiled with CONFIG_CGROUP_BPF not enabled, it is not
possible to attach, detach or query IR BPF programs to /dev/lircN devices,
making them impossible to use. For embedded devices, it should be possible
to use IR decoding without cgroups or CONFIG_CGROUP_BPF enabled.

This change requires some refactoring, since bpf_prog_{attach,detach,query}
functions are now always compiled, but their code paths for cgroups need
moving out. Rather than a #ifdef CONFIG_CGROUP_BPF in kernel/bpf/syscall.c,
moving them to kernel/bpf/cgroup.c does not require #ifdefs since that file
is already conditionally compiled.

Signed-off-by: Sean Young <sean@mess.org>
---
 include/linux/bpf-cgroup.h |  31 +++++++++++
 kernel/bpf/cgroup.c        | 110 +++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c       | 105 ++---------------------------------
 3 files changed, 145 insertions(+), 101 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 975fb4cf1bb7..ee67cd35f426 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -188,12 +188,43 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 									      \
 	__ret;								      \
 })
+int sockmap_get_from_fd(const union bpf_attr *attr, int type, bool attach);
+int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+			   enum bpf_prog_type ptype);
+int cgroup_bpf_prog_detach(const union bpf_attr *attr,
+			   enum bpf_prog_type ptype);
+int cgroup_bpf_prog_query(const union bpf_attr *attr,
+			  union bpf_attr __user *uattr);
 #else
 
 struct cgroup_bpf {};
 static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 
+static inline int sockmap_get_from_fd(const union bpf_attr *attr,
+				      int type, bool attach)
+{
+	return -EINVAL;
+}
+
+static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+					 enum bpf_prog_type ptype)
+{
+	return -EINVAL;
+}
+
+static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr,
+					 enum bpf_prog_type ptype)
+{
+	return -EINVAL;
+}
+
+static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
+					union bpf_attr __user *uattr)
+{
+	return -EINVAL;
+}
+
 #define cgroup_bpf_enabled (0)
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index f7c00bd6f8e4..d6e18f9dc0c4 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -428,6 +428,116 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 	return ret;
 }
 
+int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
+				      enum bpf_attach_type attach_type)
+{
+	switch (prog->type) {
+	case BPF_PROG_TYPE_CGROUP_SOCK:
+	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+		return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
+	default:
+		return 0;
+	}
+}
+
+int sockmap_get_from_fd(const union bpf_attr *attr, int type, bool attach)
+{
+	struct bpf_prog *prog = NULL;
+	int ufd = attr->target_fd;
+	struct bpf_map *map;
+	struct fd f;
+	int err;
+
+	f = fdget(ufd);
+	map = __bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	if (attach) {
+		prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
+		if (IS_ERR(prog)) {
+			fdput(f);
+			return PTR_ERR(prog);
+		}
+	}
+
+	err = sock_map_prog(map, prog, attr->attach_type);
+	if (err) {
+		fdput(f);
+		if (prog)
+			bpf_prog_put(prog);
+		return err;
+	}
+
+	fdput(f);
+	return 0;
+}
+
+int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype)
+{
+	struct bpf_prog *prog;
+	struct cgroup *cgrp;
+	int ret;
+
+	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
+		bpf_prog_put(prog);
+		return -EINVAL;
+	}
+
+	cgrp = cgroup_get_from_fd(attr->target_fd);
+	if (IS_ERR(cgrp)) {
+		bpf_prog_put(prog);
+		return PTR_ERR(cgrp);
+	}
+
+	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
+				attr->attach_flags);
+	if (ret)
+		bpf_prog_put(prog);
+	cgroup_put(cgrp);
+
+	return ret;
+}
+
+int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
+{
+	struct bpf_prog *prog;
+	struct cgroup *cgrp;
+	int ret;
+
+	cgrp = cgroup_get_from_fd(attr->target_fd);
+	if (IS_ERR(cgrp))
+		return PTR_ERR(cgrp);
+
+	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+	if (IS_ERR(prog))
+		prog = NULL;
+
+	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
+	if (prog)
+		bpf_prog_put(prog);
+	cgroup_put(cgrp);
+	return ret;
+}
+
+int cgroup_bpf_prog_query(const union bpf_attr *attr,
+			  union bpf_attr __user *uattr)
+{
+	struct cgroup *cgrp;
+	int ret;
+
+	cgrp = cgroup_get_from_fd(attr->query.target_fd);
+	if (IS_ERR(cgrp))
+		return PTR_ERR(cgrp);
+	ret = cgroup_bpf_query(cgrp, attr, uattr);
+	cgroup_put(cgrp);
+	return ret;
+}
+
 /**
  * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
  * @sk: The socket sending or receiving traffic
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0fa20624707f..52fa44856623 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1489,65 +1489,14 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 	return err;
 }
 
-#ifdef CONFIG_CGROUP_BPF
-
-static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
-					     enum bpf_attach_type attach_type)
-{
-	switch (prog->type) {
-	case BPF_PROG_TYPE_CGROUP_SOCK:
-	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
-		return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
-	default:
-		return 0;
-	}
-}
-
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 
-static int sockmap_get_from_fd(const union bpf_attr *attr,
-			       int type, bool attach)
-{
-	struct bpf_prog *prog = NULL;
-	int ufd = attr->target_fd;
-	struct bpf_map *map;
-	struct fd f;
-	int err;
-
-	f = fdget(ufd);
-	map = __bpf_map_get(f);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
-
-	if (attach) {
-		prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
-		if (IS_ERR(prog)) {
-			fdput(f);
-			return PTR_ERR(prog);
-		}
-	}
-
-	err = sock_map_prog(map, prog, attr->attach_type);
-	if (err) {
-		fdput(f);
-		if (prog)
-			bpf_prog_put(prog);
-		return err;
-	}
-
-	fdput(f);
-	return 0;
-}
-
 #define BPF_F_ATTACH_MASK \
 	(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
 
 static int bpf_prog_attach(const union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
-	struct bpf_prog *prog;
-	struct cgroup *cgrp;
-	int ret;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -1593,28 +1542,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		return -EINVAL;
 	}
 
-	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
-	if (IS_ERR(prog))
-		return PTR_ERR(prog);
-
-	if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
-		bpf_prog_put(prog);
-		return -EINVAL;
-	}
-
-	cgrp = cgroup_get_from_fd(attr->target_fd);
-	if (IS_ERR(cgrp)) {
-		bpf_prog_put(prog);
-		return PTR_ERR(cgrp);
-	}
-
-	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
-				attr->attach_flags);
-	if (ret)
-		bpf_prog_put(prog);
-	cgroup_put(cgrp);
-
-	return ret;
+	return cgroup_bpf_prog_attach(attr, ptype);
 }
 
 #define BPF_PROG_DETACH_LAST_FIELD attach_type
@@ -1622,9 +1550,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 static int bpf_prog_detach(const union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
-	struct bpf_prog *prog;
-	struct cgroup *cgrp;
-	int ret;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -1667,19 +1592,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		return -EINVAL;
 	}
 
-	cgrp = cgroup_get_from_fd(attr->target_fd);
-	if (IS_ERR(cgrp))
-		return PTR_ERR(cgrp);
-
-	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
-	if (IS_ERR(prog))
-		prog = NULL;
-
-	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
-	if (prog)
-		bpf_prog_put(prog);
-	cgroup_put(cgrp);
-	return ret;
+	return cgroup_bpf_prog_detach(attr, ptype);
 }
 
 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
@@ -1687,9 +1600,6 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 static int bpf_prog_query(const union bpf_attr *attr,
 			  union bpf_attr __user *uattr)
 {
-	struct cgroup *cgrp;
-	int ret;
-
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 	if (CHECK_ATTR(BPF_PROG_QUERY))
@@ -1717,14 +1627,9 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	default:
 		return -EINVAL;
 	}
-	cgrp = cgroup_get_from_fd(attr->query.target_fd);
-	if (IS_ERR(cgrp))
-		return PTR_ERR(cgrp);
-	ret = cgroup_bpf_query(cgrp, attr, uattr);
-	cgroup_put(cgrp);
-	return ret;
+
+	return cgroup_bpf_prog_query(attr, uattr);
 }
-#endif /* CONFIG_CGROUP_BPF */
 
 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
 
@@ -2371,7 +2276,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_OBJ_GET:
 		err = bpf_obj_get(&attr);
 		break;
-#ifdef CONFIG_CGROUP_BPF
 	case BPF_PROG_ATTACH:
 		err = bpf_prog_attach(&attr);
 		break;
@@ -2381,7 +2285,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_PROG_QUERY:
 		err = bpf_prog_query(&attr, uattr);
 		break;
-#endif
 	case BPF_PROG_TEST_RUN:
 		err = bpf_prog_test_run(&attr, uattr);
 		break;
-- 
2.17.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox