Netdev List
 help / color / mirror / Atom feed
* [bpf PATCH v4 2/4] bpf: sockmap, fix smap_list_map_remove when psock is in many maps
From: John Fastabend @ 2018-06-25 15:34 UTC (permalink / raw)
  To: john.fastabend, ast, daniel, kafai; +Cc: netdev
In-Reply-To: <20180625152948.3057.72785.stgit@john-Precision-Tower-5810>

If a hashmap is free'd with open socks it removes the reference to
the hash entry from the psock. If that is the last reference to the
psock then it will also be free'd by the reference counting logic.
However the current logic that removes the hash reference from the
list of references is broken. In smap_list_remove() we first check
if the sockmap entry matches and then check if the hashmap entry
matches. But, the sockmap entry sill always match because its NULL in
this case which causes the first entry to be removed from the list.
If this is always the "right" entry (because the user adds/removes
entries in order) then everything is OK but otherwise a subsequent
bpf_tcp_close() may reference a free'd object.

To fix this create two list handlers one for sockmap and one for
sockhash.

Reported-by: syzbot+0ce137753c78f7b6acc1@syzkaller.appspotmail.com
Fixes: 81110384441a ("bpf: sockmap, add hash map support")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 kernel/bpf/sockmap.c |   34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index d7fd17a..5adff4b 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1602,17 +1602,27 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
 	return ERR_PTR(err);
 }
 
-static void smap_list_remove(struct smap_psock *psock,
-			     struct sock **entry,
-			     struct htab_elem *hash_link)
+static void smap_list_map_remove(struct smap_psock *psock,
+				 struct sock **entry)
 {
 	struct smap_psock_map_entry *e, *tmp;
 
 	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
-		if (e->entry == entry || e->hash_link == hash_link) {
+		if (e->entry == entry)
+			list_del(&e->list);
+	}
+}
+
+static void smap_list_hash_remove(struct smap_psock *psock,
+				  struct htab_elem *hash_link)
+{
+	struct smap_psock_map_entry *e, *tmp;
+
+	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+		struct htab_elem *c = e->hash_link;
+
+		if (c == hash_link)
 			list_del(&e->list);
-			break;
-		}
 	}
 }
 
@@ -1647,7 +1657,7 @@ static void sock_map_free(struct bpf_map *map)
 		 * to be null and queued for garbage collection.
 		 */
 		if (likely(psock)) {
-			smap_list_remove(psock, &stab->sock_map[i], NULL);
+			smap_list_map_remove(psock, &stab->sock_map[i]);
 			smap_release_sock(psock, sock);
 		}
 		write_unlock_bh(&sock->sk_callback_lock);
@@ -1706,7 +1716,7 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
 
 	if (psock->bpf_parse)
 		smap_stop_sock(psock, sock);
-	smap_list_remove(psock, &stab->sock_map[k], NULL);
+	smap_list_map_remove(psock, &stab->sock_map[k]);
 	smap_release_sock(psock, sock);
 out:
 	write_unlock_bh(&sock->sk_callback_lock);
@@ -1908,7 +1918,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		struct smap_psock *opsock = smap_psock_sk(osock);
 
 		write_lock_bh(&osock->sk_callback_lock);
-		smap_list_remove(opsock, &stab->sock_map[i], NULL);
+		smap_list_map_remove(opsock, &stab->sock_map[i]);
 		smap_release_sock(opsock, osock);
 		write_unlock_bh(&osock->sk_callback_lock);
 	}
@@ -2124,7 +2134,7 @@ static void sock_hash_free(struct bpf_map *map)
 			 * (psock) to be null and queued for garbage collection.
 			 */
 			if (likely(psock)) {
-				smap_list_remove(psock, NULL, l);
+				smap_list_hash_remove(psock, l);
 				smap_release_sock(psock, sock);
 			}
 			write_unlock_bh(&sock->sk_callback_lock);
@@ -2304,7 +2314,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		psock = smap_psock_sk(l_old->sk);
 
 		hlist_del_rcu(&l_old->hash_node);
-		smap_list_remove(psock, NULL, l_old);
+		smap_list_hash_remove(psock, l_old);
 		smap_release_sock(psock, l_old->sk);
 		free_htab_elem(htab, l_old);
 	}
@@ -2372,7 +2382,7 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
 		 * to be null and queued for garbage collection.
 		 */
 		if (likely(psock)) {
-			smap_list_remove(psock, NULL, l);
+			smap_list_hash_remove(psock, l);
 			smap_release_sock(psock, sock);
 		}
 		write_unlock_bh(&sock->sk_callback_lock);

^ permalink raw reply related

* [bpf PATCH v4 3/4] bpf: sockhash fix omitted bucket lock in sock_close
From: John Fastabend @ 2018-06-25 15:34 UTC (permalink / raw)
  To: john.fastabend, ast, daniel, kafai; +Cc: netdev
In-Reply-To: <20180625152948.3057.72785.stgit@john-Precision-Tower-5810>

First in tcp_close, reduce scope of sk_callback_lock() the lock is
only needed for protecting maps list the ingress and cork
lists are protected by sock lock. Having the lock in wider scope is
harmless but may confuse the reader who may infer it is in fact
needed.

Next, in sock_hash_delete_elem() the pattern is as follows,

  sock_hash_delete_elem()
     [...]
     spin_lock(bucket_lock)
     l = lookup_elem_raw()
     if (l)
        hlist_del_rcu()
        write_lock(sk_callback_lock)
         .... destroy psock ...
        write_unlock(sk_callback_lock)
     spin_unlock(bucket_lock)

The ordering is necessary because we only know the {p}sock after
dereferencing the hash table which we can't do unless we have the
bucket lock held. Once we have the bucket lock and the psock element
it is deleted from the hashmap to ensure any other path doing a lookup
will fail. Finally, the refcnt is decremented and if zero the psock
is destroyed.

In parallel with the above (or free'ing the map) a tcp close event
may trigger tcp_close(). Which at the moment omits the bucket lock
altogether (oops!) where the flow looks like this,

  bpf_tcp_close()
     [...]
     write_lock(sk_callback_lock)
     for each psock->maps // list of maps this sock is part of
         hlist_del_rcu(ref_hash_node);
         .... destroy psock ...
     write_unlock(sk_callback_lock)

Obviously, and demonstrated by syzbot, this is broken because
we can have multiple threads deleting entries via hlist_del_rcu().

To fix this we might be tempted to wrap the hlist operation in a
bucket lock but that would create a lock inversion problem. In
summary to follow locking rules the psocks maps list needs the
sk_callback_lock but we need the bucket lock to do the hlist_del_rcu.
To resolve the lock inversion problem pop the head of the maps list
repeatedly and remove the reference until no more are left. If a
delete happens in parallel from the BPF API that is OK as well because
it will do a similar action, lookup the lock in the map/hash, delete
it from the map/hash, and dec the refcnt. We check for this case
before doing a destroy on the psock to ensure we don't have two
threads tearing down a psock. The new logic is as follows,

  bpf_tcp_close()
  e = psock_map_pop(psock->maps) // done with sk_callback_lock
  bucket_lock() // lock hash list bucket
  l = lookup_elem_raw(head, hash, key, key_size);
  if (l) {
     //only get here if elmnt was not already removed
     hlist_del_rcu()
     ... destroy psock...
  }
  bucket_unlock()

And finally for all the above to work add missing sk_callback_lock
around smap_list_remove in sock_hash_ctx_update_elem(). Otherwise
delete and update may corrupt maps list. Then add RCU annotations and
use rcu_dereference/rcu_assign_pointer to manage values relying on
RCU so that the object is not free'd from sock_hash_free() while it
is being referenced in bpf_tcp_close().

(As an aside the sk_callback_lock serves two purposes. The
 first, is to update the sock callbacks sk_data_ready, sk_write_space,
 etc. The second is to protect the psock 'maps' list. The 'maps' list
 is used to (as shown above) to delete all map/hash references to a
 sock when the sock is closed)

Reported-by: syzbot+0ce137753c78f7b6acc1@syzkaller.appspotmail.com
Fixes: 81110384441a ("bpf: sockmap, add hash map support")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 kernel/bpf/sockmap.c |  122 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 86 insertions(+), 36 deletions(-)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 5adff4b..ab670d5 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -72,6 +72,7 @@ struct bpf_htab {
 	u32 n_buckets;
 	u32 elem_size;
 	struct bpf_sock_progs progs;
+	struct rcu_head rcu;
 };
 
 struct htab_elem {
@@ -89,8 +90,8 @@ enum smap_psock_state {
 struct smap_psock_map_entry {
 	struct list_head list;
 	struct sock **entry;
-	struct htab_elem *hash_link;
-	struct bpf_htab *htab;
+	struct htab_elem __rcu *hash_link;
+	struct bpf_htab __rcu *htab;
 };
 
 struct smap_psock {
@@ -258,16 +259,54 @@ static void bpf_tcp_release(struct sock *sk)
 	rcu_read_unlock();
 }
 
+static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
+					 u32 hash, void *key, u32 key_size)
+{
+	struct htab_elem *l;
+
+	hlist_for_each_entry_rcu(l, head, hash_node) {
+		if (l->hash == hash && !memcmp(&l->key, key, key_size))
+			return l;
+	}
+
+	return NULL;
+}
+
+static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
+{
+	return &htab->buckets[hash & (htab->n_buckets - 1)];
+}
+
+static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+{
+	return &__select_bucket(htab, hash)->head;
+}
+
 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 {
 	atomic_dec(&htab->count);
 	kfree_rcu(l, rcu);
 }
 
+static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
+						  struct smap_psock *psock)
+{
+	struct smap_psock_map_entry *e;
+
+	write_lock_bh(&sk->sk_callback_lock);
+	e = list_first_entry_or_null(&psock->maps,
+				     struct smap_psock_map_entry,
+				     list);
+	if (e)
+		list_del(&e->list);
+	write_unlock_bh(&sk->sk_callback_lock);
+	return e;
+}
+
 static void bpf_tcp_close(struct sock *sk, long timeout)
 {
 	void (*close_fun)(struct sock *sk, long timeout);
-	struct smap_psock_map_entry *e, *tmp;
+	struct smap_psock_map_entry *e;
 	struct sk_msg_buff *md, *mtmp;
 	struct smap_psock *psock;
 	struct sock *osk;
@@ -286,7 +325,6 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 	 */
 	close_fun = psock->save_close;
 
-	write_lock_bh(&sk->sk_callback_lock);
 	if (psock->cork) {
 		free_start_sg(psock->sock, psock->cork);
 		kfree(psock->cork);
@@ -299,20 +337,38 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 		kfree(md);
 	}
 
-	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+	e = psock_map_pop(sk, psock);
+	while (e) {
 		if (e->entry) {
 			osk = cmpxchg(e->entry, sk, NULL);
 			if (osk == sk) {
-				list_del(&e->list);
 				smap_release_sock(psock, sk);
 			}
 		} else {
-			hlist_del_rcu(&e->hash_link->hash_node);
-			smap_release_sock(psock, e->hash_link->sk);
-			free_htab_elem(e->htab, e->hash_link);
+			struct htab_elem *link = rcu_dereference(e->hash_link);
+			struct bpf_htab *htab = rcu_dereference(e->htab);
+			struct hlist_head *head;
+			struct htab_elem *l;
+			struct bucket *b;
+
+			b = __select_bucket(htab, link->hash);
+			head = &b->head;
+			raw_spin_lock_bh(&b->lock);
+			l = lookup_elem_raw(head,
+					    link->hash, link->key,
+					    htab->map.key_size);
+			/* If another thread deleted this object skip deletion.
+			 * The refcnt on psock may or may not be zero.
+			 */
+			if (l) {
+				hlist_del_rcu(&link->hash_node);
+				smap_release_sock(psock, link->sk);
+				free_htab_elem(htab, link);
+			}
+			raw_spin_unlock_bh(&b->lock);
 		}
+		e = psock_map_pop(sk, psock);
 	}
-	write_unlock_bh(&sk->sk_callback_lock);
 	rcu_read_unlock();
 	close_fun(sk, timeout);
 }
@@ -1619,7 +1675,7 @@ static void smap_list_hash_remove(struct smap_psock *psock,
 	struct smap_psock_map_entry *e, *tmp;
 
 	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
-		struct htab_elem *c = e->hash_link;
+		struct htab_elem *c = rcu_dereference(e->hash_link);
 
 		if (c == hash_link)
 			list_del(&e->list);
@@ -2091,14 +2147,13 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
 	return ERR_PTR(err);
 }
 
-static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
+static void __bpf_htab_free(struct rcu_head *rcu)
 {
-	return &htab->buckets[hash & (htab->n_buckets - 1)];
-}
+	struct bpf_htab *htab;
 
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
-{
-	return &__select_bucket(htab, hash)->head;
+	htab = container_of(rcu, struct bpf_htab, rcu);
+	bpf_map_area_free(htab->buckets);
+	kfree(htab);
 }
 
 static void sock_hash_free(struct bpf_map *map)
@@ -2117,10 +2172,13 @@ static void sock_hash_free(struct bpf_map *map)
 	 */
 	rcu_read_lock();
 	for (i = 0; i < htab->n_buckets; i++) {
-		struct hlist_head *head = select_bucket(htab, i);
+		struct bucket *b = __select_bucket(htab, i);
+		struct hlist_head *head;
 		struct hlist_node *n;
 		struct htab_elem *l;
 
+		raw_spin_lock_bh(&b->lock);
+		head = &b->head;
 		hlist_for_each_entry_safe(l, n, head, hash_node) {
 			struct sock *sock = l->sk;
 			struct smap_psock *psock;
@@ -2138,12 +2196,12 @@ static void sock_hash_free(struct bpf_map *map)
 				smap_release_sock(psock, sock);
 			}
 			write_unlock_bh(&sock->sk_callback_lock);
-			kfree(l);
+			free_htab_elem(htab, l);
 		}
+		raw_spin_unlock_bh(&b->lock);
 	}
 	rcu_read_unlock();
-	bpf_map_area_free(htab->buckets);
-	kfree(htab);
+	call_rcu(&htab->rcu, __bpf_htab_free);
 }
 
 static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
@@ -2170,19 +2228,6 @@ static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
 	return l_new;
 }
 
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
-					 u32 hash, void *key, u32 key_size)
-{
-	struct htab_elem *l;
-
-	hlist_for_each_entry_rcu(l, head, hash_node) {
-		if (l->hash == hash && !memcmp(&l->key, key, key_size))
-			return l;
-	}
-
-	return NULL;
-}
-
 static inline u32 htab_map_hash(const void *key, u32 key_len)
 {
 	return jhash(key, key_len, 0);
@@ -2302,9 +2347,12 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		goto bucket_err;
 	}
 
-	e->hash_link = l_new;
-	e->htab = container_of(map, struct bpf_htab, map);
+	rcu_assign_pointer(e->hash_link, l_new);
+	rcu_assign_pointer(e->htab,
+			   container_of(map, struct bpf_htab, map));
+	write_lock_bh(&sock->sk_callback_lock);
 	list_add_tail(&e->list, &psock->maps);
+	write_unlock_bh(&sock->sk_callback_lock);
 
 	/* add new element to the head of the list, so that
 	 * concurrent search will find it before old elem
@@ -2314,8 +2362,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		psock = smap_psock_sk(l_old->sk);
 
 		hlist_del_rcu(&l_old->hash_node);
+		write_lock_bh(&l_old->sk->sk_callback_lock);
 		smap_list_hash_remove(psock, l_old);
 		smap_release_sock(psock, l_old->sk);
+		write_unlock_bh(&l_old->sk->sk_callback_lock);
 		free_htab_elem(htab, l_old);
 	}
 	raw_spin_unlock_bh(&b->lock);

^ permalink raw reply related

* [bpf PATCH v4 4/4] bpf: sockhash, add release routine
From: John Fastabend @ 2018-06-25 15:34 UTC (permalink / raw)
  To: john.fastabend, ast, daniel, kafai; +Cc: netdev
In-Reply-To: <20180625152948.3057.72785.stgit@john-Precision-Tower-5810>

Add map_release_uref pointer to hashmap ops. This was dropped when
original sockhash code was ported into bpf-next before initial
commit.

Fixes: 81110384441a ("bpf: sockmap, add hash map support")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 kernel/bpf/sockmap.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index ab670d5..e6328ab 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -2481,6 +2481,7 @@ struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
 	.map_get_next_key = sock_hash_get_next_key,
 	.map_update_elem = sock_hash_update_elem,
 	.map_delete_elem = sock_hash_delete_elem,
+	.map_release_uref = sock_map_release,
 };
 
 BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,

^ permalink raw reply related

* [PATCH 00/14] ARM: davinci: step towards removing at24_platform_data
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

Since I took over maintainership of the at24 driver I've been working
towards removing at24_platform_data in favor for device properties.

DaVinci is the only platform that's still using it - all other users
have already been converted.

One of the obstacles in case of DaVinci is removing the setup() callback
from the pdata struct, the only user of which are some davinci boards.

Most boards use the EEPROM to store the MAC address. This series adds
support for cell lookups to the nvmem framework, registers relevant
cells for all users, converts the davinci_emac driver to using them
and replaces at24_platform_data with device properties.

The only board that's still using this callback is now mityomapl138.
Unfortunately it stores more info in EEPROM than just the MAC address
and will require some more work. Unfortunately I don't have access
to this board so I can't test any actual solutions on a live hardware.

Tested on a dm365-evm board.

Bartosz Golaszewski (14):
  nvmem: add support for cell lookups
  ARM: davinci: dm365-evm: use nvmem lookup for mac address
  ARM: davinci: dm644-evm: use nvmem lookup for mac address
  ARM: davinci: dm646x-evm: use nvmem lookup for mac address
  ARM: davinci: da830-evm: use nvmem lookup for mac address
  ARM: davinci: mityomapl138: add nvmem cells lookup entries
  net: davinci_emac: use nvmem to retrieve the mac address
  ARM: davinci: mityomapl138: don't read the MAC address from machine
    code
  ARM: davinci: dm365-evm: use device properties for at24 eeprom
  ARM: davinci: da830-evm: use device properties for at24 eeprom
  ARM: davinci: dm644x-evm: use device properties for at24 eeprom
  ARM: davinci: dm646x-evm: use device properties for at24 eeprom
  ARM: davinci: sffsdr: fix the at24 eeprom device name
  ARM: davinci: sffsdr: use device properties for at24 eeprom

 arch/arm/mach-davinci/board-da830-evm.c    | 25 +++++++---
 arch/arm/mach-davinci/board-dm365-evm.c    | 25 +++++++---
 arch/arm/mach-davinci/board-dm644x-evm.c   | 24 ++++++---
 arch/arm/mach-davinci/board-dm646x-evm.c   | 25 +++++++---
 arch/arm/mach-davinci/board-mityomapl138.c | 30 +++++++++---
 arch/arm/mach-davinci/board-sffsdr.c       | 13 +++--
 drivers/net/ethernet/ti/davinci_emac.c     | 29 +++++++----
 drivers/nvmem/core.c                       | 57 +++++++++++++++++++++-
 include/linux/nvmem-consumer.h             |  6 +++
 include/linux/nvmem-provider.h             |  6 +++
 10 files changed, 182 insertions(+), 58 deletions(-)

-- 
2.17.1

^ permalink raw reply

* [PATCH 01/14] nvmem: add support for cell lookups
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

We can currently only register nvmem cells from device tree or by
manually calling nvmem_add_cells(). The latter options however forces
users to make sure that the nvmem provider with which the cells are
associated is registered before the call.

This patch proposes a new solution inspired by other frameworks that
offer resource lookups (GPIO, PWM etc.). It adds a function that allows
machine code to register nvmem lookup which are later lazily used to
add corresponding nvmem cells.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/nvmem/core.c           | 57 +++++++++++++++++++++++++++++++++-
 include/linux/nvmem-consumer.h |  6 ++++
 include/linux/nvmem-provider.h |  6 ++++
 3 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index b5b0cdc21d01..a2e87b464319 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -62,6 +62,9 @@ static DEFINE_IDA(nvmem_ida);
 static LIST_HEAD(nvmem_cells);
 static DEFINE_MUTEX(nvmem_cells_mutex);
 
+static LIST_HEAD(nvmem_cell_lookups);
+static DEFINE_MUTEX(nvmem_lookup_mutex);
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key eeprom_lock_key;
 #endif
@@ -247,6 +250,23 @@ static const struct attribute_group *nvmem_ro_root_dev_groups[] = {
 	NULL,
 };
 
+/**
+ * nvmem_register_lookup() - register a number of nvmem cell lookup entries
+ *
+ * @lookup: array of nvmem cell lookup entries
+ * @nentries: number of lookup entries in the array
+ */
+void nvmem_register_lookup(struct nvmem_cell_lookup *lookup, size_t nentries)
+{
+	int i;
+
+	mutex_lock(&nvmem_lookup_mutex);
+	for (i = 0; i < nentries; i++)
+		list_add_tail(&lookup[i].list, &nvmem_cell_lookups);
+	mutex_unlock(&nvmem_lookup_mutex);
+}
+EXPORT_SYMBOL_GPL(nvmem_register_lookup);
+
 static void nvmem_release(struct device *dev)
 {
 	struct nvmem_device *nvmem = to_nvmem_device(dev);
@@ -916,6 +936,37 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np,
 EXPORT_SYMBOL_GPL(of_nvmem_cell_get);
 #endif
 
+static struct nvmem_cell *nvmem_cell_from_lookup(const char *cell_id)
+{
+	struct nvmem_cell *cell = ERR_PTR(-EPROBE_DEFER);
+	struct nvmem_cell_lookup *lookup;
+	struct nvmem_device *nvmem;
+	int rc;
+
+	mutex_lock(&nvmem_lookup_mutex);
+
+	list_for_each_entry(lookup, &nvmem_cell_lookups, list) {
+		if (strcmp(cell_id, lookup->info.name) == 0) {
+			nvmem = nvmem_find(lookup->nvmem_name);
+			if (!nvmem)
+				goto out;
+
+			rc = nvmem_add_cells(nvmem, &lookup->info, 1);
+			if (rc) {
+				cell = ERR_PTR(rc);
+				goto out;
+			}
+
+			cell = nvmem_cell_get_from_list(cell_id);
+			break;
+		}
+	}
+
+out:
+	mutex_unlock(&nvmem_lookup_mutex);
+	return cell;
+}
+
 /**
  * nvmem_cell_get() - Get nvmem cell of device form a given cell name
  *
@@ -936,7 +987,11 @@ struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *cell_id)
 			return cell;
 	}
 
-	return nvmem_cell_get_from_list(cell_id);
+	cell = nvmem_cell_get_from_list(cell_id);
+	if (!IS_ERR(cell) || PTR_ERR(cell) == -EPROBE_DEFER)
+		return cell;
+
+	return nvmem_cell_from_lookup(cell_id);
 }
 EXPORT_SYMBOL_GPL(nvmem_cell_get);
 
diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h
index 4e85447f7860..f4b5d3186e94 100644
--- a/include/linux/nvmem-consumer.h
+++ b/include/linux/nvmem-consumer.h
@@ -29,6 +29,12 @@ struct nvmem_cell_info {
 	unsigned int		nbits;
 };
 
+struct nvmem_cell_lookup {
+	struct nvmem_cell_info	info;
+	struct list_head	list;
+	const char		*nvmem_name;
+};
+
 #if IS_ENABLED(CONFIG_NVMEM)
 
 /* Cell based interface */
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index 24def6ad09bb..766c0a96c113 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -17,6 +17,7 @@
 
 struct nvmem_device;
 struct nvmem_cell_info;
+struct nvmem_cell_lookup;
 typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset,
 				void *val, size_t bytes);
 typedef int (*nvmem_reg_write_t)(void *priv, unsigned int offset,
@@ -72,6 +73,8 @@ struct nvmem_config {
 struct nvmem_device *nvmem_register(const struct nvmem_config *cfg);
 int nvmem_unregister(struct nvmem_device *nvmem);
 
+void nvmem_register_lookup(struct nvmem_cell_lookup *lookup, size_t nentries);
+
 struct nvmem_device *devm_nvmem_register(struct device *dev,
 					 const struct nvmem_config *cfg);
 
@@ -92,6 +95,9 @@ static inline int nvmem_unregister(struct nvmem_device *nvmem)
 	return -ENOSYS;
 }
 
+static inline void
+nvmem_register_lookup(struct nvmem_cell_lookup *lookup, size_t nentries) {}
+
 static inline struct nvmem_device *
 devm_nvmem_register(struct device *dev, const struct nvmem_config *c)
 {
-- 
2.17.1

^ permalink raw reply related

* [PATCH 04/14] ARM: davinci: dm646x-evm: use nvmem lookup for mac address
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

We now support nvmem lookups for machine code. Add a lookup for
mac-address.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 arch/arm/mach-davinci/board-dm646x-evm.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index 584064fdabf5..4c82d38033b6 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -37,6 +37,7 @@
 #include <linux/platform_data/i2c-davinci.h>
 #include <linux/platform_data/mtd-davinci.h>
 #include <linux/platform_data/mtd-davinci-aemif.h>
+#include <linux/nvmem-provider.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -310,6 +311,15 @@ static struct pcf857x_platform_data pcf_data = {
  *  - ... newer boards may have more
  */
 
+static struct nvmem_cell_lookup dm646x_evm_mac_address_cell = {
+	.info = {
+		.name = "mac-address",
+		.offset = 0x7f00,
+		.bytes = ETH_ALEN,
+	},
+	.nvmem_name = "1-00500",
+};
+
 static struct at24_platform_data eeprom_info = {
 	.byte_len       = (256*1024) / 8,
 	.page_size      = 64,
@@ -802,6 +812,8 @@ static __init void evm_init(void)
 		davinci_init_ide();
 
 	soc_info->emac_pdata->phy_id = DM646X_EVM_PHY_ID;
+
+	nvmem_register_lookup(&dm646x_evm_mac_address_cell, 1);
 }
 
 MACHINE_START(DAVINCI_DM6467_EVM, "DaVinci DM646x EVM")
-- 
2.17.1

^ permalink raw reply related

* [PATCH 06/14] ARM: davinci: mityomapl138: add nvmem cells lookup entries
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

We now support nvmem lookups for machine code. Add a lookup for
mac-address.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 arch/arm/mach-davinci/board-mityomapl138.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index 37b3e48a21d1..2ec31ff61dbd 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -21,6 +21,7 @@
 #include <linux/etherdevice.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/flash.h>
+#include <linux/nvmem-provider.h>
 
 #include <asm/io.h>
 #include <asm/mach-types.h>
@@ -160,6 +161,25 @@ static void read_factory_config(struct nvmem_device *nvmem, void *context)
 	mityomapl138_cpufreq_init(partnum);
 }
 
+static struct nvmem_cell_lookup mityomapl138_nvmem_cells[] = {
+	{
+		.info = {
+			.name = "factory-config",
+			.offset = 0x0,
+			.bytes = sizeof(struct factory_config),
+		},
+		.nvmem_name = "1-00500",
+	},
+	{
+		.info = {
+			.name = "mac-address",
+			.offset = 0x64,
+			.bytes = ETH_ALEN,
+		},
+		.nvmem_name = "1-00500",
+	}
+};
+
 static struct at24_platform_data mityomapl138_fd_chip = {
 	.byte_len	= 256,
 	.page_size	= 8,
@@ -534,6 +554,8 @@ static void __init mityomapl138_init(void)
 	if (ret)
 		pr_warn("spi 1 registration failed: %d\n", ret);
 
+	nvmem_register_lookup(mityomapl138_nvmem_cells,
+			      ARRAY_SIZE(mityomapl138_nvmem_cells));
 	mityomapl138_config_emac();
 
 	ret = da8xx_register_rtc();
-- 
2.17.1

^ permalink raw reply related

* [PATCH 07/14] net: davinci_emac: use nvmem to retrieve the mac address
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

All users which store the MAC address in EEPROM now register relevant
nvmem cells. Switch to retrieving the MAC address over the nvmem
framework.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/net/ethernet/ti/davinci_emac.c | 29 +++++++++++++++++---------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index a1a6445b5a7e..22c2322e46be 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -67,7 +67,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_net.h>
 #include <linux/mfd/syscon.h>
-
+#include <linux/nvmem-consumer.h>
 #include <asm/irq.h>
 #include <asm/page.h>
 
@@ -1696,7 +1696,6 @@ davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv)
 	const struct of_device_id *match;
 	const struct emac_platform_data *auxdata;
 	struct emac_platform_data *pdata = NULL;
-	const u8 *mac_addr;
 
 	if (!IS_ENABLED(CONFIG_OF) || !pdev->dev.of_node)
 		return dev_get_platdata(&pdev->dev);
@@ -1708,12 +1707,6 @@ davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv)
 	np = pdev->dev.of_node;
 	pdata->version = EMAC_VERSION_2;
 
-	if (!is_valid_ether_addr(pdata->mac_addr)) {
-		mac_addr = of_get_mac_address(np);
-		if (mac_addr)
-			ether_addr_copy(pdata->mac_addr, mac_addr);
-	}
-
 	of_property_read_u32(np, "ti,davinci-ctrl-reg-offset",
 			     &pdata->ctrl_reg_offset);
 
@@ -1783,7 +1776,9 @@ static int davinci_emac_probe(struct platform_device *pdev)
 	struct cpdma_params dma_params;
 	struct clk *emac_clk;
 	unsigned long emac_bus_frequency;
-
+	struct nvmem_cell *cell;
+	void *mac_addr;
+	size_t mac_addr_len;
 
 	/* obtain emac clock from kernel */
 	emac_clk = devm_clk_get(&pdev->dev, NULL);
@@ -1815,8 +1810,22 @@ static int davinci_emac_probe(struct platform_device *pdev)
 		goto err_free_netdev;
 	}
 
+	cell = nvmem_cell_get(&pdev->dev, "mac-address");
+	if (!IS_ERR(cell)) {
+		mac_addr = nvmem_cell_read(cell, &mac_addr_len);
+		if (!IS_ERR(mac_addr)) {
+			if (is_valid_ether_addr(mac_addr)) {
+				dev_info(&pdev->dev,
+					 "Read MAC addr from EEPROM: %pM\n",
+					 mac_addr);
+				memcpy(priv->mac_addr, mac_addr, ETH_ALEN);
+			}
+			kfree(mac_addr);
+		}
+		nvmem_cell_put(cell);
+	}
+
 	/* MAC addr and PHY mask , RMII enable info from platform_data */
-	memcpy(priv->mac_addr, pdata->mac_addr, ETH_ALEN);
 	priv->phy_id = pdata->phy_id;
 	priv->rmii_en = pdata->rmii_en;
 	priv->version = pdata->version;
-- 
2.17.1

^ permalink raw reply related

* [PATCH 10/14] ARM: davinci: da830-evm: use device properties for at24 eeprom
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

We want to work towards phasing out the at24_platform_data structure.
There are few users and its contents can be represented using generic
device properties. Using device properties only will allow us to
significantly simplify the at24 configuration code.

Remove the at24_platform_data structure and replace it with an array
of property entries. Drop the byte_len/size property, as the model name
already implies the EEPROM's size.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 arch/arm/mach-davinci/board-da830-evm.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index 3be3e93f2f18..779d09581169 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -18,7 +18,7 @@
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
 #include <linux/platform_data/pcf857x.h>
-#include <linux/platform_data/at24.h>
+#include <linux/property.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/spi/spi.h>
@@ -419,12 +419,9 @@ static struct nvmem_cell_lookup da830_evm_mac_address_cell = {
 	.nvmem_name = "1-00500",
 };
 
-static struct at24_platform_data da830_evm_i2c_eeprom_info = {
-	.byte_len	= SZ_256K / 8,
-	.page_size	= 64,
-	.flags		= AT24_FLAG_ADDR16,
-	.setup		= davinci_get_mac_addr,
-	.context	= (void *)0x7f00,
+static const struct property_entry da830_evm_i2c_eeprom_properties[] = {
+	PROPERTY_ENTRY_U32("pagesize", 64),
+	{ }
 };
 
 static int __init da830_evm_ui_expander_setup(struct i2c_client *client,
@@ -458,7 +455,7 @@ static struct pcf857x_platform_data __initdata da830_evm_ui_expander_info = {
 static struct i2c_board_info __initdata da830_evm_i2c_devices[] = {
 	{
 		I2C_BOARD_INFO("24c256", 0x50),
-		.platform_data	= &da830_evm_i2c_eeprom_info,
+		.properties = da830_evm_i2c_eeprom_properties,
 	},
 	{
 		I2C_BOARD_INFO("tlv320aic3x", 0x18),
-- 
2.17.1

^ permalink raw reply related

* [PATCH 11/14] ARM: davinci: dm644x-evm: use device properties for at24 eeprom
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

We want to work towards phasing out the at24_platform_data structure.
There are few users and its contents can be represented using generic
device properties. Using device properties only will allow us to
significantly simplify the at24 configuration code.

Remove the at24_platform_data structure and replace it with an array
of property entries. Drop the byte_len/size property, as the model name
already implies the EEPROM's size.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 arch/arm/mach-davinci/board-dm644x-evm.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index adbe8630ef19..5b26a8c5bbd8 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -16,8 +16,8 @@
 #include <linux/gpio/machine.h>
 #include <linux/i2c.h>
 #include <linux/platform_data/pcf857x.h>
-#include <linux/platform_data/at24.h>
 #include <linux/platform_data/gpio-davinci.h>
+#include <linux/property.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
@@ -486,12 +486,8 @@ static struct nvmem_cell_lookup dm6446evm_mac_address_cell = {
 	.nvmem_name = "1-00500",
 };
 
-static struct at24_platform_data eeprom_info = {
-	.byte_len	= (256*1024) / 8,
-	.page_size	= 64,
-	.flags		= AT24_FLAG_ADDR16,
-	.setup          = davinci_get_mac_addr,
-	.context	= (void *)0x7f00,
+static const struct property_entry eeprom_properties[] = {
+	PROPERTY_ENTRY_U32("pagesize", 64),
 };
 
 /*
@@ -601,7 +597,7 @@ static struct i2c_board_info __initdata i2c_info[] =  {
 	},
 	{
 		I2C_BOARD_INFO("24c256", 0x50),
-		.platform_data	= &eeprom_info,
+		.properties = eeprom_properties,
 	},
 	{
 		I2C_BOARD_INFO("tlv320aic33", 0x1b),
-- 
2.17.1

^ permalink raw reply related

* [PATCH 12/14] ARM: davinci: dm646x-evm: use device properties for at24 eeprom
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

We want to work towards phasing out the at24_platform_data structure.
There are few users and its contents can be represented using generic
device properties. Using device properties only will allow us to
significantly simplify the at24 configuration code.

Remove the at24_platform_data structure and replace it with an array
of property entries. Drop the byte_len/size property, as the model name
already implies the EEPROM's size.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 arch/arm/mach-davinci/board-dm646x-evm.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index 4c82d38033b6..8c585e7be180 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -22,7 +22,7 @@
 #include <linux/gpio.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
-#include <linux/platform_data/at24.h>
+#include <linux/property.h>
 #include <linux/platform_data/pcf857x.h>
 
 #include <media/i2c/tvp514x.h>
@@ -320,12 +320,9 @@ static struct nvmem_cell_lookup dm646x_evm_mac_address_cell = {
 	.nvmem_name = "1-00500",
 };
 
-static struct at24_platform_data eeprom_info = {
-	.byte_len       = (256*1024) / 8,
-	.page_size      = 64,
-	.flags          = AT24_FLAG_ADDR16,
-	.setup          = davinci_get_mac_addr,
-	.context	= (void *)0x7f00,
+static const struct property_entry eeprom_properties[] = {
+	PROPERTY_ENTRY_U32("pagesize", 64),
+	{ }
 };
 #endif
 
@@ -396,7 +393,7 @@ static void evm_init_cpld(void)
 static struct i2c_board_info __initdata i2c_info[] =  {
 	{
 		I2C_BOARD_INFO("24c256", 0x50),
-		.platform_data  = &eeprom_info,
+		.properties  = eeprom_properties,
 	},
 	{
 		I2C_BOARD_INFO("pcf8574a", 0x38),
-- 
2.17.1

^ permalink raw reply related

* [PATCH 13/14] ARM: davinci: sffsdr: fix the at24 eeprom device name
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

The currently used 24lc64 i2c device name doesn't match against any
of the devices supported by the at24 driver. Change it to the closest
compatible chip.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 arch/arm/mach-davinci/board-sffsdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-davinci/board-sffsdr.c b/arch/arm/mach-davinci/board-sffsdr.c
index e7c1728b0833..f6a4d094cbc3 100644
--- a/arch/arm/mach-davinci/board-sffsdr.c
+++ b/arch/arm/mach-davinci/board-sffsdr.c
@@ -100,7 +100,7 @@ static struct at24_platform_data eeprom_info = {
 
 static struct i2c_board_info __initdata i2c_info[] =  {
 	{
-		I2C_BOARD_INFO("24lc64", 0x50),
+		I2C_BOARD_INFO("24c64", 0x50),
 		.platform_data	= &eeprom_info,
 	},
 	/* Other I2C devices:
-- 
2.17.1

^ permalink raw reply related

* [PATCH 14/14] ARM: davinci: sffsdr: use device properties for at24 eeprom
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

We want to work towards phasing out the at24_platform_data structure.
There are few users and its contents can be represented using generic
device properties. Using device properties only will allow us to
significantly simplify the at24 configuration code.

Remove the at24_platform_data structure and replace it with an array
of property entries. Drop the byte_len/size property, as the model name
already implies the EEPROM's size.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 arch/arm/mach-davinci/board-sffsdr.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-davinci/board-sffsdr.c b/arch/arm/mach-davinci/board-sffsdr.c
index f6a4d094cbc3..680e5d7628a8 100644
--- a/arch/arm/mach-davinci/board-sffsdr.c
+++ b/arch/arm/mach-davinci/board-sffsdr.c
@@ -26,7 +26,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
-#include <linux/platform_data/at24.h>
+#include <linux/property.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
@@ -92,16 +92,15 @@ static struct platform_device davinci_sffsdr_nandflash_device = {
 	.resource	= davinci_sffsdr_nandflash_resource,
 };
 
-static struct at24_platform_data eeprom_info = {
-	.byte_len	= (64*1024) / 8,
-	.page_size	= 32,
-	.flags		= AT24_FLAG_ADDR16,
+static const struct property_entry eeprom_properties[] = {
+	PROPERTY_ENTRY_U32("pagesize", 32),
+	{ },
 };
 
 static struct i2c_board_info __initdata i2c_info[] =  {
 	{
 		I2C_BOARD_INFO("24c64", 0x50),
-		.platform_data	= &eeprom_info,
+		.properties = eeprom_properties,
 	},
 	/* Other I2C devices:
 	 * MSP430,  addr 0x23 (not used)
-- 
2.17.1

^ permalink raw reply related

* [PATCH 02/14] ARM: davinci: dm365-evm: use nvmem lookup for mac address
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

We now support nvmem lookups for machine code. Add a lookup for
mac-address.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 arch/arm/mach-davinci/board-dm365-evm.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c
index 435f7ec7d9af..df640d977bfa 100644
--- a/arch/arm/mach-davinci/board-dm365-evm.c
+++ b/arch/arm/mach-davinci/board-dm365-evm.c
@@ -28,6 +28,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/eeprom.h>
 #include <linux/v4l2-dv-timings.h>
+#include <linux/nvmem-provider.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -169,6 +170,15 @@ static struct platform_device davinci_nand_device = {
 	},
 };
 
+static struct nvmem_cell_lookup dm365evm_mac_address_cell = {
+	.info = {
+		.name = "mac-address",
+		.offset = 0x7f00,
+		.bytes = ETH_ALEN,
+	},
+	.nvmem_name = "1-00500",
+};
+
 static struct at24_platform_data eeprom_info = {
 	.byte_len       = (256*1024) / 8,
 	.page_size      = 64,
@@ -769,6 +779,8 @@ static __init void dm365_evm_init(void)
 
 	dm365_init_spi0(BIT(0), dm365_evm_spi_info,
 			ARRAY_SIZE(dm365_evm_spi_info));
+
+	nvmem_register_lookup(&dm365evm_mac_address_cell, 1);
 }
 
 MACHINE_START(DAVINCI_DM365_EVM, "DaVinci DM365 EVM")
-- 
2.17.1

^ permalink raw reply related

* [PATCH 03/14] ARM: davinci: dm644-evm: use nvmem lookup for mac address
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

We now support nvmem lookups for machine code. Add a lookup for
mac-address.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 arch/arm/mach-davinci/board-dm644x-evm.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index 48436f74fd71..adbe8630ef19 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -28,6 +28,7 @@
 #include <linux/v4l2-dv-timings.h>
 #include <linux/export.h>
 #include <linux/leds.h>
+#include <linux/nvmem-provider.h>
 
 #include <media/i2c/tvp514x.h>
 
@@ -476,6 +477,15 @@ static struct pcf857x_platform_data pcf_data_u35 = {
  *  - ... newer boards may have more
  */
 
+static struct nvmem_cell_lookup dm6446evm_mac_address_cell = {
+	.info = {
+		.name = "mac-address",
+		.offset = 0x7f00,
+		.bytes = ETH_ALEN,
+	},
+	.nvmem_name = "1-00500",
+};
+
 static struct at24_platform_data eeprom_info = {
 	.byte_len	= (256*1024) / 8,
 	.page_size	= 64,
@@ -828,6 +838,8 @@ static __init void davinci_evm_init(void)
 		phy_register_fixup_for_uid(LXT971_PHY_ID, LXT971_PHY_MASK,
 						davinci_phy_fixup);
 	}
+
+	nvmem_register_lookup(&dm6446evm_mac_address_cell, 1);
 }
 
 MACHINE_START(DAVINCI_EVM, "DaVinci DM644x EVM")
-- 
2.17.1

^ permalink raw reply related

* [PATCH 05/14] ARM: davinci: da830-evm: use nvmem lookup for mac address
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

We now support nvmem lookups for machine code. Add a lookup for
mac-address.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 arch/arm/mach-davinci/board-da830-evm.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index 14a6fc061744..3be3e93f2f18 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -29,6 +29,7 @@
 #include <linux/platform_data/spi-davinci.h>
 #include <linux/platform_data/usb-davinci.h>
 #include <linux/regulator/machine.h>
+#include <linux/nvmem-provider.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -409,6 +410,15 @@ static inline void da830_evm_init_lcdc(int mux_mode)
 static inline void da830_evm_init_lcdc(int mux_mode) { }
 #endif
 
+static struct nvmem_cell_lookup da830_evm_mac_address_cell = {
+	.info = {
+		.name = "mac-address",
+		.offset = 0x7f00,
+		.bytes = ETH_ALEN,
+	},
+	.nvmem_name = "1-00500",
+};
+
 static struct at24_platform_data da830_evm_i2c_eeprom_info = {
 	.byte_len	= SZ_256K / 8,
 	.page_size	= 64,
@@ -618,6 +628,8 @@ static __init void da830_evm_init(void)
 		pr_warn("%s: spi 0 registration failed: %d\n", __func__, ret);
 
 	regulator_has_full_constraints();
+
+	nvmem_register_lookup(&da830_evm_mac_address_cell, 1);
 }
 
 #ifdef CONFIG_SERIAL_8250_CONSOLE
-- 
2.17.1

^ permalink raw reply related

* [PATCH 08/14] ARM: davinci: mityomapl138: don't read the MAC address from machine code
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

This is now done by the emac driver using a registered nvmem cell.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 arch/arm/mach-davinci/board-mityomapl138.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index 2ec31ff61dbd..6263e6afcbf0 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -120,7 +120,6 @@ static void read_factory_config(struct nvmem_device *nvmem, void *context)
 {
 	int ret;
 	const char *partnum = NULL;
-	struct davinci_soc_info *soc_info = &davinci_soc_info;
 
 	if (!IS_BUILTIN(CONFIG_NVMEM)) {
 		pr_warn("Factory Config not available without CONFIG_NVMEM\n");
@@ -146,13 +145,6 @@ static void read_factory_config(struct nvmem_device *nvmem, void *context)
 		goto bad_config;
 	}
 
-	pr_info("Found MAC = %pM\n", factory_config.mac);
-	if (is_valid_ether_addr(factory_config.mac))
-		memcpy(soc_info->emac_pdata->mac_addr,
-			factory_config.mac, ETH_ALEN);
-	else
-		pr_warn("Invalid MAC found in factory config block\n");
-
 	partnum = factory_config.partnum;
 	pr_info("Part Number = %s\n", partnum);
 
-- 
2.17.1

^ permalink raw reply related

* [PATCH 09/14] ARM: davinci: dm365-evm: use device properties for at24 eeprom
From: Bartosz Golaszewski @ 2018-06-25 15:50 UTC (permalink / raw)
  To: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman
  Cc: linux-arm-kernel, linux-kernel, linux-omap, netdev,
	Bartosz Golaszewski
In-Reply-To: <20180625155025.12567-1-brgl@bgdev.pl>

From: Bartosz Golaszewski <bgolaszewski@baylibre.com>

We want to work towards phasing out the at24_platform_data structure.
There are few users and its contents can be represented using generic
device properties. Using device properties only will allow us to
significantly simplify the at24 configuration code.

Remove the at24_platform_data structure and replace it with an array
of property entries. Drop the byte_len/size property, as the model name
already implies the EEPROM's size.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 arch/arm/mach-davinci/board-dm365-evm.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c
index df640d977bfa..ffe93265f565 100644
--- a/arch/arm/mach-davinci/board-dm365-evm.c
+++ b/arch/arm/mach-davinci/board-dm365-evm.c
@@ -18,7 +18,7 @@
 #include <linux/i2c.h>
 #include <linux/io.h>
 #include <linux/clk.h>
-#include <linux/platform_data/at24.h>
+#include <linux/property.h>
 #include <linux/leds.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
@@ -179,18 +179,15 @@ static struct nvmem_cell_lookup dm365evm_mac_address_cell = {
 	.nvmem_name = "1-00500",
 };
 
-static struct at24_platform_data eeprom_info = {
-	.byte_len       = (256*1024) / 8,
-	.page_size      = 64,
-	.flags          = AT24_FLAG_ADDR16,
-	.setup          = davinci_get_mac_addr,
-	.context	= (void *)0x7f00,
+static const struct property_entry eeprom_properties[] = {
+	PROPERTY_ENTRY_U32("pagesize", 64),
+	{ }
 };
 
 static struct i2c_board_info i2c_info[] = {
 	{
 		I2C_BOARD_INFO("24c256", 0x50),
-		.platform_data	= &eeprom_info,
+		.properties = eeprom_properties,
 	},
 	{
 		I2C_BOARD_INFO("tlv320aic3x", 0x18),
-- 
2.17.1

^ permalink raw reply related

* Re: [PATCH v2 ipsec-next] xfrm: policy: remove pcpu policy cache
From: Steffen Klassert @ 2018-06-25 15:55 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netdev
In-Reply-To: <20180625152602.23111-1-fw@strlen.de>

On Mon, Jun 25, 2018 at 05:26:02PM +0200, Florian Westphal wrote:
> Kristian Evensen says:
>   In a project I am involved in, we are running ipsec (Strongswan) on
>   different mt7621-based routers. Each router is configured as an
>   initiator and has around ~30 tunnels to different responders (running
>   on misc. devices). Before the flow cache was removed (kernel 4.9), we
>   got a combined throughput of around 70Mbit/s for all tunnels on one
>   router. However, we recently switched to kernel 4.14 (4.14.48), and
>   the total throughput is somewhere around 57Mbit/s (best-case). I.e., a
>   drop of around 20%. Reverting the flow cache removal restores, as
>   expected, performance levels to that of kernel 4.9.
> 
> When pcpu xdst exists, it has to be validated first before it can be
> used.
> 
> A negative hit thus increases cost vs. no-cache.
> 
> As number of tunnels increases, hit rate decreases so this pcpu caching
> isn't a viable strategy.
> 
> Furthermore, the xdst cache also needs to run with BH off, so when
> removing this the bh disable/enable pairs can be removed too.
> 
> Kristian tested a 4.14.y backport of this change and reported
> increased performance:
> 
>   In our tests, the throughput reduction has been reduced from around -20%
>   to -5%. We also see that the overall throughput is independent of the
>   number of tunnels, while before the throughput was reduced as the number
>   of tunnels increased.
> 
> Reported-by: Kristian Evensen <kristian.evensen@gmail.com>
> Signed-off-by: Florian Westphal <fw@strlen.de>

Applied to ipsec-next, thanks a lot!

^ permalink raw reply

* [PATCH net-next] net: preserve sock reference when scrubbing the skb.
From: Flavio Leitner @ 2018-06-25 15:56 UTC (permalink / raw)
  To: netdev
  Cc: Eric Dumazet, Paolo Abeni, David Miller, Florian Westphal,
	netfilter-devel, Flavio Leitner

The sock reference is lost when scrubbing the packet and that breaks
TSQ (TCP Small Queues) and XPS (Transmit Packet Steering) causing
performance impacts of about 50% in a single TCP stream when crossing
network namespaces.

XPS breaks because the queue mapping stored in the socket is not
available, so another random queue might be selected when the stack
needs to transmit something like a TCP ACK, or TCP Retransmissions.
That causes packet re-ordering and/or performance issues.

TSQ breaks because it orphans the packet while it is still in the
host, so packets are queued contributing to the buffer bloat problem.

Preserving the sock reference fixes both issues. The socket is
orphaned anyways in the receiving path before any relevant action,
but the transmit side needs some extra checking included in the
patch.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
---
 include/net/netfilter/nf_log.h         | 3 ++-
 net/core/skbuff.c                      | 1 -
 net/ipv4/netfilter/nf_log_ipv4.c       | 8 ++++----
 net/ipv6/netfilter/nf_log_ipv6.c       | 8 ++++----
 net/netfilter/nf_conntrack_broadcast.c | 2 +-
 net/netfilter/nf_log_common.c          | 5 +++--
 net/netfilter/nf_nat_core.c            | 6 +++++-
 net/netfilter/nft_meta.c               | 9 ++++++---
 net/netfilter/nft_socket.c             | 5 ++++-
 net/netfilter/xt_cgroup.c              | 6 ++++--
 net/netfilter/xt_owner.c               | 2 +-
 net/netfilter/xt_recent.c              | 3 ++-
 net/netfilter/xt_socket.c              | 6 ++++++
 13 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index e811ac07ea94..0d3920896d50 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -106,7 +106,8 @@ int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 			   u8 proto, int fragment, unsigned int offset,
 			   unsigned int logflags);
-void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk);
+void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
+			    struct sock *sk);
 void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
 			       unsigned int hooknum, const struct sk_buff *skb,
 			       const struct net_device *in,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c642304f178c..37263095545a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4911,7 +4911,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 		return;
 
 	ipvs_reset(skb);
-	skb_orphan(skb);
 	skb->mark = 0;
 }
 EXPORT_SYMBOL_GPL(skb_scrub_packet);
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 4388de0e5380..1e6f28c97d3a 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -35,7 +35,7 @@ static const struct nf_loginfo default_loginfo = {
 };
 
 /* One level of recursion won't kill us */
-static void dump_ipv4_packet(struct nf_log_buf *m,
+static void dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
 			     const struct nf_loginfo *info,
 			     const struct sk_buff *skb, unsigned int iphoff)
 {
@@ -183,7 +183,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
 			/* Max length: 3+maxlen */
 			if (!iphoff) { /* Only recurse once. */
 				nf_log_buf_add(m, "[");
-				dump_ipv4_packet(m, info, skb,
+				dump_ipv4_packet(net, m, info, skb,
 					    iphoff + ih->ihl*4+sizeof(_icmph));
 				nf_log_buf_add(m, "] ");
 			}
@@ -251,7 +251,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
 
 	/* Max length: 15 "UID=4294967295 " */
 	if ((logflags & NF_LOG_UID) && !iphoff)
-		nf_log_dump_sk_uid_gid(m, skb->sk);
+		nf_log_dump_sk_uid_gid(net, m, skb->sk);
 
 	/* Max length: 16 "MARK=0xFFFFFFFF " */
 	if (!iphoff && skb->mark)
@@ -333,7 +333,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
 	if (in != NULL)
 		dump_ipv4_mac_header(m, loginfo, skb);
 
-	dump_ipv4_packet(m, loginfo, skb, 0);
+	dump_ipv4_packet(net, m, loginfo, skb, 0);
 
 	nf_log_buf_close(m);
 }
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index b397a8fe88b9..c6bf580d0f33 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -36,7 +36,7 @@ static const struct nf_loginfo default_loginfo = {
 };
 
 /* One level of recursion won't kill us */
-static void dump_ipv6_packet(struct nf_log_buf *m,
+static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
 			     const struct nf_loginfo *info,
 			     const struct sk_buff *skb, unsigned int ip6hoff,
 			     int recurse)
@@ -258,7 +258,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
 			/* Max length: 3+maxlen */
 			if (recurse) {
 				nf_log_buf_add(m, "[");
-				dump_ipv6_packet(m, info, skb,
+				dump_ipv6_packet(net, m, info, skb,
 						 ptr + sizeof(_icmp6h), 0);
 				nf_log_buf_add(m, "] ");
 			}
@@ -278,7 +278,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
 
 	/* Max length: 15 "UID=4294967295 " */
 	if ((logflags & NF_LOG_UID) && recurse)
-		nf_log_dump_sk_uid_gid(m, skb->sk);
+		nf_log_dump_sk_uid_gid(net, m, skb->sk);
 
 	/* Max length: 16 "MARK=0xFFFFFFFF " */
 	if (recurse && skb->mark)
@@ -365,7 +365,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
 	if (in != NULL)
 		dump_ipv6_mac_header(m, loginfo, skb);
 
-	dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1);
+	dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
 
 	nf_log_buf_close(m);
 }
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index a1086bdec242..5423b197d98a 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -32,7 +32,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
 	__be32 mask = 0;
 
 	/* we're only interested in locally generated packets */
-	if (skb->sk == NULL)
+	if (skb->sk == NULL || !net_eq(nf_ct_net(ct), sock_net(skb->sk)))
 		goto out;
 	if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
 		goto out;
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index dc61399e30be..a8c5c846aec1 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -132,9 +132,10 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
 
-void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
+void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
+			    struct sock *sk)
 {
-	if (!sk || !sk_fullsock(sk))
+	if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk)))
 		return;
 
 	read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 46f9df99d276..86df2a1666fd 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -108,6 +108,7 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 	struct flowi fl;
 	unsigned int hh_len;
 	struct dst_entry *dst;
+	struct sock *sk = skb->sk;
 	int err;
 
 	err = xfrm_decode_session(skb, &fl, family);
@@ -119,7 +120,10 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 		dst = ((struct xfrm_dst *)dst)->route;
 	dst_hold(dst);
 
-	dst = xfrm_lookup(net, dst, &fl, skb->sk, 0);
+	if (sk && !net_eq(net, sock_net(sk)))
+		sk = NULL;
+
+	dst = xfrm_lookup(net, dst, &fl, sk, 0);
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
 
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 1105a23bda5e..2b94dcc43456 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -107,7 +107,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 		break;
 	case NFT_META_SKUID:
 		sk = skb_to_full_sk(skb);
-		if (!sk || !sk_fullsock(sk))
+		if (!sk || !sk_fullsock(sk) ||
+		    !net_eq(nft_net(pkt), sock_net(sk)))
 			goto err;
 
 		read_lock_bh(&sk->sk_callback_lock);
@@ -123,7 +124,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 		break;
 	case NFT_META_SKGID:
 		sk = skb_to_full_sk(skb);
-		if (!sk || !sk_fullsock(sk))
+		if (!sk || !sk_fullsock(sk) ||
+		    !net_eq(nft_net(pkt), sock_net(sk)))
 			goto err;
 
 		read_lock_bh(&sk->sk_callback_lock);
@@ -214,7 +216,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 #ifdef CONFIG_CGROUP_NET_CLASSID
 	case NFT_META_CGROUP:
 		sk = skb_to_full_sk(skb);
-		if (!sk || !sk_fullsock(sk))
+		if (!sk || !sk_fullsock(sk) ||
+		    !net_eq(nft_net(pkt), sock_net(sk)))
 			goto err;
 		*dest = sock_cgroup_classid(&sk->sk_cgrp_data);
 		break;
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 74e1b3bd6954..998c2b546f6d 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -23,6 +23,9 @@ static void nft_socket_eval(const struct nft_expr *expr,
 	struct sock *sk = skb->sk;
 	u32 *dest = &regs->data[priv->dreg];
 
+	if (sk && !net_eq(nft_net(pkt), sock_net(sk)))
+		sk = NULL;
+
 	if (!sk)
 		switch(nft_pf(pkt)) {
 		case NFPROTO_IPV4:
@@ -39,7 +42,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
 			return;
 		}
 
-	if(!sk) {
+	if (!sk) {
 		nft_reg_store8(dest, 0);
 		return;
 	}
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 7df2dece57d3..5d92e1781980 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -72,8 +72,9 @@ static bool
 cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_cgroup_info_v0 *info = par->matchinfo;
+	struct sock *sk = skb->sk;
 
-	if (skb->sk == NULL || !sk_fullsock(skb->sk))
+	if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
 		return false;
 
 	return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^
@@ -85,8 +86,9 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_cgroup_info_v1 *info = par->matchinfo;
 	struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
 	struct cgroup *ancestor = info->priv;
+	struct sock *sk = skb->sk;
 
-	if (!skb->sk || !sk_fullsock(skb->sk))
+	if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
 		return false;
 
 	if (ancestor)
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 3d705c688a27..46686fb73784 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -67,7 +67,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	struct sock *sk = skb_to_full_sk(skb);
 	struct net *net = xt_net(par);
 
-	if (sk == NULL || sk->sk_socket == NULL)
+	if (!sk || !sk->sk_socket || !net_eq(net, sock_net(sk)))
 		return (info->match ^ info->invert) == 0;
 	else if (info->match & info->invert & XT_OWNER_SOCKET)
 		/*
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 07085c22b19c..f44de4bc2100 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -265,7 +265,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	}
 
 	/* use TTL as seen before forwarding */
-	if (xt_out(par) != NULL && skb->sk == NULL)
+	if (xt_out(par) != NULL &&
+	    (!skb->sk || !net_eq(net, sock_net(skb->sk))))
 		ttl++;
 
 	spin_lock_bh(&recent_lock);
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 5c0779c4fa3c..e2795f5f7000 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -58,6 +58,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 
 	if (!sk)
 		sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par));
+	else if (!net_eq(xt_net(par), sock_net(sk)))
+		sk = NULL;
+
 	if (sk) {
 		bool wildcard;
 		bool transparent = true;
@@ -115,6 +118,9 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 
 	if (!sk)
 		sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par));
+	else if (!net_eq(xt_net(par), sock_net(sk)))
+		sk = NULL;
+
 	if (sk) {
 		bool wildcard;
 		bool transparent = true;
-- 
2.14.3

^ permalink raw reply related

* [PATCH nf-next v2] openvswitch: use nf_ct_get_tuplepr, invert_tuplepr
From: Florian Westphal @ 2018-06-25 15:55 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, pshelar, dev, Florian Westphal

These versions deal with the l3proto/l4proto details internally.
It removes only caller of nf_ct_get_tuple, so make it static.

After this, l3proto->get_l4proto() can be removed in a followup patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 No changes since v1.

 This is a preparation patch to remove the l3proto indirections.
 Evanutally nf_conntrack_l3proto will be removed.

 ipv4 and ipv6 protocol trackers will be part of nf_conntrack itself.

 include/net/netfilter/nf_conntrack_core.h |  7 -------
 net/netfilter/nf_conntrack_core.c         |  3 +--
 net/openvswitch/conntrack.c               | 17 +++--------------
 3 files changed, 4 insertions(+), 23 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 9b5e7634713e..90df45022c51 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -40,13 +40,6 @@ void nf_conntrack_cleanup_start(void);
 void nf_conntrack_init_end(void);
 void nf_conntrack_cleanup_end(void);
 
-bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff,
-		     unsigned int dataoff, u_int16_t l3num, u_int8_t protonum,
-		     struct net *net,
-		     struct nf_conntrack_tuple *tuple,
-		     const struct nf_conntrack_l3proto *l3proto,
-		     const struct nf_conntrack_l4proto *l4proto);
-
 bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 			const struct nf_conntrack_tuple *orig,
 			const struct nf_conntrack_l3proto *l3proto,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3465da2a98bd..160493f95fed 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -222,7 +222,7 @@ static u32 hash_conntrack(const struct net *net,
 	return scale_hash(hash_conntrack_raw(tuple, net));
 }
 
-bool
+static bool
 nf_ct_get_tuple(const struct sk_buff *skb,
 		unsigned int nhoff,
 		unsigned int dataoff,
@@ -244,7 +244,6 @@ nf_ct_get_tuple(const struct sk_buff *skb,
 
 	return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
 }
-EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
 
 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 		       u_int16_t l3num,
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 284aca2a252d..e05bd3e53f0f 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -607,23 +607,12 @@ static struct nf_conn *
 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 		     u8 l3num, struct sk_buff *skb, bool natted)
 {
-	const struct nf_conntrack_l3proto *l3proto;
-	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
-	unsigned int dataoff;
-	u8 protonum;
 
-	l3proto = __nf_ct_l3proto_find(l3num);
-	if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
-				 &protonum) <= 0) {
-		pr_debug("ovs_ct_find_existing: Can't get protonum\n");
-		return NULL;
-	}
-	l4proto = __nf_ct_l4proto_find(l3num, protonum);
-	if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
-			     protonum, net, &tuple, l3proto, l4proto)) {
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num,
+			       net, &tuple)) {
 		pr_debug("ovs_ct_find_existing: Can't get tuple\n");
 		return NULL;
 	}
@@ -632,7 +621,7 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 	if (natted) {
 		struct nf_conntrack_tuple inverse;
 
-		if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) {
+		if (!nf_ct_invert_tuplepr(&inverse, &tuple)) {
 			pr_debug("ovs_ct_find_existing: Inversion failed!\n");
 			return NULL;
 		}
-- 
2.16.4

^ permalink raw reply related

* Re: Grant
From: M. M. Fridman @ 2018-06-25 11:50 UTC (permalink / raw)




-- 
I Mikhail Fridman has selected you specially as one of my beneficiaries for my
Charitable Donation.

Reply as soon as possible with further directives.

Best Regards,
Mikhail Fridman.

^ permalink raw reply

* Re: [PATCH net-next 3/5] sctp: add spp_ipv6_flowlabel and spp_dscp for sctp_paddrparams
From: 吉藤英明 @ 2018-06-25 16:12 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner
  Cc: Neil Horman, David Miller, lucien.xin, netdev, linux-sctp,
	吉藤英明, yoshfuji
In-Reply-To: <20180625130319.GA820@localhost.localdomain>

Hi,

2018-06-25 22:03 GMT+09:00 Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>:
> On Mon, Jun 25, 2018 at 07:28:47AM -0400, Neil Horman wrote:
>> On Mon, Jun 25, 2018 at 04:31:26PM +0900, David Miller wrote:
>> > From: Xin Long <lucien.xin@gmail.com>
>> > Date: Mon, 25 Jun 2018 10:14:35 +0800
>> >
>> > >  struct sctp_paddrparams {
>> > > @@ -773,6 +775,8 @@ struct sctp_paddrparams {
>> > >   __u32                   spp_pathmtu;
>> > >   __u32                   spp_sackdelay;
>> > >   __u32                   spp_flags;
>> > > + __u32                   spp_ipv6_flowlabel;
>> > > + __u8                    spp_dscp;
>> > >  } __attribute__((packed, aligned(4)));
>> >
>> > I don't think you can change the size of this structure like this.
>> >
>> > This check in sctp_setsockopt_peer_addr_params():
>> >
>> >     if (optlen != sizeof(struct sctp_paddrparams))
>> >             return -EINVAL;
>> >
>> > is going to trigger in old kernels when executing programs
>> > built against the new struct definition.
>
> That will happen, yes, but do we really care about being future-proof
> here? I mean: if we also update such check(s) to support dealing with
> smaller-than-supported structs, newer kernels will be able to run
> programs built against the old struct, and the new one; while building
> using newer headers and running on older kernel may fool the
> application in other ways too (like enabling support for something
> that is available on newer kernel and that is not present in the older
> one).

We should not break existing apps.
We still accept apps of pre-2.4 era without sin6_scope_id
(e.g., net/ipv6/af_inet6.c:inet6_bind()).

>
>> >
>> I think thats also the reason its a packed aligned attribute, it can't be
>> changed, or older kernels won't be able to fill it out properly.
>> Neil
>
> It's more for supporting running 32-bits apps on 64-bit kernels
> (according to 20c9c825b12fc).
>
>   Marcelo

^ permalink raw reply

* Re: [PATCH net-next 3/5] sctp: add spp_ipv6_flowlabel and spp_dscp for sctp_paddrparams
From: Marcelo Ricardo Leitner @ 2018-06-25 16:31 UTC (permalink / raw)
  To: 吉藤英明
  Cc: Neil Horman, David Miller, lucien.xin, netdev, linux-sctp,
	yoshfuji
In-Reply-To: <CAPA1RqB48gBWKOQck6Ke5yH3cm1B5QtJKh1CL4SYfqes8Tc+Nw@mail.gmail.com>

Hi,

On Tue, Jun 26, 2018 at 01:12:00AM +0900, 吉藤英明 wrote:
> Hi,
> 
> 2018-06-25 22:03 GMT+09:00 Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>:
> > On Mon, Jun 25, 2018 at 07:28:47AM -0400, Neil Horman wrote:
> >> On Mon, Jun 25, 2018 at 04:31:26PM +0900, David Miller wrote:
> >> > From: Xin Long <lucien.xin@gmail.com>
> >> > Date: Mon, 25 Jun 2018 10:14:35 +0800
> >> >
> >> > >  struct sctp_paddrparams {
> >> > > @@ -773,6 +775,8 @@ struct sctp_paddrparams {
> >> > >   __u32                   spp_pathmtu;
> >> > >   __u32                   spp_sackdelay;
> >> > >   __u32                   spp_flags;
> >> > > + __u32                   spp_ipv6_flowlabel;
> >> > > + __u8                    spp_dscp;
> >> > >  } __attribute__((packed, aligned(4)));
> >> >
> >> > I don't think you can change the size of this structure like this.
> >> >
> >> > This check in sctp_setsockopt_peer_addr_params():
> >> >
> >> >     if (optlen != sizeof(struct sctp_paddrparams))
> >> >             return -EINVAL;
> >> >
> >> > is going to trigger in old kernels when executing programs
> >> > built against the new struct definition.
> >
> > That will happen, yes, but do we really care about being future-proof
> > here? I mean: if we also update such check(s) to support dealing with
> > smaller-than-supported structs, newer kernels will be able to run
> > programs built against the old struct, and the new one; while building
> > using newer headers and running on older kernel may fool the
> > application in other ways too (like enabling support for something
> > that is available on newer kernel and that is not present in the older
> > one).
> 
> We should not break existing apps.
> We still accept apps of pre-2.4 era without sin6_scope_id
> (e.g., net/ipv6/af_inet6.c:inet6_bind()).

Yes. That's what I tried to say. That is supporting an old app built
with old kernel headers and running on a newer kernel, and not the
other way around (an app built with fresh headers and running on an
old kernel).

> 
> >
> >> >
> >> I think thats also the reason its a packed aligned attribute, it can't be
> >> changed, or older kernels won't be able to fill it out properly.
> >> Neil
> >
> > It's more for supporting running 32-bits apps on 64-bit kernels
> > (according to 20c9c825b12fc).
> >
> >   Marcelo

^ permalink raw reply

* Re: [bpf PATCH v4 3/4] bpf: sockhash fix omitted bucket lock in sock_close
From: Martin KaFai Lau @ 2018-06-25 16:40 UTC (permalink / raw)
  To: John Fastabend; +Cc: ast, daniel, netdev
In-Reply-To: <20180625153417.3057.19275.stgit@john-Precision-Tower-5810>

On Mon, Jun 25, 2018 at 08:34:17AM -0700, John Fastabend wrote:
> First in tcp_close, reduce scope of sk_callback_lock() the lock is
> only needed for protecting maps list the ingress and cork
> lists are protected by sock lock. Having the lock in wider scope is
> harmless but may confuse the reader who may infer it is in fact
> needed.
> 
> Next, in sock_hash_delete_elem() the pattern is as follows,
> 
>   sock_hash_delete_elem()
>      [...]
>      spin_lock(bucket_lock)
>      l = lookup_elem_raw()
>      if (l)
>         hlist_del_rcu()
>         write_lock(sk_callback_lock)
>          .... destroy psock ...
>         write_unlock(sk_callback_lock)
>      spin_unlock(bucket_lock)
> 
> The ordering is necessary because we only know the {p}sock after
> dereferencing the hash table which we can't do unless we have the
> bucket lock held. Once we have the bucket lock and the psock element
> it is deleted from the hashmap to ensure any other path doing a lookup
> will fail. Finally, the refcnt is decremented and if zero the psock
> is destroyed.
> 
> In parallel with the above (or free'ing the map) a tcp close event
> may trigger tcp_close(). Which at the moment omits the bucket lock
> altogether (oops!) where the flow looks like this,
> 
>   bpf_tcp_close()
>      [...]
>      write_lock(sk_callback_lock)
>      for each psock->maps // list of maps this sock is part of
>          hlist_del_rcu(ref_hash_node);
>          .... destroy psock ...
>      write_unlock(sk_callback_lock)
> 
> Obviously, and demonstrated by syzbot, this is broken because
> we can have multiple threads deleting entries via hlist_del_rcu().
> 
> To fix this we might be tempted to wrap the hlist operation in a
> bucket lock but that would create a lock inversion problem. In
> summary to follow locking rules the psocks maps list needs the
> sk_callback_lock but we need the bucket lock to do the hlist_del_rcu.
> To resolve the lock inversion problem pop the head of the maps list
> repeatedly and remove the reference until no more are left. If a
> delete happens in parallel from the BPF API that is OK as well because
> it will do a similar action, lookup the lock in the map/hash, delete
> it from the map/hash, and dec the refcnt. We check for this case
> before doing a destroy on the psock to ensure we don't have two
> threads tearing down a psock. The new logic is as follows,
> 
>   bpf_tcp_close()
>   e = psock_map_pop(psock->maps) // done with sk_callback_lock
>   bucket_lock() // lock hash list bucket
>   l = lookup_elem_raw(head, hash, key, key_size);
>   if (l) {
>      //only get here if elmnt was not already removed
>      hlist_del_rcu()
>      ... destroy psock...
>   }
>   bucket_unlock()
> 
> And finally for all the above to work add missing sk_callback_lock
> around smap_list_remove in sock_hash_ctx_update_elem(). Otherwise
> delete and update may corrupt maps list. Then add RCU annotations and
> use rcu_dereference/rcu_assign_pointer to manage values relying on
> RCU so that the object is not free'd from sock_hash_free() while it
> is being referenced in bpf_tcp_close().
> 
> (As an aside the sk_callback_lock serves two purposes. The
>  first, is to update the sock callbacks sk_data_ready, sk_write_space,
>  etc. The second is to protect the psock 'maps' list. The 'maps' list
>  is used to (as shown above) to delete all map/hash references to a
>  sock when the sock is closed)
> 
> Reported-by: syzbot+0ce137753c78f7b6acc1@syzkaller.appspotmail.com
> Fixes: 81110384441a ("bpf: sockmap, add hash map support")
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox