* [PATCH 1/4] mac80211: Use linked list instead of rhashtable walk for mesh tables
From: Herbert Xu @ 2019-02-13 14:39 UTC (permalink / raw)
To: David Miller, johannes, linux-wireless, netdev, j, tgraf,
johannes.berg
In-Reply-To: <20190213143853.labj6zdcsoupkris@gondor.apana.org.au>
The mesh table code walks over hash tables for two purposes. First of
all it's used as part of a netlink dump process, but it is also used
for looking up entries to delete using criteria other than the hash
key.
The second purpose is directly contrary to the design specification
of rhashtable walks. It is only meant for use by netlink dumps.
This is because rhashtable is resizable and you cannot obtain a
stable walk over it during a resize process.
In fact mesh's use of rhashtable for dumping is bogus too. Rather
than using rhashtable walk's iterator to keep track of the current
position, it always converts the current position to an integer
which defeats the purpose of the iterator.
Therefore this patch converts all uses of rhashtable walk into a
simple linked list.
This patch also adds a new spin lock to protect the hash table
insertion/removal as well as the walk list modifications. In fact
the previous code was buggy as the removals can race with each
other, potentially resulting in a double-free.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
net/mac80211/mesh.h | 6 +
net/mac80211/mesh_pathtbl.c | 138 +++++++++++---------------------------------
2 files changed, 43 insertions(+), 101 deletions(-)
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index cad6592c52a1..2ec7011a4d07 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -70,6 +70,7 @@ enum mesh_deferred_task_flags {
* @dst: mesh path destination mac address
* @mpp: mesh proxy mac address
* @rhash: rhashtable list pointer
+ * @walk_list: linked list containing all mesh_path objects.
* @gate_list: list pointer for known gates list
* @sdata: mesh subif
* @next_hop: mesh neighbor to which frames for this destination will be
@@ -105,6 +106,7 @@ struct mesh_path {
u8 dst[ETH_ALEN];
u8 mpp[ETH_ALEN]; /* used for MPP or MAP */
struct rhash_head rhash;
+ struct hlist_node walk_list;
struct hlist_node gate_list;
struct ieee80211_sub_if_data *sdata;
struct sta_info __rcu *next_hop;
@@ -133,12 +135,16 @@ struct mesh_path {
* gate's mpath may or may not be resolved and active.
* @gates_lock: protects updates to known_gates
* @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr
+ * @walk_head: linked list containging all mesh_path objects
+ * @walk_lock: lock protecting walk_head
* @entries: number of entries in the table
*/
struct mesh_table {
struct hlist_head known_gates;
spinlock_t gates_lock;
struct rhashtable rhead;
+ struct hlist_head walk_head;
+ spinlock_t walk_lock;
atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */
};
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index a5125624a76d..884a0d212e8b 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -59,8 +59,10 @@ static struct mesh_table *mesh_table_alloc(void)
return NULL;
INIT_HLIST_HEAD(&newtbl->known_gates);
+ INIT_HLIST_HEAD(&newtbl->walk_head);
atomic_set(&newtbl->entries, 0);
spin_lock_init(&newtbl->gates_lock);
+ spin_lock_init(&newtbl->walk_lock);
return newtbl;
}
@@ -249,28 +251,15 @@ mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst)
static struct mesh_path *
__mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
{
- int i = 0, ret;
- struct mesh_path *mpath = NULL;
- struct rhashtable_iter iter;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return NULL;
-
- rhashtable_walk_start(&iter);
+ int i = 0;
+ struct mesh_path *mpath;
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) {
if (i++ == idx)
break;
}
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
- if (IS_ERR(mpath) || !mpath)
+ if (!mpath)
return NULL;
if (mpath_expired(mpath)) {
@@ -432,6 +421,7 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
return ERR_PTR(-ENOMEM);
tbl = sdata->u.mesh.mesh_paths;
+ spin_lock_bh(&tbl->walk_lock);
do {
ret = rhashtable_lookup_insert_fast(&tbl->rhead,
&new_mpath->rhash,
@@ -441,8 +431,10 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
mpath = rhashtable_lookup_fast(&tbl->rhead,
dst,
mesh_rht_params);
-
+ else if (!ret)
+ hlist_add_head(&new_mpath->walk_list, &tbl->walk_head);
} while (unlikely(ret == -EEXIST && !mpath));
+ spin_unlock_bh(&tbl->walk_lock);
if (ret && ret != -EEXIST)
return ERR_PTR(ret);
@@ -480,9 +472,14 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata,
memcpy(new_mpath->mpp, mpp, ETH_ALEN);
tbl = sdata->u.mesh.mpp_paths;
+
+ spin_lock_bh(&tbl->walk_lock);
ret = rhashtable_lookup_insert_fast(&tbl->rhead,
&new_mpath->rhash,
mesh_rht_params);
+ if (!ret)
+ hlist_add_head_rcu(&new_mpath->walk_list, &tbl->walk_head);
+ spin_unlock_bh(&tbl->walk_lock);
sdata->u.mesh.mpp_paths_generation++;
return ret;
@@ -503,20 +500,9 @@ void mesh_plink_broken(struct sta_info *sta)
struct mesh_table *tbl = sdata->u.mesh.mesh_paths;
static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return;
-
- rhashtable_walk_start(&iter);
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) {
if (rcu_access_pointer(mpath->next_hop) == sta &&
mpath->flags & MESH_PATH_ACTIVE &&
!(mpath->flags & MESH_PATH_FIXED)) {
@@ -530,8 +516,7 @@ void mesh_plink_broken(struct sta_info *sta)
WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast);
}
}
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ rcu_read_unlock();
}
static void mesh_path_free_rcu(struct mesh_table *tbl,
@@ -551,6 +536,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl,
static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath)
{
+ hlist_del_rcu(&mpath->walk_list);
rhashtable_remove_fast(&tbl->rhead, &mpath->rhash, mesh_rht_params);
mesh_path_free_rcu(tbl, mpath);
}
@@ -571,27 +557,14 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct mesh_table *tbl = sdata->u.mesh.mesh_paths;
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return;
-
- rhashtable_walk_start(&iter);
-
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ struct hlist_node *n;
+ spin_lock_bh(&tbl->walk_lock);
+ hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
if (rcu_access_pointer(mpath->next_hop) == sta)
__mesh_path_del(tbl, mpath);
}
-
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ spin_unlock_bh(&tbl->walk_lock);
}
static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
@@ -599,51 +572,26 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
{
struct mesh_table *tbl = sdata->u.mesh.mpp_paths;
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return;
-
- rhashtable_walk_start(&iter);
-
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ struct hlist_node *n;
+ spin_lock_bh(&tbl->walk_lock);
+ hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
if (ether_addr_equal(mpath->mpp, proxy))
__mesh_path_del(tbl, mpath);
}
-
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ spin_unlock_bh(&tbl->walk_lock);
}
static void table_flush_by_iface(struct mesh_table *tbl)
{
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
-
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC);
- if (ret)
- return;
-
- rhashtable_walk_start(&iter);
+ struct hlist_node *n;
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ spin_lock_bh(&tbl->walk_lock);
+ hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
__mesh_path_del(tbl, mpath);
}
-
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ spin_unlock_bh(&tbl->walk_lock);
}
/**
@@ -675,7 +623,7 @@ static int table_path_del(struct mesh_table *tbl,
{
struct mesh_path *mpath;
- rcu_read_lock();
+ spin_lock_bh(&tbl->walk_lock);
mpath = rhashtable_lookup_fast(&tbl->rhead, addr, mesh_rht_params);
if (!mpath) {
rcu_read_unlock();
@@ -683,7 +631,7 @@ static int table_path_del(struct mesh_table *tbl,
}
__mesh_path_del(tbl, mpath);
- rcu_read_unlock();
+ spin_unlock_bh(&tbl->walk_lock);
return 0;
}
@@ -854,28 +802,16 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata,
struct mesh_table *tbl)
{
struct mesh_path *mpath;
- struct rhashtable_iter iter;
- int ret;
+ struct hlist_node *n;
- ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_KERNEL);
- if (ret)
- return;
-
- rhashtable_walk_start(&iter);
-
- while ((mpath = rhashtable_walk_next(&iter))) {
- if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
- continue;
- if (IS_ERR(mpath))
- break;
+ spin_lock_bh(&tbl->walk_lock);
+ hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) {
if ((!(mpath->flags & MESH_PATH_RESOLVING)) &&
(!(mpath->flags & MESH_PATH_FIXED)) &&
time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE))
__mesh_path_del(tbl, mpath);
}
-
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ spin_unlock_bh(&tbl->walk_lock);
}
void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
^ permalink raw reply related
* [PATCH 4/4] rhashtable: Remove obsolete rhashtable_walk_init function
From: Herbert Xu @ 2019-02-13 14:39 UTC (permalink / raw)
To: David Miller, johannes, linux-wireless, netdev, j, tgraf,
johannes.berg
In-Reply-To: <20190213143853.labj6zdcsoupkris@gondor.apana.org.au>
The rhashtable_walk_init function has been obsolete for more than
two years. This patch finally converts its last users over to
rhashtable_walk_enter and removes it.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
include/linux/rhashtable.h | 8 --------
lib/rhashtable.c | 2 +-
lib/test_rhashtable.c | 9 ++-------
net/ipv6/ila/ila_xlat.c | 15 +++------------
net/netlink/af_netlink.c | 10 +---------
5 files changed, 7 insertions(+), 37 deletions(-)
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 20f9c6af7473..ae9c0f71f311 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -1113,14 +1113,6 @@ static inline int rhashtable_replace_fast(
return err;
}
-/* Obsolete function, do not use in new code. */
-static inline int rhashtable_walk_init(struct rhashtable *ht,
- struct rhashtable_iter *iter, gfp_t gfp)
-{
- rhashtable_walk_enter(ht, iter);
- return 0;
-}
-
/**
* rhltable_walk_enter - Initialise an iterator
* @hlt: Table to walk over
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 852ffa5160f1..0a105d4af166 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -682,7 +682,7 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
* rhashtable_walk_exit - Free an iterator
* @iter: Hash table Iterator
*
- * This function frees resources allocated by rhashtable_walk_init.
+ * This function frees resources allocated by rhashtable_walk_enter.
*/
void rhashtable_walk_exit(struct rhashtable_iter *iter)
{
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 6a8ac7626797..d96962eea942 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -177,16 +177,11 @@ static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array,
static void test_bucket_stats(struct rhashtable *ht, unsigned int entries)
{
- unsigned int err, total = 0, chain_len = 0;
+ unsigned int total = 0, chain_len = 0;
struct rhashtable_iter hti;
struct rhash_head *pos;
- err = rhashtable_walk_init(ht, &hti, GFP_KERNEL);
- if (err) {
- pr_warn("Test failed: allocation error");
- return;
- }
-
+ rhashtable_walk_enter(ht, &hti);
rhashtable_walk_start(&hti);
while ((pos = rhashtable_walk_next(&hti))) {
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 17c455ff69ff..ae6cd4cef8db 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -385,10 +385,7 @@ int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info)
spinlock_t *lock;
int ret;
- ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter, GFP_KERNEL);
- if (ret)
- goto done;
-
+ rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter);
rhashtable_walk_start(&iter);
for (;;) {
@@ -509,23 +506,17 @@ int ila_xlat_nl_dump_start(struct netlink_callback *cb)
struct net *net = sock_net(cb->skb->sk);
struct ila_net *ilan = net_generic(net, ila_net_id);
struct ila_dump_iter *iter;
- int ret;
iter = kmalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
return -ENOMEM;
- ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter->rhiter,
- GFP_KERNEL);
- if (ret) {
- kfree(iter);
- return ret;
- }
+ rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter->rhiter);
iter->skip = 0;
cb->args[0] = (long)iter;
- return ret;
+ return 0;
}
int ila_xlat_nl_dump_done(struct netlink_callback *cb)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3c023d6120f6..f369cc751cea 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2541,15 +2541,7 @@ struct nl_seq_iter {
static int netlink_walk_start(struct nl_seq_iter *iter)
{
- int err;
-
- err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti,
- GFP_KERNEL);
- if (err) {
- iter->link = MAX_LINKS;
- return err;
- }
-
+ rhashtable_walk_enter(&nl_table[iter->link].hash, &iter->hti);
rhashtable_walk_start(&iter->hti);
return 0;
^ permalink raw reply related
* [PATCH 3/4] mac80211: Use rhashtable_lookup_get_insert_fast instead of racy code
From: Herbert Xu @ 2019-02-13 14:39 UTC (permalink / raw)
To: David Miller, johannes, linux-wireless, netdev, j, tgraf,
johannes.berg
In-Reply-To: <20190213143853.labj6zdcsoupkris@gondor.apana.org.au>
The code in mesh_path_add tries to handle the case where a duplicate
entry is added to the rhashtable by doing a lookup after a failed
insertion. It also tries to handle races by repeating the insertion
should the lookup fail.
This is now unnecessary as we have rhashtable API functions that can
directly return the mathcing object.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
net/mac80211/mesh_pathtbl.c | 31 ++++++++++---------------------
1 file changed, 10 insertions(+), 21 deletions(-)
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index db5a1aef22db..8902395e406e 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -404,7 +404,6 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
{
struct mesh_table *tbl;
struct mesh_path *mpath, *new_mpath;
- int ret;
if (ether_addr_equal(dst, sdata->vif.addr))
/* never add ourselves as neighbours */
@@ -422,32 +421,22 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
tbl = sdata->u.mesh.mesh_paths;
spin_lock_bh(&tbl->walk_lock);
- do {
- ret = rhashtable_lookup_insert_fast(&tbl->rhead,
- &new_mpath->rhash,
- mesh_rht_params);
-
- if (ret == -EEXIST)
- mpath = rhashtable_lookup_fast(&tbl->rhead,
- dst,
- mesh_rht_params);
- else if (!ret)
- hlist_add_head(&new_mpath->walk_list, &tbl->walk_head);
- } while (unlikely(ret == -EEXIST && !mpath));
+ mpath = rhashtable_lookup_get_insert_fast(&tbl->rhead,
+ &new_mpath->rhash,
+ mesh_rht_params);
+ if (!mpath)
+ hlist_add_head(&new_mpath->walk_list, &tbl->walk_head);
spin_unlock_bh(&tbl->walk_lock);
- if (ret)
+ if (mpath) {
kfree(new_mpath);
- if (ret != -EEXIST)
- return ERR_PTR(ret);
+ if (IS_ERR(mpath))
+ return mpath;
- /* At this point either new_mpath was added, or we found a
- * matching entry already in the table; in the latter case
- * free the unnecessary new entry.
- */
- if (ret == -EEXIST)
new_mpath = mpath;
+ }
+
sdata->u.mesh.mesh_paths_generation++;
return new_mpath;
}
^ permalink raw reply related
* [PATCH iproute2] iplink: document XDP subcommand to force the XDP mode.
From: Matteo Croce @ 2019-02-13 14:40 UTC (permalink / raw)
To: netdev; +Cc: David Ahern, Stephen Hemminger, Jakub Kicinski
When attaching an eBPF program to a device, ip link can force the XDP mode
by using the xdp{generic,drv,offload} keyword instead of just 'xdp'.
Document this behaviour also in the help output.
Signed-off-by: Matteo Croce <mcroce@redhat.com>
Fixes: 14683814 ("bpf: add xdpdrv for requesting XDP driver mode")
Fixes: 1b5e8094 ("bpf: allow requesting XDP HW offload")
---
ip/iplink.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ip/iplink.c b/ip/iplink.c
index b5519201..3a0cf459 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -98,7 +98,7 @@ void iplink_usage(void)
" [ trust { on | off} ] ]\n"
" [ node_guid { eui64 } ]\n"
" [ port_guid { eui64 } ]\n"
- " [ xdp { off |\n"
+ " [ { xdp | xdpgeneric | xdpdrv | xdpoffload } { off |\n"
" object FILE [ section NAME ] [ verbose ] |\n"
" pinned FILE } ]\n"
" [ master DEVICE ][ vrf NAME ]\n"
--
2.20.1
^ permalink raw reply related
* Re: [v2 PATCH 0/4] mac80211: Fix incorrect usage of rhashtable walk API
From: Johannes Berg @ 2019-02-13 14:55 UTC (permalink / raw)
To: Herbert Xu, David Miller, linux-wireless, netdev, j, tgraf
In-Reply-To: <20190213143853.labj6zdcsoupkris@gondor.apana.org.au>
On Wed, 2019-02-13 at 22:38 +0800, Herbert Xu wrote:
> Hi:
>
> The first two patches in this series are bug fixes and should be
> backported to stable.
>
> They fixes a number of issues with the use of the rhashtable API
> in mac80211. First of all it converts the use of rashtable walks over
> to a simple linked list. This is because an rhashtable walk is
> inherently unstable and not meant for uses that require stability,
> e.g., when you're trying to lookup an object to delete.
>
> It also fixes a potential memory leak when the rhashtable insertion
> fails (which can occur due to OOM).
Thanks a lot Herbert! That looks simpler than I thought it would be.
johannes
^ permalink raw reply
* Re: [PATCH net-next 09/12] mlxsw: core: Extend hwmon interface with fan fault attribute
From: Guenter Roeck @ 2019-02-13 15:02 UTC (permalink / raw)
To: Andrew Lunn, Ido Schimmel
Cc: netdev@vger.kernel.org, davem@davemloft.net, Jiri Pirko, mlxsw,
Vadim Pasternak
In-Reply-To: <20190213135331.GB24589@lunn.ch>
On 2/13/19 5:53 AM, Andrew Lunn wrote:
> On Wed, Feb 13, 2019 at 11:28:53AM +0000, Ido Schimmel wrote:
>> From: Vadim Pasternak <vadimp@mellanox.com>
>>
>> Add new fan hwmon attribute for exposing fan faults (fault indication is
>> read from Fan Out of Range Event Register).
>>
>> Signed-off-by: Vadim Pasternak <vadimp@mellanox.com>
>> Reviewed-by: Jiri Pirko <jiri@mellanox.com>
>> Signed-off-by: Ido Schimmel <idosch@mellanox.com>
>
> Hi Ido
>
> You should include the HWMON maintainer in the Cc: list.
>
> I would not be too surprised if he says to use
> hwmon_device_register_with_info().
>
I would ask to do that for new drivers, but this is is not a new driver.
On top of that, I wasn't included in its initial review. Since I wasn't
involved, I have no idea what shape the driver is in, and for sure won't
review it now (to retain my sanity).
Only comment I have is that using the _with_info API and using devm_
functions might simplify the driver a lot. I'll be happy to do a review
if/when that is done.
Guenter
^ permalink raw reply
* Re: [PATCH 2/4] mac80211: Free mpath object when rhashtable insertion fails
From: Johannes Berg @ 2019-02-13 15:04 UTC (permalink / raw)
To: Herbert Xu, David Miller, linux-wireless, netdev, j, tgraf
In-Reply-To: <E1gtvgu-0006bT-4G@gondobar>
On Wed, 2019-02-13 at 22:39 +0800, Herbert Xu wrote:
> + if (ret != -EEXIST)
> return ERR_PTR(ret);
Surely that should still be "if (ret && ret != -EEXIST)" otherwise you
return for the success case too? or "else if"?
I'd probably say we should combine all those ifs into something like this?
if (ret == 0) {
sdata->u.mesh.mesh_paths_generation++;
return new_mpath;
} else if (ret == -EEXIST) {
kfree(new_mpath);
return mpath;
} else {
kfree(new_mpath);
return NULL;
}
Yes, that's a small change in behaviour as to when the generation
counter is updated, but I'm pretty sure it really only needs updating
when we inserted something, not if we found an old mpath.
johannes
^ permalink raw reply
* [PATCH net] net: dlink: sundance: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-13 15:12 UTC (permalink / raw)
To: netdev; +Cc: kda, davem, yang.wei9, albin_yang
From: Yang Wei <yang.wei9@zte.com.cn>
dev_consume_skb_irq() should be called in intr_handler() when skb
xmit done. It makes drop profiles(dropwatch, perf) more friendly.
Remove a redundant blank line in intr_handler().
Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
drivers/net/ethernet/dlink/sundance.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index 1a27176..4a37a69 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -1193,7 +1193,6 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
int handled = 0;
int i;
-
do {
int intr_status = ioread16(ioaddr + IntrStatus);
iowrite16(intr_status, ioaddr + IntrStatus);
@@ -1286,7 +1285,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
dma_unmap_single(&np->pci_dev->dev,
le32_to_cpu(np->tx_ring[entry].frag[0].addr),
skb->len, DMA_TO_DEVICE);
- dev_kfree_skb_irq (np->tx_skbuff[entry]);
+ dev_consume_skb_irq(np->tx_skbuff[entry]);
np->tx_skbuff[entry] = NULL;
np->tx_ring[entry].frag[0].addr = 0;
np->tx_ring[entry].frag[0].length = 0;
@@ -1305,7 +1304,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
dma_unmap_single(&np->pci_dev->dev,
le32_to_cpu(np->tx_ring[entry].frag[0].addr),
skb->len, DMA_TO_DEVICE);
- dev_kfree_skb_irq (np->tx_skbuff[entry]);
+ dev_consume_skb_irq(np->tx_skbuff[entry]);
np->tx_skbuff[entry] = NULL;
np->tx_ring[entry].frag[0].addr = 0;
np->tx_ring[entry].frag[0].length = 0;
--
2.7.4
^ permalink raw reply related
* [PATCH net] net: amd: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-13 15:14 UTC (permalink / raw)
To: netdev; +Cc: davem, yang.wei9, albin_yang
From: Yang Wei <yang.wei9@zte.com.cn>
dev_consume_skb_irq() should be called when skb xmit done. It makes
drop profiles(dropwatch, perf) more friendly.
Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
drivers/net/ethernet/amd/lance.c | 2 +-
drivers/net/ethernet/amd/ni65.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c
index b56d84c..f90b454 100644
--- a/drivers/net/ethernet/amd/lance.c
+++ b/drivers/net/ethernet/amd/lance.c
@@ -1084,7 +1084,7 @@ static irqreturn_t lance_interrupt(int irq, void *dev_id)
/* We must free the original skb if it's not a data-only copy
in the bounce buffer. */
if (lp->tx_skbuff[entry]) {
- dev_kfree_skb_irq(lp->tx_skbuff[entry]);
+ dev_consume_skb_irq(lp->tx_skbuff[entry]);
lp->tx_skbuff[entry] = NULL;
}
dirty_tx++;
diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c
index 8931ce6..87ff5d6 100644
--- a/drivers/net/ethernet/amd/ni65.c
+++ b/drivers/net/ethernet/amd/ni65.c
@@ -1028,7 +1028,7 @@ static void ni65_xmit_intr(struct net_device *dev,int csr0)
#ifdef XMT_VIA_SKB
if(p->tmd_skb[p->tmdlast]) {
- dev_kfree_skb_irq(p->tmd_skb[p->tmdlast]);
+ dev_consume_skb_irq(p->tmd_skb[p->tmdlast]);
p->tmd_skb[p->tmdlast] = NULL;
}
#endif
--
2.7.4
^ permalink raw reply related
* [PATCH net] net: myri10ge: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-13 15:15 UTC (permalink / raw)
To: netdev; +Cc: christopher.lee, davem, yang.wei9, albin_yang
From: Yang Wei <yang.wei9@zte.com.cn>
dev_consume_skb_irq() should be called in myri10ge_tx_done() when
skb xmit done. It makes drop profiles(dropwatch, perf) more friendly.
Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 19ce0e6..e0340f7 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -1408,7 +1408,7 @@ myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
if (skb) {
ss->stats.tx_bytes += skb->len;
ss->stats.tx_packets++;
- dev_kfree_skb_irq(skb);
+ dev_consume_skb_irq(skb);
if (len)
pci_unmap_single(pdev,
dma_unmap_addr(&tx->info[idx],
--
2.7.4
^ permalink raw reply related
* [PATCH net] net: sgi: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-13 15:17 UTC (permalink / raw)
To: netdev; +Cc: ralf, davem, yang.wei9, albin_yang
From: Yang Wei <yang.wei9@zte.com.cn>
dev_consume_skb_irq() should be called when skb xmit done. It makes
drop profiles(dropwatch, perf) more friendly.
Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
drivers/net/ethernet/sgi/ioc3-eth.c | 2 +-
drivers/net/ethernet/sgi/meth.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index 3140999..358e66b 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -666,7 +666,7 @@ static inline void ioc3_tx(struct net_device *dev)
packets++;
skb = ip->tx_skbs[o_entry];
bytes += skb->len;
- dev_kfree_skb_irq(skb);
+ dev_consume_skb_irq(skb);
ip->tx_skbs[o_entry] = NULL;
o_entry = (o_entry + 1) & 127; /* Next */
diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c
index 0e1b7e9..a80971d 100644
--- a/drivers/net/ethernet/sgi/meth.c
+++ b/drivers/net/ethernet/sgi/meth.c
@@ -521,7 +521,7 @@ static void meth_tx_cleanup(struct net_device* dev, unsigned long int_status)
DPRINTK("RPTR points us here, but packet not done?\n");
break;
}
- dev_kfree_skb_irq(skb);
+ dev_consume_skb_irq(skb);
priv->tx_skbs[priv->tx_read] = NULL;
priv->tx_ring[priv->tx_read].header.raw = 0;
priv->tx_read = (priv->tx_read+1)&(TX_RING_ENTRIES-1);
--
2.7.4
^ permalink raw reply related
* [PATCH net] net: micrel: ks8695net: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-13 15:18 UTC (permalink / raw)
To: netdev; +Cc: davem, yang.wei9, albin_yang
From: Yang Wei <yang.wei9@zte.com.cn>
dev_consume_skb_irq() should be called in ks8695_tx_irq() when skb
xmit done. It makes drop profiles(dropwatch, perf) more friendly.
Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
drivers/net/ethernet/micrel/ks8695net.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/micrel/ks8695net.c b/drivers/net/ethernet/micrel/ks8695net.c
index b881f5d..6006d47 100644
--- a/drivers/net/ethernet/micrel/ks8695net.c
+++ b/drivers/net/ethernet/micrel/ks8695net.c
@@ -391,7 +391,7 @@ ks8695_tx_irq(int irq, void *dev_id)
ksp->tx_buffers[buff_n].dma_ptr,
ksp->tx_buffers[buff_n].length,
DMA_TO_DEVICE);
- dev_kfree_skb_irq(ksp->tx_buffers[buff_n].skb);
+ dev_consume_skb_irq(ksp->tx_buffers[buff_n].skb);
ksp->tx_buffers[buff_n].skb = NULL;
ksp->tx_ring_used--;
}
--
2.7.4
^ permalink raw reply related
* [PATCH net] net: natsemi: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-13 15:19 UTC (permalink / raw)
To: netdev; +Cc: tsbogend, davem, yang.wei9, albin_yang
From: Yang Wei <yang.wei9@zte.com.cn>
dev_consume_skb_irq() should be called when skb xmit done. It makes
drop profiles(dropwatch, perf) more friendly.
Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
drivers/net/ethernet/natsemi/natsemi.c | 2 +-
drivers/net/ethernet/natsemi/ns83820.c | 2 +-
drivers/net/ethernet/natsemi/sonic.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index b9a1a9f..1a2634c 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -2173,7 +2173,7 @@ static void netdev_tx_done(struct net_device *dev)
np->tx_skbuff[entry]->len,
PCI_DMA_TODEVICE);
/* Free the original skb. */
- dev_kfree_skb_irq(np->tx_skbuff[entry]);
+ dev_consume_skb_irq(np->tx_skbuff[entry]);
np->tx_skbuff[entry] = NULL;
}
if (netif_queue_stopped(dev) &&
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index 958fced..000009e 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -1003,7 +1003,7 @@ static void do_tx_done(struct net_device *ndev)
addr,
len,
PCI_DMA_TODEVICE);
- dev_kfree_skb_irq(skb);
+ dev_consume_skb_irq(skb);
atomic_dec(&dev->nr_tx_skbs);
} else
pci_unmap_page(dev->pci_dev,
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c
index c805dcb..aaec009 100644
--- a/drivers/net/ethernet/natsemi/sonic.c
+++ b/drivers/net/ethernet/natsemi/sonic.c
@@ -328,7 +328,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
}
/* We must free the original skb */
- dev_kfree_skb_irq(lp->tx_skb[entry]);
+ dev_consume_skb_irq(lp->tx_skb[entry]);
lp->tx_skb[entry] = NULL;
/* and unmap DMA buffer */
dma_unmap_single(lp->device, lp->tx_laddr[entry], lp->tx_len[entry], DMA_TO_DEVICE);
--
2.7.4
^ permalink raw reply related
* [PATCH net] net: nuvoton: w90p910_ether: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-02-13 15:21 UTC (permalink / raw)
To: netdev, linux-arm-kernel; +Cc: mcuos.com, davem, yang.wei9, albin_yang
From: Yang Wei <yang.wei9@zte.com.cn>
dev_consume_skb_irq() should be called in w90p910_ether_start_xmit()
when skb xmit done. It makes drop profiles(dropwatch, perf) more
friendly.
Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
drivers/net/ethernet/nuvoton/w90p910_ether.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c
index c662c6f..67bf02b 100644
--- a/drivers/net/ethernet/nuvoton/w90p910_ether.c
+++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c
@@ -630,7 +630,7 @@ static int w90p910_ether_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (!(w90p910_send_frame(dev, skb->data, skb->len))) {
ether->skb = skb;
- dev_kfree_skb_irq(skb);
+ dev_consume_skb_irq(skb);
return 0;
}
return -EAGAIN;
--
2.7.4
^ permalink raw reply related
* RE: PURCHASE ORDER- #0023/2019, SMA35578__INTERNATIONAL Inbox x
From: Fong, Erica @ 2019-02-13 15:25 UTC (permalink / raw)
To: netdev
[-- Attachment #1: SOA.zip --]
[-- Type: application/zip, Size: 240594 bytes --]
^ permalink raw reply
* Re: [PATCH net-next v4] ipmr: ip6mr: Create new sockopt to clear mfc cache or vifs
From: Nicolas Dichtel @ 2019-02-13 15:31 UTC (permalink / raw)
To: Callum Sinclair, davem, kuznet, yoshfuji, nikolay, netdev,
linux-kernel
In-Reply-To: <20190212031255.16121-2-callum.sinclair@alliedtelesis.co.nz>
Le 12/02/2019 à 04:12, Callum Sinclair a écrit :
[snip]
> /* Wipe the cache */
> - list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
> - if (!all && (c->mfc_flags & MFC_STATIC))
> - continue;
> - rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
> - list_del_rcu(&c->list);
> - cache = (struct mfc_cache *)c;
> - call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
> - mrt->id);
> - mroute_netlink_event(mrt, cache, RTM_DELROUTE);
> - mr_cache_put(c);
> - }
> -
> - if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
> - spin_lock_bh(&mfc_unres_lock);
> - list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
> - list_del(&c->list);
> + if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) {
> + list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
> + if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) ||
> + (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC)))
> + continue;
> + rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
> + list_del_rcu(&c->list);
> cache = (struct mfc_cache *)c;
> + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
> + mrt->id);
> mroute_netlink_event(mrt, cache, RTM_DELROUTE);
> - ipmr_destroy_unres(mrt, cache);
> + mr_cache_put(c);
> + }
> +
> + if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
I wonder if the mfc_unres_queue must be cleaned up when only
MRT_FLUSH_MFC_STATIC is provided. My first intuition would be to do it only with
MRT_FLUSH_MFC.
Any opinion?
Regards,
Nicolas
^ permalink raw reply
* Re: [RFC 03/19] net/ice: Add support for ice peer devices and drivers
From: Jeff Kirsher @ 2019-02-13 15:40 UTC (permalink / raw)
To: Jason Gunthorpe, Shiraz Saleem
Cc: dledford, davem, linux-rdma, netdev, mustafa.ismail,
Anirudh Venkataramanan
In-Reply-To: <20190213034104.GA8751@ziepe.ca>
[-- Attachment #1: Type: text/plain, Size: 1803 bytes --]
On Tue, 2019-02-12 at 20:41 -0700, Jason Gunthorpe wrote:
> On Tue, Feb 12, 2019 at 03:43:46PM -0600, Shiraz Saleem wrote:
> > From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
> >
> > The E800 series of Ethernet devices has multiple hardware blocks,
> > of
> > which RDMA is one. The RDMA block isn't interfaced directly to PCI
> > or any other bus. The RDMA driver (irdma) thus depends on the ice
> > driver to provide access to the RDMA hardware block.
> >
> > The ice driver first creates a pseudo bus and then creates and
> > attaches
> > a new device to the pseudo bus using device_register(). This new
> > device
> > is referred to as a "peer device" and the associated driver (i.e.
> > irdma)
> > is a "peer driver" to ice. Once the peer driver loads, it can call
> > ice driver functions exposed to it via struct ice_ops. Similarly,
> > ice can
> > call peer driver functions exposed to it via struct ice_peer_ops.
>
> This seems quite big for this straightforward description..
>
> I was going to say I like the idea of using the driver model to
> connect the drivers, but if it takes so much code ...
Part of the reason why the ice driver patches are much larger than the
i40e patch is because currently there is zero RDMA support for our ice
driver. The ice developers wanted to wait for the new RDMA interface
implementation before adding the RDMA support to the ice driver.
>
> > + /* check for reset in progress before proceeding */
> > + pf = pci_get_drvdata(peer_dev->pdev);
> > + for (i = 0; i < ICE_MAX_RESET_WAIT; i++) {
> > + if (!ice_is_reset_in_progress(pf->state))
> > + break;
> > + msleep(100);
> > + }
>
> Use proper locking, not loops with sleeps.
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* RE: [PATCH net] sctp: make sctp_setsockopt_events() less strict about the option length
From: David Laight @ 2019-02-13 16:17 UTC (permalink / raw)
To: 'Marcelo Ricardo Leitner', David Miller
Cc: julien@arista.com, netdev@vger.kernel.org,
linux-sctp@vger.kernel.org, linux-kernel@vger.kernel.org,
nhorman@tuxdriver.com, vyasevich@gmail.com, lucien.xin@gmail.com
In-Reply-To: <20190210201559.GE10665@localhost.localdomain>
From: Marcelo Ricardo Leitner
> Sent: 10 February 2019 20:16
...
> We have issues on read path too. 52ccb8e90c0a ("[SCTP]: Update
> SCTP_PEER_ADDR_PARAMS socket option to the latest api draft.")
> extended struct sctp_paddrparams and its getsockopt goes with:
The API shouldn't change like this at all.
Is this from the RFC or elsewhere??
If the structure changes the socket option name and value
should also change.
IMHO large chunks of the sctp rfc are just horrid.
In particular all the places where is states that API functions are
implemented using setsockopt() - that should be an implementation detail.
Also ISTR that some of the structures are defined to have holes in them...
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
^ permalink raw reply
* Re: [RFC PATCH net-next 2/5] net: 8021q: vlan_dev: add vid tag for uc and mc address lists
From: Ivan Khoronzhuk @ 2019-02-13 16:17 UTC (permalink / raw)
To: Florian Fainelli, davem, linux-omap, netdev, linux-kernel, jiri,
andrew
In-Reply-To: <20190122131239.GC3125@khorivan>
On Tue, Jan 22, 2019 at 03:12:41PM +0200, Ivan Khoronzhuk wrote:
>On Mon, Jan 21, 2019 at 03:37:41PM -0800, Florian Fainelli wrote:
>>On 12/4/18 3:42 PM, Ivan Khoronzhuk wrote:
>>>On Tue, Dec 04, 2018 at 11:49:27AM -0800, Florian Fainelli wrote:
>
>[...]
>
>>
>>Ivan, based on the recent submission I copied you on [1], it sounds like
>>we want to move ahead with your proposal to extend netdev_hw_addr with a
>>vid member.
>>
>>On second thought, your approach is good and if we enclose the vid
>>member within an #if IS_ENABLED(CONFIG_VLAN)8021Q) we should be good for
>>most foreseeable use cases, if not, we can always introduce a variable
>>size/defined context in the future.
>>
>>Can you resubmit this patch series as non-RFC in the next few days so I
>>can also repost mine [1] and take advantage of these changes for
>>multicast over VLAN when VLAN filtering is globally enabled on the device.
>>
>>[1]: https://www.spinics.net/lists/netdev/msg544722.html
>>
>>Thanks!
>
>Yes, sure. I can start to do that in several days.
>Just a little busy right now.
>
>Just before doing this, maybe some comments could be added as it has more
>attention now. Meanwhile I can send alternative variant but based on
>real dev splitting addresses between vlans. In this approach it leaves address
>space w/o vid extension but requires more changes to vlan core. Drawback here
>that to change one address alg traverses all related vlan addresses, it can be
>cpu/time wasteful, if it's done regularly, but saves memory....
>
>Basically it's implemented locally in cpsw and requires more changes to move
>it as some vlan core auxiliary functions to be reused. But it can work only
>with vlans directly on top of real dev, which is fixable.
>
>Core function here:
>__hw_addr_ref_sync_dev
>it is called only for address the link of which was increased/decreased, thus
>update made only on one address, comparing it for every vlan dev.
>
>It was added with this patch:
>[1] net: core: dev_addr_lists: add auxiliary func to handle reference
>address update e7946760de5852f32
>
>And used by this patch:
>[2] net: ethernet: ti: cpsw: fix vlan mcast 15180eca569bfe1d4d
>
>So, idea is to move [2] to be vlan core auxiliary function to be reused
>by NIC drivers.
>
>But potentially it can bring a little more changes I assume:
>
>1) add priv_flag |= IFF_IV_FLT (independent vlan filtering). It allows to reuse
>this flag for farther changes, probably for per vlan allmulti or so.
>
>2) real dev has to have complete list for vlans, not only their vids, but also
>all vlandevs in device chain above it. So changes in add_vid can be required.
>Vlan core can assign vlan dev pointer to real device only after it's completely
>initialized. And for propagation reasons it requires every device in
>infrastructure to be aware. That seems doable, but depends not only on me.
>
>3) Move code from [2] to be auxiliary vlan core API for setting mc and uc.
>From this patch only one function is cpsw specific: cpsw_set_mc(). The rest can
>be applicable on every NIC supporting IFF_IV_FLT.
>
>4) Move code from link below to do the same but for uc addresses:
>https://git.linaro.org/people/ivan.khoronzhuk/tsn_kernel.git/commit/?h=ucast_vlan_fix&id=ebc88a7d8758759322d9ff88f25f8bac51ce7219
>here only one func cpsw specific: cpsw_set_uc()
>the rest can be generic.
>
>As third alternative, we can think about how to reduce memory for addresses by
>reusing them or else, but this is as continuation of addr+vid approach, and API
>probably would be the same.
>
>Then all this can be compared for proper decision.
Hi Florian,
After several more investigations and tries probably better left this
idea as is.
Here actually several explanations for this:
1) If even assume that we can get access to vlan devices in the above ndev
tree (we can) that doesn't guarantee that receive vlan filters are set
replicating this structure. For example bond device can have one active slave
but both of them in the tree having vid set, in this case addresses are
syched only with active slave, no filters should be applied to not active slave.
this can be achieved only each address has vid context.
2) According to 1) rx filters device structure can be created while mc_sync()
in each rx_mode(), and then used as orthogonal info. I've tried and it looks
not cool and consumes anyway memory and even if it's less it's still not very
scalable. (+ no normal signal "in complex structure case" when address should
be undated to avoid redundant cpu cycles). Not sure it can have practical
results and be universal enouph.
3) Assuming that every device in the tree (bond, team or else) is legal to
modify its own address space, the real end device cannot be sure the vlan device
address spaces reflects vid addresses that device tree want's from him.
According to this each address in address space must hold its own context at
every device and this context is comparable with address size.
>-- Regards,
>Ivan Khoronzhuk
--
Regards,
Ivan Khoronzhuk
^ permalink raw reply
* Re: [PATCH iproute2 net-next v2 3/4] ss: Buffer raw fields first, then render them as a table
From: Stephen Hemminger @ 2019-02-13 16:51 UTC (permalink / raw)
To: Stefano Brivio; +Cc: Eric Dumazet, netdev, Sabrina Dubroca
In-Reply-To: <20190213093711.13ab560e@redhat.com>
On Wed, 13 Feb 2019 09:37:11 +0100
Stefano Brivio <sbrivio@redhat.com> wrote:
> On Tue, 12 Feb 2019 16:42:04 -0800
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> > I do not get it.
> >
> > "ss -emoi " uses almost 1KB per socket.
> >
> > 10,000,000 sockets -> we need about 10GB of memory ???
> >
> > This is a serious regression.
>
> I guess this is rather subjective: the worst case I considered back then
> was the output of 'ss -tei0' (less than 500 bytes) for one million
> sockets, which gives 500M of memory, which should in turn be fine on a
> machine handling one million sockets.
>
> Now, if 'ss -emoi' on 10 million sockets is an actual use case (out of
> curiosity: how are you going to process that output? Would JSON help?),
> I see two easy options to solve this:
>
> 1. flush the output every time we reach a given buffer size (1M
> perhaps). This might make the resulting blocks slightly unaligned,
> with occasional loss of readability on lines occurring every 1k to
> 10k sockets approximately, even though after 1k sockets column sizes
> won't change much (it looks anyway better than the original), and I
> don't expect anybody to actually scroll that output
>
> 2. add a switch for unbuffered output, but then you need to remember to
> pass it manually, and the whole output would be as bad as the
> original in case you need the switch.
>
> I'd rather go with 1., it's easy to implement (we already have partial
> flushing with '--events') and it looks like a good compromise on
> usability. Thoughts?
>
I agree with eric. The benefits of buffering are not worth it.
Let's just choose a reasonable field width, if something is too big, columns won't line up
which i snot a big deal.
Unless you come up with a better solution, I am going to revert this.
^ permalink raw reply
* [PATCH -next] net: ipvlan_l3s: fix kconfig dependency warning
From: Randy Dunlap @ 2019-02-13 16:55 UTC (permalink / raw)
To: netdev@vger.kernel.org; +Cc: Mahesh Bandewar, Daniel Borkmann, David Miller
From: Randy Dunlap <rdunlap@infradead.org>
Fix the kconfig warning in IPVLAN_L3S when neither INET nor IPV6
is enabled:
WARNING: unmet direct dependencies detected for NET_L3_MASTER_DEV
Depends on [n]: NET [=y] && (INET [=n] || IPV6 [=n])
Selected by [y]:
- IPVLAN_L3S [=y] && NETDEVICES [=y] && NET_CORE [=y] && NETFILTER [=y]
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Mahesh Bandewar <maheshb@google.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
---
v2: simplify the dependency to IPVLAN
Seen in mmotm but applies to linux-next.
drivers/net/Kconfig | 1 +
1 file changed, 1 insertion(+)
--- linux-next-20190213.orig/drivers/net/Kconfig
+++ linux-next-20190213/drivers/net/Kconfig
@@ -147,6 +147,7 @@ config MACVTAP
config IPVLAN_L3S
depends on NETFILTER
+ depends on IPVLAN
def_bool y
select NET_L3_MASTER_DEV
^ permalink raw reply
* Re: [PATCH net] net: macb: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Claudiu.Beznea @ 2019-02-13 16:55 UTC (permalink / raw)
To: albin_yang, netdev; +Cc: Nicolas.Ferre, davem, yang.wei9
In-Reply-To: <1549987202-5393-1-git-send-email-albin_yang@163.com>
On 12.02.2019 18:00, Yang Wei wrote:
> From: Yang Wei <yang.wei9@zte.com.cn>
>
> dev_consume_skb_irq() should be called in at91ether_interrupt() when
> skb xmit done. It makes drop profiles(dropwatch, perf) more friendly.
>
> Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
Reviewed-by: Claudiu Beznea <claudiu.beznea@microchip.com>
> ---
> drivers/net/ethernet/cadence/macb_main.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
> index 2b28826..835cc58 100644
> --- a/drivers/net/ethernet/cadence/macb_main.c
> +++ b/drivers/net/ethernet/cadence/macb_main.c
> @@ -3763,7 +3763,7 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
> dev->stats.tx_errors++;
>
> if (lp->skb) {
> - dev_kfree_skb_irq(lp->skb);
> + dev_consume_skb_irq(lp->skb);
> lp->skb = NULL;
> dma_unmap_single(NULL, lp->skb_physaddr,
> lp->skb_length, DMA_TO_DEVICE);
>
^ permalink raw reply
* Re: [PATCH net-next 1/1] flow_offload: fix block stats
From: Pablo Neira Ayuso @ 2019-02-13 16:59 UTC (permalink / raw)
To: John Hurley; +Cc: jiri, davem, netdev, oss-drivers
In-Reply-To: <1550017432-26306-1-git-send-email-john.hurley@netronome.com>
On Wed, Feb 13, 2019 at 12:23:52AM +0000, John Hurley wrote:
> With the introduction of flow_stats_update(), drivers now update the stats
> fields of the passed tc_cls_flower_offload struct, rather than call
> tcf_exts_stats_update() directly to update the stats of offloaded TC
> flower rules. However, if multiple qdiscs are registered to a TC shared
> block and a flower rule is applied, then, when getting stats for the rule,
> multiple callbacks may be made.
>
> Take this into consideration by modifying flow_stats_update to gather the
> stats from all callbacks. Currently, the values in tc_cls_flower_offload
> only account for the last stats callback in the list.
>
> Fixes: 3b1903ef97c0 ("flow_offload: add statistics retrieval infrastructure and use it")
> Signed-off-by: John Hurley <john.hurley@netronome.com>
> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
^ permalink raw reply
* [PATCH net] net: stmmac: Fix NAPI poll in TX path when in multi-queue
From: Jose Abreu @ 2019-02-13 17:00 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: Jose Abreu, Joao Pinto, David S . Miller, Giuseppe Cavallaro,
Alexandre Torgue
Commit 8fce33317023 introduced the concept of NAPI per-channel and
independent cleaning of TX path.
This is currently breaking performance in some cases. The scenario
happens when all packets are being received in Queue 0 but the TX is
performed in Queue != 0.
I didn't look very deep but it seems that NAPI for Queue 0 will clean
the RX path but as TX is in different NAPI, this last one is called at a
slower rate which kills performance in TX. I suspect this is due to TX
cleaning takes much longer than RX and because NAPI will get canceled
once we return with 0 budget consumed (e.g. when TX is still not done it
will return 0 budget).
Fix this by looking at all TX channels in NAPI poll function.
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Fixes: 8fce33317023 ("net: stmmac: Rework coalesce timer and fix multi-queue races")
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
---
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 -
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 11 +++++------
2 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 63e1064b27a2..8f6741a626d8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -82,7 +82,6 @@ struct stmmac_channel {
struct stmmac_priv *priv_data;
u32 index;
int has_rx;
- int has_tx;
};
struct stmmac_tc_entry {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 685d20472358..5bf5f8ebb4b6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2031,13 +2031,13 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
struct stmmac_channel *ch = &priv->channel[chan];
bool needs_work = false;
- if ((status & handle_rx) && ch->has_rx) {
+ if (status & handle_rx) {
needs_work = true;
} else {
status &= ~handle_rx;
}
- if ((status & handle_tx) && ch->has_tx) {
+ if (status & handle_tx) {
needs_work = true;
} else {
status &= ~handle_tx;
@@ -3528,11 +3528,12 @@ static int stmmac_napi_poll(struct napi_struct *napi, int budget)
struct stmmac_priv *priv = ch->priv_data;
int work_done, rx_done = 0, tx_done = 0;
u32 chan = ch->index;
+ int i;
priv->xstats.napi_poll++;
- if (ch->has_tx)
- tx_done = stmmac_tx_clean(priv, budget, chan);
+ for (i = 0; i < priv->plat->tx_queues_to_use; i++)
+ tx_done += stmmac_tx_clean(priv, budget, i);
if (ch->has_rx)
rx_done = stmmac_rx(priv, budget, chan);
@@ -4325,8 +4326,6 @@ int stmmac_dvr_probe(struct device *device,
if (queue < priv->plat->rx_queues_to_use)
ch->has_rx = true;
- if (queue < priv->plat->tx_queues_to_use)
- ch->has_tx = true;
netif_napi_add(ndev, &ch->napi, stmmac_napi_poll,
NAPI_POLL_WEIGHT);
--
2.7.4
^ permalink raw reply related
* [PATCH bpf-next] net: bpf: remove XDP_QUERY_XSK_UMEM enumerator
From: Björn Töpel @ 2019-02-13 17:07 UTC (permalink / raw)
To: ast, daniel, netdev
Cc: Jan Sokolowski, magnus.karlsson, magnus.karlsson, intel-wired-lan
From: Jan Sokolowski <jan.sokolowski@intel.com>
Commit c9b47cc1fabc ("xsk: fix bug when trying to use both copy and
zero-copy on one queue id") moved the umem query code to the AF_XDP
core, and therefore removed the need to query the netdevice for a
umem.
This patch removes XDP_QUERY_XSK_UMEM and all code that implement that
behavior, which is just dead code.
Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_main.c | 3 --
drivers/net/ethernet/intel/i40e/i40e_xsk.c | 28 -------------------
drivers/net/ethernet/intel/i40e/i40e_xsk.h | 2 --
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 --
.../ethernet/intel/ixgbe/ixgbe_txrx_common.h | 2 --
drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 17 -----------
include/linux/netdevice.h | 7 ++---
7 files changed, 3 insertions(+), 59 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 44856a84738d..5e74a5127849 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -12128,9 +12128,6 @@ static int i40e_xdp(struct net_device *dev,
case XDP_QUERY_PROG:
xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
return 0;
- case XDP_QUERY_XSK_UMEM:
- return i40e_xsk_umem_query(vsi, &xdp->xsk.umem,
- xdp->xsk.queue_id);
case XDP_SETUP_XSK_UMEM:
return i40e_xsk_umem_setup(vsi, xdp->xsk.umem,
xdp->xsk.queue_id);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 96d849460d9b..e190a2c2b9ff 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -154,34 +154,6 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
return 0;
}
-/**
- * i40e_xsk_umem_query - Queries a certain ring/qid for its UMEM
- * @vsi: Current VSI
- * @umem: UMEM associated to the ring, if any
- * @qid: Rx ring to associate UMEM to
- *
- * This function will store, if any, the UMEM associated to certain ring.
- *
- * Returns 0 on success, <0 on failure
- **/
-int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem,
- u16 qid)
-{
- struct net_device *netdev = vsi->netdev;
- struct xdp_umem *queried_umem;
-
- if (vsi->type != I40E_VSI_MAIN)
- return -EINVAL;
-
- queried_umem = xdp_get_umem_from_qid(netdev, qid);
-
- if (!queried_umem)
- return -EINVAL;
-
- *umem = queried_umem;
- return 0;
-}
-
/**
* i40e_xsk_umem_setup - Enable/disassociate a UMEM to/from a ring/qid
* @vsi: Current VSI
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index 9038c5d5cf08..8cc0a2e7d9a2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -10,8 +10,6 @@ struct zero_copy_allocator;
int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
-int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem,
- u16 qid);
int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
u16 qid);
void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index b53087a980ef..38c430b94ae3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10280,9 +10280,6 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
xdp->prog_id = adapter->xdp_prog ?
adapter->xdp_prog->aux->id : 0;
return 0;
- case XDP_QUERY_XSK_UMEM:
- return ixgbe_xsk_umem_query(adapter, &xdp->xsk.umem,
- xdp->xsk.queue_id);
case XDP_SETUP_XSK_UMEM:
return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem,
xdp->xsk.queue_id);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
index 53d4089f5644..d93a690aff74 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
@@ -30,8 +30,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring);
struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
struct ixgbe_ring *ring);
-int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem,
- u16 qid);
int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
u16 qid);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 65c3e2c979d4..98870707b51a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -174,23 +174,6 @@ static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
return 0;
}
-int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem,
- u16 qid)
-{
- if (qid >= adapter->num_rx_queues)
- return -EINVAL;
-
- if (adapter->xsk_umems) {
- if (qid >= adapter->num_xsk_umems)
- return -EINVAL;
- *umem = adapter->xsk_umems[qid];
- return 0;
- }
-
- *umem = NULL;
- return 0;
-}
-
int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
u16 qid)
{
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1d95e634f3fe..6aedaf1e9a25 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -868,7 +868,6 @@ enum bpf_netdev_command {
/* BPF program for offload callbacks, invoked at program load time. */
BPF_OFFLOAD_MAP_ALLOC,
BPF_OFFLOAD_MAP_FREE,
- XDP_QUERY_XSK_UMEM,
XDP_SETUP_XSK_UMEM,
};
@@ -895,10 +894,10 @@ struct netdev_bpf {
struct {
struct bpf_offloaded_map *offmap;
};
- /* XDP_QUERY_XSK_UMEM, XDP_SETUP_XSK_UMEM */
+ /* XDP_SETUP_XSK_UMEM */
struct {
- struct xdp_umem *umem; /* out for query*/
- u16 queue_id; /* in for query */
+ struct xdp_umem *umem;
+ u16 queue_id;
} xsk;
};
};
--
2.19.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox