* [RFC PATCH 16/17] fib_trie: Remove checks for index >= tnode_child_length from tnode_get_child
From: Alexander Duyck @ 2014-12-22 17:42 UTC (permalink / raw)
To: netdev
In-Reply-To: <20141222172632.1119.51469.stgit@ahduyck-vm-fedora20>
For some reason the compiler doesn't seem to understand that when we are in
a loop that runs from tnode_child_length - 1 to 0 we don't expect the value
of tn->bits to change. As such every call to tnode_get_child was rerunning
tnode_chile_length which ended up consuming quite a bit of space in the
resultant assembly code.
I have gone though and verified that in all cases where tnode_get_child
is used we are either winding though a fixed loop from tnode_child_length -
1 to 0, or are in a fastpath case where we are verifying the value by
either checking for any remaining bits after shifting index by bits and
testing for leaf, or by using tnode_child_length.
size net/ipv4/fib_trie.o
Before:
text data bss dec hex filename
15506 376 8 15890 3e12 net/ipv4/fib_trie.o
After:
text data bss dec hex filename
14827 376 8 15211 3b6b net/ipv4/fib_trie.o
Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
---
net/ipv4/fib_trie.c | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1a0f9c5..b6298ed 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -186,8 +186,6 @@ static inline unsigned long tnode_child_length(const struct tnode *tn)
static inline struct tnode *tnode_get_child(const struct tnode *tn,
unsigned long i)
{
- BUG_ON(i >= tnode_child_length(tn));
-
return rtnl_dereference(tn->child[i]);
}
@@ -195,8 +193,6 @@ static inline struct tnode *tnode_get_child(const struct tnode *tn,
static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn,
unsigned long i)
{
- BUG_ON(i >= tnode_child_length(tn));
-
return rcu_dereference_rtnl(tn->child[i]);
}
@@ -371,7 +367,7 @@ static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
*/
static void put_child(struct tnode *tn, unsigned long i, struct tnode *n)
{
- struct tnode *chi = rtnl_dereference(tn->child[i]);
+ struct tnode *chi = tnode_get_child(tn, i);
int isfull, wasfull;
BUG_ON(i >= tnode_child_length(tn));
@@ -867,7 +863,7 @@ static struct tnode *fib_find_node(struct trie *t, u32 key)
if (IS_LEAF(n))
break;
- n = rcu_dereference_rtnl(n->child[index]);
+ n = tnode_get_child_rcu(n, index);
}
return n;
@@ -934,7 +930,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
}
tp = n;
- n = rcu_dereference_rtnl(n->child[index]);
+ n = tnode_get_child_rcu(n, index);
}
l = leaf_new(key);
@@ -1210,7 +1206,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
pn = n;
cindex = index;
- n = rcu_dereference_rtnl(n->child[index]);
+ n = tnode_get_child_rcu(n, index);
if (unlikely(!n))
goto backtrace;
}
@@ -1829,7 +1825,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
if (n->bits < MAX_STAT_DEPTH)
s->nodesizes[n->bits]++;
- for (i = 0; i < tnode_child_length(n); i++) {
+ for (i = tnode_child_length(n); i--;) {
if (!rcu_access_pointer(n->child[i]))
s->nullpointers++;
}
^ permalink raw reply related
* [RFC PATCH 17/17] fib_trie: Add tracking value for suffix length
From: Alexander Duyck @ 2014-12-22 17:42 UTC (permalink / raw)
To: netdev
In-Reply-To: <20141222172632.1119.51469.stgit@ahduyck-vm-fedora20>
This change adds a tracking value for the maximum suffix length of all
prefixes stored in any given tnode. With this value we can determine if we
need to backtrace or not based on if the suffix is greater than the pos
value.
By doing this we can reduce the CPU overhead for lookups in the local table
as many of the prefixes there are 32b long and have a suffix length of 0
meaning we can immediately backtrace to the root node without needing to
test any of the nodes between it and where we ended up.
Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
---
net/ipv4/fib_trie.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 122 insertions(+), 7 deletions(-)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b6298ed..d366dcc 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -96,6 +96,7 @@ struct tnode {
t_key key;
unsigned char bits; /* 2log(KEYLENGTH) bits needed */
unsigned char pos; /* 2log(KEYLENGTH) bits needed */
+ unsigned char slen;
struct tnode __rcu *parent;
struct rcu_head rcu;
union {
@@ -311,6 +312,7 @@ static struct tnode *leaf_new(t_key key)
* as the nodes are searched
*/
l->key = key;
+ l->slen = 0;
l->pos = 0;
/* set bits to 0 indicating we are not a tnode */
l->bits = 0;
@@ -342,6 +344,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
if (tn) {
tn->parent = NULL;
+ tn->slen = pos;
tn->pos = pos;
tn->bits = bits;
tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
@@ -387,6 +390,9 @@ static void put_child(struct tnode *tn, unsigned long i, struct tnode *n)
else if (!wasfull && isfull)
tn->full_children++;
+ if (n && (tn->slen < n->slen))
+ tn->slen = n->slen;
+
rcu_assign_pointer(tn->child[i], n);
}
@@ -635,6 +641,41 @@ static int halve(struct trie *t, struct tnode *oldtnode)
return 0;
}
+static unsigned char update_suffix(struct tnode *tn)
+{
+ unsigned char slen = tn->pos;
+ unsigned long stride, i;
+
+ /* search though the list of children looking for nodes that might
+ * have a suffix greater than the one we currently have. This is
+ * why we start with a stride of 2 since a stride of 1 would
+ * represent the nodes with suffix length equal to tn->pos
+ */
+ for (i = 0, stride = 0x2ul ; i < tnode_child_length(tn); i += stride) {
+ struct tnode *n = tnode_get_child(tn, i);
+
+ if (!n || (n->slen <= slen))
+ continue;
+
+ /* update stride and slen based on new value */
+ stride <<= (n->slen - slen);
+ slen = n->slen;
+ i &= ~(stride - 1);
+
+ /* if slen covers all but the last bit we can stop here
+ * there will be nothing longer than that since only node
+ * 0 and 1 << (bits - 1) could have that as their suffix
+ * length.
+ */
+ if ((slen + 1) >= (tn->pos + tn->bits))
+ break;
+ }
+
+ tn->slen = slen;
+
+ return slen;
+}
+
/* From "Implementing a dynamic compressed trie" by Stefan Nilsson of
* the Helsinki University of Technology and Matti Tikkanen of Nokia
* Telecommunications, page 6:
@@ -790,6 +831,19 @@ no_children:
/* drop dead node */
tnode_free_init(tn);
tnode_free(tn);
+ return;
+ }
+
+ /* Return if at least one deflate was run */
+ if (max_work != MAX_WORK)
+ return;
+
+ /* push the suffix length to the parent node */
+ if (tn->slen > tn->pos) {
+ unsigned char slen = update_suffix(tn);
+
+ if (tp && (slen > tp->slen))
+ tp->slen = slen;
}
}
@@ -818,8 +872,58 @@ static inline struct list_head *get_fa_head(struct tnode *l, int plen)
return &li->falh;
}
-static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
+static void leaf_pull_suffix(struct tnode *l)
+{
+ struct tnode *tp = node_parent(l);
+
+ while (tp && (tp->slen > tp->pos) && (tp->slen > l->slen)) {
+ if (update_suffix(tp) > l->slen)
+ break;
+ tp = node_parent(tp);
+ }
+}
+
+static void leaf_push_suffix(struct tnode *l)
+{
+ struct tnode *tn = node_parent(l);
+
+ /* if this is a new leaf then tn will be NULL and we can sort
+ * out parent suffix lengths as a part of trie_rebalance
+ */
+ while (tn && (tn->slen < l->slen)) {
+ tn->slen = l->slen;
+ tn = node_parent(tn);
+ }
+}
+
+static void remove_leaf_info(struct tnode *l, struct leaf_info *old)
+{
+ struct hlist_node *prev;
+
+ /* record the location of the pointer to this object */
+ prev = rtnl_dereference(hlist_pprev_rcu(&old->hlist));
+
+ /* remove the leaf info from the list */
+ hlist_del_rcu(&old->hlist);
+
+ /* if we emptied the list this leaf will be freed and we can sort
+ * out parent suffix lengths as a part of trie_rebalance
+ */
+ if (hlist_empty(&l->list))
+ return;
+
+ /* if we removed the tail then we need to update slen */
+ if (!rcu_access_pointer(hlist_next_rcu(prev))) {
+ struct leaf_info *li = hlist_entry(prev, typeof(*li), hlist);
+
+ l->slen = KEYLENGTH - li->plen;
+ leaf_pull_suffix(l);
+ }
+}
+
+static void insert_leaf_info(struct tnode *l, struct leaf_info *new)
{
+ struct hlist_head *head = &l->list;
struct leaf_info *li = NULL, *last = NULL;
if (hlist_empty(head)) {
@@ -836,6 +940,12 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
else
hlist_add_before_rcu(&new->hlist, &li->hlist);
}
+
+ /* if we added to the tail node then we need to update slen */
+ if (!rcu_access_pointer(hlist_next_rcu(&new->hlist))) {
+ l->slen = KEYLENGTH - new->plen;
+ leaf_push_suffix(l);
+ }
}
/* rcu_read_lock needs to be hold by caller from readside */
@@ -925,7 +1035,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
/* we have found a leaf. Prefixes have already been compared */
if (IS_LEAF(n)) {
/* Case 1: n is a leaf, and prefixes match*/
- insert_leaf_info(&n->list, li);
+ insert_leaf_info(n, li);
return fa_head;
}
@@ -939,7 +1049,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
return NULL;
}
- insert_leaf_info(&l->list, li);
+ insert_leaf_info(l, li);
/* Case 2: n is a LEAF or a TNODE and the key doesn't match.
*
@@ -1203,8 +1313,13 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
if (IS_LEAF(n))
goto found;
- pn = n;
- cindex = index;
+ /* only record pn and cindex if we are going to be chopping
+ * bits later. Otherwise we are just wasting cycles.
+ */
+ if (n->slen > n->pos) {
+ pn = n;
+ cindex = index;
+ }
n = tnode_get_child_rcu(n, index);
if (unlikely(!n))
@@ -1220,7 +1335,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
* between the key and the prefix exist in the region of
* the lsb and higher in the prefix.
*/
- if (unlikely(prefix_mismatch(key, n)))
+ if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos))
goto backtrace;
/* exit out and process leaf */
@@ -1419,7 +1534,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
tb->tb_num_default--;
if (list_empty(fa_head)) {
- hlist_del_rcu(&li->hlist);
+ remove_leaf_info(l, li);
free_leaf_info(li);
}
^ permalink raw reply related
* Re: [BUG] rtl8192se: panic accessing unmapped memory in skb
From: Larry Finger @ 2014-12-22 17:43 UTC (permalink / raw)
To: Eric Biggers; +Cc: linux-wireless, netdev, linux-kernel
In-Reply-To: <20141221234714.GA30675@zzz>
On 12/21/2014 05:47 PM, Eric Biggers wrote:
> Hi,
>
> To get your patched version to work at all I had to update
> _rtl_pci_init_rx_ring() to account for new return value of
> _rtl_pci_init_one_rxdesc(). I will let you know if anything shows up in the
> kernel log, but I expect this is a highly sporadic problem. The system has 4 GB
> of memory, and I used the 3.18 kernel for 10 days prior to the panic with no
> issues. The panic occurred while upgrading system packages, so it's possible
> jhat the system was experiencing memory pressure.
>
> I upgraded from 3.17 to 3.18 on Dec 8, so I've actually only had since then to
> notice any bugs that may have been introduced since 3.17.
>
> It does appear there were changes made to pci.c between 3.17 and 3.18. It
> appears the 3.17 code will drop the incoming packet if a new skb can't be
> allocated, whereas the 3.18 code assumes a new skb can always be allocated. The
> 3.17 behavior seems more logical to me. I don't know how either of these
> behaviors compare to other networking drivers, however.
Sorry about missing the necessary changes in the rest of the driver. That is
what I get for only compile testing.
I reviewed the 3.17 => 3.18 changes and found the difference in the logic that
you noticed, and I missed earlier. As a result, I need to push this change for
3.19 with the notation for updating of 3.18. You have probably received this
patch already. As it needs to be backported, I decided to forgo changing the
return value of _rtl_pci_init_one_rxdesc(). That change should be made, but
there is no emergency there.
Thanks,
Larry
^ permalink raw reply
* [PATCH 0/5] netlink/genetlink cleanups & multicast improvements
From: Johannes Berg @ 2014-12-22 17:56 UTC (permalink / raw)
To: netdev
I'm looking at using the multicast group functionality in a way that would
benefit from knowing when there are subscribers to avoid collecting the
required data when there aren't any. During this I noticed that the unbind
for multicast groups doesn't actually work - it's never called when sockets
are closed. Luckily, nobody actually uses the functionality.
While looking at the code trying to find why it's not called and where the
multicast listeners are actually removed, I found the potential cleanup in
patch 3. Patch 2 also has a cleanup for a generic netlink API in this area.
johannes
^ permalink raw reply
* [PATCH 1/5] netlink: rename netlink_unbind() to netlink_undo_bind()
From: Johannes Berg @ 2014-12-22 17:56 UTC (permalink / raw)
To: netdev; +Cc: Johannes Berg
In-Reply-To: <1419270999-22165-1-git-send-email-johannes@sipsolutions.net>
From: Johannes Berg <johannes.berg@intel.com>
The new name is more expressive - this isn't a generic unbind
function but rather only a little undo helper for use only in
netlink_bind().
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
net/netlink/af_netlink.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ef5f77b44ec7..1117a2cc7c28 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1430,8 +1430,8 @@ static int netlink_realloc_groups(struct sock *sk)
return err;
}
-static void netlink_unbind(int group, long unsigned int groups,
- struct netlink_sock *nlk)
+static void netlink_undo_bind(int group, long unsigned int groups,
+ struct netlink_sock *nlk)
{
int undo;
@@ -1481,7 +1481,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
err = nlk->netlink_bind(group);
if (!err)
continue;
- netlink_unbind(group, groups, nlk);
+ netlink_undo_bind(group, groups, nlk);
return err;
}
}
@@ -1491,7 +1491,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
netlink_insert(sk, net, nladdr->nl_pid) :
netlink_autobind(sock);
if (err) {
- netlink_unbind(nlk->ngroups, groups, nlk);
+ netlink_undo_bind(nlk->ngroups, groups, nlk);
return err;
}
}
--
2.1.1
^ permalink raw reply related
* [PATCH 4/5] netlink: call unbind when releasing socket
From: Johannes Berg @ 2014-12-22 17:56 UTC (permalink / raw)
To: netdev; +Cc: Johannes Berg
In-Reply-To: <1419270999-22165-1-git-send-email-johannes@sipsolutions.net>
From: Johannes Berg <johannes.berg@intel.com>
Currently, netlink_unbind() is only called when the socket
explicitly unbinds, which limits its usefulness (luckily
there are no users of it yet anyway.)
Call netlink_unbind() also when a socket is released, so it
becomes possible to track listeners with this callback and
without also implementing a netlink notifier (and checking
netlink_has_listeners() in there.)
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
net/netlink/af_netlink.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index df6086f69592..07a903de4439 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1266,6 +1266,13 @@ static int netlink_release(struct socket *sock)
netlink_table_ungrab();
}
+ if (nlk->netlink_unbind) {
+ int i;
+
+ for (i = 0; i < nlk->ngroups; i++)
+ if (test_bit(i, nlk->groups))
+ nlk->netlink_unbind(i + 1);
+ }
kfree(nlk->groups);
nlk->groups = NULL;
--
2.1.1
^ permalink raw reply related
* [PATCH 2/5] genetlink: pass only network namespace to genl_has_listeners()
From: Johannes Berg @ 2014-12-22 17:56 UTC (permalink / raw)
To: netdev; +Cc: Johannes Berg
In-Reply-To: <1419270999-22165-1-git-send-email-johannes@sipsolutions.net>
From: Johannes Berg <johannes.berg@intel.com>
There's no point to force the caller to know about the internal
genl_sock to use inside struct net, just have them pass the network
namespace. This doesn't really change code generation since it's
an inline, but makes the caller less magic - there's never any
reason to pass another socket.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
include/net/genetlink.h | 4 ++--
net/openvswitch/datapath.c | 3 +--
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index af10c2cf8a1d..38620da4aa7a 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -395,11 +395,11 @@ static inline int genl_set_err(struct genl_family *family, struct net *net,
}
static inline int genl_has_listeners(struct genl_family *family,
- struct sock *sk, unsigned int group)
+ struct net *net, unsigned int group)
{
if (WARN_ON_ONCE(group >= family->n_mcgrps))
return -EINVAL;
group = family->mcgrp_offset + group;
- return netlink_has_listeners(sk, group);
+ return netlink_has_listeners(net->genl_sock, group);
}
#endif /* __NET_GENERIC_NETLINK_H */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 332b5a031739..4e9a5f035cbc 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -83,8 +83,7 @@ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
unsigned int group)
{
return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
- genl_has_listeners(family, genl_info_net(info)->genl_sock,
- group);
+ genl_has_listeners(family, genl_info_net(info), group);
}
static void ovs_notify(struct genl_family *family,
--
2.1.1
^ permalink raw reply related
* [PATCH 3/5] netlink: update listeners directly when removing socket
From: Johannes Berg @ 2014-12-22 17:56 UTC (permalink / raw)
To: netdev; +Cc: Johannes Berg
In-Reply-To: <1419270999-22165-1-git-send-email-johannes@sipsolutions.net>
From: Johannes Berg <johannes.berg@intel.com>
The code is now confusing to read - first in one function down
(netlink_remove) any group subscriptions are implicitly removed
by calling __sk_del_bind_node(), but the subscriber database is
only updated far later by calling netlink_update_listeners().
Move the latter call to just after removal from the list so it
is easier to follow the code.
This also enables moving the locking inside the kernel-socket
conditional, which improves the normal socket destruction path.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
net/netlink/af_netlink.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1117a2cc7c28..df6086f69592 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1111,8 +1111,10 @@ static void netlink_remove(struct sock *sk)
mutex_unlock(&nl_sk_hash_lock);
netlink_table_grab();
- if (nlk_sk(sk)->subscriptions)
+ if (nlk_sk(sk)->subscriptions) {
__sk_del_bind_node(sk);
+ netlink_update_listeners(sk);
+ }
netlink_table_ungrab();
}
@@ -1246,8 +1248,8 @@ static int netlink_release(struct socket *sock)
module_put(nlk->module);
- netlink_table_grab();
if (netlink_is_kernel(sk)) {
+ netlink_table_grab();
BUG_ON(nl_table[sk->sk_protocol].registered == 0);
if (--nl_table[sk->sk_protocol].registered == 0) {
struct listeners *old;
@@ -1261,10 +1263,8 @@ static int netlink_release(struct socket *sock)
nl_table[sk->sk_protocol].flags = 0;
nl_table[sk->sk_protocol].registered = 0;
}
- } else if (nlk->subscriptions) {
- netlink_update_listeners(sk);
+ netlink_table_ungrab();
}
- netlink_table_ungrab();
kfree(nlk->groups);
nlk->groups = NULL;
--
2.1.1
^ permalink raw reply related
* [PATCH 5/5] genetlink: pass multicast bind/unbind to families
From: Johannes Berg @ 2014-12-22 17:56 UTC (permalink / raw)
To: netdev; +Cc: Johannes Berg
In-Reply-To: <1419270999-22165-1-git-send-email-johannes@sipsolutions.net>
From: Johannes Berg <johannes.berg@intel.com>
In order to make the newly fixed multicast bind/unbind
functionality in generic netlink, pass them down to the
appropriate family.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
include/net/genetlink.h | 5 +++++
net/netlink/genetlink.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 56 insertions(+)
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 38620da4aa7a..3ed31e5a445b 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -31,6 +31,9 @@ struct genl_info;
* do additional, common, filtering and return an error
* @post_doit: called after an operation's doit callback, it may
* undo operations done by pre_doit, for example release locks
+ * @mcast_bind: a socket bound to the given multicast group (which
+ * is given as the offset into the groups array)
+ * @mcast_unbind: a socket was unbound from the given multicast group
* @attrbuf: buffer to store parsed attributes
* @family_list: family list
* @mcgrps: multicast groups used by this family (private)
@@ -53,6 +56,8 @@ struct genl_family {
void (*post_doit)(const struct genl_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
+ int (*mcast_bind)(int group);
+ void (*mcast_unbind)(int group);
struct nlattr ** attrbuf; /* private */
const struct genl_ops * ops; /* private */
const struct genl_multicast_group *mcgrps; /* private */
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 76393f2f4b22..960d3a41682a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -983,11 +983,62 @@ static struct genl_multicast_group genl_ctrl_groups[] = {
{ .name = "notify", },
};
+static int genl_bind(int group)
+{
+ int i, err;
+ bool found = false;
+
+ down_read(&cb_lock);
+ for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
+ struct genl_family *f;
+
+ list_for_each_entry(f, genl_family_chain(i), family_list) {
+ if (group >= f->mcgrp_offset &&
+ group < f->mcgrp_offset + f->n_mcgrps) {
+ err = f->mcast_bind(group - f->mcgrp_offset);
+ found = true;
+ break;
+ }
+ }
+ }
+ up_read(&cb_lock);
+
+ if (WARN_ON(!found))
+ err = 0;
+
+ return err;
+}
+
+static void genl_unbind(int group)
+{
+ int i;
+ bool found = false;
+
+ down_read(&cb_lock);
+ for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
+ struct genl_family *f;
+
+ list_for_each_entry(f, genl_family_chain(i), family_list) {
+ if (group >= f->mcgrp_offset &&
+ group < f->mcgrp_offset + f->n_mcgrps) {
+ f->mcast_unbind(group - f->mcgrp_offset);
+ found = true;
+ break;
+ }
+ }
+ }
+ up_read(&cb_lock);
+
+ WARN_ON(!found);
+}
+
static int __net_init genl_pernet_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
+ .bind = genl_bind,
+ .unbind = genl_unbind,
};
/* we'll bump the group number right afterwards */
--
2.1.1
^ permalink raw reply related
* Re: [RFC PATCH 00/17] fib_trie: Reduce time spent in fib_table_lookup by 35 to 75%
From: Dave Taht @ 2014-12-22 18:08 UTC (permalink / raw)
To: Alexander Duyck; +Cc: netdev@vger.kernel.org
In-Reply-To: <20141222172632.1119.51469.stgit@ahduyck-vm-fedora20>
impressive. I think. But I don't quite understand what you mean by a depth of 7?
What did your routing table actually look like?
For example, my ipv4 routing table looks like this at the moment.
What is depth? searching from /32, /31, /30, /29?
root@davedesk:~# ip route
default via 172.21.2.21 dev se00 proto babel onlink
10.1.10.0/24 via 172.21.2.21 dev se00 proto babel onlink
10.1.10.12 via 172.21.2.21 dev se00 proto babel onlink
50.197.142.144/29 via 172.21.0.1 dev ge00 proto babel onlink
73.15.8.0/23 via 172.21.0.1 dev ge00 proto babel onlink
172.20.1.0/24 via 172.21.0.1 dev ge00 proto babel onlink
172.20.2.0/27 via 172.21.0.1 dev ge00 proto babel onlink
172.20.2.0/24 via 172.21.0.1 dev ge00 proto babel onlink
172.20.2.7 via 172.21.0.1 dev ge00 proto babel onlink
172.20.4.0/24 via 172.21.0.1 dev ge00 proto babel onlink
172.20.5.0/24 via 172.21.0.1 dev ge00 proto babel onlink
172.20.6.0/24 via 172.21.0.1 dev ge00 proto babel onlink
172.20.6.1 via 172.21.0.1 dev ge00 proto babel onlink
172.20.7.0/24 via 172.21.0.1 dev ge00 proto babel onlink
172.20.8.0/24 via 172.21.0.1 dev ge00 proto babel onlink
172.20.20.0/24 via 172.21.0.1 dev ge00 proto babel onlink
172.20.47.0/24 via 172.21.0.1 dev ge00 proto babel onlink
172.20.47.1 via 172.21.0.1 dev ge00 proto babel onlink
172.20.92.64/27 via 172.21.0.1 dev ge00 proto babel onlink
172.20.92.96/27 via 172.21.0.1 dev ge00 proto babel onlink
172.20.142.2 via 172.21.0.1 dev ge00 proto babel onlink
172.20.142.3 via 172.21.0.1 dev ge00 proto babel onlink
172.20.142.4 via 172.21.0.1 dev ge00 proto babel onlink
172.20.142.5 via 172.21.0.1 dev ge00 proto babel onlink
172.20.142.6 via 172.21.0.1 dev ge00 proto babel onlink
172.20.142.7 via 172.21.0.1 dev ge00 proto babel onlink
172.20.142.9 via 172.21.0.1 dev ge00 proto babel onlink
172.20.142.10 via 172.21.0.1 dev ge00 proto babel onlink
172.20.142.11 via 172.21.0.1 dev ge00 proto babel onlink
172.20.142.16 via 172.21.0.1 dev ge00 proto babel onlink
172.20.142.45 via 172.21.0.1 dev ge00 proto babel onlink
172.20.143.4 via 172.21.0.1 dev ge00 proto babel onlink
172.20.143.6 via 172.21.0.1 dev ge00 proto babel onlink
172.20.143.7 via 172.21.0.1 dev ge00 proto babel onlink
172.20.143.20 via 172.21.0.1 dev ge00 proto babel onlink
172.20.222.0/24 via 172.21.0.1 dev ge00 proto babel onlink
172.20.223.0/24 via 172.21.0.1 dev ge00 proto babel onlink
172.21.0.0/24 dev ge00 proto kernel scope link src 172.21.0.2
172.21.0.1 via 172.21.0.1 dev ge00 proto babel onlink
172.21.2.0/27 dev se00 proto kernel scope link src 172.21.2.1
172.21.2.20 via 172.21.2.20 dev se00 proto babel onlink
172.21.2.21 via 172.21.2.21 dev se00 proto babel onlink
172.21.2.64/27 dev sw00 proto kernel scope link src 172.21.2.65
172.21.2.96/27 dev sw10 proto kernel scope link src 172.21.2.97
172.21.2.128/27 dev gw00 proto kernel scope link src 172.21.2.129
172.21.2.160/27 dev gw10 proto kernel scope link src 172.21.2.161
172.21.128.0/24 via 172.21.2.21 dev se00 proto babel onlink
172.21.128.0/20 via 172.21.2.21 dev se00 proto babel onlink
172.21.129.0/24 via 172.21.2.21 dev se00 proto babel onlink
172.23.2.0/23 via 172.21.0.1 dev ge00 proto babel onlink
172.23.6.0/23 via 172.21.0.1 dev ge00 proto babel onlink
172.23.143.3 via 172.21.0.1 dev ge00 proto babel onlink
172.23.143.7 via 172.21.0.1 dev ge00 proto babel onlink
192.168.7.2 via 172.21.0.1 dev ge00 proto babel onlink
Dave Täht
http://www.bufferbloat.net/projects/bloat/wiki/Upcoming_Talks
^ permalink raw reply
* Re: [PATCH] net: unisys: adding unisys virtnic driver
From: David Miller @ 2014-12-22 18:08 UTC (permalink / raw)
To: zyjzyj2000
Cc: earfvids, benjamin.romer, netdev, dzickus, Bruce.Vessey,
sparmaintainer, prarit
In-Reply-To: <5497D708.7070109@gmail.com>
From: zhuyj <zyjzyj2000@gmail.com>
Date: Mon, 22 Dec 2014 16:32:08 +0800
> Compared with veth, tun/tap, is there any difference about this
> virtnic?
First, please do not top-post.
Second, do not quote an entire huge patch just to make a 2-line
comment. Everyone now has to receive that huge patch again in
their inbox, so you are creating a huge burdon for everyone on
the list.
^ permalink raw reply
* [PATCH RFC] ipw2200: select CFG80211_WEXT
From: Paul Bolle @ 2014-12-22 18:10 UTC (permalink / raw)
To: Johannes Berg
Cc: Stanislav Yakovlev, Kalle Valo, linux-wireless, netdev,
linux-kernel
Commit 24a0aa212ee2 ("cfg80211: make WEXT compatibility unselectable")
made it impossible to depend on CFG80211_WEXT. It does still allow to
select that symbol. (Yes, the commit summary is confusing.)
So make IPW2200 select CFG80211_WEXT, so that the ipw2200 driver can be
built again.
Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
---
Johannes,
Building v3.19-rc1 for an outdated ThinkPad X41 left me without the
ipw2200 driver. It turns out this trivial patch is all that's needed to
make ipw2200 buildable again.
(A similar patch would be needed for the drivers behind Kconfig symbol
HERMES. Ie, orinico and friends.)
I must admit that I do not fully understand your commit. (How was
CFG80211_WEXT "marked for deprecation and removal for a little more than
two years"?) There's some terminology confusion: what you call "select"
I tend to call "set". Anyhow, your commit basically disables building
ipw2200 (and apparently orinoco and friends)?
Was that your intention?
drivers/net/wireless/ipw2x00/Kconfig | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/net/wireless/ipw2x00/Kconfig b/drivers/net/wireless/ipw2x00/Kconfig
index 91c0cb3c368e..21de4fe6cf2d 100644
--- a/drivers/net/wireless/ipw2x00/Kconfig
+++ b/drivers/net/wireless/ipw2x00/Kconfig
@@ -65,7 +65,8 @@ config IPW2100_DEBUG
config IPW2200
tristate "Intel PRO/Wireless 2200BG and 2915ABG Network Connection"
- depends on PCI && CFG80211 && CFG80211_WEXT
+ depends on PCI && CFG80211
+ select CFG80211_WEXT
select WIRELESS_EXT
select WEXT_SPY
select WEXT_PRIV
--
2.1.0
^ permalink raw reply related
* Re: [PATCH iproute2] ip lib: Added shorter timestamp option
From: Stephen Hemminger @ 2014-12-22 18:12 UTC (permalink / raw)
To: Vadim Kochan; +Cc: netdev
In-Reply-To: <1418285526-28859-1-git-send-email-vadim4j@gmail.com>
On Thu, 11 Dec 2014 10:12:06 +0200
Vadim Kochan <vadim4j@gmail.com> wrote:
> From: Vadim Kochan <vadim4j@gmail.com>
>
> Added another timestamp format to look like more logging info:
>
> [Dec 01 01:46:20.675589] 2: enp0s25: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default
> link/ether 3c:97:0e:a3:86:2e brd ff:ff:ff:ff:ff:ff
>
> Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
I would suggest supporting RFC3339 which is a standard for timestamps instead.
[2014-22-12T01:46:20.1012] ...
^ permalink raw reply
* Re: [RFC PATCH 05/17] fib_trie: Optimize fib_table_lookup to avoid wasting time on loops/variables
From: David Miller @ 2014-12-22 18:30 UTC (permalink / raw)
To: alexander.h.duyck; +Cc: netdev
In-Reply-To: <20141222174123.1119.28780.stgit@ahduyck-vm-fedora20>
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Mon, 22 Dec 2014 09:41:24 -0800
> This patch is meant to reduce the complexity of fib_table_lookup by reducing
> the number of variables to the bare minimum while still keeping the same if
> not improved functionality versus the original.
>
> Most of this change was started off by the desire to rid the function of
> chopped_off and current_prefix_length as they actually added very little to
> the function since they only applied when computing the cindex. I was able
> to replace them mostly with just a check for the prefix match. As long as
> the prefix between the key and the node being tested was the same we know
> we can search the tnode fully versus just testing cindex 0.
>
> The second portion of the change ended up being a massive reordering.
> Originally the calls to check_leaf were up near the start of the loop, and
> the backtracing and descending into lower levels of tnodes was later. This
> didn't make much sense as the structure of the tree means the leaves are
> always the last thing to be tested. As such I reordered things so that we
> instead have a loop that will delve into the tree and only exit when we
> have either found a leaf or we have exhausted the tree. The advantage of
> rearranging things like this is that we can fully inline check_leaf since
> there is now only one reference to it in the function.
>
> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
I really like this change, in particular that you got rid of the
__fls().
That's unfortunately expensive on older sparcs, so when I was
micro-benchmarking the routing cache changes it would show up in
perf.
^ permalink raw reply
* Re: [RFC PATCH 17/17] fib_trie: Add tracking value for suffix length
From: David Miller @ 2014-12-22 18:32 UTC (permalink / raw)
To: alexander.h.duyck; +Cc: netdev
In-Reply-To: <20141222174238.1119.68562.stgit@ahduyck-vm-fedora20>
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Mon, 22 Dec 2014 09:42:38 -0800
> This change adds a tracking value for the maximum suffix length of all
> prefixes stored in any given tnode. With this value we can determine if we
> need to backtrace or not based on if the suffix is greater than the pos
> value.
>
> By doing this we can reduce the CPU overhead for lookups in the local table
> as many of the prefixes there are 32b long and have a suffix length of 0
> meaning we can immediately backtrace to the root node without needing to
> test any of the nodes between it and where we ended up.
>
> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
This is a really nice optimization.
^ permalink raw reply
* Re: [RFC PATCH 02/17] fib_trie: Make leaf and tnode more uniform
From: David Miller @ 2014-12-22 18:33 UTC (permalink / raw)
To: alexander.h.duyck; +Cc: netdev
In-Reply-To: <20141222174105.1119.71598.stgit@ahduyck-vm-fedora20>
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Mon, 22 Dec 2014 09:41:05 -0800
> -#define IS_TNODE(n) (!(n->parent & T_LEAF))
> -#define IS_LEAF(n) (n->parent & T_LEAF)
> +struct tnode {
> + t_key key;
> + unsigned char bits; /* 2log(KEYLENGTH) bits needed */
> + unsigned char pos; /* 2log(KEYLENGTH) bits needed */
> + struct tnode __rcu *parent;
> + union {
> + struct rcu_head rcu;
> + struct tnode *tnode_free;
> + };
> + unsigned int full_children; /* KEYLENGTH bits needed */
> + unsigned int empty_children; /* KEYLENGTH bits needed */
> + struct rt_trie_node __rcu *child[0];
> +};
I wonder if we can compress this even further.
The full_children and empty_children can probably both be a u16, right?
If so, you can stick at least one of them after 'bits' and 'pos' and
thus save 4 bytes on 32b.
^ permalink raw reply
* Re: [RFC PATCH 00/17] fib_trie: Reduce time spent in fib_table_lookup by 35 to 75%
From: David Miller @ 2014-12-22 18:35 UTC (permalink / raw)
To: alexander.h.duyck; +Cc: netdev
In-Reply-To: <20141222172632.1119.51469.stgit@ahduyck-vm-fedora20>
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Mon, 22 Dec 2014 09:40:52 -0800
> These patches are meant to address several performance issues I have seen
> in the fib_trie implementation, and fib_table_lookup specifically. With
> these changes in place I have seen a reduction of up to 35 to 75% for the
> total time spent in fib_table_lookup depending on the type of search being
> performed.
Fantastic work Alexander.
I had a patch series, just for micro-benchmarking, that got rid of the
local table and just put everything in the global one.
Everything works and we always only do one probe into the FIB.
That speeds things up a lot.
The only problem is that we have to take into consideration cases
where userspace tries to directly modify and do things to the local
table. Also we might have to pretend we have a local table in
dumps too.
^ permalink raw reply
* [PATCH 1/1 net-next] netfilter: remove unnecessary sizeof(char)
From: Fabian Frederick @ 2014-12-22 18:36 UTC (permalink / raw)
To: linux-kernel
Cc: davem, joe, Fabian Frederick, Pablo Neira Ayuso, Patrick McHardy,
Jozsef Kadlecsik, netfilter-devel, coreteam, netdev
sizeof(char) is always 1.
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
net/netfilter/nf_log.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 43c926c..1191f66 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -426,7 +426,7 @@ static int netfilter_log_sysctl_init(struct net *net)
nf_log_sysctl_fnames[i];
nf_log_sysctl_table[i].data = NULL;
nf_log_sysctl_table[i].maxlen =
- NFLOGGER_NAME_LEN * sizeof(char);
+ NFLOGGER_NAME_LEN;
nf_log_sysctl_table[i].mode = 0644;
nf_log_sysctl_table[i].proc_handler =
nf_log_proc_dostring;
--
1.9.1
^ permalink raw reply related
* Re: [RFC PATCH 00/17] fib_trie: Reduce time spent in fib_table_lookup by 35 to 75%
From: Alexander Duyck @ 2014-12-22 18:38 UTC (permalink / raw)
To: Dave Taht; +Cc: netdev@vger.kernel.org
In-Reply-To: <CAA93jw7Zz8_wToKOou42mXhgunZypQ6f=oGEHZk2K6CbrsrNqg@mail.gmail.com>
On 12/22/2014 10:08 AM, Dave Taht wrote:
> impressive. I think. But I don't quite understand what you mean by a depth of 7?
>
> What did your routing table actually look like?
>
> For example, my ipv4 routing table looks like this at the moment.
>
> What is depth? searching from /32, /31, /30, /29?
What I was referring to is the local trie since all routing ends up
having to do a failed lookup there before we can look in the main trie.
What I did is populate a list of addresses such that I had 15 bits set
in the lower 16 of the address. By doing that it allowed me to stress
the trie pretty hard since it can only inflate out to a tnode with 8
children.
My routing test was from my ixgbe which was on a 10.0.0.X address to a
dummy address at 192.168.255.253 which resulted in it being routed to
the dummy interface. The ixgbe to local receive was to address
192.168.255.254.
Below is all the info for my trie.
- Alex
[root@ahduyck-vm-fedora20 net]# cat /proc/net/fib_triestat
Basic info: size of leaf: 40 bytes, size of tnode: 40 bytes.
Main:
Aver depth: 2.80
Max depth: 4
Leaves: 15
Prefixes: 15
Internal nodes: 6
2: 2 3: 4
Pointers: 40
Null ptrs: 20
Total size: 2 kB
Local:
Aver depth: 3.87
Max depth: 7
Leaves: 49
Prefixes: 50
Internal nodes: 24
1: 10 2: 6 3: 7 4: 1
Pointers: 116
Null ptrs: 44
Total size: 7 kB
[root@ahduyck-vm-fedora20 ~]# cat /proc/net/fib_trie
Main:
+-- 0.0.0.0/0 3 0 5
+-- 0.0.0.0/4 2 0 2
|-- 0.0.0.0
/0 universe UNICAST
+-- 10.0.0.0/22 3 0 4
|-- 10.0.0.0
/24 link UNICAST
|-- 10.0.1.0
/24 link UNICAST
|-- 10.0.2.0
/24 link UNICAST
|-- 10.0.3.0
/24 link UNICAST
|-- 169.254.0.0
/16 link UNICAST
+-- 192.168.0.0/16 3 1 4
|-- 192.168.122.0
/24 link UNICAST
|-- 192.168.128.0
/24 link UNICAST
|-- 192.168.192.0
/24 link UNICAST
+-- 192.168.224.0/19 3 1 4
|-- 192.168.224.0
/24 link UNICAST
|-- 192.168.240.0
/24 link UNICAST
|-- 192.168.248.0
/24 link UNICAST
+-- 192.168.252.0/22 2 0 1
|-- 192.168.252.0
/24 link UNICAST
|-- 192.168.254.0
/24 link UNICAST
|-- 192.168.255.0
/24 link UNICAST
Local:
+-- 0.0.0.0/0 3 0 5
+-- 10.0.0.0/22 4 0 4
|-- 10.0.0.0
/32 link BROADCAST
|-- 10.0.0.128
/32 host LOCAL
|-- 10.0.0.255
/32 link BROADCAST
|-- 10.0.1.0
/32 link BROADCAST
|-- 10.0.1.128
/32 host LOCAL
|-- 10.0.1.255
/32 link BROADCAST
|-- 10.0.2.0
/32 link BROADCAST
|-- 10.0.2.128
/32 host LOCAL
|-- 10.0.2.255
/32 link BROADCAST
|-- 10.0.3.0
/32 link BROADCAST
|-- 10.0.3.128
/32 host LOCAL
|-- 10.0.3.255
/32 link BROADCAST
+-- 127.0.0.0/8 2 0 2
+-- 127.0.0.0/31 1 0 0
|-- 127.0.0.0
/32 link BROADCAST
/8 host LOCAL
|-- 127.0.0.1
/32 host LOCAL
|-- 127.255.255.255
/32 link BROADCAST
+-- 192.168.0.0/16 3 1 4
+-- 192.168.122.0/24 2 0 1
|-- 192.168.122.0
/32 link BROADCAST
|-- 192.168.122.173
/32 host LOCAL
|-- 192.168.122.255
/32 link BROADCAST
+-- 192.168.128.0/24 2 0 2
+-- 192.168.128.0/31 1 0 0
|-- 192.168.128.0
/32 link BROADCAST
|-- 192.168.128.1
/32 host LOCAL
|-- 192.168.128.255
/32 link BROADCAST
+-- 192.168.192.0/24 2 0 2
+-- 192.168.192.0/31 1 0 0
|-- 192.168.192.0
/32 link BROADCAST
|-- 192.168.192.1
/32 host LOCAL
|-- 192.168.192.255
/32 link BROADCAST
+-- 192.168.224.0/19 3 2 4
+-- 192.168.224.0/24 2 0 2
|-- 192.168.224.0
/32 link BROADCAST
|-- 192.168.224.1
/32 host LOCAL
|-- 192.168.224.255
/32 link BROADCAST
+-- 192.168.240.0/24 2 0 2
+-- 192.168.240.0/31 1 0 0
|-- 192.168.240.0
/32 link BROADCAST
|-- 192.168.240.1
/32 host LOCAL
|-- 192.168.240.255
/32 link BROADCAST
+-- 192.168.248.0/22 3 0 6
+-- 192.168.248.0/31 1 0 0
|-- 192.168.248.0
/32 link BROADCAST
|-- 192.168.248.1
/32 host LOCAL
|-- 192.168.248.255
/32 link BROADCAST
+-- 192.168.252.0/22 3 1 2
+-- 192.168.252.0/31 1 0 0
|-- 192.168.252.0
/32 link BROADCAST
|-- 192.168.252.1
/32 host LOCAL
|-- 192.168.252.255
/32 link BROADCAST
+-- 192.168.254.0/31 1 0 0
|-- 192.168.254.0
/32 link BROADCAST
|-- 192.168.254.1
/32 host LOCAL
|-- 192.168.254.255
/32 link BROADCAST
+-- 192.168.255.0/31 1 0 0
|-- 192.168.255.0
/32 link BROADCAST
|-- 192.168.255.1
/32 host LOCAL
+-- 192.168.255.128/25 3 1 4
|-- 192.168.255.128
/32 host LOCAL
|-- 192.168.255.192
/32 host LOCAL
|-- 192.168.255.224
/32 host LOCAL
+-- 192.168.255.240/28 3 1 4
|-- 192.168.255.240
/32 host LOCAL
|-- 192.168.255.248
/32 host LOCAL
|-- 192.168.255.252
/32 host LOCAL
+-- 192.168.255.254/31 1 0 0
|-- 192.168.255.254
/32 host LOCAL
|-- 192.168.255.255
/32 link BROADCAST
^ permalink raw reply
* Re: [PATCH iproute2] ip lib: Added shorter timestamp option
From: Vadim Kochan @ 2014-12-22 18:37 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: Vadim Kochan, netdev
In-Reply-To: <20141222101212.04877275@urahara>
On Mon, Dec 22, 2014 at 10:12:12AM -0800, Stephen Hemminger wrote:
> On Thu, 11 Dec 2014 10:12:06 +0200
> Vadim Kochan <vadim4j@gmail.com> wrote:
>
> > From: Vadim Kochan <vadim4j@gmail.com>
> >
> > Added another timestamp format to look like more logging info:
> >
> > [Dec 01 01:46:20.675589] 2: enp0s25: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default
> > link/ether 3c:97:0e:a3:86:2e brd ff:ff:ff:ff:ff:ff
> >
> > Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
>
> I would suggest supporting RFC3339 which is a standard for timestamps instead.
>
> [2014-22-12T01:46:20.1012] ...
OK, thanks, I will look on at.
^ permalink raw reply
* Re: [RFC PATCH 00/17] fib_trie: Reduce time spent in fib_table_lookup by 35 to 75%
From: David Miller @ 2014-12-22 18:53 UTC (permalink / raw)
To: dave.taht; +Cc: alexander.h.duyck, netdev
In-Reply-To: <CAA93jw7Zz8_wToKOou42mXhgunZypQ6f=oGEHZk2K6CbrsrNqg@mail.gmail.com>
From: Dave Taht <dave.taht@gmail.com>
Date: Mon, 22 Dec 2014 10:08:09 -0800
> impressive. I think. But I don't quite understand what you mean by a depth of 7?
He means the deepest path in the fib_trie datastructure that is
holding the routing table.
^ permalink raw reply
* Re: [RFC PATCH 02/17] fib_trie: Make leaf and tnode more uniform
From: Alexander Duyck @ 2014-12-22 18:55 UTC (permalink / raw)
To: David Miller; +Cc: netdev
In-Reply-To: <20141222.133353.2244861758408916536.davem@davemloft.net>
On 12/22/2014 10:33 AM, David Miller wrote:
> From: Alexander Duyck <alexander.h.duyck@redhat.com>
> Date: Mon, 22 Dec 2014 09:41:05 -0800
>
>> -#define IS_TNODE(n) (!(n->parent & T_LEAF))
>> -#define IS_LEAF(n) (n->parent & T_LEAF)
>> +struct tnode {
>> + t_key key;
>> + unsigned char bits; /* 2log(KEYLENGTH) bits needed */
>> + unsigned char pos; /* 2log(KEYLENGTH) bits needed */
>> + struct tnode __rcu *parent;
>> + union {
>> + struct rcu_head rcu;
>> + struct tnode *tnode_free;
>> + };
>> + unsigned int full_children; /* KEYLENGTH bits needed */
>> + unsigned int empty_children; /* KEYLENGTH bits needed */
>> + struct rt_trie_node __rcu *child[0];
>> +};
> I wonder if we can compress this even further.
>
> The full_children and empty_children can probably both be a u16, right?
> If so, you can stick at least one of them after 'bits' and 'pos' and
> thus save 4 bytes on 32b.
The thing is I don't think we would actually be saving any space. The
slub allocator will round us up anyway. On a 32b system the size is 28B
if I recall correctly. Dropping it to 24B would mean only a 2 child
node could be allocated from the 32B slab. Anything larger than that it
wouldn't matter.
My real concern with all of this is the fact that we have to do 2
separate memory reads per node, one for the key info and one for the
child pointer. I really think we need to get this down to 1 in order to
get there, but the overhead is the tricky part for that. What I would
look at doing is splitting the tnode into two parts. One would be a key
vector (key, pos, bits, seq) paired with a pointer to either a
tnode_info or leaf_info, the other would be something like a tnode_info
(rcu, parent pointer, full_children, empty_children, key vector
array[0]) that provides a means of backtracing and stores the nodes.
The problem is it makes insertion/deletion and backtracking more
complicated and doubles (64b) or quadruples (32b) the memory needed as
such I am still just throwing the idea around and haven't gotten into
implementation yet.
- Alex
^ permalink raw reply
* Re: [RFC PATCH 00/17] fib_trie: Reduce time spent in fib_table_lookup by 35 to 75%
From: Dave Taht @ 2014-12-22 18:59 UTC (permalink / raw)
To: Alexander Duyck; +Cc: netdev@vger.kernel.org
In-Reply-To: <54986514.1010502@redhat.com>
On Mon, Dec 22, 2014 at 10:38 AM, Alexander Duyck
<alexander.h.duyck@redhat.com> wrote:
>> impressive. I think. But I don't quite understand what you mean by a depth of 7?
>He means the deepest path in the fib_trie datastructure that is
>holding the routing table.
Thank you (dave and alexander) for the clarification!
re:
cat /proc/net/fib_triestat
cat /proc/net/fib_trie
are these a newish feature or merely compiled out in openwrt?
I have to admit I would love to know what your improvements do for a
large (e.g. BGP) table in these regards. Regrettably they don´t let me
near those with pre-production code....
--
Dave Täht
http://www.bufferbloat.net/projects/bloat/wiki/Upcoming_Talks
^ permalink raw reply
* Re: [PATCH RFC] ipw2200: select CFG80211_WEXT
From: Johannes Berg @ 2014-12-22 19:13 UTC (permalink / raw)
To: Paul Bolle
Cc: Stanislav Yakovlev, Kalle Valo, linux-wireless, netdev,
linux-kernel
In-Reply-To: <1419271817.2317.12.camel@tiscali.nl>
On Mon, 2014-12-22 at 19:10 +0100, Paul Bolle wrote:
> Commit 24a0aa212ee2 ("cfg80211: make WEXT compatibility unselectable")
> made it impossible to depend on CFG80211_WEXT. It does still allow to
> select that symbol. (Yes, the commit summary is confusing.)
>
> So make IPW2200 select CFG80211_WEXT, so that the ipw2200 driver can be
> built again.
>
> Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
> ---
> Johannes,
>
> Building v3.19-rc1 for an outdated ThinkPad X41 left me without the
> ipw2200 driver. It turns out this trivial patch is all that's needed to
> make ipw2200 buildable again.
>
> (A similar patch would be needed for the drivers behind Kconfig symbol
> HERMES. Ie, orinico and friends.)
>
> I must admit that I do not fully understand your commit. (How was
> CFG80211_WEXT "marked for deprecation and removal for a little more than
> two years"?) There's some terminology confusion: what you call "select"
> I tend to call "set". Anyhow, your commit basically disables building
> ipw2200 (and apparently orinoco and friends)?
>
> Was that your intention?
>
> drivers/net/wireless/ipw2x00/Kconfig | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/wireless/ipw2x00/Kconfig b/drivers/net/wireless/ipw2x00/Kconfig
> index 91c0cb3c368e..21de4fe6cf2d 100644
> --- a/drivers/net/wireless/ipw2x00/Kconfig
> +++ b/drivers/net/wireless/ipw2x00/Kconfig
> @@ -65,7 +65,8 @@ config IPW2100_DEBUG
>
> config IPW2200
> tristate "Intel PRO/Wireless 2200BG and 2915ABG Network Connection"
> - depends on PCI && CFG80211 && CFG80211_WEXT
> + depends on PCI && CFG80211
> + select CFG80211_WEXT
> select WIRELESS_EXT
I didn't realize that this driver actually depended on this symbol - I
had been under the impression that those would still use regular wext
(WIRELESS_EXT) only.
So yeah - this makes sense. FWIW, by "selectable" I meant by the user.
johannes
^ permalink raw reply
* [PATCH iproute2] ip: allow ip address show to list addresses with certain flags not being set
From: Heiner Kallweit @ 2014-12-22 19:18 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: netdev
Sometimes it's needed to have "ip address show" list only addresses
with certain flags not being set, e.g. in network scripts.
As an example one might want to exclude addresses in "tentative"
or "deprecated" state.
Support listing addresses with flags tentative, deprecated, dadfailed
not being set by prefixing the respective flag with a minus.
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
ip/ipaddress.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/ip/ipaddress.c b/ip/ipaddress.c
index 221ae1f..a071572 100644
--- a/ip/ipaddress.c
+++ b/ip/ipaddress.c
@@ -80,7 +80,7 @@ static void usage(void)
fprintf(stderr, "SCOPE-ID := [ host | link | global | NUMBER ]\n");
fprintf(stderr, "FLAG-LIST := [ FLAG-LIST ] FLAG\n");
fprintf(stderr, "FLAG := [ permanent | dynamic | secondary | primary |\n");
- fprintf(stderr, " tentative | deprecated | dadfailed | temporary |\n");
+ fprintf(stderr, " [-]tentative | [-]deprecated | [-]dadfailed | temporary |\n");
fprintf(stderr, " CONFFLAG-LIST ]\n");
fprintf(stderr, "CONFFLAG-LIST := [ CONFFLAG-LIST ] CONFFLAG\n");
fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute ]\n");
@@ -1261,9 +1261,15 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action)
} else if (strcmp(*argv, "tentative") == 0) {
filter.flags |= IFA_F_TENTATIVE;
filter.flagmask |= IFA_F_TENTATIVE;
+ } else if (strcmp(*argv, "-tentative") == 0) {
+ filter.flags &= ~IFA_F_TENTATIVE;
+ filter.flagmask |= IFA_F_TENTATIVE;
} else if (strcmp(*argv, "deprecated") == 0) {
filter.flags |= IFA_F_DEPRECATED;
filter.flagmask |= IFA_F_DEPRECATED;
+ } else if (strcmp(*argv, "-deprecated") == 0) {
+ filter.flags &= ~IFA_F_DEPRECATED;
+ filter.flagmask |= IFA_F_DEPRECATED;
} else if (strcmp(*argv, "home") == 0) {
filter.flags |= IFA_F_HOMEADDRESS;
filter.flagmask |= IFA_F_HOMEADDRESS;
@@ -1279,6 +1285,9 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action)
} else if (strcmp(*argv, "dadfailed") == 0) {
filter.flags |= IFA_F_DADFAILED;
filter.flagmask |= IFA_F_DADFAILED;
+ } else if (strcmp(*argv, "-dadfailed") == 0) {
+ filter.flags &= ~IFA_F_DADFAILED;
+ filter.flagmask |= IFA_F_DADFAILED;
} else if (strcmp(*argv, "label") == 0) {
NEXT_ARG();
filter.label = *argv;
--
2.2.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox