* [PATCH RFC] IPv6: RTM_GETROUTE NLM_F_MATCH handled as stated in RFC 3549
@ 2011-12-28 14:01 Matti Vaittinen
2012-01-03 20:16 ` David Miller
0 siblings, 1 reply; 3+ messages in thread
From: Matti Vaittinen @ 2011-12-28 14:01 UTC (permalink / raw)
To: netdev, davem
Hi Dee Ho!
RFC 3549 states:
Additional flag bits for GET requests on config information in
the FEC.
NLM_F_ROOT Return the complete table instead of a
single entry.
NLM_F_MATCH Return all entries matching criteria passed in
message content.
NLM_F_ATOMIC Return an atomic snapshot of the table being
referenced. This may require special
privileges because it has the potential to
interrupt service in the FE for a longer time.
Convenience macros for flag bits:
NLM_F_DUMP This is NLM_F_ROOT or'ed with NLM_F_MATCH
However, currently requests with NLM_F_ROOT or NLM_F_MATCH or both
(NLM_F_DUMP) specified will return all the (routing) entries.
To me it sounds that the NLM_F_MATCH was originally meant to
allow user to ask only entries (routes) fulfilling conditions given in
request. This would further simplify userland applications which need
to get only certain entries - and I believe that is often the case.
I believe the current operation which differs from RFC makes netlink
socket usage even more confusing. (There really is not too much
up-to-date documentation telling how these requests+all the attributes
work).
This patch makes ipv6 module to return only routes which match
attributes / filled fields in RTM_GETROUTE, if NLM_F_MATCH is
specified and NLM_F_ROOT is not. This patch has not been tested,
and is meant more to be for visualization of what I thought of doing.
If the NLM_F_MATCH support is considered to be good idea, then I
will check this more thoroughly and send another patch.
I assume this would not break *many* existing userspace applications,
since specifying NLM_F_MATCH (especially with no NLM_F_ROOT) sounds
pretty stupid - if no entries should be filtered.
I checked iproute2, and it uses NLM_F_DUMP and does filtering entries
in userspace - thus it is not affected.
I guess this same idea could be brought in RTM_GETADDR and RTM_GETLINK
too? Maybe also on IPv4 side?
Any comments?
This patch demonstrates how RTM_GETROUTE request behaviour for IPv6
could be changed to better match RFC 3549 when NLM_F_MATCH flag is
specified.
"NLM_F_MATCH Return all entries matching criteria passed in
message content."
Signed-off-by: Matti Vaittinen <Mazziesaccount@gmail.com>
---
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 2ad92ca..27313ad 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -139,12 +139,21 @@ extern void rt6_pmtu_discovery(const struct in6_addr *daddr,
struct netlink_callback;
struct rt6_rtnl_dump_arg {
+ long maxtbl;
struct sk_buff *skb;
struct netlink_callback *cb;
struct net *net;
};
+struct rt6_rtnl_match_arg {
+ long maxtbl;
+ struct sk_buff *skb;
+ struct netlink_callback *cb;
+ struct net *net;
+ struct fib6_config *cfg;
+};
extern int rt6_dump_route(struct rt6_info *rt, void *p_arg);
+extern int rt6_dump_route_if_match(struct rt6_info *rt, void *p_arg);
extern void rt6_ifdown(struct net *net, struct net_device *dev);
extern void rt6_mtu_change(struct net_device *dev, unsigned mtu);
extern void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2783631..13bea56 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -268,7 +268,26 @@ static void __net_init fib6_tables_init(struct net *net)
}
#endif
+static int fib6_dump_node_if_match(struct fib6_walker_t *w)
+{
+ int res;
+ struct rt6_info *rt;
+
+ for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+ res = rt6_dump_route_if_match(rt, w->args);
+ if (res < 0) {
+ /* Frame is full or request was not parsed correctly.
+ * Anyways, suspend walking
+ */
+ w->leaf = rt;
+ return 1;
+ }
+ WARN_ON(res == 0);
+ }
+ w->leaf = NULL;
+ return 0;
+}
static int fib6_dump_node(struct fib6_walker_t *w)
{
int res;
@@ -292,11 +311,14 @@ static void fib6_dump_end(struct netlink_callback *cb)
struct fib6_walker_t *w = (void*)cb->args[2];
if (w) {
+ if (w->func == fib6_dump_node_if_match)
+ kfree(((struct rt6_rtnl_match_arg *)w->args)->cfg);
if (cb->args[4]) {
cb->args[4] = 0;
fib6_walker_unlink(w);
}
cb->args[2] = 0;
+ kfree(w->args);
kfree(w);
}
cb->done = (void*)cb->args[3];
@@ -356,16 +378,13 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
unsigned int e = 0, s_e;
- struct rt6_rtnl_dump_arg arg;
+ struct rt6_rtnl_dump_arg *arg;
struct fib6_walker_t *w;
struct fib6_table *tb;
struct hlist_node *node;
struct hlist_head *head;
int res = 0;
- s_h = cb->args[0];
- s_e = cb->args[1];
-
w = (void *)cb->args[2];
if (!w) {
/* New dump:
@@ -381,17 +400,57 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
w = kzalloc(sizeof(*w), GFP_ATOMIC);
if (!w)
return -ENOMEM;
- w->func = fib6_dump_node;
+ if (cb->nlh && (cb->nlh->nlmsg_flags & NLM_F_MATCH) &&
+ !(cb->nlh->nlmsg_flags & NLM_F_ROOT) &&
+ nlmsg_len(cb->nlh) >= sizeof(struct rtmsg)) {
+
+ struct rtmsg *rtm = nlmsg_data(cb->nlh);
+
+ cb->done = fib6_dump_done;
+ w->args = kzalloc(sizeof(struct rt6_rtnl_match_arg), GFP_ATOMIC);
+ if (!w->args) {
+ kfree(w);
+ return -ENOMEM;
+ }
+ ((struct rt6_rtnl_match_arg *)w->args)->cfg =
+ kzalloc(sizeof(struct fib6_config), GFP_ATOMIC);
+
+ if (!((struct rt6_rtnl_match_arg *)w->args)->cfg) {
+ kfree(w->args);
+ kfree(w);
+ return -ENOMEM;
+ }
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ if (rtm->rtm_table <= FIB6_TABLE_HASHSZ) {
+ cb->args[0] = rtm->rtm_table;
+ ((struct rt6_rtnl_match_arg *)w->args)->maxtbl =
+ (rtm->rtm_table) ? rtm->rtm_table+1 : FIB6_TABLE_HASHSZ;
+ }
+#endif
+
+ ((struct rt6_rtnl_match_arg *)w->args)->cfg->fc_table = 0xFFFFFFFF;
+ w->func = fib6_dump_node_if_match;
+ } else {
+ w->func = fib6_dump_node;
+ w->args = kzalloc(sizeof(struct rt6_rtnl_dump_arg), GFP_ATOMIC);
+ if (!w->args) {
+ kfree(w);
+ return -ENOMEM;
+ }
+ ((struct rt6_rtnl_dump_arg *)w->args)->maxtbl = FIB6_TABLE_HASHSZ;
+ }
cb->args[2] = (long)w;
}
+ arg = (struct rt6_rtnl_dump_arg *)w->args;
- arg.skb = skb;
- arg.cb = cb;
- arg.net = net;
- w->args = &arg;
+ s_h = cb->args[0];
+ s_e = cb->args[1];
+ arg->skb = skb;
+ arg->cb = cb;
+ arg->net = net;
rcu_read_lock();
- for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
+ for (h = s_h; h < arg->maxtbl; h++, s_e = 0) {
e = 0;
head = &net->ipv6.fib_table_hash[h];
hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 35b07cc..a71d44d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2350,7 +2350,154 @@ static inline size_t rt6_nlmsg_size(void)
+ RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
+ nla_total_size(sizeof(struct rta_cacheinfo));
}
+static int rt6_cmp_fill_node(struct net *net,
+ struct sk_buff *skb, struct rt6_info *rt,
+ u32 pid,
+ int prefix, struct fib6_config *cfg)
+{
+ struct rtmsg *rtm;
+ struct nlmsghdr *nlh;
+ long expires;
+ u32 table;
+ struct neighbour *n = NULL;
+ u32 seq;
+ struct in6_addr gwaddr;
+
+
+ if (prefix) { /* user wants prefix routes only */
+ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
+ /* success since this is not a prefix route */
+ return 1;
+ }
+ }
+ if (cfg->fc_dst_len && cfg->fc_dst_len != rt->rt6i_dst.plen)
+ return 1;
+
+ if (cfg->fc_src_len && cfg->fc_src_len != rt->rt6i_src.plen)
+ return 1;
+ cfg->fc_flags = ((~RTF_UP)&cfg->fc_flags);
+ if (cfg->fc_flags && cfg->fc_flags != rt->rt6i_flags)
+ return 1;
+
+ if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
+ return 1;
+
+ if (cfg->fc_flags & RTF_GATEWAY) {
+ rcu_read_lock();
+ n = dst_get_neighbour_noref(&rt->dst);
+ if (!n) {
+ rcu_read_unlock();
+ return 1;
+ }
+ gwaddr = *(struct in6_addr *)&(n->primary_key);
+ rcu_read_unlock();
+ if (memcmp(&gwaddr, &cfg->fc_gateway, 16))
+ return 1;
+ }
+
+ if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
+ return 1;
+
+ if (cfg->fc_ifindex)
+ if (!rt->dst.dev || rt->rt6i_dev->ifindex != cfg->fc_ifindex)
+ return 1;
+
+ if (memcmp(&in6addr_any, &cfg->fc_dst, sizeof(struct in6_addr)))
+ if (memcmp(&cfg->fc_dst, &rt->rt6i_dst.addr, sizeof(struct in6_addr)))
+ return 1;
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (memcmp(&in6addr_any, &cfg->fc_src, sizeof(struct in6_addr)))
+ if (memcmp(&cfg->fc_src, &rt->rt6i_src.addr, sizeof(struct in6_addr)))
+ return 1;
+#endif
+ if (memcmp(&in6addr_any, &cfg->fc_prefsrc, sizeof(struct in6_addr)))
+ if (!rt->rt6i_prefsrc.plen ||
+ memcmp(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc, sizeof(struct in6_addr)))
+ return 1;
+ /* All values checked, now allocate and fill NLMSG */
+ seq = cfg->fc_nlinfo.nlh->nlmsg_seq;
+
+ nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ rtm = nlmsg_data(nlh);
+ rtm->rtm_family = AF_INET6;
+ rtm->rtm_dst_len = rt->rt6i_dst.plen;
+ rtm->rtm_src_len = rt->rt6i_src.plen;
+ rtm->rtm_tos = 0;
+ if (rt->rt6i_table)
+ table = rt->rt6i_table->tb6_id;
+ else
+ table = RT6_TABLE_UNSPEC;
+ rtm->rtm_table = table;
+ NLA_PUT_U32(skb, RTA_TABLE, table);
+ if (rt->rt6i_flags&RTF_REJECT)
+ rtm->rtm_type = RTN_UNREACHABLE;
+ else if (rt->rt6i_flags&RTF_LOCAL)
+ rtm->rtm_type = RTN_LOCAL;
+ else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
+ rtm->rtm_type = RTN_LOCAL;
+ else
+ rtm->rtm_type = RTN_UNICAST;
+ rtm->rtm_flags = 0;
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ rtm->rtm_protocol = rt->rt6i_protocol;
+ if (rt->rt6i_flags&RTF_DYNAMIC)
+ rtm->rtm_protocol = RTPROT_REDIRECT;
+ else if (rt->rt6i_flags & RTF_ADDRCONF)
+ rtm->rtm_protocol = RTPROT_KERNEL;
+ else if (rt->rt6i_flags&RTF_DEFAULT)
+ rtm->rtm_protocol = RTPROT_RA;
+
+ if (rt->rt6i_flags&RTF_CACHE)
+ rtm->rtm_flags |= RTM_F_CLONED;
+
+ if (rtm->rtm_dst_len)
+ NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
+#ifdef CONFIG_IPV6_SUBTREES
+ if (rtm->rtm_src_len)
+ NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
+#endif
+ if (rt->rt6i_prefsrc.plen) {
+ struct in6_addr saddr_buf;
+ saddr_buf = rt->rt6i_prefsrc.addr;
+ NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
+ }
+ if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
+ goto nla_put_failure;
+
+ rcu_read_lock();
+ n = dst_get_neighbour_noref(&rt->dst);
+ if (n)
+ NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
+ rcu_read_unlock();
+
+ if (rt->dst.dev)
+ NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
+
+ NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
+
+ if (!(rt->rt6i_flags & RTF_EXPIRES))
+ expires = 0;
+ else if (rt->rt6i_expires - jiffies < INT_MAX)
+ expires = rt->rt6i_expires - jiffies;
+ else
+ expires = INT_MAX;
+
+ if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
+ expires, rt->dst.error) < 0)
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+
+}
static int rt6_fill_node(struct net *net,
struct sk_buff *skb, struct rt6_info *rt,
struct in6_addr *dst, struct in6_addr *src,
@@ -2478,7 +2625,34 @@ nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
+int rt6_dump_route_if_match(struct rt6_info *rt, void *p_arg)
+{
+ struct rt6_rtnl_match_arg *arg = (struct rt6_rtnl_match_arg *) p_arg;
+ int prefix;
+ int err;
+ if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
+ struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
+ prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
+ } else
+ prefix = 0;
+ /* this is a bit of a hack. But we do not want to
+ * evaluate config struct for same nlmsg
+ * for each route we fetch from fib.
+ * It's enough to do it once.
+ */
+ if (0xFFFFFFFF == arg->cfg->fc_table) {
+ arg->cfg->fc_table = 0;
+ err = rtm_to_fib6_config(arg->cb->skb,
+ (struct nlmsghdr *)arg->cb->nlh, arg->cfg);
+ if (err)
+ return err;
+ }
+ return rt6_cmp_fill_node(arg->net,
+ arg->skb, rt,
+ NETLINK_CB(arg->cb->skb).pid,
+ prefix, arg->cfg);
+}
int rt6_dump_route(struct rt6_info *rt, void *p_arg)
{
struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
@@ -2489,7 +2663,6 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
} else
prefix = 0;
-
return rt6_fill_node(arg->net,
arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH RFC] IPv6: RTM_GETROUTE NLM_F_MATCH handled as stated in RFC 3549
2011-12-28 14:01 [PATCH RFC] IPv6: RTM_GETROUTE NLM_F_MATCH handled as stated in RFC 3549 Matti Vaittinen
@ 2012-01-03 20:16 ` David Miller
2012-01-04 6:24 ` Matti Vaittinen
0 siblings, 1 reply; 3+ messages in thread
From: David Miller @ 2012-01-03 20:16 UTC (permalink / raw)
To: matti.vaittinen; +Cc: netdev
From: Matti Vaittinen <matti.vaittinen@nsn.com>
Date: Wed, 28 Dec 2011 16:01:55 +0200
> This patch makes ipv6 module to return only routes which match
> attributes / filled fields in RTM_GETROUTE, if NLM_F_MATCH is
> specified and NLM_F_ROOT is not. This patch has not been tested,
> and is meant more to be for visualization of what I thought of doing.
> If the NLM_F_MATCH support is considered to be good idea, then I
> will check this more thoroughly and send another patch.
>
> I assume this would not break *many* existing userspace applications,
> since specifying NLM_F_MATCH (especially with no NLM_F_ROOT) sounds
> pretty stupid - if no entries should be filtered.
>
> I checked iproute2, and it uses NLM_F_DUMP and does filtering entries
> in userspace - thus it is not affected.
>
> I guess this same idea could be brought in RTM_GETADDR and RTM_GETLINK
> too? Maybe also on IPv4 side?
The problem is that you can't avoid writing the user level filters
even if we add this behavior now.
Any tool which wants to work on every single Linux system out there
right now has to accomodate the case where NLM_F_MATCH isn't done by
the kernel. It will take several years before this would be widely
deployed even if it went in right now.
This means applications are not simplified at all, in fact they become
more complex, since they have to accomodate not just one but two
possible cases.
Specifying this behavior in RFC 3549 is pretty pointless, given the
existing situation.
So the value proposition of adding this change doesn't add up to me.
I'm therefore not inclined to apply a patch like this, sorry. And even
if I was, I'd ask that ipv4 get it first or at the same time.
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH RFC] IPv6: RTM_GETROUTE NLM_F_MATCH handled as stated in RFC 3549
2012-01-03 20:16 ` David Miller
@ 2012-01-04 6:24 ` Matti Vaittinen
0 siblings, 0 replies; 3+ messages in thread
From: Matti Vaittinen @ 2012-01-04 6:24 UTC (permalink / raw)
To: ext David Miller; +Cc: netdev
On Tue, 2012-01-03 at 15:16 -0500, ext David Miller wrote:
> From: Matti Vaittinen <matti.vaittinen@nsn.com>
> Date: Wed, 28 Dec 2011 16:01:55 +0200
>
> > This patch makes ipv6 module to return only routes which match
> > attributes / filled fields in RTM_GETROUTE, if NLM_F_MATCH is
> > specified and NLM_F_ROOT is not. This patch has not been tested,
> > and is meant more to be for visualization of what I thought of doing.
> > If the NLM_F_MATCH support is considered to be good idea, then I
> > will check this more thoroughly and send another patch.
> >
> > I assume this would not break *many* existing userspace applications,
> > since specifying NLM_F_MATCH (especially with no NLM_F_ROOT) sounds
> > pretty stupid - if no entries should be filtered.
> >
> > I checked iproute2, and it uses NLM_F_DUMP and does filtering entries
> > in userspace - thus it is not affected.
> >
> > I guess this same idea could be brought in RTM_GETADDR and RTM_GETLINK
> > too? Maybe also on IPv4 side?
>
> The problem is that you can't avoid writing the user level filters
> even if we add this behavior now.
>
> Any tool which wants to work on every single Linux system out there
> right now has to accomodate the case where NLM_F_MATCH isn't done by
> the kernel. It will take several years before this would be widely
> deployed even if it went in right now.
>
> This means applications are not simplified at all, in fact they become
> more complex, since they have to accomodate not just one but two
> possible cases.
I can't argue. Like Metallica sang, "Sad but true".
> I'm therefore not inclined to apply a patch like this, sorry. And even
> if I was, I'd ask that ipv4 get it first or at the same time.
>
No need to be sorry. I guess I can live with this ;) Thanks for closing
the case.
--Matti
--
Matti Vaittinen
+358 504863070
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Told a UDP joke the other night...
...but I'm not sure everyone got it...
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2012-01-04 6:14 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-12-28 14:01 [PATCH RFC] IPv6: RTM_GETROUTE NLM_F_MATCH handled as stated in RFC 3549 Matti Vaittinen
2012-01-03 20:16 ` David Miller
2012-01-04 6:24 ` Matti Vaittinen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).