From: Matti Vaittinen <matti.vaittinen@nsn.com>
To: netdev@vger.kernel.org, davem@davemloft.net
Subject: [PATCH RFC] IPv6: RTM_GETROUTE NLM_F_MATCH handled as stated in RFC 3549
Date: Wed, 28 Dec 2011 16:01:55 +0200 [thread overview]
Message-ID: <1325080915.26559.43.camel@hakki> (raw)
Hi Dee Ho!
RFC 3549 states:
Additional flag bits for GET requests on config information in
the FEC.
NLM_F_ROOT Return the complete table instead of a
single entry.
NLM_F_MATCH Return all entries matching criteria passed in
message content.
NLM_F_ATOMIC Return an atomic snapshot of the table being
referenced. This may require special
privileges because it has the potential to
interrupt service in the FE for a longer time.
Convenience macros for flag bits:
NLM_F_DUMP This is NLM_F_ROOT or'ed with NLM_F_MATCH
However, currently requests with NLM_F_ROOT or NLM_F_MATCH or both
(NLM_F_DUMP) specified will return all the (routing) entries.
To me it sounds that the NLM_F_MATCH was originally meant to
allow user to ask only entries (routes) fulfilling conditions given in
request. This would further simplify userland applications which need
to get only certain entries - and I believe that is often the case.
I believe the current operation which differs from RFC makes netlink
socket usage even more confusing. (There really is not too much
up-to-date documentation telling how these requests+all the attributes
work).
This patch makes ipv6 module to return only routes which match
attributes / filled fields in RTM_GETROUTE, if NLM_F_MATCH is
specified and NLM_F_ROOT is not. This patch has not been tested,
and is meant more to be for visualization of what I thought of doing.
If the NLM_F_MATCH support is considered to be good idea, then I
will check this more thoroughly and send another patch.
I assume this would not break *many* existing userspace applications,
since specifying NLM_F_MATCH (especially with no NLM_F_ROOT) sounds
pretty stupid - if no entries should be filtered.
I checked iproute2, and it uses NLM_F_DUMP and does filtering entries
in userspace - thus it is not affected.
I guess this same idea could be brought in RTM_GETADDR and RTM_GETLINK
too? Maybe also on IPv4 side?
Any comments?
This patch demonstrates how RTM_GETROUTE request behaviour for IPv6
could be changed to better match RFC 3549 when NLM_F_MATCH flag is
specified.
"NLM_F_MATCH Return all entries matching criteria passed in
message content."
Signed-off-by: Matti Vaittinen <Mazziesaccount@gmail.com>
---
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 2ad92ca..27313ad 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -139,12 +139,21 @@ extern void rt6_pmtu_discovery(const struct in6_addr *daddr,
struct netlink_callback;
struct rt6_rtnl_dump_arg {
+ long maxtbl;
struct sk_buff *skb;
struct netlink_callback *cb;
struct net *net;
};
+struct rt6_rtnl_match_arg {
+ long maxtbl;
+ struct sk_buff *skb;
+ struct netlink_callback *cb;
+ struct net *net;
+ struct fib6_config *cfg;
+};
extern int rt6_dump_route(struct rt6_info *rt, void *p_arg);
+extern int rt6_dump_route_if_match(struct rt6_info *rt, void *p_arg);
extern void rt6_ifdown(struct net *net, struct net_device *dev);
extern void rt6_mtu_change(struct net_device *dev, unsigned mtu);
extern void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2783631..13bea56 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -268,7 +268,26 @@ static void __net_init fib6_tables_init(struct net *net)
}
#endif
+static int fib6_dump_node_if_match(struct fib6_walker_t *w)
+{
+ int res;
+ struct rt6_info *rt;
+
+ for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+ res = rt6_dump_route_if_match(rt, w->args);
+ if (res < 0) {
+ /* Frame is full or request was not parsed correctly.
+ * Anyways, suspend walking
+ */
+ w->leaf = rt;
+ return 1;
+ }
+ WARN_ON(res == 0);
+ }
+ w->leaf = NULL;
+ return 0;
+}
static int fib6_dump_node(struct fib6_walker_t *w)
{
int res;
@@ -292,11 +311,14 @@ static void fib6_dump_end(struct netlink_callback *cb)
struct fib6_walker_t *w = (void*)cb->args[2];
if (w) {
+ if (w->func == fib6_dump_node_if_match)
+ kfree(((struct rt6_rtnl_match_arg *)w->args)->cfg);
if (cb->args[4]) {
cb->args[4] = 0;
fib6_walker_unlink(w);
}
cb->args[2] = 0;
+ kfree(w->args);
kfree(w);
}
cb->done = (void*)cb->args[3];
@@ -356,16 +378,13 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
unsigned int e = 0, s_e;
- struct rt6_rtnl_dump_arg arg;
+ struct rt6_rtnl_dump_arg *arg;
struct fib6_walker_t *w;
struct fib6_table *tb;
struct hlist_node *node;
struct hlist_head *head;
int res = 0;
- s_h = cb->args[0];
- s_e = cb->args[1];
-
w = (void *)cb->args[2];
if (!w) {
/* New dump:
@@ -381,17 +400,57 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
w = kzalloc(sizeof(*w), GFP_ATOMIC);
if (!w)
return -ENOMEM;
- w->func = fib6_dump_node;
+ if (cb->nlh && (cb->nlh->nlmsg_flags & NLM_F_MATCH) &&
+ !(cb->nlh->nlmsg_flags & NLM_F_ROOT) &&
+ nlmsg_len(cb->nlh) >= sizeof(struct rtmsg)) {
+
+ struct rtmsg *rtm = nlmsg_data(cb->nlh);
+
+ cb->done = fib6_dump_done;
+ w->args = kzalloc(sizeof(struct rt6_rtnl_match_arg), GFP_ATOMIC);
+ if (!w->args) {
+ kfree(w);
+ return -ENOMEM;
+ }
+ ((struct rt6_rtnl_match_arg *)w->args)->cfg =
+ kzalloc(sizeof(struct fib6_config), GFP_ATOMIC);
+
+ if (!((struct rt6_rtnl_match_arg *)w->args)->cfg) {
+ kfree(w->args);
+ kfree(w);
+ return -ENOMEM;
+ }
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ if (rtm->rtm_table <= FIB6_TABLE_HASHSZ) {
+ cb->args[0] = rtm->rtm_table;
+ ((struct rt6_rtnl_match_arg *)w->args)->maxtbl =
+ (rtm->rtm_table) ? rtm->rtm_table+1 : FIB6_TABLE_HASHSZ;
+ }
+#endif
+
+ ((struct rt6_rtnl_match_arg *)w->args)->cfg->fc_table = 0xFFFFFFFF;
+ w->func = fib6_dump_node_if_match;
+ } else {
+ w->func = fib6_dump_node;
+ w->args = kzalloc(sizeof(struct rt6_rtnl_dump_arg), GFP_ATOMIC);
+ if (!w->args) {
+ kfree(w);
+ return -ENOMEM;
+ }
+ ((struct rt6_rtnl_dump_arg *)w->args)->maxtbl = FIB6_TABLE_HASHSZ;
+ }
cb->args[2] = (long)w;
}
+ arg = (struct rt6_rtnl_dump_arg *)w->args;
- arg.skb = skb;
- arg.cb = cb;
- arg.net = net;
- w->args = &arg;
+ s_h = cb->args[0];
+ s_e = cb->args[1];
+ arg->skb = skb;
+ arg->cb = cb;
+ arg->net = net;
rcu_read_lock();
- for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
+ for (h = s_h; h < arg->maxtbl; h++, s_e = 0) {
e = 0;
head = &net->ipv6.fib_table_hash[h];
hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 35b07cc..a71d44d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2350,7 +2350,154 @@ static inline size_t rt6_nlmsg_size(void)
+ RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
+ nla_total_size(sizeof(struct rta_cacheinfo));
}
+static int rt6_cmp_fill_node(struct net *net,
+ struct sk_buff *skb, struct rt6_info *rt,
+ u32 pid,
+ int prefix, struct fib6_config *cfg)
+{
+ struct rtmsg *rtm;
+ struct nlmsghdr *nlh;
+ long expires;
+ u32 table;
+ struct neighbour *n = NULL;
+ u32 seq;
+ struct in6_addr gwaddr;
+
+
+ if (prefix) { /* user wants prefix routes only */
+ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
+ /* success since this is not a prefix route */
+ return 1;
+ }
+ }
+ if (cfg->fc_dst_len && cfg->fc_dst_len != rt->rt6i_dst.plen)
+ return 1;
+
+ if (cfg->fc_src_len && cfg->fc_src_len != rt->rt6i_src.plen)
+ return 1;
+ cfg->fc_flags = ((~RTF_UP)&cfg->fc_flags);
+ if (cfg->fc_flags && cfg->fc_flags != rt->rt6i_flags)
+ return 1;
+
+ if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
+ return 1;
+
+ if (cfg->fc_flags & RTF_GATEWAY) {
+ rcu_read_lock();
+ n = dst_get_neighbour_noref(&rt->dst);
+ if (!n) {
+ rcu_read_unlock();
+ return 1;
+ }
+ gwaddr = *(struct in6_addr *)&(n->primary_key);
+ rcu_read_unlock();
+ if (memcmp(&gwaddr, &cfg->fc_gateway, 16))
+ return 1;
+ }
+
+ if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
+ return 1;
+
+ if (cfg->fc_ifindex)
+ if (!rt->dst.dev || rt->rt6i_dev->ifindex != cfg->fc_ifindex)
+ return 1;
+
+ if (memcmp(&in6addr_any, &cfg->fc_dst, sizeof(struct in6_addr)))
+ if (memcmp(&cfg->fc_dst, &rt->rt6i_dst.addr, sizeof(struct in6_addr)))
+ return 1;
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (memcmp(&in6addr_any, &cfg->fc_src, sizeof(struct in6_addr)))
+ if (memcmp(&cfg->fc_src, &rt->rt6i_src.addr, sizeof(struct in6_addr)))
+ return 1;
+#endif
+ if (memcmp(&in6addr_any, &cfg->fc_prefsrc, sizeof(struct in6_addr)))
+ if (!rt->rt6i_prefsrc.plen ||
+ memcmp(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc, sizeof(struct in6_addr)))
+ return 1;
+ /* All values checked, now allocate and fill NLMSG */
+ seq = cfg->fc_nlinfo.nlh->nlmsg_seq;
+
+ nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+ if (nlh == NULL)
+ return -EMSGSIZE;
+
+ rtm = nlmsg_data(nlh);
+ rtm->rtm_family = AF_INET6;
+ rtm->rtm_dst_len = rt->rt6i_dst.plen;
+ rtm->rtm_src_len = rt->rt6i_src.plen;
+ rtm->rtm_tos = 0;
+ if (rt->rt6i_table)
+ table = rt->rt6i_table->tb6_id;
+ else
+ table = RT6_TABLE_UNSPEC;
+ rtm->rtm_table = table;
+ NLA_PUT_U32(skb, RTA_TABLE, table);
+ if (rt->rt6i_flags&RTF_REJECT)
+ rtm->rtm_type = RTN_UNREACHABLE;
+ else if (rt->rt6i_flags&RTF_LOCAL)
+ rtm->rtm_type = RTN_LOCAL;
+ else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
+ rtm->rtm_type = RTN_LOCAL;
+ else
+ rtm->rtm_type = RTN_UNICAST;
+ rtm->rtm_flags = 0;
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ rtm->rtm_protocol = rt->rt6i_protocol;
+ if (rt->rt6i_flags&RTF_DYNAMIC)
+ rtm->rtm_protocol = RTPROT_REDIRECT;
+ else if (rt->rt6i_flags & RTF_ADDRCONF)
+ rtm->rtm_protocol = RTPROT_KERNEL;
+ else if (rt->rt6i_flags&RTF_DEFAULT)
+ rtm->rtm_protocol = RTPROT_RA;
+
+ if (rt->rt6i_flags&RTF_CACHE)
+ rtm->rtm_flags |= RTM_F_CLONED;
+
+ if (rtm->rtm_dst_len)
+ NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
+#ifdef CONFIG_IPV6_SUBTREES
+ if (rtm->rtm_src_len)
+ NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
+#endif
+ if (rt->rt6i_prefsrc.plen) {
+ struct in6_addr saddr_buf;
+ saddr_buf = rt->rt6i_prefsrc.addr;
+ NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
+ }
+ if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
+ goto nla_put_failure;
+
+ rcu_read_lock();
+ n = dst_get_neighbour_noref(&rt->dst);
+ if (n)
+ NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
+ rcu_read_unlock();
+
+ if (rt->dst.dev)
+ NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
+
+ NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
+
+ if (!(rt->rt6i_flags & RTF_EXPIRES))
+ expires = 0;
+ else if (rt->rt6i_expires - jiffies < INT_MAX)
+ expires = rt->rt6i_expires - jiffies;
+ else
+ expires = INT_MAX;
+
+ if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
+ expires, rt->dst.error) < 0)
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+
+}
static int rt6_fill_node(struct net *net,
struct sk_buff *skb, struct rt6_info *rt,
struct in6_addr *dst, struct in6_addr *src,
@@ -2478,7 +2625,34 @@ nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
+int rt6_dump_route_if_match(struct rt6_info *rt, void *p_arg)
+{
+ struct rt6_rtnl_match_arg *arg = (struct rt6_rtnl_match_arg *) p_arg;
+ int prefix;
+ int err;
+ if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
+ struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
+ prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
+ } else
+ prefix = 0;
+ /* this is a bit of a hack. But we do not want to
+ * evaluate config struct for same nlmsg
+ * for each route we fetch from fib.
+ * It's enough to do it once.
+ */
+ if (0xFFFFFFFF == arg->cfg->fc_table) {
+ arg->cfg->fc_table = 0;
+ err = rtm_to_fib6_config(arg->cb->skb,
+ (struct nlmsghdr *)arg->cb->nlh, arg->cfg);
+ if (err)
+ return err;
+ }
+ return rt6_cmp_fill_node(arg->net,
+ arg->skb, rt,
+ NETLINK_CB(arg->cb->skb).pid,
+ prefix, arg->cfg);
+}
int rt6_dump_route(struct rt6_info *rt, void *p_arg)
{
struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
@@ -2489,7 +2663,6 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
} else
prefix = 0;
-
return rt6_fill_node(arg->net,
arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
next reply other threads:[~2011-12-28 14:11 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-12-28 14:01 Matti Vaittinen [this message]
2012-01-03 20:16 ` [PATCH RFC] IPv6: RTM_GETROUTE NLM_F_MATCH handled as stated in RFC 3549 David Miller
2012-01-04 6:24 ` Matti Vaittinen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1325080915.26559.43.camel@hakki \
--to=matti.vaittinen@nsn.com \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.