From: Patrick McHardy <kaber@trash.net>
To: Thomas Graf <tgraf@suug.ch>
Cc: David Miller <davem@davemloft.net>,
jmorris@namei.org, netdev@vger.kernel.org, vnuorval@tcs.hut.fi,
usagi-core@linux-ipv6.org, yoshfuji@linux-ipv6.org,
anttit@tcs.hut.fi
Subject: Re: [RESEND 3/5] [NET]: Protocol Independant Policy Routing Rules Framework
Date: Fri, 28 Jul 2006 01:30:42 +0200 [thread overview]
Message-ID: <44C94CA2.4090900@trash.net> (raw)
In-Reply-To: <20060727223931.GZ14627@postel.suug.ch>
Thomas Graf wrote:
> --- /dev/null
> +++ net-2.6.git/net/core/fib_rules.c
> +int fib_rules_register(struct fib_rules_ops *ops)
> +{
> + int err = -EEXIST;
> + struct fib_rules_ops *o;
> +
> + if (ops->rule_size < sizeof(struct fib_rule))
> + return -EINVAL;
> +
> + if (ops->match == NULL || ops->configure == NULL ||
> + ops->compare == NULL || ops->fill == NULL ||
> + ops->action == NULL)
> + return -EINVAL;
> +
> + spin_lock_bh(&rules_mod_lock);
This doesn't look like it needs bh protection.
> + list_for_each_entry(o, &rules_ops, list)
> + if (ops->family == o->family)
> + goto errout;
> +
> + list_add_tail_rcu(&ops->list, &rules_ops);
> + err = 0;
> +errout:
> + spin_unlock_bh(&rules_mod_lock);
> +
> + return err;
> +}
> +
> +EXPORT_SYMBOL_GPL(fib_rules_register);
> +
> +static void cleanup_ops(struct fib_rules_ops *ops)
> +{
> + struct fib_rule *rule, *tmp;
> +
> + list_for_each_entry_safe(rule, tmp, ops->rules_list, list) {
> + list_del_rcu(&rule->list);
> + fib_rule_put(rule);
> + }
> +}
> +
> +int fib_rules_unregister(struct fib_rules_ops *ops)
> +{
> + int err = 0;
> + struct fib_rules_ops *o;
> +
> + spin_lock_bh(&rules_mod_lock);
> + list_for_each_entry(o, &rules_ops, list) {
> + if (o == ops) {
> + list_del_rcu(&o->list);
> + cleanup_ops(ops);
> + goto out;
> + }
> + }
> +
> + err = -ENOENT;
> +out:
> + spin_unlock_bh(&rules_mod_lock);
> +
> + synchronize_net();
> +
> + return err;
> +}
> +
> +EXPORT_SYMBOL_GPL(fib_rules_unregister);
> +
> +int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
> + int flags, struct fib_lookup_arg *arg)
> +{
> + struct fib_rule *rule;
> + int err;
> +
> + rcu_read_lock();
> +
> + list_for_each_entry(rule, ops->rules_list, list) {
> + if (rule->ifname[0] && (rule->ifindex != fl->iif))
> + continue;
ifindex may be unset even if ifname is set (in case the interface
does not exist yet). In that case it will match falsely on
locally generated packets.
> +
> + if (!ops->match(rule, fl, flags))
> + continue;
> +
> + rcu_read_unlock();
> +
> + err = ops->action(rule, fl, flags, arg);
> + if (err != -EAGAIN) {
> + fib_rule_get(rule);
> + arg->rule = rule;
> + goto out;
> + }
This seems to race with fib_nl_delrule:
CPU1 CPU2
list_for_each_entry -> find matching entry
list_del_rcu
fib_rule_put
call_rcu(fib_rule_put_rcu)
fib_rule_get
Moving fib_rule_get inside the rcu protected section and
calling synchronize_rcu before fib_rule_put in fib_nl_delrule
looks like the easiest fix.
> + }
> +
> + err = -ENETUNREACH;
> +out:
> + rcu_read_unlock();
> +
> + return err;
> +}
> +
> +EXPORT_SYMBOL_GPL(fib_rules_lookup);
> +
> +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
> +{
> + struct fib_rule_hdr *frh = nlmsg_data(nlh);
> + struct fib_rules_ops *ops = NULL;
> + struct fib_rule *rule, *r, *last = NULL;
> + struct nlattr *tb[FRA_MAX+1];
> + int err = -EINVAL;
> +
> + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
> + goto errout;
> +
> + ops = lookup_rules_ops(frh->family);
> + if (ops == NULL) {
> + err = EAFNOSUPPORT;
> + goto errout;
> + }
> +
> + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
> + if (err < 0)
> + goto errout;
> +
> + if (tb[FRA_IFNAME] && nla_len(tb[FRA_IFNAME]) > IFNAMSIZ)
> + goto errout;
> +
> + rule = kzalloc(ops->rule_size, GFP_KERNEL);
> + if (rule == NULL) {
> + err = -ENOMEM;
> + goto errout;
> + }
> +
> + if (tb[FRA_PRIORITY])
> + rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
> +
> + if (tb[FRA_IFNAME]) {
> + struct net_device *dev;
> +
> + rule->ifindex = -1;
> + if (nla_strlcpy(rule->ifname, tb[FRA_IFNAME],
> + IFNAMSIZ) >= IFNAMSIZ)
> + goto errout_free;
> +
> + dev = __dev_get_by_name(rule->ifname);
> + if (dev)
> + rule->ifindex = dev->ifindex;
> + }
> +
> + rule->action = frh->action;
> + rule->flags = frh->flags;
> + rule->table = frh->table;
> +
> + if (!rule->pref && ops->default_pref)
> + rule->pref = ops->default_pref();
> +
> + err = ops->configure(rule, skb, nlh, frh, tb);
> + if (err < 0)
> + goto errout_free;
> +
> + list_for_each_entry(r, ops->rules_list, list) {
> + if (r->pref > rule->pref)
> + break;
> + last = r;
> + }
> +
> + fib_rule_get(rule);
> +
> + if (last)
> + list_add_rcu(&rule->list, &last->list);
> + else
> + list_add_rcu(&rule->list, ops->rules_list);
> +
> + notify_rule_change(RTM_NEWRULE, rule, ops);
> + rules_ops_put(ops);
> + return 0;
> +
> +errout_free:
> + kfree(rule);
> +errout:
> + rules_ops_put(ops);
> + return err;
> +}
> +
> +int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
> +{
> + struct fib_rule_hdr *frh = nlmsg_data(nlh);
> + struct fib_rules_ops *ops = NULL;
> + struct fib_rule *rule;
> + struct nlattr *tb[FRA_MAX+1];
> + int err = -EINVAL;
> +
> + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
> + goto errout;
> +
> + ops = lookup_rules_ops(frh->family);
> + if (ops == NULL) {
> + err = EAFNOSUPPORT;
> + goto errout;
> + }
> +
> + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
> + if (err < 0)
> + goto errout;
> +
> + list_for_each_entry(rule, ops->rules_list, list) {
> + if (frh->action && (frh->action != rule->action))
> + continue;
> +
> + if (frh->table && (frh->table != rule->table))
> + continue;
> +
> + if (tb[FRA_PRIORITY] &&
> + (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
> + continue;
> +
> + if (tb[FRA_IFNAME] &&
> + nla_strcmp(tb[FRA_IFNAME], rule->ifname))
> + continue;
> +
> + if (!ops->compare(rule, frh, tb))
> + continue;
> +
> + if (rule->flags & FIB_RULE_PERMANENT) {
> + err = -EPERM;
> + goto errout;
> + }
> +
> + list_del_rcu(&rule->list);
> + notify_rule_change(RTM_DELRULE, rule, ops);
> + fib_rule_put(rule);
> + rules_ops_put(ops);
> + return 0;
> + }
> +
> + err = -ENOENT;
> +errout:
> + rules_ops_put(ops);
> + return err;
> +}
> +
> +static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
> + u32 pid, u32 seq, int type, int flags,
> + struct fib_rules_ops *ops)
> +{
> + struct nlmsghdr *nlh;
> + struct fib_rule_hdr *frh;
> +
> + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
> + if (nlh == NULL)
> + return -1;
> +
> + frh = nlmsg_data(nlh);
> + frh->table = rule->table;
> + frh->res1 = 0;
> + frh->res2 = 0;
> + frh->action = rule->action;
> + frh->flags = rule->flags;
> +
> + if (rule->ifname[0])
> + NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
> +
> + if (rule->pref)
> + NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
> +
> + if (ops->fill(rule, skb, nlh, frh) < 0)
> + goto nla_put_failure;
> +
> + return nlmsg_end(skb, nlh);
> +
> +nla_put_failure:
> + return nlmsg_cancel(skb, nlh);
> +}
> +
> +int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
> +{
> + int idx = 0;
> + struct fib_rule *rule;
> + struct fib_rules_ops *ops;
> +
> + ops = lookup_rules_ops(family);
> + if (ops == NULL)
> + return -EAFNOSUPPORT;
> +
> + rcu_read_lock();
> + list_for_each_entry(rule, ops->rules_list, list) {
> + if (idx < cb->args[0])
> + goto skip;
> +
> + if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
> + cb->nlh->nlmsg_seq, RTM_NEWRULE,
> + NLM_F_MULTI, ops) < 0)
> + break;
> +skip:
> + idx++;
> + }
> + rcu_read_unlock();
> + cb->args[0] = idx;
> + rules_ops_put(ops);
> +
> + return skb->len;
> +}
> +
> +EXPORT_SYMBOL_GPL(fib_rules_dump);
> +
> +static void notify_rule_change(int event, struct fib_rule *rule,
> + struct fib_rules_ops *ops)
> +{
> + int size = nlmsg_total_size(sizeof(struct fib_rule_hdr) + 128);
> + struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
> +
> + if (skb == NULL)
> + netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS);
> + else if (fib_nl_fill_rule(skb, rule, 0, 0, event, 0, ops) < 0) {
> + kfree_skb(skb);
> + netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL);
> + } else
> + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL);
> +}
Shouldn't different families use different groups? Userspace
might (rightfully, I think) expect not to see anything but
IPv4 rules on RTNLGRP_IPV4_RULE.
> +static void attach_rules(struct list_head *rules, struct net_device *dev)
> +{
> + struct fib_rule *rule;
> +
> + list_for_each_entry(rule, rules, list) {
> + if (rule->ifindex == -1 &&
> + strcmp(dev->name, rule->ifname) == 0)
> + rule->ifindex = dev->ifindex;
> + }
> +}
> +
> +static void detach_rules(struct list_head *rules, struct net_device *dev)
> +{
> + struct fib_rule *rule;
> +
> + list_for_each_entry(rule, rules, list)
> + if (rule->ifindex == dev->ifindex)
> + rule->ifindex = -1;
> +}
> +
> +
> +static int fib_rules_event(struct notifier_block *this, unsigned long event,
> + void *ptr)
> +{
> + struct net_device *dev = ptr;
> + struct fib_rules_ops *ops;
> +
> + ASSERT_RTNL();
> + rcu_read_lock();
> +
> + switch (event) {
> + case NETDEV_REGISTER:
> + list_for_each_entry(ops, &rules_ops, list)
> + attach_rules(ops->rules_list, dev);
> + break;
> +
> + case NETDEV_UNREGISTER:
> + list_for_each_entry(ops, &rules_ops, list)
> + detach_rules(ops->rules_list, dev);
> + break;
> + }
> +
> + rcu_read_unlock();
> +
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block fib_rules_notifier = {
> + .notifier_call = fib_rules_event,
> +};
> +
> +static int __init fib_rules_init(void)
> +{
> + return register_netdevice_notifier(&fib_rules_notifier);
> +}
> +
> +subsys_initcall(fib_rules_init);
next prev parent reply other threads:[~2006-07-27 23:32 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-07-26 22:11 [RFC] Multiple IPV6 Routing Tables & Policy Routing Thomas Graf
2006-07-26 22:00 ` [PATCH 1/5] [IPV6]: Remove ndiscs rt6_lock dependency Thomas Graf
2006-07-26 22:28 ` David Miller
2006-07-26 23:34 ` Tushar Gohad
2006-07-26 23:34 ` David Miller
2006-07-31 11:01 ` Ville Nuorvala
2006-07-26 22:00 ` [PATCH 2/5] [IPV6]: Multiple Routing Tables Thomas Graf
2006-07-26 22:39 ` David Miller
2006-07-26 22:48 ` Thomas Graf
2006-07-26 22:55 ` David Miller
2006-07-29 4:13 ` YOSHIFUJI Hideaki / 吉藤英明
2006-07-29 4:14 ` David Miller
2006-07-29 4:28 ` YOSHIFUJI Hideaki / 吉藤英明
2006-07-29 10:29 ` Thomas Graf
2006-07-31 13:55 ` Ville Nuorvala
2006-07-31 14:01 ` Herbert Xu
2006-07-31 14:02 ` Herbert Xu
2006-07-31 15:41 ` Thomas Graf
2006-07-31 20:09 ` David Miller
2006-07-31 15:34 ` Thomas Graf
2006-07-26 22:00 ` [PATCH 3/5] [NET]: Protocol Independant Policy Routing Rules Framework Thomas Graf
2006-07-26 22:41 ` David Miller
2006-07-27 5:58 ` James Morris
2006-07-27 6:02 ` David Miller
2006-07-27 22:39 ` [RESEND " Thomas Graf
2006-07-27 22:58 ` Patrick McHardy
2006-07-27 23:17 ` David Miller
2006-07-27 23:31 ` Patrick McHardy
2006-07-28 9:25 ` Martin Josefsson
2006-07-29 1:40 ` Patrick McHardy
2006-07-29 7:25 ` Martin Josefsson
2006-07-27 23:30 ` Patrick McHardy [this message]
2006-07-28 10:23 ` Thomas Graf
2006-07-31 14:46 ` Ville Nuorvala
2006-07-31 15:24 ` Thomas Graf
2006-07-31 18:01 ` Patrick McHardy
2006-07-31 20:01 ` Thomas Graf
2006-07-26 22:00 ` [PATCH 4/5] [IPV6]: Policy Routing Rules Thomas Graf
2006-07-26 22:42 ` David Miller
2006-07-26 23:26 ` David Miller
2006-07-26 23:33 ` David Miller
2006-07-26 23:40 ` David Miller
2006-07-27 22:40 ` [RESEND " Thomas Graf
2006-07-31 14:55 ` Ville Nuorvala
2006-07-26 22:00 ` [PATCH 5/5] [IPV4]: Use Protocol Independant Policy Routing Rules Framework Thomas Graf
2006-07-26 22:43 ` David Miller
2006-07-26 23:47 ` David Miller
2006-07-27 22:40 ` [RESEND " Thomas Graf
2006-07-28 6:10 ` [RFC] Multiple IPV6 Routing Tables & Policy Routing YOSHIFUJI Hideaki / 吉藤英明
2006-07-28 8:23 ` David Miller
2006-07-28 10:32 ` Thomas Graf
2006-07-29 4:27 ` YOSHIFUJI Hideaki / 吉藤英明
2006-07-31 11:01 ` Ville Nuorvala
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=44C94CA2.4090900@trash.net \
--to=kaber@trash.net \
--cc=anttit@tcs.hut.fi \
--cc=davem@davemloft.net \
--cc=jmorris@namei.org \
--cc=netdev@vger.kernel.org \
--cc=tgraf@suug.ch \
--cc=usagi-core@linux-ipv6.org \
--cc=vnuorval@tcs.hut.fi \
--cc=yoshfuji@linux-ipv6.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).