Re: [RESEND 3/5] [NET]: Protocol Independant Policy Routing Rules Framework

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Patrick McHardy <kaber@trash.net>
To: Thomas Graf <tgraf@suug.ch>
Cc: David Miller <davem@davemloft.net>,
	jmorris@namei.org, netdev@vger.kernel.org, vnuorval@tcs.hut.fi,
	usagi-core@linux-ipv6.org, yoshfuji@linux-ipv6.org,
	anttit@tcs.hut.fi
Subject: Re: [RESEND 3/5] [NET]: Protocol Independant Policy Routing Rules Framework
Date: Fri, 28 Jul 2006 01:30:42 +0200	[thread overview]
Message-ID: <44C94CA2.4090900@trash.net> (raw)
In-Reply-To: <20060727223931.GZ14627@postel.suug.ch>

Thomas Graf wrote:
> --- /dev/null
> +++ net-2.6.git/net/core/fib_rules.c
> +int fib_rules_register(struct fib_rules_ops *ops)
> +{
> +	int err = -EEXIST;
> +	struct fib_rules_ops *o;
> +
> +	if (ops->rule_size < sizeof(struct fib_rule))
> +		return -EINVAL;
> +
> +	if (ops->match == NULL || ops->configure == NULL ||
> +	    ops->compare == NULL || ops->fill == NULL ||
> +	    ops->action == NULL)
> +		return -EINVAL;
> +
> +	spin_lock_bh(&rules_mod_lock);

This doesn't look like it needs bh protection.

> +	list_for_each_entry(o, &rules_ops, list)
> +		if (ops->family == o->family)
> +			goto errout;
> +
> +	list_add_tail_rcu(&ops->list, &rules_ops);
> +	err = 0;
> +errout:
> +	spin_unlock_bh(&rules_mod_lock);
> +
> +	return err;
> +}
> +
> +EXPORT_SYMBOL_GPL(fib_rules_register);
> +
> +static void cleanup_ops(struct fib_rules_ops *ops)
> +{
> +	struct fib_rule *rule, *tmp;
> +
> +	list_for_each_entry_safe(rule, tmp, ops->rules_list, list) {
> +		list_del_rcu(&rule->list);
> +		fib_rule_put(rule);
> +	}
> +}
> +
> +int fib_rules_unregister(struct fib_rules_ops *ops)
> +{
> +	int err = 0;
> +	struct fib_rules_ops *o;
> +
> +	spin_lock_bh(&rules_mod_lock);
> +	list_for_each_entry(o, &rules_ops, list) {
> +		if (o == ops) {
> +			list_del_rcu(&o->list);
> +			cleanup_ops(ops);
> +			goto out;
> +		}
> +	}
> +
> +	err = -ENOENT;
> +out:
> +	spin_unlock_bh(&rules_mod_lock);
> +
> +	synchronize_net();
> +
> +	return err;
> +}
> +
> +EXPORT_SYMBOL_GPL(fib_rules_unregister);
> +
> +int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
> +		     int flags, struct fib_lookup_arg *arg)
> +{
> +	struct fib_rule *rule;
> +	int err;
> +
> +	rcu_read_lock();
> +
> +	list_for_each_entry(rule, ops->rules_list, list) {
> +		if (rule->ifname[0] && (rule->ifindex != fl->iif))
> +			continue;

ifindex may be unset even if ifname is set (in case the interface
does not exist yet). In that case it will match falsely on
locally generated packets.

> +
> +		if (!ops->match(rule, fl, flags))
> +			continue;
> +
> +		rcu_read_unlock();
> +
> +		err = ops->action(rule, fl, flags, arg);
> +		if (err != -EAGAIN) {
> +			fib_rule_get(rule);
> +			arg->rule = rule;
> +			goto out;
> +		}

This seems to race with fib_nl_delrule:

CPU1			CPU2
			list_for_each_entry -> find matching entry
list_del_rcu
fib_rule_put
call_rcu(fib_rule_put_rcu)
			fib_rule_get

Moving fib_rule_get inside the rcu protected section and
calling synchronize_rcu before fib_rule_put in fib_nl_delrule
looks like the easiest fix.

> +	}
> +
> +	err = -ENETUNREACH;
> +out:
> +	rcu_read_unlock();
> +
> +	return err;
> +}
> +
> +EXPORT_SYMBOL_GPL(fib_rules_lookup);
> +
> +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
> +{
> +	struct fib_rule_hdr *frh = nlmsg_data(nlh);
> +	struct fib_rules_ops *ops = NULL;
> +	struct fib_rule *rule, *r, *last = NULL;
> +	struct nlattr *tb[FRA_MAX+1];
> +	int err = -EINVAL;
> +
> +	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
> +		goto errout;
> +
> +	ops = lookup_rules_ops(frh->family);
> +	if (ops == NULL) {
> +		err = EAFNOSUPPORT;
> +		goto errout;
> +	}
> +
> +	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
> +	if (err < 0)
> +		goto errout;
> +
> +	if (tb[FRA_IFNAME] && nla_len(tb[FRA_IFNAME]) > IFNAMSIZ)
> +		goto errout;
> +
> +	rule = kzalloc(ops->rule_size, GFP_KERNEL);
> +	if (rule == NULL) {
> +		err = -ENOMEM;
> +		goto errout;
> +	}
> +
> +	if (tb[FRA_PRIORITY])
> +		rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
> +
> +	if (tb[FRA_IFNAME]) {
> +		struct net_device *dev;
> +
> +		rule->ifindex = -1;
> +		if (nla_strlcpy(rule->ifname, tb[FRA_IFNAME],
> +				IFNAMSIZ) >= IFNAMSIZ)
> +			goto errout_free;
> +
> +		dev = __dev_get_by_name(rule->ifname);
> +		if (dev)
> +			rule->ifindex = dev->ifindex;
> +	}
> +
> +	rule->action = frh->action;
> +	rule->flags = frh->flags;
> +	rule->table = frh->table;
> +
> +	if (!rule->pref && ops->default_pref)
> +		rule->pref = ops->default_pref();
> +
> +	err = ops->configure(rule, skb, nlh, frh, tb);
> +	if (err < 0)
> +		goto errout_free;
> +
> +	list_for_each_entry(r, ops->rules_list, list) {
> +		if (r->pref > rule->pref)
> +			break;
> +		last = r;
> +	}
> +
> +	fib_rule_get(rule);
> +
> +	if (last)
> +		list_add_rcu(&rule->list, &last->list);
> +	else
> +		list_add_rcu(&rule->list, ops->rules_list);
> +
> +	notify_rule_change(RTM_NEWRULE, rule, ops);
> +	rules_ops_put(ops);
> +	return 0;
> +
> +errout_free:
> +	kfree(rule);
> +errout:
> +	rules_ops_put(ops);
> +	return err;
> +}
> +
> +int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
> +{
> +	struct fib_rule_hdr *frh = nlmsg_data(nlh);
> +	struct fib_rules_ops *ops = NULL;
> +	struct fib_rule *rule;
> +	struct nlattr *tb[FRA_MAX+1];
> +	int err = -EINVAL;
> +
> +	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
> +		goto errout;
> +
> +	ops = lookup_rules_ops(frh->family);
> +	if (ops == NULL) {
> +		err = EAFNOSUPPORT;
> +		goto errout;
> +	}
> +
> +	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
> +	if (err < 0)
> +		goto errout;
> +
> +	list_for_each_entry(rule, ops->rules_list, list) {
> +		if (frh->action && (frh->action != rule->action))
> +			continue;
> +
> +		if (frh->table && (frh->table != rule->table))
> +			continue;
> +
> +		if (tb[FRA_PRIORITY] &&
> +		    (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
> +			continue;
> +
> +		if (tb[FRA_IFNAME] &&
> +		    nla_strcmp(tb[FRA_IFNAME], rule->ifname))
> +			continue;
> +
> +		if (!ops->compare(rule, frh, tb))
> +			continue;
> +
> +		if (rule->flags & FIB_RULE_PERMANENT) {
> +			err = -EPERM;
> +			goto errout;
> +		}
> +
> +		list_del_rcu(&rule->list);
> +		notify_rule_change(RTM_DELRULE, rule, ops);
> +		fib_rule_put(rule);
> +		rules_ops_put(ops);
> +		return 0;
> +	}
> +
> +	err = -ENOENT;
> +errout:
> +	rules_ops_put(ops);
> +	return err;
> +}
> +
> +static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
> +			    u32 pid, u32 seq, int type, int flags,
> +			    struct fib_rules_ops *ops)
> +{
> +	struct nlmsghdr *nlh;
> +	struct fib_rule_hdr *frh;
> +
> +	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
> +	if (nlh == NULL)
> +		return -1;
> +
> +	frh = nlmsg_data(nlh);
> +	frh->table = rule->table;
> +	frh->res1 = 0;
> +	frh->res2 = 0;
> +	frh->action = rule->action;
> +	frh->flags = rule->flags;
> +
> +	if (rule->ifname[0])
> +		NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
> +
> +	if (rule->pref)
> +		NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
> +
> +	if (ops->fill(rule, skb, nlh, frh) < 0)
> +		goto nla_put_failure;
> +
> +	return nlmsg_end(skb, nlh);
> +
> +nla_put_failure:
> +	return nlmsg_cancel(skb, nlh);
> +}
> +
> +int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
> +{
> +	int idx = 0;
> +	struct fib_rule *rule;
> +	struct fib_rules_ops *ops;
> +
> +	ops = lookup_rules_ops(family);
> +	if (ops == NULL)
> +		return -EAFNOSUPPORT;
> +
> +	rcu_read_lock();
> +	list_for_each_entry(rule, ops->rules_list, list) {
> +		if (idx < cb->args[0])
> +			goto skip;
> +
> +		if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
> +				     cb->nlh->nlmsg_seq, RTM_NEWRULE,
> +				     NLM_F_MULTI, ops) < 0)
> +			break;
> +skip:
> +		idx++;
> +	}
> +	rcu_read_unlock();
> +	cb->args[0] = idx;
> +	rules_ops_put(ops);
> +
> +	return skb->len;
> +}
> +
> +EXPORT_SYMBOL_GPL(fib_rules_dump);
> +
> +static void notify_rule_change(int event, struct fib_rule *rule,
> +			       struct fib_rules_ops *ops)
> +{
> +	int size = nlmsg_total_size(sizeof(struct fib_rule_hdr) + 128);
> +	struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
> +
> +	if (skb == NULL)
> +		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS);
> +	else if (fib_nl_fill_rule(skb, rule, 0, 0, event, 0, ops) < 0) {
> +		kfree_skb(skb);
> +		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL);
> +	} else
> +		netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL);
> +}

Shouldn't different families use different groups? Userspace
might (rightfully, I think) expect not to see anything but
IPv4 rules on RTNLGRP_IPV4_RULE.

> +static void attach_rules(struct list_head *rules, struct net_device *dev)
> +{
> +	struct fib_rule *rule;
> +
> +	list_for_each_entry(rule, rules, list) {
> +		if (rule->ifindex == -1 &&
> +		    strcmp(dev->name, rule->ifname) == 0)
> +			rule->ifindex = dev->ifindex;
> +	}
> +}
> +
> +static void detach_rules(struct list_head *rules, struct net_device *dev)
> +{
> +	struct fib_rule *rule;
> +
> +	list_for_each_entry(rule, rules, list)
> +		if (rule->ifindex == dev->ifindex)
> +			rule->ifindex = -1;
> +}
> +
> +
> +static int fib_rules_event(struct notifier_block *this, unsigned long event,
> +			    void *ptr)
> +{
> +	struct net_device *dev = ptr;
> +	struct fib_rules_ops *ops;
> +
> +	ASSERT_RTNL();
> +	rcu_read_lock();
> +
> +	switch (event) {
> +	case NETDEV_REGISTER:
> +		list_for_each_entry(ops, &rules_ops, list)
> +			attach_rules(ops->rules_list, dev);
> +		break;
> +
> +	case NETDEV_UNREGISTER:
> +		list_for_each_entry(ops, &rules_ops, list)
> +			detach_rules(ops->rules_list, dev);
> +		break;
> +	}
> +
> +	rcu_read_unlock();
> +
> +	return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block fib_rules_notifier = {
> +	.notifier_call = fib_rules_event,
> +};
> +
> +static int __init fib_rules_init(void)
> +{
> +	return register_netdevice_notifier(&fib_rules_notifier);
> +}
> +
> +subsys_initcall(fib_rules_init);

next prev parent reply	other threads:[~2006-07-27 23:32 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-07-26 22:11 [RFC] Multiple IPV6 Routing Tables & Policy Routing Thomas Graf
2006-07-26 22:00 ` [PATCH 1/5] [IPV6]: Remove ndiscs rt6_lock dependency Thomas Graf
2006-07-26 22:28   ` David Miller
2006-07-26 23:34     ` Tushar Gohad
2006-07-26 23:34       ` David Miller
2006-07-31 11:01     ` Ville Nuorvala
2006-07-26 22:00 ` [PATCH 2/5] [IPV6]: Multiple Routing Tables Thomas Graf
2006-07-26 22:39   ` David Miller
2006-07-26 22:48     ` Thomas Graf
2006-07-26 22:55       ` David Miller
2006-07-29  4:13   ` YOSHIFUJI Hideaki / 吉藤英明
2006-07-29  4:14     ` David Miller
2006-07-29  4:28       ` YOSHIFUJI Hideaki / 吉藤英明
2006-07-29 10:29     ` Thomas Graf
2006-07-31 13:55   ` Ville Nuorvala
2006-07-31 14:01     ` Herbert Xu
2006-07-31 14:02       ` Herbert Xu
2006-07-31 15:41       ` Thomas Graf
2006-07-31 20:09         ` David Miller
2006-07-31 15:34     ` Thomas Graf
2006-07-26 22:00 ` [PATCH 3/5] [NET]: Protocol Independant Policy Routing Rules Framework Thomas Graf
2006-07-26 22:41   ` David Miller
2006-07-27  5:58   ` James Morris
2006-07-27  6:02     ` David Miller
2006-07-27 22:39       ` [RESEND " Thomas Graf
2006-07-27 22:58         ` Patrick McHardy
2006-07-27 23:17           ` David Miller
2006-07-27 23:31             ` Patrick McHardy
2006-07-28  9:25           ` Martin Josefsson
2006-07-29  1:40             ` Patrick McHardy
2006-07-29  7:25               ` Martin Josefsson
2006-07-27 23:30         ` Patrick McHardy [this message]
2006-07-28 10:23           ` Thomas Graf
2006-07-31 14:46         ` Ville Nuorvala
2006-07-31 15:24           ` Thomas Graf
2006-07-31 18:01             ` Patrick McHardy
2006-07-31 20:01               ` Thomas Graf
2006-07-26 22:00 ` [PATCH 4/5] [IPV6]: Policy Routing Rules Thomas Graf
2006-07-26 22:42   ` David Miller
2006-07-26 23:26   ` David Miller
2006-07-26 23:33   ` David Miller
2006-07-26 23:40     ` David Miller
2006-07-27 22:40       ` [RESEND " Thomas Graf
2006-07-31 14:55         ` Ville Nuorvala
2006-07-26 22:00 ` [PATCH 5/5] [IPV4]: Use Protocol Independant Policy Routing Rules Framework Thomas Graf
2006-07-26 22:43   ` David Miller
2006-07-26 23:47   ` David Miller
2006-07-27 22:40     ` [RESEND " Thomas Graf
2006-07-28  6:10 ` [RFC] Multiple IPV6 Routing Tables & Policy Routing YOSHIFUJI Hideaki / 吉藤英明
2006-07-28  8:23   ` David Miller
2006-07-28 10:32   ` Thomas Graf
2006-07-29  4:27     ` YOSHIFUJI Hideaki / 吉藤英明
2006-07-31 11:01 ` Ville Nuorvala

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=44C94CA2.4090900@trash.net \
    --to=kaber@trash.net \
    --cc=anttit@tcs.hut.fi \
    --cc=davem@davemloft.net \
    --cc=jmorris@namei.org \
    --cc=netdev@vger.kernel.org \
    --cc=tgraf@suug.ch \
    --cc=usagi-core@linux-ipv6.org \
    --cc=vnuorval@tcs.hut.fi \
    --cc=yoshfuji@linux-ipv6.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).