All of lore.kernel.org
 help / color / mirror / Atom feed
From: John Fastabend <john.r.fastabend@intel.com>
To: Neil Horman <nhorman@tuxdriver.com>
Cc: "netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	"Love, Robert W" <robert.w.love@intel.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: Re: [PATCH 1/2] net: add network priority cgroup infrastructure (v2)
Date: Thu, 17 Nov 2011 19:17:28 -0800	[thread overview]
Message-ID: <4EC5CE48.8020603@intel.com> (raw)
In-Reply-To: <1321566472-28969-2-git-send-email-nhorman@tuxdriver.com>

On 11/17/2011 1:47 PM, Neil Horman wrote:
> This patch adds in the infrastructure code to create the network priority
> cgroup.  The cgroup, in addition to the standard processes file creates two
> control files:
> 
> 1) prioidx - This is a read-only file that exports the index of this cgroup.
> This is a value that is both arbitrary and unique to a cgroup in this subsystem,
> and is used to index the per-device priority map
> 
> 2) priomap - This is a writeable file.  On read it reports a table of 2-tuples
> <name:priority> where name is the name of a network interface and priority is
> indicates the priority assigned to frames egresessing on the named interface and
> originating from a pid in this cgroup
> 
> This cgroup allows for skb priority to be set prior to a root qdisc getting
> selected. This is benenficial for DCB enabled systems, in that it allows for any
> application to use dcb configured priorities so without application modification
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> CC: Robert Love <robert.w.love@intel.com>
> CC: "David S. Miller" <davem@davemloft.net>
> ---

one more nit... can we convert the rcu_dereference() into rtnl_dereference()
where it is relevant?

/**
 * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL
 * @p: The pointer to read, prior to dereferencing
 *
 * Return the value of the specified RCU-protected pointer, but omit
 * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
 * caller holds RTNL.
 */
#define rtnl_dereference(p)                                     \
        rcu_dereference_protected(p, lockdep_rtnl_is_held())

[...]

> +
> +static void extend_netdev_table(struct net_device *dev, u32 new_len)
> +{
> +       size_t new_size = sizeof(struct netprio_map) +
> +                          ((sizeof(u32) * new_len));
> +       struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
> +       struct netprio_map *old_priomap;
> +       int i;
> +
> +       old_priomap  = rcu_dereference(dev->priomap);
> +

This could be rtnl_dereference(dev->priomap) to annotate that we always
have the rtnl lock here.

> +       if (!new_priomap) {
> +               printk(KERN_WARNING "Unable to alloc new priomap!\n");
> +               return;
> +       }
> +
> +       for (i = 0;
> +            old_priomap && (i < old_priomap->priomap_len);
> +            i++)
> +               new_priomap->priomap[i] = old_priomap->priomap[i];
> +
> +       new_priomap->priomap_len = new_len;
> +
> +       rcu_assign_pointer(dev->priomap, new_priomap);
> +       if (old_priomap)
> +               kfree_rcu(old_priomap, rcu);
> +}
> +
> +static void update_netdev_tables(void)
> +{
> +       struct net_device *dev;
> +       u32 max_len = atomic_read(&max_prioidx);
> +       struct netprio_map *map;
> +
> +       rtnl_lock();
          ^^^^^^^^^^^
> +
> +
> +       for_each_netdev(&init_net, dev) {
> +               map = rcu_dereference(dev->priomap);

same here rtnl_dereference(dev->priomap);

> +               if ((!map) ||
> +                   (map->priomap_len < max_len))
> +                       extend_netdev_table(dev, max_len);
> +       }
> +
> +       rtnl_unlock();
> +}
> +
> +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
> +                                                struct cgroup *cgrp)
> +{
> +       struct cgroup_netprio_state *cs;
> +       int ret;
> +
> +       cs = kzalloc(sizeof(*cs), GFP_KERNEL);
> +       if (!cs)
> +               return ERR_PTR(-ENOMEM);
> +
> +       if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
> +               kfree(cs);
> +               return ERR_PTR(-EINVAL);
> +       }
> +
> +       ret = get_prioidx(&cs->prioidx);
> +       if (ret != 0) {
> +               printk(KERN_WARNING "No space in priority index array\n");
> +               kfree(cs);
> +               return ERR_PTR(ret);
> +       }
> +
> +       return &cs->css;
> +}
> +
> +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
> +{
> +       struct cgroup_netprio_state *cs;
> +       struct net_device *dev;
> +       struct netprio_map *map;
> +
> +       cs = cgrp_netprio_state(cgrp);
> +       rtnl_lock();
> +       for_each_netdev(&init_net, dev) {
> +               map = rcu_dereference(dev->priomap);

map = rtnl_dereference(dev->priomap)

> +               if (map)
> +                       map->priomap[cs->prioidx] = 0;
> +       }
> +       rtnl_unlock();
> +       put_prioidx(cs->prioidx);
> +       kfree(cs);
> +}
> +
> +static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
> +{
> +       return (u64)cgrp_netprio_state(cgrp)->prioidx;
> +}
> +
> +static int read_priomap(struct cgroup *cont, struct cftype *cft,
> +                       struct cgroup_map_cb *cb)
> +{
> +       struct net_device *dev;
> +       u32 prioidx = cgrp_netprio_state(cont)->prioidx;
> +       u32 priority;
> +       struct netprio_map *map;
> +
> +       rcu_read_lock();
> +
> +       for_each_netdev_rcu(&init_net, dev) {
> +               map = rcu_dereference(dev->priomap);
> +               priority = map ? map->priomap[prioidx] : 0;
> +               cb->fill(cb, dev->name, priority);
> +       }
> +       rcu_read_unlock();
> +       return 0;
> +}
> +

[...]

> +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
> +{
> +       return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
> +}
> +
> +static int netprio_device_event(struct notifier_block *unused,
> +                               unsigned long event, void *ptr)
> +{
> +       struct net_device *dev = ptr;
> +       struct netprio_map *old;
> +       u32 max_len = atomic_read(&max_prioidx);
> +
> +       old = rcu_dereference_protected(dev->priomap, 1);

This is protected because of the rtnl lock so use,

old = rtnl_dereference(dev->priomap);

> +       /*
> +        * Note this is called with rtnl_lock held so we have update side
> +        * protection on our rcu assignments
> +        */
> +
> +       switch (event) {
> +
> +       case NETDEV_REGISTER:
> +               if (max_len)
> +                       extend_netdev_table(dev, max_len);
> +               break;
> +       case NETDEV_UNREGISTER:
> +               rcu_assign_pointer(dev->priomap, NULL);
> +               if (old)
> +                       kfree_rcu(old, rcu);
> +               break;
> +       }
> +       return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block netprio_device_notifier = {
> +       .notifier_call = netprio_device_event
> +};
> +

I can roll an update if you want, just let me know.

Thanks,
John

  reply	other threads:[~2011-11-18  3:17 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-16 20:51 [PATCH 0/2] net: Add network priority cgroup Neil Horman
2011-11-16 20:51 ` [PATCH 1/2] net: add network priority cgroup infrastructure Neil Horman
2011-11-17  9:29   ` WANG Cong
2011-11-17 11:25   ` John Fastabend
2011-11-17 11:56     ` Neil Horman
2011-11-16 20:51 ` [PATCH 2/2] net: add documentation for net_prio cgroups Neil Horman
2011-11-17 21:47 ` [PATCH 0/2] net: Add network priority cgroup (v2) Neil Horman
2011-11-17 21:47   ` [PATCH 1/2] net: add network priority cgroup infrastructure (v2) Neil Horman
2011-11-18  3:17     ` John Fastabend [this message]
2011-11-18 11:50       ` Neil Horman
2011-11-17 21:47   ` [PATCH 2/2] net: add documentation for net_prio cgroups (v2) Neil Horman
2011-11-18 16:13 ` [PATCH 0/2] net: Add network priority cgroup (v3) Neil Horman
2011-11-18 16:13   ` [PATCH 1/2] net: add network priority cgroup infrastructure (v3) Neil Horman
2011-11-21 20:39     ` David Miller
2011-11-21 20:43       ` Neil Horman
2011-11-18 16:13   ` [PATCH 2/2] net: add documentation for net_prio cgroups (v3) Neil Horman
2011-11-22 15:10 ` [PATCH 0/2] net: Add network priority cgroup (v4) Neil Horman
2011-11-22 15:10   ` [PATCH 1/2] net: add network priority cgroup infrastructure (v4) Neil Horman
2011-11-22 15:10   ` [PATCH 2/2] net: add documentation for net_prio cgroups (v4) Neil Horman
2011-11-22 20:23   ` [PATCH 0/2] net: Add network priority cgroup (v4) David Miller
2011-11-22 20:39     ` Neil Horman
2011-11-22 20:45       ` David Miller
2011-11-22 21:00         ` Neil Horman
2011-11-23 10:19           ` Kirill Smelkov
2011-11-23 11:49             ` Neil Horman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4EC5CE48.8020603@intel.com \
    --to=john.r.fastabend@intel.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=nhorman@tuxdriver.com \
    --cc=robert.w.love@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.