All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Serge E. Hallyn" <serue@us.ibm.com>
To: Dan Smith <danms@us.ibm.com>
Cc: containers@lists.osdl.org, netdev@vger.kernel.org
Subject: Re: [PATCH 2/4] C/R: Basic support for network namespaces and devices (v3)
Date: Wed, 10 Feb 2010 13:20:19 -0600	[thread overview]
Message-ID: <20100210192019.GA18879@us.ibm.com> (raw)
In-Reply-To: <87ljf1gemh.fsf@caffeine.danplanet.com>

Quoting Dan Smith (danms@us.ibm.com):
> Guilt dropped the new checkpoint_dev.c file when I switched to the
> newer branch.  Sorry about that.  Updated patch included below.

(Just a few comments on a cursory look.  Will take a closer look
later)

> +int ckpt_netdev_inet_addrs(struct in_device *indev,
> +			   struct ckpt_netdev_addr *_abuf[])
> +{
> +	struct ckpt_netdev_addr *abuf = NULL;
> +	struct in_ifaddr *addr = indev->ifa_list;
> +	int pages = 0;
> +	int addrs = 0;
> +	int max;
> +
> +	read_lock(&dev_base_lock);
> + retry:
> +	if (++pages > 4) {
> +		addrs = -ENOMEM;
> +		goto out;
> +	}
> +
> +	*_abuf = krealloc(abuf, PAGE_SIZE * pages, GFP_KERNEL);

rw_lockt is effectively a spinlock, so I don't think you can sleep
here.

> +	if (*_abuf == NULL) {
> +		addrs = -ENOMEM;
> +		goto out;
> +	}
> +	abuf = *_abuf;
> +
> +	max = (pages * PAGE_SIZE) / sizeof(*abuf);
> +	while (addr) {
> +		abuf[addrs].type = CKPT_NETDEV_ADDR_IPV4; /* Only IPv4 now */
> +		abuf[addrs].inet4_local = addr->ifa_local;
> +		abuf[addrs].inet4_address = addr->ifa_address;
> +		abuf[addrs].inet4_mask = addr->ifa_mask;
> +		abuf[addrs].inet4_broadcast = addr->ifa_broadcast;
> +
> +		addr = addr->ifa_next;
> +		if (++addrs >= max)
> +			goto retry;
> +	}
> +
> + out:
> +	read_unlock(&dev_base_lock);
> +
> +	if (addrs < 0) {
> +		kfree(abuf);
> +		*_abuf = NULL;
> +	}
> +
> +	return addrs;
> +}
> +
> +struct ckpt_hdr_netdev *ckpt_netdev_base(struct ckpt_ctx *ctx,
> +					 struct net_device *dev,
> +					 struct ckpt_netdev_addr *addrs[])
> +{
> +	struct ckpt_hdr_netdev *h;
> +	int ret;
> +
> +	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_NETDEV);
> +	if (!h)
> +		return ERR_PTR(-ENOMEM);
> +
> +	ret = ckpt_netdev_hwaddr(dev, h);
> +	if (ret < 0)
> +		goto out;
> +
> +	*addrs = NULL;
> +	ret = h->inet_addrs = ckpt_netdev_inet_addrs(dev->ip_ptr, addrs);
> +	if (ret < 0)
> +		goto out;
> +
> +	ret = h->netns_ref = checkpoint_obj(ctx, dev->nd_net, CKPT_OBJ_NET_NS);
> + out:
> +	if (ret < 0) {
> +		ckpt_hdr_put(ctx, h);
> +		h = ERR_PTR(ret);
> +		if (*addrs)
> +			kfree(*addrs);
> +	}
> +
> +	return h;
> +}
> +
> +int checkpoint_netdev(struct ckpt_ctx *ctx, void *ptr)
> +{
> +	struct net_device *dev = (struct net_device *)ptr;
> +
> +	if (!dev->netdev_ops->ndo_checkpoint)
> +		return -EINVAL;
> +
> +	ckpt_debug("checkpointing netdev %s\n", dev->name);
> +
> +	return dev->netdev_ops->ndo_checkpoint(ctx, dev);
> +}
> +
> +int checkpoint_netns(struct ckpt_ctx *ctx, void *ptr)
> +{
> +	struct net *net = ptr;
> +	struct net_device *dev;
> +	struct ckpt_hdr_netns *h;
> +	int ret;
> +
> +	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_NET_NS);
> +	if (!h)
> +		return -ENOMEM;
> +
> +	h->this_ref = ckpt_obj_lookup(ctx, net, CKPT_OBJ_NET_NS);
> +	BUG_ON(h->this_ref == 0);
> +
> +	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
> +	if (ret < 0)
> +		goto out;
> +
> +	for_each_netdev(net, dev) {
> +		if (!dev->netdev_ops->ndo_checkpoint)
> +			continue;

Won't the checkpoint_obj() call checkpoint_netdev(), which will return
-EINVAL if ndo_checkpoint is not defined?  But here you skip the
checkpoint_obj() call (which seems wrong to me).  Which do you want to
have happen?

> +		ret = checkpoint_obj(ctx, dev, CKPT_OBJ_NETDEV);
> +		if (ret < 0)
> +			break;
> +	}
> + out:
> +	ckpt_hdr_put(ctx, h);
> +
> +	return ret;
> +}
> +
> +static int restore_in_addrs(struct ckpt_ctx *ctx,
> +			    __u32 naddrs,
> +			    struct net *net,
> +			    struct net_device *dev)
> +{
> +	__u32 i;
> +	int ret = 0;
> +	int len = naddrs * sizeof(struct ckpt_netdev_addr);
> +	struct ckpt_netdev_addr *addrs = NULL;
> +
> +	addrs = kmalloc(len, GFP_KERNEL);
> +	if (!addrs)
> +		return -ENOMEM;
> +
> +	ret = _ckpt_read_buffer(ctx, addrs, len);
> +	if (ret < 0)
> +		goto out;
> +
> +	for (i = 0; i < naddrs; i++) {
> +		struct ckpt_netdev_addr *addr = &addrs[i];
> +		struct ifreq req;
> +		struct sockaddr_in *inaddr;
> +
> +		if (addr->type != CKPT_NETDEV_ADDR_IPV4) {
> +			ret = -EINVAL;
> +			ckpt_err(ctx, ret, "Unsupported netdev addr type %i\n",
> +				 addr->type);
> +			break;
> +		}
> +
> +		ckpt_debug("restoring %s: %x/%x/%x\n", dev->name,
> +			   addr->inet4_address,
> +			   addr->inet4_mask,
> +			   addr->inet4_broadcast);
> +
> +		memcpy(req.ifr_name, dev->name, IFNAMSIZ);
> +
> +		inaddr = (struct sockaddr_in *)&req.ifr_addr;
> +		inaddr->sin_addr.s_addr = addr->inet4_address;
> +		inaddr->sin_family = AF_INET;
> +		ret = __kern_devinet_ioctl(net, SIOCSIFADDR, &req);
> +		if (ret < 0) {
> +			ckpt_err(ctx, ret, "Failed to set address\n");
> +			break;
> +		}
> +
> +		inaddr = (struct sockaddr_in *)&req.ifr_addr;
> +		inaddr->sin_addr.s_addr = addr->inet4_mask;
> +		inaddr->sin_family = AF_INET;
> +		ret = __kern_devinet_ioctl(net, SIOCSIFNETMASK, &req);
> +		if (ret < 0) {
> +			ckpt_err(ctx, ret, "Failed to set netmask\n");
> +			break;
> +		}
> +
> +		inaddr = (struct sockaddr_in *)&req.ifr_addr;
> +		inaddr->sin_addr.s_addr = addr->inet4_broadcast;
> +		inaddr->sin_family = AF_INET;
> +		ret = __kern_devinet_ioctl(net, SIOCSIFBRDADDR, &req);
> +		if (ret < 0) {
> +			ckpt_err(ctx, ret, "Failed to set broadcast\n");
> +			break;
> +		}
> +	}
> +
> + out:
> +	kfree(addrs);
> +
> +	return ret;
> +}
> +
> +static int veth_peer_data(struct sk_buff *skb, char *peer_name)
> +{
> +	struct nlattr *linkdata;
> +	struct ifinfomsg ifm;
> +
> +	linkdata = nla_nest_start(skb, IFLA_INFO_DATA);
> +	if (!linkdata)
> +		return -ENOMEM;
> +
> +	nla_put(skb, VETH_INFO_PEER, sizeof(ifm), &ifm);
> +	nla_put_string(skb, IFLA_IFNAME, peer_name);
> +
> +	nla_nest_end(skb, linkdata);
> +
> +	return 0;
> +}
> +
> +static struct sk_buff *new_link_message(char *this_name, char *peer_name)
> +{
> +	int ret = -ENOMEM;
> +	int flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK;
> +	struct nlmsghdr *nlh;
> +	struct sk_buff *skb;
> +	struct ifinfomsg *ifm;
> +	struct nlattr *linkinfo;
> +
> +	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> +	if (!skb)
> +		goto out;
> +
> +	nlh = nlmsg_put(skb, 0, 0, RTM_NEWLINK, sizeof(*ifm), flags);
> +	if (!nlh)
> +		goto out;
> +
> +	ifm = nlmsg_data(nlh);
> +	memset(ifm, 0, sizeof(*ifm));
> +
> +	ret = nla_put_string(skb, IFLA_IFNAME, this_name);
> +	if (ret)
> +		goto out;
> +
> +	ret = -ENOMEM;
> +
> +	linkinfo = nla_nest_start(skb, IFLA_LINKINFO);
> +	if (!linkinfo)
> +		goto out;
> +
> +	if (nla_put_string(skb, IFLA_INFO_KIND, "veth") < 0)
> +		goto out;
> +
> +	ret = veth_peer_data(skb, peer_name);

By hard-coding veth stuff into generic-sounding functions in
net/checkpoint_dev.c you seem to be assuming that only veth will
ever be supported for checkpoint/restart?  what about macvlan?
(Not to mention that eventually we intend to support moving
physical nics into containers)

-serge

  reply	other threads:[~2010-02-10 19:20 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-02-09 21:25 Network device and namespace checkpoint/restart (v2) Dan Smith
2010-02-09 21:25 ` [PATCH 1/4] Add checkpoint and collect hooks to net_device_ops Dan Smith
2010-02-09 21:25 ` [PATCH 2/4] C/R: Basic support for network namespaces and devices (v3) Dan Smith
     [not found]   ` <1265750713-15749-3-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-02-10 17:24     ` Serge E. Hallyn
2010-02-10 17:38       ` Dan Smith
     [not found]         ` <87pr4dgfdz.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2010-02-10 20:01           ` Oren Laadan
2010-02-10 20:30             ` Serge E. Hallyn
2010-02-10 17:55   ` Dan Smith
2010-02-10 19:20     ` Serge E. Hallyn [this message]
2010-02-10 19:30       ` Dan Smith
2010-02-10 20:25         ` Serge E. Hallyn
2010-02-10 20:31           ` Dan Smith
2010-02-10 20:34             ` Serge E. Hallyn
     [not found]     ` <87ljf1gemh.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2010-02-11 11:02       ` Louis Rilling
2010-02-11 15:59         ` Dan Smith
2010-02-11 17:20     ` Oren Laadan
2010-02-11 17:51   ` Oren Laadan
2010-02-09 21:25 ` [PATCH 3/4] Add checkpoint support for veth devices Dan Smith
2010-02-10 17:57   ` Serge E. Hallyn
2010-02-09 21:25 ` [PATCH 4/4] Add loopback checkpoint support Dan Smith
2010-02-11 17:26 ` Network device and namespace checkpoint/restart (v2) Oren Laadan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100210192019.GA18879@us.ibm.com \
    --to=serue@us.ibm.com \
    --cc=containers@lists.osdl.org \
    --cc=danms@us.ibm.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.