* C/R: Fixup IPv6 support
@ 2010-03-24 19:40 Dan Smith
[not found] ` <1269459625-21033-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Dan Smith @ 2010-03-24 19:40 UTC (permalink / raw)
To: containers-qjLDD68F18O7TbgM5vRIOg
This set enables checkpointing network interfaces that have IPv6
addresses, as well as brings back the bits necessary for IPv6 sockets
to work.
With this, I can migrate an sshd with active IPv4 and IPv6 connections.
It also makes general testing easier because sshd will open an IPv6
socket if the kernel supports it which, until this patch, meant
that IPv6 must have been disabled in the kernel for sshd to be migratable.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 1/2] C/R: Support for IPv6 addresses on network devices
[not found] ` <1269459625-21033-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-03-24 19:40 ` Dan Smith
[not found] ` <1269459625-21033-2-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-03-24 19:40 ` [PATCH 2/2] C/R: Fix storing IPv6 addresses and handle the "ipv6only" socket flag Dan Smith
1 sibling, 1 reply; 12+ messages in thread
From: Dan Smith @ 2010-03-24 19:40 UTC (permalink / raw)
To: containers-qjLDD68F18O7TbgM5vRIOg
Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
include/linux/checkpoint.h | 2 +-
include/linux/checkpoint_hdr.h | 8 ++
net/checkpoint_dev.c | 252 +++++++++++++++++++++++++++++++---------
3 files changed, 208 insertions(+), 54 deletions(-)
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index efbc049..a8131bd 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -129,7 +129,7 @@ extern void *restore_netdev(struct ckpt_ctx *ctx);
extern int ckpt_netdev_in_init_netns(struct ckpt_ctx *ctx,
struct net_device *dev);
-extern int ckpt_netdev_inet_addrs(struct in_device *indev,
+extern int ckpt_netdev_inet_addrs(struct net_device *dev,
struct ckpt_netdev_addr *list[]);
extern int ckpt_netdev_hwaddr(struct net_device *dev,
struct ckpt_hdr_netdev *h);
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 01553b4..913d76d 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -802,6 +802,7 @@ struct ckpt_hdr_netdev {
enum ckpt_netdev_addr_types {
CKPT_NETDEV_ADDR_IPV4,
+ CKPT_NETDEV_ADDR_IPV6,
};
struct ckpt_netdev_addr {
@@ -813,6 +814,13 @@ struct ckpt_netdev_addr {
__be32 inet4_mask;
__be32 inet4_broadcast;
};
+ struct {
+ __be32 inet6_addr[4];
+ __u32 inet6_prefix_len;
+ __u32 inet6_valid_lft;
+ __u32 inet6_prefered_lft;
+ __u16 inet6_scope;
+ };
} __attribute__((aligned(8)));
} __attribute__((aligned(8)));
diff --git a/net/checkpoint_dev.c b/net/checkpoint_dev.c
index 2bb3d4d..173c2ea 100644
--- a/net/checkpoint_dev.c
+++ b/net/checkpoint_dev.c
@@ -18,8 +18,11 @@
#include <linux/checkpoint_hdr.h>
#include <linux/deferqueue.h>
+#include <net/if_inet6.h>
+#include <net/ipv6.h>
#include <net/net_namespace.h>
#include <net/sch_generic.h>
+#include <net/addrconf.h>
struct veth_newlink {
char *peer;
@@ -47,6 +50,24 @@ static int __kern_devinet_ioctl(struct net *net, unsigned int cmd, void *arg)
return ret;
}
+#ifdef CONFIG_IPV6
+static int __kern_addrconf(struct net *net, unsigned int cmd, void *arg)
+{
+ mm_segment_t fs;
+ int ret;
+
+ fs = get_fs();
+ set_fs(KERNEL_DS);
+ if (cmd == SIOCSIFADDR)
+ ret = addrconf_add_ifaddr(net, arg);
+ else
+ ret = -EINVAL;
+ set_fs(fs);
+
+ return ret;
+}
+#endif
+
static int __kern_dev_ioctl(struct net *net, unsigned int cmd, void *arg)
{
mm_segment_t fs;
@@ -149,11 +170,81 @@ int ckpt_netdev_hwaddr(struct net_device *dev, struct ckpt_hdr_netdev *h)
return 0;
}
-int ckpt_netdev_inet_addrs(struct in_device *indev,
+static int ckpt_netdev_inet4_addrs(struct in_device *indev,
+ int index, int max,
+ struct ckpt_netdev_addr *abuf)
+{
+ struct in_ifaddr *addr = indev->ifa_list;
+
+ while (addr) {
+ abuf[index].type = CKPT_NETDEV_ADDR_IPV4;
+ abuf[index].inet4_local = htonl(addr->ifa_local);
+ abuf[index].inet4_address = htonl(addr->ifa_address);
+ abuf[index].inet4_mask = htonl(addr->ifa_mask);
+ abuf[index].inet4_broadcast = htonl(addr->ifa_broadcast);
+
+ addr = addr->ifa_next;
+ if (++index >= max)
+ return -E2BIG;
+ }
+
+ return index;
+}
+
+#ifdef CONFIG_IPV6
+
+#define __BYTE_ORDER_COPY(op, dst, src) \
+ do { \
+ int i; \
+ for (i = 0; i < 4; i++) \
+ dst[i] = op(src[i]); \
+ } while (0);
+
+#define HTON_IPV6(dst, src) __BYTE_ORDER_COPY(htonl, dst, src)
+#define NTOH_IPV6(dst, src) __BYTE_ORDER_COPY(ntohl, dst, src)
+
+static int ckpt_netdev_inet6_addrs(struct inet6_dev *indev,
+ int index, int max,
+ struct ckpt_netdev_addr *abuf)
+{
+ struct inet6_ifaddr *addr = indev->addr_list;
+
+ while (addr) {
+ abuf[index].type = CKPT_NETDEV_ADDR_IPV6;
+
+ HTON_IPV6(abuf[index].inet6_addr, addr->addr.in6_u.u6_addr32);
+
+ ckpt_debug("Checkpointed inet6: %x:%x:%x:%x\n",
+ abuf[index].inet6_addr[0],
+ abuf[index].inet6_addr[1],
+ abuf[index].inet6_addr[2],
+ abuf[index].inet6_addr[3]);
+
+ abuf[index].inet6_prefix_len = addr->prefix_len;
+ abuf[index].inet6_valid_lft = addr->valid_lft;
+ abuf[index].inet6_prefered_lft = addr->prefered_lft;
+ abuf[index].inet6_scope = addr->scope;
+
+ addr = addr->if_next;
+ if (++index >= max)
+ return -E2BIG;
+ }
+
+ return index;
+}
+#else
+static int ckpt_netdev_inet6_addrs(struct inet6_dev *indev,
+ int index, int max,
+ struct ckpt_netdev_addr *abuf)
+{
+ return -ENOSYS;
+}
+#endif
+
+int ckpt_netdev_inet_addrs(struct net_device *dev,
struct ckpt_netdev_addr *_abuf[])
{
struct ckpt_netdev_addr *abuf = NULL;
- struct in_ifaddr *addr = indev->ifa_list;
int addrs = 0;
int max = 32;
@@ -167,21 +258,21 @@ int ckpt_netdev_inet_addrs(struct in_device *indev,
read_lock(&dev_base_lock);
- while (addr) {
- abuf[addrs].type = CKPT_NETDEV_ADDR_IPV4; /* Only IPv4 now */
- abuf[addrs].inet4_local = htonl(addr->ifa_local);
- abuf[addrs].inet4_address = htonl(addr->ifa_address);
- abuf[addrs].inet4_mask = htonl(addr->ifa_mask);
- abuf[addrs].inet4_broadcast = htonl(addr->ifa_broadcast);
+ addrs = 0;
- addr = addr->ifa_next;
- if (++addrs >= max) {
- read_unlock(&dev_base_lock);
- max *= 2;
- goto retry;
- }
- }
+ addrs = ckpt_netdev_inet4_addrs(dev->ip_ptr, addrs, max, abuf);
+ if (addrs == -E2BIG) {
+ read_unlock(&dev_base_lock);
+ goto retry;
+ } else if (addrs < 0)
+ goto unlock;
+ addrs = ckpt_netdev_inet6_addrs(dev->ip6_ptr, addrs, max, abuf);
+ if (addrs == -E2BIG) {
+ read_unlock(&dev_base_lock);
+ goto retry;
+ }
+ unlock:
read_unlock(&dev_base_lock);
out:
if (addrs < 0) {
@@ -208,7 +299,7 @@ struct ckpt_hdr_netdev *ckpt_netdev_base(struct ckpt_ctx *ctx,
goto out;
*addrs = NULL;
- ret = h->inet_addrs = ckpt_netdev_inet_addrs(dev->ip_ptr, addrs);
+ ret = h->inet_addrs = ckpt_netdev_inet_addrs(dev, addrs);
if (ret < 0)
goto out;
@@ -278,6 +369,93 @@ int checkpoint_netns(struct ckpt_ctx *ctx, void *ptr)
return ret;
}
+static int restore_inet4_addr(struct ckpt_ctx *ctx,
+ struct net_device *dev,
+ struct net *net,
+ struct ckpt_netdev_addr *addr)
+{
+ struct ifreq req;
+ struct sockaddr_in *inaddr;
+ int ret;
+
+ ckpt_debug("restoring %s: %x/%x/%x\n",
+ dev->name,
+ addr->inet4_address,
+ addr->inet4_mask,
+ addr->inet4_broadcast);
+
+ memcpy(req.ifr_name, dev->name, IFNAMSIZ);
+
+ inaddr = (struct sockaddr_in *)&req.ifr_addr;
+ inaddr->sin_addr.s_addr = ntohl(addr->inet4_address);
+ inaddr->sin_family = AF_INET;
+ ret = __kern_devinet_ioctl(net, SIOCSIFADDR, &req);
+ if (ret < 0) {
+ ckpt_err(ctx, ret, "Failed to set address\n");
+ return ret;
+ }
+
+ inaddr = (struct sockaddr_in *)&req.ifr_addr;
+ inaddr->sin_addr.s_addr = ntohl(addr->inet4_mask);
+ inaddr->sin_family = AF_INET;
+ ret = __kern_devinet_ioctl(net, SIOCSIFNETMASK, &req);
+ if (ret < 0) {
+ ckpt_err(ctx, ret, "Failed to set netmask\n");
+ return ret;
+ }
+
+ inaddr = (struct sockaddr_in *)&req.ifr_addr;
+ inaddr->sin_addr.s_addr = ntohl(addr->inet4_broadcast);
+ inaddr->sin_family = AF_INET;
+ ret = __kern_devinet_ioctl(net, SIOCSIFBRDADDR, &req);
+ if (ret < 0) {
+ ckpt_err(ctx, ret, "Failed to set broadcast\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_IPV6
+static int restore_inet6_addr(struct ckpt_ctx *ctx,
+ struct net_device *dev,
+ struct net *net,
+ struct ckpt_netdev_addr *addr)
+{
+ struct in6_ifreq req;
+ int ret;
+
+ ckpt_debug("restoring %s: %x:%x:%x:%x/%i\n",
+ dev->name,
+ addr->inet6_addr[0],
+ addr->inet6_addr[1],
+ addr->inet6_addr[2],
+ addr->inet6_addr[3],
+ addr->inet6_prefix_len);
+
+ req.ifr6_ifindex = dev->ifindex;
+ NTOH_IPV6(req.ifr6_addr.in6_u.u6_addr32, &addr->inet6_addr);
+ req.ifr6_prefixlen = addr->inet6_prefix_len;
+
+ ret = __kern_addrconf(net, SIOCSIFADDR, &req);
+ if (ret == -EEXIST)
+ ret = 0;
+ else if (ret < 0)
+ ckpt_err(ctx, ret, "Failed to set address");
+
+ return ret;
+}
+#else
+static int restore_inet6_addr(struct ckpt_ctx *ctx,
+ struct net_device *dev,
+ struct net *net,
+ struct ckpt_netdev_addr *addr)
+{
+ ckpt_err(ctx, -ENOSYS, "IPv6 not supported");
+ return -ENOSYS;
+}
+#endif
+
static int restore_in_addrs(struct ckpt_ctx *ctx,
__u32 naddrs,
struct net *net,
@@ -294,49 +472,17 @@ static int restore_in_addrs(struct ckpt_ctx *ctx,
for (i = 0; i < naddrs; i++) {
struct ckpt_netdev_addr *addr = &addrs[i];
- struct ifreq req;
- struct sockaddr_in *inaddr;
- if (addr->type != CKPT_NETDEV_ADDR_IPV4) {
+ if (addr->type == CKPT_NETDEV_ADDR_IPV4)
+ ret = restore_inet4_addr(ctx, dev, net, addr);
+ else if (addr->type == CKPT_NETDEV_ADDR_IPV6)
+ ret = restore_inet6_addr(ctx, dev, net, addr);
+ else {
ret = -EINVAL;
ckpt_err(ctx, ret, "Unsupported netdev addr type %i\n",
addr->type);
break;
}
-
- ckpt_debug("restoring %s: %x/%x/%x\n", dev->name,
- addr->inet4_address,
- addr->inet4_mask,
- addr->inet4_broadcast);
-
- memcpy(req.ifr_name, dev->name, IFNAMSIZ);
-
- inaddr = (struct sockaddr_in *)&req.ifr_addr;
- inaddr->sin_addr.s_addr = ntohl(addr->inet4_address);
- inaddr->sin_family = AF_INET;
- ret = __kern_devinet_ioctl(net, SIOCSIFADDR, &req);
- if (ret < 0) {
- ckpt_err(ctx, ret, "Failed to set address\n");
- break;
- }
-
- inaddr = (struct sockaddr_in *)&req.ifr_addr;
- inaddr->sin_addr.s_addr = ntohl(addr->inet4_mask);
- inaddr->sin_family = AF_INET;
- ret = __kern_devinet_ioctl(net, SIOCSIFNETMASK, &req);
- if (ret < 0) {
- ckpt_err(ctx, ret, "Failed to set netmask\n");
- break;
- }
-
- inaddr = (struct sockaddr_in *)&req.ifr_addr;
- inaddr->sin_addr.s_addr = ntohl(addr->inet4_broadcast);
- inaddr->sin_family = AF_INET;
- ret = __kern_devinet_ioctl(net, SIOCSIFBRDADDR, &req);
- if (ret < 0) {
- ckpt_err(ctx, ret, "Failed to set broadcast\n");
- break;
- }
}
out:
--
1.6.2.5
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/2] C/R: Fix storing IPv6 addresses and handle the "ipv6only" socket flag
[not found] ` <1269459625-21033-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-03-24 19:40 ` [PATCH 1/2] C/R: Support for IPv6 addresses on network devices Dan Smith
@ 2010-03-24 19:40 ` Dan Smith
1 sibling, 0 replies; 12+ messages in thread
From: Dan Smith @ 2010-03-24 19:40 UTC (permalink / raw)
To: containers-qjLDD68F18O7TbgM5vRIOg
The first item is a result of sockaddr_in6 being larger than the base
sockaddr structure, thus not being long enough to reserve enough space in
the checkpoint header.
The second comes into play when things (like sshd) bind to INADDR6_ANY,
set the "ipv6only" socket flag and then bind an IPv4 socket to the same
port.
Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
include/linux/checkpoint_hdr.h | 13 +++++++++++--
net/ipv4/checkpoint.c | 34 ++++++++++++++++++++--------------
net/ipv6/af_inet6.c | 2 ++
3 files changed, 33 insertions(+), 16 deletions(-)
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 913d76d..0e8eb8b 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -756,12 +756,21 @@ struct ckpt_hdr_socket_inet {
struct in6_addr saddr;
struct in6_addr rcv_saddr;
struct in6_addr daddr;
+ __u8 ipv6only;
} inet6 __attribute__ ((aligned(8)));
__u32 laddr_len;
__u32 raddr_len;
- struct sockaddr_in laddr;
- struct sockaddr_in raddr;
+ union {
+ struct sockaddr laddr;
+ struct sockaddr_in laddr4;
+ struct sockaddr_in6 laddr6;
+ };
+ union {
+ struct sockaddr raddr;
+ struct sockaddr_in raddr4;
+ struct sockaddr_in6 raddr6;
+ };
} __attribute__((aligned(8)));
struct ckpt_hdr_file_socket {
diff --git a/net/ipv4/checkpoint.c b/net/ipv4/checkpoint.c
index b4024e7..57b185d 100644
--- a/net/ipv4/checkpoint.c
+++ b/net/ipv4/checkpoint.c
@@ -190,12 +190,14 @@ static int sock_inet_restore_connection(struct sock *sk,
struct inet_sock *inet = inet_sk(sk);
int tcp_gso = sk->sk_family == AF_INET ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
- inet->inet_daddr = hh->raddr.sin_addr.s_addr;
- inet->inet_saddr = hh->laddr.sin_addr.s_addr;
- inet->inet_rcv_saddr = inet->inet_saddr;
+ if (sk->sk_family == AF_INET) {
+ inet->inet_daddr = hh->raddr4.sin_addr.s_addr;
+ inet->inet_saddr = hh->laddr4.sin_addr.s_addr;
+ inet->inet_rcv_saddr = inet->inet_saddr;
- inet->inet_dport = hh->raddr.sin_port;
- inet->inet_sport = hh->laddr.sin_port;
+ inet->inet_dport = hh->raddr4.sin_port;
+ inet->inet_sport = hh->laddr4.sin_port;
+ }
if (sk->sk_protocol == IPPROTO_TCP)
sk->sk_gso_type = tcp_gso;
@@ -266,6 +268,7 @@ static int sock_inet_cptrst(struct ckpt_ctx *ctx,
ipv6_addr_copy(&inet6->rcv_saddr, &hh->inet6.rcv_saddr);
ipv6_addr_copy(&inet6->daddr, &hh->inet6.daddr);
}
+ CKPT_COPY(op, hh->inet6.ipv6only, inet6->ipv6only);
}
return ret;
@@ -281,8 +284,8 @@ int inet_checkpoint(struct ckpt_ctx *ctx, struct socket *sock)
return -EINVAL;
ret = ckpt_sock_getnames(ctx, sock,
- (struct sockaddr *)&in->laddr, &in->laddr_len,
- (struct sockaddr *)&in->raddr, &in->raddr_len);
+ &in->laddr, &in->laddr_len,
+ &in->raddr, &in->raddr_len);
if (ret)
goto out;
@@ -387,19 +390,19 @@ static int inet_precheck(struct socket *sock, struct ckpt_hdr_socket_inet *in)
__u8 nonagle_mask = TCP_NAGLE_OFF | TCP_NAGLE_CORK | TCP_NAGLE_PUSH;
__u8 ecn_mask = TCP_ECN_OK | TCP_ECN_QUEUE_CWR | TCP_ECN_DEMAND_CWR;
- if ((htons(in->laddr.sin_port) < PROT_SOCK) &&
+ if ((htons(in->laddr4.sin_port) < PROT_SOCK) &&
!capable(CAP_NET_BIND_SERVICE)) {
ckpt_debug("unable to bind to port %hu\n",
- htons(in->laddr.sin_port));
+ htons(in->laddr4.sin_port));
return -EINVAL;
}
- if (in->laddr_len > sizeof(struct sockaddr_in)) {
+ if (in->laddr_len > sizeof(in->laddr6)) {
ckpt_debug("laddr_len is too big\n");
return -EINVAL;
}
- if (in->raddr_len > sizeof(struct sockaddr_in)) {
+ if (in->raddr_len > sizeof(in->laddr6)) {
ckpt_debug("raddr_len is too big\n");
return -EINVAL;
}
@@ -498,13 +501,16 @@ int inet_restore(struct ckpt_ctx *ctx,
((h->sock.state == TCP_CLOSE) && (in->laddr_len > 0))) {
sock->sk->sk_reuse = 2;
inet_sk(sock->sk)->freebind = 1;
- ret = sock->ops->bind(sock,
- (struct sockaddr *)&in->laddr,
- in->laddr_len);
+ ret = sock->ops->bind(sock, &in->laddr, in->laddr_len);
ckpt_debug("inet bind: %i\n", ret);
if (ret < 0)
goto out;
+ if (in->inet6.ipv6only) {
+ struct ipv6_pinfo *np = inet6_sk(sock->sk);
+ np->ipv6only = 1;
+ }
+
if (h->sock.state == TCP_LISTEN) {
ret = sock->ops->listen(sock, h->sock.backlog);
ckpt_debug("inet listen: %i\n", ret);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 12e69d3..9acb55a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -523,6 +523,8 @@ const struct proto_ops inet6_stream_ops = {
.mmap = sock_no_mmap,
.sendpage = tcp_sendpage,
.splice_read = tcp_splice_read,
+ .checkpoint = inet_checkpoint,
+ .restore = inet_restore,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
--
1.6.2.5
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] C/R: Support for IPv6 addresses on network devices
[not found] ` <1269459625-21033-2-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-03-25 20:36 ` Brian Haley
[not found] ` <4BABC967.5090908-VXdhtT5mjnY@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Brian Haley @ 2010-03-25 20:36 UTC (permalink / raw)
To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg
Hi Dan,
Dan Smith wrote:
> struct ckpt_netdev_addr {
> @@ -813,6 +814,13 @@ struct ckpt_netdev_addr {
> __be32 inet4_mask;
> __be32 inet4_broadcast;
> };
> + struct {
> + __be32 inet6_addr[4];
It might be easier to just make this an in6_addr.
> + __u32 inet6_prefix_len;
> + __u32 inet6_valid_lft;
> + __u32 inet6_prefered_lft;
> + __u16 inet6_scope;
> + };
You'll also need to save "flags", without it I think all your addresses
would show up as "permanent" because it will look like they were added
by user-space tools. Actually, using the SIOCSIFADDR path that might
happen anyways, which wouldn't be correct.
> +#ifdef CONFIG_IPV6
> +
> +#define __BYTE_ORDER_COPY(op, dst, src) \
> + do { \
> + int i; \
> + for (i = 0; i < 4; i++) \
> + dst[i] = op(src[i]); \
> + } while (0);
> +
> +#define HTON_IPV6(dst, src) __BYTE_ORDER_COPY(htonl, dst, src)
> +#define NTOH_IPV6(dst, src) __BYTE_ORDER_COPY(ntohl, dst, src)
Yuck, this is ugly, use ipv6_addr_copy() please.
> +static int ckpt_netdev_inet6_addrs(struct inet6_dev *indev,
> + int index, int max,
> + struct ckpt_netdev_addr *abuf)
> +{
> + struct inet6_ifaddr *addr = indev->addr_list;
> +
> + while (addr) {
> + abuf[index].type = CKPT_NETDEV_ADDR_IPV6;
> +
> + HTON_IPV6(abuf[index].inet6_addr, addr->addr.in6_u.u6_addr32);
Use ipv6_addr_copy().
> + ckpt_debug("Checkpointed inet6: %x:%x:%x:%x\n",
> + abuf[index].inet6_addr[0],
> + abuf[index].inet6_addr[1],
> + abuf[index].inet6_addr[2],
> + abuf[index].inet6_addr[3]);
There was a new format specifier added to the kernel print routines
called "%pI6" for printing IPv6 addresses.
> + abuf[index].inet6_prefix_len = addr->prefix_len;
> + abuf[index].inet6_valid_lft = addr->valid_lft;
> + abuf[index].inet6_prefered_lft = addr->prefered_lft;
> + abuf[index].inet6_scope = addr->scope;
abuf[index].inet6_flags = addr->flags;
> +int ckpt_netdev_inet_addrs(struct net_device *dev,
> struct ckpt_netdev_addr *_abuf[])
> {
> struct ckpt_netdev_addr *abuf = NULL;
> - struct in_ifaddr *addr = indev->ifa_list;
> int addrs = 0;
You can drop this initialization since you're now doing it below.
> @@ -167,21 +258,21 @@ int ckpt_netdev_inet_addrs(struct in_device *indev,
>
> read_lock(&dev_base_lock);
>
> - while (addr) {
> - abuf[addrs].type = CKPT_NETDEV_ADDR_IPV4; /* Only IPv4 now */
> - abuf[addrs].inet4_local = htonl(addr->ifa_local);
> - abuf[addrs].inet4_address = htonl(addr->ifa_address);
> - abuf[addrs].inet4_mask = htonl(addr->ifa_mask);
> - abuf[addrs].inet4_broadcast = htonl(addr->ifa_broadcast);
> + addrs = 0;
>
> - addr = addr->ifa_next;
> - if (++addrs >= max) {
> - read_unlock(&dev_base_lock);
> - max *= 2;
> - goto retry;
> - }
> - }
> + addrs = ckpt_netdev_inet4_addrs(dev->ip_ptr, addrs, max, abuf);
> + if (addrs == -E2BIG) {
> + read_unlock(&dev_base_lock);
> + goto retry;
> + } else if (addrs < 0)
> + goto unlock;
When can this return value be < 0 other then -E2BIG?
> +static int restore_inet4_addr(struct ckpt_ctx *ctx,
> + struct net_device *dev,
> + struct net *net,
> + struct ckpt_netdev_addr *addr)
> +{
> + struct ifreq req;
> + struct sockaddr_in *inaddr;
> + int ret;
> +
> + ckpt_debug("restoring %s: %x/%x/%x\n",
> + dev->name,
> + addr->inet4_address,
> + addr->inet4_mask,
> + addr->inet4_broadcast);
There's a "%pI4" for IPv4 addresses now.
> +#ifdef CONFIG_IPV6
> +static int restore_inet6_addr(struct ckpt_ctx *ctx,
> + struct net_device *dev,
> + struct net *net,
> + struct ckpt_netdev_addr *addr)
> +{
> + struct in6_ifreq req;
> + int ret;
> +
> + ckpt_debug("restoring %s: %x:%x:%x:%x/%i\n",
> + dev->name,
> + addr->inet6_addr[0],
> + addr->inet6_addr[1],
> + addr->inet6_addr[2],
> + addr->inet6_addr[3],
> + addr->inet6_prefix_len);
%pI6
> +
> + req.ifr6_ifindex = dev->ifindex;
> + NTOH_IPV6(req.ifr6_addr.in6_u.u6_addr32, &addr->inet6_addr);
ipv6_addr_copy()
> + req.ifr6_prefixlen = addr->inet6_prefix_len;
> +
> + ret = __kern_addrconf(net, SIOCSIFADDR, &req);
> + if (ret == -EEXIST)
> + ret = 0;
> + else if (ret < 0)
> + ckpt_err(ctx, ret, "Failed to set address");
> +
> + return ret;
> +}
I am still worried about this. When an interface is activated and
the IPv6 module is loaded, it's going to generate a link-local address
right away. Then it will auto-configure an address based on information
in a received router advertisement. Is this code going to conflict
with that? Meaning, will you have two link-locals on this interface
once the system is running?
Also, moving these addresses around is going to increase the likelihood
of a duplicate address (link-locals are typically based off the MAC, then
the global uses the same lower 64-bits). Maybe only saving/restoring
"permanent" addresses is correct? I could be wrong since I don't know
the typical use case here, but assume migrating a VM.
There's also going to be some conflict when you get to adding the
Multicast address back, as adding a "normal" IPv6 address is usually
going to add at least one Multicast address in the process.
And what about tunnel devices? Maybe you already cover that somewhere
else?
And I won't harp on Anycast and Privacy addresses, I know this was
only a first pass :)
-Brian
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] C/R: Support for IPv6 addresses on network devices
[not found] ` <4BABC967.5090908-VXdhtT5mjnY@public.gmane.org>
@ 2010-03-25 21:01 ` Dan Smith
[not found] ` <87aatwf74u.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Dan Smith @ 2010-03-25 21:01 UTC (permalink / raw)
To: Brian Haley; +Cc: containers-qjLDD68F18O7TbgM5vRIOg
>> +#define HTON_IPV6(dst, src) __BYTE_ORDER_COPY(htonl, dst, src)
>> +#define NTOH_IPV6(dst, src) __BYTE_ORDER_COPY(ntohl, dst, src)
BH> Yuck, this is ugly, use ipv6_addr_copy() please.
So, I started with ipv6_addr_copy(), but that leaves the addresses in
the host endianess within the checkpoint image, right? Dave Miller
had previously asked to have the IPv4 addresses htonl()'d before being
written to the image, so I was doing the same here.
BH> There was a new format specifier added to the kernel print
BH> routines called "%pI6" for printing IPv6 addresses.
Neato. I didn't actually mean to leave those debug statements in
there, but I guess I will so I can use %pI6 :)
BH> When can this return value be < 0 other then -E2BIG?
It can't at the moment, but I figured I should catch it here now so
that the two checkpoint functions could throw an error later and not
have it go unnoticed.
>> + ret = __kern_addrconf(net, SIOCSIFADDR, &req);
>> + if (ret == -EEXIST)
>> + ret = 0;
>> + else if (ret < 0)
>> + ckpt_err(ctx, ret, "Failed to set address");
>> +
>> + return ret;
>> +}
BH> I am still worried about this. When an interface is activated and
BH> the IPv6 module is loaded, it's going to generate a link-local address
BH> right away. Then it will auto-configure an address based on information
BH> in a received router advertisement. Is this code going to conflict
BH> with that? Meaning, will you have two link-locals on this interface
BH> once the system is running?
I have to claim IPv6 ignorance here :)
BH> Also, moving these addresses around is going to increase the
BH> likelihood of a duplicate address (link-locals are typically based
BH> off the MAC, then the global uses the same lower 64-bits). Maybe
BH> only saving/restoring "permanent" addresses is correct?
Sounds good to me :)
BH> There's also going to be some conflict when you get to adding the
BH> Multicast address back, as adding a "normal" IPv6 address is
BH> usually going to add at least one Multicast address in the
BH> process.
/me refers to his previous statement of ignorance.
BH> And what about tunnel devices? Maybe you already cover that
BH> somewhere else?
Like sit devices? If so, I punt on those right now (somewhere else).
I guess I need to add a patch to this set to save/restore their
addresses and attributes as well.
BH> And I won't harp on Anycast and Privacy addresses, I know this was
BH> only a first pass :)
I'll have to figure out how to test anycast, privacy, multicast and
link local addresses, as well as tunnel devices so that I can move
forward with some of these things.
Man, the world seems simpler with 32-bit addresses :)
Thanks Brian!
--
Dan Smith
IBM Linux Technology Center
email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] C/R: Support for IPv6 addresses on network devices
[not found] ` <87aatwf74u.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
@ 2010-03-25 21:28 ` Brian Haley
[not found] ` <4BABD594.1020301-VXdhtT5mjnY@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Brian Haley @ 2010-03-25 21:28 UTC (permalink / raw)
To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg
Dan Smith wrote:
>>> +#define HTON_IPV6(dst, src) __BYTE_ORDER_COPY(htonl, dst, src)
>>> +#define NTOH_IPV6(dst, src) __BYTE_ORDER_COPY(ntohl, dst, src)
>
> BH> Yuck, this is ugly, use ipv6_addr_copy() please.
>
> So, I started with ipv6_addr_copy(), but that leaves the addresses in
> the host endianess within the checkpoint image, right? Dave Miller
> had previously asked to have the IPv4 addresses htonl()'d before being
> written to the image, so I was doing the same here.
Ok, I don't remember Dave's email.
> BH> I am still worried about this. When an interface is activated and
> BH> the IPv6 module is loaded, it's going to generate a link-local address
> BH> right away. Then it will auto-configure an address based on information
> BH> in a received router advertisement. Is this code going to conflict
> BH> with that? Meaning, will you have two link-locals on this interface
> BH> once the system is running?
>
> I have to claim IPv6 ignorance here :)
Well, what does an 'ip -6 a' show before and after a checkpoint?
-Brian
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] C/R: Support for IPv6 addresses on network devices
[not found] ` <4BABD594.1020301-VXdhtT5mjnY@public.gmane.org>
@ 2010-03-26 15:35 ` Dan Smith
[not found] ` <87634jf63u.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Dan Smith @ 2010-03-26 15:35 UTC (permalink / raw)
To: Brian Haley; +Cc: containers-qjLDD68F18O7TbgM5vRIOg
BH> Ok, I don't remember Dave's email.
https://lists.linux-foundation.org/pipermail/containers/2010-February/023135.html
However, now that I've gone in and done some looking and replacing my
print statements with %pI6, I think it may actually be kept in memory
in network byte order, which means I don't need to switch it. Does
that sound right? :)
BH> Well, what does an 'ip -6 a' show before and after a checkpoint?
Yeah, I end up with an extra address. I think ignoring the
link-local ones sounds like a good plan, for now at least.
--
Dan Smith
IBM Linux Technology Center
email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] C/R: Support for IPv6 addresses on network devices
[not found] ` <87634jf63u.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
@ 2010-03-30 15:35 ` Brian Haley
[not found] ` <4BB21A45.4050300-VXdhtT5mjnY@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Brian Haley @ 2010-03-30 15:35 UTC (permalink / raw)
To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg
Dan Smith wrote:
> BH> Ok, I don't remember Dave's email.
>
> https://lists.linux-foundation.org/pipermail/containers/2010-February/023135.html
>
> However, now that I've gone in and done some looking and replacing my
> print statements with %pI6, I think it may actually be kept in memory
> in network byte order, which means I don't need to switch it. Does
> that sound right? :)
Yes, it sounds right to me.
> BH> Well, what does an 'ip -6 a' show before and after a checkpoint?
>
> Yeah, I end up with an extra address. I think ignoring the
> link-local ones sounds like a good plan, for now at least.
Can I ask what the addresses were? Did you move from VM to VM so the
underlying NIC MAC address changed?
Again, I don't know your typical user for C/R. For example, with IPv4
you save all the addresses, but if one of them was configured via DHCP,
you could have an address conflict when you restore it, since there's
no way to know if it's been handed-out to another system in the meantime.
Or does a typical C/R user only have static addresses?
With IPv6 it gets worse because the link-local will get created
automatically, and if you're in a VM it will probably be somewhat
random. Then when you move to another VM you'll get another virtual
NIC with a different MAC address. Since the global address is going
to be based off the same lower 64-bits, you'll wind-up with a second
global in most situations (since you're restoring the original address).
It almost seems as though you only want to C/R manually configured
IPv6 addresses (those marked IFA_F_PERMANENT) since those are going
to be static to that "system", and in that case the admin has probably
disabled the address auto-configuration process. I guess you'll find
out as people use it and complain, then you can add knobs to control
the behavior, or simply document the restrictions.
-Brian
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] C/R: Support for IPv6 addresses on network devices
[not found] ` <4BB21A45.4050300-VXdhtT5mjnY@public.gmane.org>
@ 2010-03-30 16:17 ` Dan Smith
[not found] ` <87zl1peqd4.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Dan Smith @ 2010-03-30 16:17 UTC (permalink / raw)
To: Brian Haley; +Cc: containers-qjLDD68F18O7TbgM5vRIOg
BH> Can I ask what the addresses were?
I'm not sure what you mean. You want to know what the duplicated
address was? My flawed byteorder adjustment caused me to have two
fe80::X addresses on the restored interface.
I've since added a check to ignore non-global scope addresses, which
works nicely.
BH> Did you move from VM to VM so the underlying NIC MAC address
BH> changed?
No, the MAC doesn't change because I reconstruct the interface on the
other side with the original MAC address. The netns and netdev code
assume that you're migrating your entire network namespace, which
includes all the devices within.
BH> Again, I don't know your typical user for C/R. For example, with
BH> IPv4 you save all the addresses, but if one of them was configured
BH> via DHCP, you could have an address conflict when you restore it,
BH> since there's no way to know if it's been handed-out to another
BH> system in the meantime. Or does a typical C/R user only have
BH> static addresses?
I think that the expectation is that if you're migrating network
connections, you are going to have to be in your own netns and have
your own interface. If DHCP is in play, then you're going to be
migrating dhclient along with your app anyway.
BH> With IPv6 it gets worse because the link-local will get created
BH> automatically, and if you're in a VM it will probably be somewhat
BH> random. Then when you move to another VM you'll get another
BH> virtual NIC with a different MAC address. Since the global
BH> address is going to be based off the same lower 64-bits, you'll
BH> wind-up with a second global in most situations (since you're
BH> restoring the original address).
Well, I'm not sure of your use of "VM" in this case. I think the
typical usage here will be a container that behaves like a VM. As I
said above, you'll have your own virtual interface and your MAC
address will go with you.
Right now, I migrate sshd and sendmail around by starting them up in a
container with a veth or macvlan device. Then, I can move them from
machine to machine while keeping the same interface, MAC and IP
addresses, and without severing connections with the outside world.
--
Dan Smith
IBM Linux Technology Center
email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] C/R: Support for IPv6 addresses on network devices
[not found] ` <87zl1peqd4.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
@ 2010-03-30 17:05 ` Brian Haley
[not found] ` <4BB22F73.60704-VXdhtT5mjnY@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Brian Haley @ 2010-03-30 17:05 UTC (permalink / raw)
To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg
Dan Smith wrote:
> BH> Can I ask what the addresses were?
>
> I'm not sure what you mean. You want to know what the duplicated
> address was? My flawed byteorder adjustment caused me to have two
> fe80::X addresses on the restored interface.
Ok, I was wondering if you had two link-locals because when you moved
from machine to machine the MAC changed.
> I've since added a check to ignore non-global scope addresses, which
> works nicely.
Well, in most cases you could ignore the global addresses as well,
since if they were auto-configured they'll be generated again on
the next Router Advertisement. That's why they're removed on
an ifdown, but the permanent ones aren't (any more).
> BH> Did you move from VM to VM so the underlying NIC MAC address
> BH> changed?
>
> No, the MAC doesn't change because I reconstruct the interface on the
> other side with the original MAC address. The netns and netdev code
> assume that you're migrating your entire network namespace, which
> includes all the devices within.
Ok, I forgot about the underlying device being moved as well, that
makes sense.
> BH> Again, I don't know your typical user for C/R. For example, with
> BH> IPv4 you save all the addresses, but if one of them was configured
> BH> via DHCP, you could have an address conflict when you restore it,
> BH> since there's no way to know if it's been handed-out to another
> BH> system in the meantime. Or does a typical C/R user only have
> BH> static addresses?
>
> I think that the expectation is that if you're migrating network
> connections, you are going to have to be in your own netns and have
> your own interface. If DHCP is in play, then you're going to be
> migrating dhclient along with your app anyway.
Ok, so dhclient6 too :)
> BH> With IPv6 it gets worse because the link-local will get created
> BH> automatically, and if you're in a VM it will probably be somewhat
> BH> random. Then when you move to another VM you'll get another
> BH> virtual NIC with a different MAC address. Since the global
> BH> address is going to be based off the same lower 64-bits, you'll
> BH> wind-up with a second global in most situations (since you're
> BH> restoring the original address).
>
> Well, I'm not sure of your use of "VM" in this case. I think the
> typical usage here will be a container that behaves like a VM. As I
> said above, you'll have your own virtual interface and your MAC
> address will go with you.
Yeah, I'm just using the wrong terminology sometimes, like you said,
it's a container behaving like a VM.
-Brian
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] C/R: Support for IPv6 addresses on network devices
[not found] ` <4BB22F73.60704-VXdhtT5mjnY@public.gmane.org>
@ 2010-03-30 18:07 ` Dan Smith
[not found] ` <87r5n1el9z.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
0 siblings, 1 reply; 12+ messages in thread
From: Dan Smith @ 2010-03-30 18:07 UTC (permalink / raw)
To: Brian Haley; +Cc: containers-qjLDD68F18O7TbgM5vRIOg
BH> Well, in most cases you could ignore the global addresses as well,
BH> since if they were auto-configured they'll be generated again on
BH> the next Router Advertisement. That's why they're removed on an
BH> ifdown, but the permanent ones aren't (any more).
So, I just checked... All of my link-local addresses have the
IFA_F_PERMANENT flag (0x80) set as well. What am I missing?
--
Dan Smith
IBM Linux Technology Center
email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] C/R: Support for IPv6 addresses on network devices
[not found] ` <87r5n1el9z.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
@ 2010-03-30 19:56 ` Brian Haley
0 siblings, 0 replies; 12+ messages in thread
From: Brian Haley @ 2010-03-30 19:56 UTC (permalink / raw)
To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg
Dan Smith wrote:
> BH> Well, in most cases you could ignore the global addresses as well,
> BH> since if they were auto-configured they'll be generated again on
> BH> the next Router Advertisement. That's why they're removed on an
> BH> ifdown, but the permanent ones aren't (any more).
>
> So, I just checked... All of my link-local addresses have the
> IFA_F_PERMANENT flag (0x80) set as well. What am I missing?
Sorry, I was recalling this from memory without looking at the code.
link-locals are always marked "permanent"
global addresses added by hand are marked "permanent"
global addresses added by auto-configuration are marked "dynamic"
(/sbin/ip prints this if IFA_F_PERMANENT is not set)
That way you can tell if they were added by the kernel or the user.
-Brian
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2010-03-30 19:56 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-03-24 19:40 C/R: Fixup IPv6 support Dan Smith
[not found] ` <1269459625-21033-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-03-24 19:40 ` [PATCH 1/2] C/R: Support for IPv6 addresses on network devices Dan Smith
[not found] ` <1269459625-21033-2-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-03-25 20:36 ` Brian Haley
[not found] ` <4BABC967.5090908-VXdhtT5mjnY@public.gmane.org>
2010-03-25 21:01 ` Dan Smith
[not found] ` <87aatwf74u.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2010-03-25 21:28 ` Brian Haley
[not found] ` <4BABD594.1020301-VXdhtT5mjnY@public.gmane.org>
2010-03-26 15:35 ` Dan Smith
[not found] ` <87634jf63u.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2010-03-30 15:35 ` Brian Haley
[not found] ` <4BB21A45.4050300-VXdhtT5mjnY@public.gmane.org>
2010-03-30 16:17 ` Dan Smith
[not found] ` <87zl1peqd4.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2010-03-30 17:05 ` Brian Haley
[not found] ` <4BB22F73.60704-VXdhtT5mjnY@public.gmane.org>
2010-03-30 18:07 ` Dan Smith
[not found] ` <87r5n1el9z.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2010-03-30 19:56 ` Brian Haley
2010-03-24 19:40 ` [PATCH 2/2] C/R: Fix storing IPv6 addresses and handle the "ipv6only" socket flag Dan Smith
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.