From: Louis Rilling <Louis.Rilling-aw0BnHfMbSpBDgjK7y7TUQ@public.gmane.org>
To: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Cc: containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org
Subject: Re: [PATCH] c/r: Add AF_UNIX support
Date: Thu, 4 Jun 2009 22:14:45 +0200 [thread overview]
Message-ID: <20090604201444.GA4302@localdomain> (raw)
In-Reply-To: <1244042305-7770-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
[-- Attachment #1.1: Type: text/plain, Size: 11116 bytes --]
Hi,
On Wed, Jun 03, 2009 at 08:18:25AM -0700, Dan Smith wrote:
> This patch adds basic checkpoint/restart support for AF_UNIX sockets. It
> has been tested with a single and multiple processes, and with data inflight
> at the time of checkpoint. It supports both socketpair()s and path-based
> sockets.
>
> I have an almost-working AF_INET follow-on to this which I can submit after
> this is reviewed and tweaked into acceptance.
>
[...]
> diff --git a/net/socket_cr.c b/net/socket_cr.c
> new file mode 100644
> index 0000000..76759fe
> --- /dev/null
> +++ b/net/socket_cr.c
> @@ -0,0 +1,378 @@
> +/*
> + * Copyright 2009 IBM Corporation
> + *
> + * Author: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation, version 2 of the
> + * License.
> + */
> +
> +#include <linux/socket.h>
> +#include <linux/mount.h>
> +#include <linux/file.h>
> +
> +#include <net/af_unix.h>
> +#include <net/tcp_states.h>
> +
> +#include <linux/checkpoint.h>
> +#include <linux/checkpoint_hdr.h>
> +
> +static int sock_copy_buffers(struct sk_buff_head *from, struct sk_buff_head *to)
> +{
> + int count = 0;
> + struct sk_buff *skb;
> +
> + spin_lock(&from->lock);
> +
> + skb_queue_walk(from, skb) {
> + struct sk_buff *tmp;
> +
> + tmp = skb_copy(skb, GFP_KERNEL);
GFP_KERNEL is not allowed here, since from->lock is locked. Not sure that
GFP_ATOMIC is acceptable though. Perhaps it would be better to temporarily move
the queue to a local head, copy it (no spinlock needed), and then push it again.
This would need to block concurrent senders/receivers during this operation,
unless it's guaranteed that they are all frozen.
Thanks,
Louis
> + if (!tmp) {
> + count = -ENOMEM;
> + goto out;
> + }
> + skb_queue_tail(to, tmp);
> + count++;
> + }
> + out:
> + spin_unlock(&from->lock);
> +
> + return count;
> +}
> +
> +static int __sock_write_buffers(struct ckpt_ctx *ctx,
> + struct sk_buff_head *queue)
> +{
> + struct sk_buff *skb;
> + int ret = 0;
> +
> + skb_queue_walk(queue, skb) {
> + ret = ckpt_write_obj_type(ctx, skb->data, skb->len,
> + CKPT_HDR_SOCKET_BUFFER);
> + if (ret)
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +static int sock_write_buffers(struct ckpt_ctx *ctx, struct sk_buff_head *queue)
> +{
> + struct ckpt_hdr_socket_buffer *h;
> + struct sk_buff_head tmpq;
> + int ret = -ENOMEM;
> +
> + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_SOCKET_BUFFERS);
> + if (!h)
> + goto out;
> +
> + skb_queue_head_init(&tmpq);
> +
> + h->skb_count = sock_copy_buffers(queue, &tmpq);
> + if (h->skb_count < 0) {
> + ret = h->skb_count;
> + goto out;
> + }
> +
> + ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
> + if (!ret)
> + ret = __sock_write_buffers(ctx, &tmpq);
> +
> + out:
> + ckpt_hdr_put(ctx, h);
> + __skb_queue_purge(&tmpq);
> +
> + return ret;
> +}
> +
> +static int sock_un_checkpoint(struct ckpt_ctx *ctx,
> + struct sock *sock,
> + struct ckpt_hdr_socket *h)
> +{
> + struct unix_sock *sk = unix_sk(sock);
> + struct unix_sock *pr = unix_sk(sk->peer);
> + int new;
> + int ret;
> +
> + h->un.this = ckpt_obj_lookup_add(ctx, sk, CKPT_OBJ_SOCK, &new);
> + if (h->un.this < 0)
> + goto out;
> +
> + if (sk->peer)
> + h->un.peer = ckpt_obj_lookup_add(ctx, pr, CKPT_OBJ_SOCK, &new);
> + else
> + h->un.peer = 0;
> +
> + if (h->un.peer < 0) {
> + ret = h->un.peer;
> + goto out;
> + }
> +
> + ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
> + out:
> + return ret;
> +}
> +
> +static int sock_cptrst(struct ckpt_ctx *ctx,
> + struct sock *sock,
> + struct ckpt_hdr_socket *h,
> + int op)
> +{
> + if (sock->sk_socket) {
> + CKPT_COPY(op, h->socket_flags, sock->sk_socket->flags);
> + CKPT_COPY(op, h->socket_state, sock->sk_socket->state);
> + }
> +
> + CKPT_COPY(op, h->reuse, sock->sk_reuse);
> + CKPT_COPY(op, h->shutdown, sock->sk_shutdown);
> + CKPT_COPY(op, h->userlocks, sock->sk_userlocks);
> + CKPT_COPY(op, h->no_check, sock->sk_no_check);
> + CKPT_COPY(op, h->protocol, sock->sk_protocol);
> + CKPT_COPY(op, h->err, sock->sk_err);
> + CKPT_COPY(op, h->err_soft, sock->sk_err_soft);
> + CKPT_COPY(op, h->priority, sock->sk_priority);
> + CKPT_COPY(op, h->rcvlowat, sock->sk_rcvlowat);
> + CKPT_COPY(op, h->backlog, sock->sk_max_ack_backlog);
> + CKPT_COPY(op, h->rcvtimeo, sock->sk_rcvtimeo);
> + CKPT_COPY(op, h->sndtimeo, sock->sk_sndtimeo);
> + CKPT_COPY(op, h->rcvbuf, sock->sk_rcvbuf);
> + CKPT_COPY(op, h->sndbuf, sock->sk_sndbuf);
> + CKPT_COPY(op, h->bound_dev_if, sock->sk_bound_dev_if);
> + CKPT_COPY(op, h->flags, sock->sk_flags);
> + CKPT_COPY(op, h->lingertime, sock->sk_lingertime);
> +
> + return 0;
> +}
> +
> +int __sock_file_checkpoint(struct ckpt_ctx *ctx, struct file *file)
> +{
> + struct socket *socket = file->private_data;
> + struct sock *sock = socket->sk;
> + struct ckpt_hdr_socket *h;
> + int ret = 0;
> +
> + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_SOCKET);
> + if (!h)
> + return -ENOMEM;
> +
> + h->family = sock->sk_family;
> + h->state = socket->state;
> + h->sock_state = sock->sk_state;
> + h->reuse = sock->sk_reuse;
> + h->type = sock->sk_type;
> + h->protocol = sock->sk_protocol;
> +
> + h->laddr_len = sizeof(h->laddr);
> + h->raddr_len = sizeof(h->raddr);
> +
> + if (socket->ops->getname(socket, &h->laddr, &h->laddr_len, 0)) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + if ((h->sock_state != TCP_LISTEN) &&
> + (h->type != SOCK_DGRAM) &&
> + (socket->ops->getname(socket, &h->raddr, &h->raddr_len, 1))) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + sock_cptrst(ctx, sock, h, CKPT_CPT);
> +
> + if (h->family == AF_UNIX) {
> + ret = sock_un_checkpoint(ctx, sock, h);
> + if (ret)
> + goto out;
> + } else {
> + ckpt_debug("unsupported socket type %i\n", h->family);
> + ret = EINVAL;
> + goto out;
> + }
> +
> + ret = sock_write_buffers(ctx, &sock->sk_receive_queue);
> + if (ret)
> + goto out;
> +
> + ret = sock_write_buffers(ctx, &sock->sk_write_queue);
> + if (ret)
> + goto out;
> +
> + /* FIXME: write out-of-order queue for TCP */
> + out:
> + ckpt_hdr_put(ctx, h);
> +
> + return ret;
> +}
> +
> +static int sock_read_buffer(struct ckpt_ctx *ctx,
> + struct sock *sock,
> + struct sk_buff **skb)
> +{
> + struct ckpt_hdr *h;
> + int ret = 0;
> + int len;
> +
> + h = ckpt_read_buf_type(ctx, SKB_MAX_ALLOC, CKPT_HDR_SOCKET_BUFFER);
> + if (IS_ERR(h))
> + return PTR_ERR(h);
> +
> + len = h->len - sizeof(*h);
> +
> + *skb = sock_alloc_send_skb(sock, len, MSG_DONTWAIT, &ret);
> + if (*skb == NULL) {
> + ret = ENOMEM;
> + goto out;
> + }
> +
> + memcpy(skb_put(*skb, len), (char *)(h + 1), len);
> + out:
> + ckpt_hdr_put(ctx, h);
> + return ret;
> +}
> +
> +static int sock_read_buffers(struct ckpt_ctx *ctx,
> + struct sock *sock,
> + struct sk_buff_head *queue)
> +{
> + struct ckpt_hdr_socket_buffer *h;
> + int ret = 0;
> + int i;
> +
> + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_SOCKET_BUFFERS);
> + if (IS_ERR(h)) {
> + ret = PTR_ERR(h);
> + goto out;
> + }
> +
> + for (i = 0; i < h->skb_count; i++) {
> + struct sk_buff *skb = NULL;
> +
> + ret = sock_read_buffer(ctx, sock, &skb);
> + if (ret)
> + break;
> +
> + skb_queue_tail(queue, skb);
> + }
> + out:
> + ckpt_hdr_put(ctx, h);
> +
> + return ret;
> +}
> +
> +static int sock_un_restart(struct ckpt_ctx *ctx,
> + struct ckpt_hdr_socket *h,
> + struct socket *socket)
> +{
> + struct sock *peer;
> + int ret = 0;
> +
> + if (h->sock_state == TCP_ESTABLISHED) {
> + peer = ckpt_obj_fetch(ctx, h->un.peer, CKPT_OBJ_SOCK);
> + if (peer && !IS_ERR(peer)) {
> + /* We're last, so join with peer */
> + struct sock *this = socket->sk;
> +
> + sock_hold(this);
> + sock_hold(peer);
> +
> + unix_sk(this)->peer = peer;
> + unix_sk(peer)->peer = this;
> +
> + this->sk_peercred.pid = task_tgid_vnr(current);
> + current_euid_egid(&this->sk_peercred.uid,
> + &this->sk_peercred.gid);
> +
> + peer->sk_peercred.pid = task_tgid_vnr(current);
> + current_euid_egid(&peer->sk_peercred.uid,
> + &peer->sk_peercred.gid);
> + } else {
> + /* We're first, so add our socket and wait for peer */
> + ckpt_obj_insert(ctx, socket->sk, h->un.this,
> + CKPT_OBJ_SOCK);
> + }
> +
> + } else if (h->sock_state == TCP_LISTEN) {
> + ret = socket->ops->bind(socket,
> + (struct sockaddr *)&h->laddr,
> + h->laddr_len);
> + if (ret < 0)
> + goto out;
> +
> + ret = socket->ops->listen(socket, h->backlog);
> + if (ret < 0)
> + goto out;
> + } else
> + ckpt_debug("unsupported UNIX socket state %i\n", h->state);
> +
> + socket->state = h->state;
> + socket->sk->sk_state = h->sock_state;
> + out:
> + return ret;
> +}
> +
> +struct socket *__sock_file_restore(struct ckpt_ctx *ctx,
> + struct ckpt_hdr_socket *h)
> +{
> + struct socket *socket;
> + int ret;
> +
> + ret = sock_create(h->family, h->type, 0, &socket);
> + if (ret < 0)
> + return ERR_PTR(ret);
> +
> + if (h->family == AF_UNIX) {
> + ret = sock_un_restart(ctx, h, socket);
> + ckpt_debug("sock_un_restart: %i\n", ret);
> + } else {
> + ckpt_debug("unsupported family %i\n", h->family);
> + ret = -EINVAL;
> + }
> +
> + if (ret)
> + goto out;
> +
> + ret = sock_read_buffers(ctx, socket->sk, &socket->sk->sk_receive_queue);
> + if (ret)
> + goto out;
> +
> + ret = sock_read_buffers(ctx, socket->sk, &socket->sk->sk_write_queue);
> + if (ret)
> + goto out;
> + out:
> + if (ret) {
> + sock_release(socket);
> + socket = ERR_PTR(ret);
> + }
> +
> + return socket;
> +}
> +
> +int sock_file_checkpoint(struct ckpt_ctx *ctx, void *ptr)
> +{
> + struct ckpt_hdr_file_socket *h;
> + int ret;
> + struct file *file = ptr;
> +
> + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FILE);
> + if (!h)
> + return -ENOMEM;
> +
> + h->common.f_type = CKPT_FILE_SOCKET;
> +
> + ret = checkpoint_file_common(ctx, file, &h->common);
> + if (ret < 0)
> + goto out;
> + ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
> + if (ret < 0)
> + goto out;
> +
> + ret = __sock_file_checkpoint(ctx, file);
> + out:
> + ckpt_hdr_put(ctx, h);
> + return ret;
> +}
> +
> +
> --
> 1.6.0.4
>
> _______________________________________________
> Containers mailing list
> Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linux-foundation.org/mailman/listinfo/containers
--
Dr Louis Rilling Kerlabs
Skype: louis.rilling Batiment Germanium
Phone: (+33|0) 6 80 89 08 23 80 avenue des Buttes de Coesmes
http://www.kerlabs.com/ 35700 Rennes
[-- Attachment #1.2: Digital signature --]
[-- Type: application/pgp-signature, Size: 197 bytes --]
[-- Attachment #2: Type: text/plain, Size: 206 bytes --]
_______________________________________________
Containers mailing list
Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
https://lists.linux-foundation.org/mailman/listinfo/containers
next prev parent reply other threads:[~2009-06-04 20:14 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-06-03 15:18 [PATCH] c/r: Add AF_UNIX support Dan Smith
[not found] ` <1244042305-7770-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-06-04 15:19 ` Serge E. Hallyn
[not found] ` <20090604151923.GA29519-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-06-04 15:36 ` Serge E. Hallyn
2009-06-04 20:20 ` Dan Smith
2009-06-08 6:15 ` Oren Laadan
2009-06-04 20:14 ` Louis Rilling [this message]
2009-06-04 21:16 ` Dan Smith
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090604201444.GA4302@localdomain \
--to=louis.rilling-aw0bnhfmbspbdgjk7y7tuq@public.gmane.org \
--cc=containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org \
--cc=danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.