Linux userland API discussions

Linux userland API discussions
 help / color / mirror / Atom feed

* [PATCH v10 1/2] crypto: AF_ALG: add AEAD support
From: Stephan Mueller @ 2015-01-14  3:53 UTC (permalink / raw)
  To: 'Herbert Xu'
  Cc: Daniel Borkmann, 'Quentin Gouchet', 'LKML',
	ABI/API, linux-crypto
In-Reply-To: <5489736.JvWhZNrmuD@tachyon.chronox.de>

This patch adds the AEAD support for AF_ALG.

The implementation is based on algif_skcipher, but contains heavy
modifications to streamline the interface for AEAD uses.

To use AEAD, the user space consumer has to use the salg_type named
"aead".

The AEAD implementation includes some overhead to calculate the size of
the ciphertext, because the AEAD implementation of the kernel crypto API
makes implied assumption on the location of the authentication tag. When
performing an encryption, the tag will be added to the created
ciphertext (note, the tag is placed adjacent to the ciphertext). For
decryption, the caller must hand in the ciphertext with the tag appended
to the ciphertext. Therefore, the selection of the used memory
needs to add/subtract the tag size from the source/destination buffers
depending on the encryption type. The code is provided with comments
explaining when and how that operation is performed.

A fully working example using all aspects of AEAD is provided at
http://www.chronox.de/libkcapi.html

Signed-off-by: Stephan Mueller <smueller@chronox.de>
---
 crypto/algif_aead.c | 680 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 680 insertions(+)
 create mode 100644 crypto/algif_aead.c

diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
new file mode 100644
index 0000000..8d1dda5
--- /dev/null
+++ b/crypto/algif_aead.c
@@ -0,0 +1,680 @@
+/*
+ * algif_aead: User-space interface for AEAD algorithms
+ *
+ * Copyright (C) 2014, Stephan Mueller <smueller@chronox.de>
+ *
+ * This file provides the user-space API for AEAD ciphers.
+ *
+ * This file is derived from algif_skcipher.c.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/scatterwalk.h>
+#include <crypto/if_alg.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <net/sock.h>
+
+struct aead_sg_list {
+	unsigned int cur;
+	struct scatterlist sg[ALG_MAX_PAGES];
+};
+
+struct aead_ctx {
+	struct aead_sg_list tsgl;
+	struct af_alg_sgl rsgl;
+
+	void *iv;
+
+	struct af_alg_completion completion;
+
+	unsigned long used;
+
+	unsigned int len;
+	bool more;
+	bool merge;
+	bool enc;
+	bool trunc;
+
+	size_t aead_assoclen;
+	struct aead_request aead_req;
+};
+
+static inline int aead_sndbuf(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct aead_ctx *ctx = ask->private;
+
+	return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
+			  ctx->used, 0);
+}
+
+static inline bool aead_writable(struct sock *sk)
+{
+	return PAGE_SIZE <= aead_sndbuf(sk);
+}
+
+static inline bool aead_sufficient_data(struct aead_ctx *ctx)
+{
+	unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
+
+	return (ctx->used >= (ctx->aead_assoclen + (ctx->enc ? 0 : as)));
+}
+
+static void aead_put_sgl(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct aead_ctx *ctx = ask->private;
+	struct aead_sg_list *sgl = &ctx->tsgl;
+	struct scatterlist *sg = sgl->sg;
+	unsigned int i;
+
+	for (i = 0; i < sgl->cur; i++) {
+		if (!sg_page(sg + i))
+			continue;
+
+		put_page(sg_page(sg + i));
+		sg_assign_page(sg + i, NULL);
+	}
+	sgl->cur = 0;
+	ctx->used = 0;
+	ctx->more = 0;
+	ctx->merge = 0;
+	ctx->trunc = 0;
+}
+
+static int aead_wait_for_wmem(struct sock *sk, unsigned flags)
+{
+	long timeout;
+	DEFINE_WAIT(wait);
+	int err = -ERESTARTSYS;
+
+	if (flags & MSG_DONTWAIT)
+		return -EAGAIN;
+
+	set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		timeout = MAX_SCHEDULE_TIMEOUT;
+		if (sk_wait_event(sk, &timeout, aead_writable(sk))) {
+			err = 0;
+			break;
+		}
+	}
+	finish_wait(sk_sleep(sk), &wait);
+
+	return err;
+}
+
+static void aead_wmem_wakeup(struct sock *sk)
+{
+	struct socket_wq *wq;
+
+	if (!aead_writable(sk))
+		return;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
+							   POLLRDNORM |
+							   POLLRDBAND);
+	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+	rcu_read_unlock();
+}
+
+static int aead_wait_for_data(struct sock *sk, unsigned flags)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct aead_ctx *ctx = ask->private;
+	long timeout;
+	DEFINE_WAIT(wait);
+	int err = -ERESTARTSYS;
+
+	if (flags & MSG_DONTWAIT) {
+		return -EAGAIN;
+	}
+
+	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		timeout = MAX_SCHEDULE_TIMEOUT;
+		if (sk_wait_event(sk, &timeout, !ctx->more)) {
+			err = 0;
+			break;
+		}
+	}
+	finish_wait(sk_sleep(sk), &wait);
+
+	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+
+	return err;
+}
+
+static void aead_data_wakeup(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct aead_ctx *ctx = ask->private;
+	struct socket_wq *wq;
+
+	if (ctx->more)
+		return;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
+							   POLLRDNORM |
+							   POLLRDBAND);
+	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+	rcu_read_unlock();
+}
+
+static int aead_sendmsg(struct kiocb *unused, struct socket *sock,
+		        struct msghdr *msg, size_t size)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct aead_ctx *ctx = ask->private;
+	unsigned ivsize =
+		crypto_aead_ivsize(crypto_aead_reqtfm(&ctx->aead_req));
+	struct aead_sg_list *sgl = &ctx->tsgl;
+	struct af_alg_control con = {};
+	long copied = 0;
+	bool enc = 0;
+	bool init = 0;
+	int err = -EINVAL;
+
+	if (msg->msg_controllen) {
+		err = af_alg_cmsg_send(msg, &con);
+		if (err)
+			return err;
+
+		init = 1;
+		switch (con.op) {
+		case ALG_OP_ENCRYPT:
+			enc = 1;
+			break;
+		case ALG_OP_DECRYPT:
+			enc = 0;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (con.iv && con.iv->ivlen != ivsize)
+			return -EINVAL;
+	}
+
+	lock_sock(sk);
+	if (!ctx->more && ctx->used)
+		goto unlock;
+
+	if (init) {
+		ctx->enc = enc;
+		if (con.iv)
+			memcpy(ctx->iv, con.iv->iv, ivsize);
+
+		ctx->aead_assoclen = con.aead_assoclen;
+	}
+
+	while (size) {
+		unsigned long len = size;
+		struct scatterlist *sg = NULL;
+
+		/* use the existing memory in an allocated page */
+		if (ctx->merge) {
+			sg = sgl->sg + sgl->cur - 1;
+			len = min_t(unsigned long, len,
+				    PAGE_SIZE - sg->offset - sg->length);
+			err = memcpy_from_msg(page_address(sg_page(sg)) +
+					      sg->offset + sg->length,
+					      msg, len);
+			if (err)
+				goto unlock;
+
+			sg->length += len;
+			ctx->merge = (sg->offset + sg->length) &
+				     (PAGE_SIZE - 1);
+
+			ctx->used += len;
+			copied += len;
+			size -= len;
+		}
+
+		if (!aead_writable(sk)) {
+			/*
+			 * If there is more data to be expected, but we cannot
+			 * write more data, forcefully define that we do not
+			 * expect more data to invoke the AEAD operation. This
+			 * prevents a deadlock in user space.
+			 */
+			ctx->more = 0;
+			ctx->trunc = 1;
+			err = aead_wait_for_wmem(sk, msg->msg_flags);
+			if (err)
+				goto unlock;
+		}
+
+		/* allocate a new page */
+		len = min_t(unsigned long, size, aead_sndbuf(sk));
+		while (len) {
+			int plen = 0;
+
+			if (sgl->cur >= ALG_MAX_PAGES) {
+				err = -E2BIG;
+				goto unlock;
+			}
+
+			sg = sgl->sg + sgl->cur;
+			plen = min_t(int, len, PAGE_SIZE);
+
+			sg_assign_page(sg, alloc_page(GFP_KERNEL));
+			err = -ENOMEM;
+			if (!sg_page(sg))
+				goto unlock;
+
+			err = memcpy_from_msg(page_address(sg_page(sg)),
+					      msg, plen);
+			if (err) {
+				__free_page(sg_page(sg));
+				sg_assign_page(sg, NULL);
+				goto unlock;
+			}
+
+			sg->offset = 0;
+			sg->length = plen;
+			len -= plen;
+			ctx->used += plen;
+			copied += plen;
+			sgl->cur++;
+			size -= plen;
+			ctx->merge = plen & (PAGE_SIZE - 1);
+		}
+	}
+
+	err = 0;
+
+	ctx->more = msg->msg_flags & MSG_MORE;
+	if (!ctx->more && !aead_sufficient_data(ctx)) {
+		aead_put_sgl(sk);
+		err = -EMSGSIZE;
+	}
+
+unlock:
+	aead_data_wakeup(sk);
+	release_sock(sk);
+
+	return err ?: copied;
+}
+
+static ssize_t aead_sendpage(struct socket *sock, struct page *page,
+			     int offset, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct aead_ctx *ctx = ask->private;
+	struct aead_sg_list *sgl = &ctx->tsgl;
+	int err = -EINVAL;
+
+	if (flags & MSG_SENDPAGE_NOTLAST)
+		flags |= MSG_MORE;
+
+	if (sgl->cur >= ALG_MAX_PAGES)
+		return -E2BIG;
+
+	lock_sock(sk);
+	if (!ctx->more && ctx->used)
+		goto unlock;
+
+	if (!size)
+		goto done;
+
+	if (!aead_writable(sk)) {
+		/* see aead_sendmsg why more is set to 0 */
+		ctx->more = 0;
+		ctx->trunc = 1;
+		err = aead_wait_for_wmem(sk, flags);
+		if (err)
+			goto unlock;
+	}
+
+	ctx->merge = 0;
+
+	get_page(page);
+	sg_set_page(sgl->sg + sgl->cur, page, size, offset);
+	sgl->cur++;
+	ctx->used += size;
+
+	err = 0;
+
+done:
+	ctx->more = flags & MSG_MORE;
+	if (!ctx->more && !aead_sufficient_data(ctx)) {
+		aead_put_sgl(sk);
+		err = -EMSGSIZE;
+	}
+
+unlock:
+	aead_data_wakeup(sk);
+	release_sock(sk);
+
+	return err ?: size;
+}
+
+static int aead_recvmsg(struct kiocb *unused, struct socket *sock,
+			struct msghdr *msg, size_t ignored, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct aead_ctx *ctx = ask->private;
+	unsigned bs = crypto_aead_blocksize(crypto_aead_reqtfm(&ctx->aead_req));
+	unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
+	struct aead_sg_list *sgl = &ctx->tsgl;
+	struct scatterlist *sg = sgl->sg;
+	struct scatterlist assoc[ALG_MAX_PAGES];
+	size_t assoclen = 0;
+	unsigned int i = 0;
+	int err = -EINVAL;
+	unsigned long used = 0;
+	unsigned long outlen = 0;
+
+	/*
+	 * Require exactly one IOV block as the AEAD operation is a one shot
+	 * due to the authentication tag.
+	 */
+	if (msg->msg_iter.nr_segs != 1)
+		return -ENOMSG;
+
+	lock_sock(sk);
+	/*
+	* AEAD memory structure: For encryption, the tag is appended to the
+	* ciphertext which implies that the memory allocated for the ciphertext
+	* must be increased by the tag length. For decryption, the tag
+	* is expected to be concatenated to the ciphertext. The plaintext
+	* therefore has a memory size of the ciphertext minus the tag length.
+	*
+	* The memory structure for cipher operation has the following
+	* structure:
+	*	AEAD encryption input:  assoc data || plaintext
+	*	AEAD encryption output: cipherntext || auth tag
+	*	AEAD decryption input:  assoc data || ciphertext || auth tag
+	*	AEAD decryption output: plaintext
+	*/
+
+	if (ctx->more) {
+		err = aead_wait_for_data(sk, flags);
+		if (err)
+			goto unlock;
+	}
+
+	used = ctx->used;
+
+	/*
+	 * Make sure sufficient data is present -- note, the same check is
+	 * is also present in sendmsg/sendpage. The checks in sendpage/sendmsg
+	 * shall provide an information to the data sender that something is
+	 * wrong, but they are irrelevant to maintain the kernel integrity.
+	 * We need this check here too in case user space decides to not honor
+	 * the error message in sendmsg/sendpage and still call recvmsg. This
+	 * check here protects the kernel integrity.
+	 */
+	if (!aead_sufficient_data(ctx))
+		goto unlock;
+
+	/*
+	 * The cipher operation input data is reduced by the associated data
+	 * length as this data is processed separately later on.
+	 */
+	used -= ctx->aead_assoclen;
+
+	if (ctx->enc) {
+		/* round up output buffer to multiple of block size */
+		outlen = ((used + bs - 1) / bs * bs);
+		/* add the size needed for the auth tag to be created */
+		outlen += as;
+	} else {
+		/* output data size is input without the authentication tag */
+		outlen = used - as;
+		/* round up output buffer to multiple of block size */
+		outlen = ((outlen + bs - 1) / bs * bs);
+	}
+
+	/* ensure output buffer is sufficiently large */
+	if (msg->msg_iter.iov->iov_len < outlen)
+		goto unlock;
+
+	outlen = af_alg_make_sg(&ctx->rsgl, msg->msg_iter.iov->iov_base,
+				outlen, 1);
+	err = outlen;
+	if (err < 0)
+		goto unlock;
+
+	err = -EINVAL;
+	sg_init_table(assoc, ALG_MAX_PAGES);
+	assoclen = ctx->aead_assoclen;
+	/*
+	 * Split scatterlist into two: first part becomes AD, second part
+	 * is plaintext / ciphertext. The first part is assigned to assoc
+	 * scatterlist. When this loop finishes, sg points to the start of the
+	 * plaintext / ciphertext.
+	 */
+	for (i = 0; i < ctx->tsgl.cur; i++) {
+		sg = sgl->sg + i;
+		if (sg->length <= assoclen) {
+			/* AD is larger than one page */
+			sg_set_page(assoc + i, sg_page(sg),
+				    sg->length, sg->offset);
+			assoclen -= sg->length;
+			if (i >= ctx->tsgl.cur)
+				goto unlock;
+		} else if (!assoclen) {
+			/* current page is to start of plaintext / ciphertext */
+			if (i)
+				/* AD terminates at page boundary */
+				sg_mark_end(assoc + i - 1);
+			else
+				/* AD size is zero */
+				sg_mark_end(assoc);
+			break;
+		} else {
+			/* AD does not terminate at page boundary */
+			sg_set_page(assoc + i, sg_page(sg),
+				    assoclen, sg->offset);
+			sg_mark_end(assoc + i);
+			/* plaintext / ciphertext starts after AD */
+			sg->length -= assoclen;
+			sg->offset += assoclen;
+			break;
+		}
+	}
+
+	aead_request_set_assoc(&ctx->aead_req, assoc, ctx->aead_assoclen);
+	aead_request_set_crypt(&ctx->aead_req, sg, ctx->rsgl.sg, used, ctx->iv);
+
+	err = af_alg_wait_for_completion(ctx->enc ?
+					 crypto_aead_encrypt(&ctx->aead_req) :
+					 crypto_aead_decrypt(&ctx->aead_req),
+					 &ctx->completion);
+
+	af_alg_free_sg(&ctx->rsgl);
+
+	/* indicate userspace that we processed incomplete data */
+	if (ctx->trunc)
+		msg->msg_flags |= MSG_TRUNC;
+
+	if (err) {
+		/* EBADMSG implies a valid cipher operation took place */
+		if (err == -EBADMSG)
+			aead_put_sgl(sk);
+		goto unlock;
+	}
+
+	aead_put_sgl(sk);
+
+	err = 0;
+
+unlock:
+	aead_wmem_wakeup(sk);
+	release_sock(sk);
+
+	return err ? err : outlen;
+}
+
+static unsigned int aead_poll(struct file *file, struct socket *sock,
+			      poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct aead_ctx *ctx = ask->private;
+	unsigned int mask;
+
+	sock_poll_wait(file, sk_sleep(sk), wait);
+	mask = 0;
+
+	if (!ctx->more)
+		mask |= POLLIN | POLLRDNORM;
+
+	if (aead_writable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	return mask;
+}
+
+static struct proto_ops algif_aead_ops = {
+	.family		=	PF_ALG,
+
+	.connect	=	sock_no_connect,
+	.socketpair	=	sock_no_socketpair,
+	.getname	=	sock_no_getname,
+	.ioctl		=	sock_no_ioctl,
+	.listen		=	sock_no_listen,
+	.shutdown	=	sock_no_shutdown,
+	.getsockopt	=	sock_no_getsockopt,
+	.mmap		=	sock_no_mmap,
+	.bind		=	sock_no_bind,
+	.accept		=	sock_no_accept,
+	.setsockopt	=	sock_no_setsockopt,
+
+	.release	=	af_alg_release,
+	.sendmsg	=	aead_sendmsg,
+	.sendpage	=	aead_sendpage,
+	.recvmsg	=	aead_recvmsg,
+	.poll		=	aead_poll,
+};
+
+static void *aead_bind(const char *name, u32 type, u32 mask)
+{
+	return crypto_alloc_aead(name, type, mask);
+}
+
+static void aead_release(void *private)
+{
+	crypto_free_aead(private);
+}
+
+static int aead_setauthsize(void *private, unsigned int authsize)
+{
+	return crypto_aead_setauthsize(private, authsize);
+}
+
+static int aead_setkey(void *private, const u8 *key, unsigned int keylen)
+{
+	return crypto_aead_setkey(private, key, keylen);
+}
+
+static void aead_sock_destruct(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct aead_ctx *ctx = ask->private;
+	unsigned int ivlen = crypto_aead_ivsize(
+				crypto_aead_reqtfm(&ctx->aead_req));
+
+	aead_put_sgl(sk);
+	sock_kzfree_s(sk, ctx->iv, ivlen);
+	sock_kfree_s(sk, ctx, ctx->len);
+	af_alg_release_parent(sk);
+}
+
+static int aead_accept_parent(void *private, struct sock *sk)
+{
+	struct aead_ctx *ctx;
+	struct alg_sock *ask = alg_sk(sk);
+	unsigned int len = sizeof(*ctx) + crypto_aead_reqsize(private);
+	unsigned int ivlen = crypto_aead_ivsize(private);
+
+	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	memset(ctx, 0, len);
+
+	ctx->iv = sock_kmalloc(sk, ivlen, GFP_KERNEL);
+	if (!ctx->iv) {
+		sock_kfree_s(sk, ctx, len);
+		return -ENOMEM;
+	}
+	memset(ctx->iv, 0, ivlen);
+
+	ctx->len = len;
+	ctx->used = 0;
+	ctx->more = 0;
+	ctx->merge = 0;
+	ctx->enc = 0;
+	ctx->tsgl.cur = 0;
+	ctx->aead_assoclen = 0;
+	ctx->trunc = 0;
+	af_alg_init_completion(&ctx->completion);
+	sg_init_table(ctx->tsgl.sg, ALG_MAX_PAGES);
+
+	ask->private = ctx;
+
+	aead_request_set_tfm(&ctx->aead_req, private);
+	aead_request_set_callback(&ctx->aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				  af_alg_complete, &ctx->completion);
+
+	sk->sk_destruct = aead_sock_destruct;
+
+	return 0;
+}
+
+static const struct af_alg_type algif_type_aead = {
+	.bind		=	aead_bind,
+	.release	=	aead_release,
+	.setkey		=	aead_setkey,
+	.setauthsize	=	aead_setauthsize,
+	.accept		=	aead_accept_parent,
+	.ops		=	&algif_aead_ops,
+	.name		=	"aead",
+	.owner		=	THIS_MODULE
+};
+
+static int __init algif_aead_init(void)
+{
+	return af_alg_register_type(&algif_type_aead);
+}
+
+static void __exit algif_aead_exit(void)
+{
+	int err = af_alg_unregister_type(&algif_type_aead);
+	BUG_ON(err);
+}
+
+module_init(algif_aead_init);
+module_exit(algif_aead_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Stephan Mueller <smueller@chronox.de>");
+MODULE_DESCRIPTION("AEAD kernel crypto API user space interface");
-- 
2.1.0

^ permalink raw reply related

* [PATCH v10 0/2] crypto: AF_ALG: add AEAD and RNG support
From: Stephan Mueller @ 2015-01-14  3:52 UTC (permalink / raw)
  To: 'Herbert Xu'
  Cc: Daniel Borkmann, 'Quentin Gouchet', 'LKML',
	ABI/API, linux-crypto

Hi,

This patch set adds AEAD and RNG support to the AF_ALG interface
exported by the kernel crypto API. By extending AF_ALG with AEAD and RNG
support, all cipher types the kernel crypto API allows access to are
now accessible from userspace.

Both, AEAD and RNG implementations are stand-alone and do not depend
other AF_ALG interfaces (like hash or skcipher).

The AEAD implementation uses the same approach as provided with
skcipher by offering the following interfaces:

	* sendmsg and recvmsg interfaces allowing multiple
	  invocations supporting a threaded user space. To support
	  multi-threaded user space, kernel-side buffering
	  is implemented similarly to skcipher.

	* splice / vmsplice interfaces allowing a zero-copy
	  invocation

The RNG interface only implements the recvmsg interface as
zero-copy is not applicable.

The new AEAD and RNG interfaces are fully tested with the test application
provided at [1]. That test application exercises all newly added user space
interfaces. The testing covers:

	* use of the sendmsg/recvmsg interface

	* use of the splice / vmsplice interface

	* invocation of all AF_ALG types (aead, rng, skcipher, hash)

	* using all types of operation (encryption, decryption, keyed MD,
	  MD, random numbers, AEAD decryption with positive and negative
	  authentication verification)

	* stress testing by running all tests for 30 minutes in an
	  endless loop

	* test execution on 64 bit and 32 bit

[1] http://www.chronox.de/libkcapi.html

Changes v2:
* rebase to current cryptodev-2.6 tree
* use memzero_explicit to zeroize AEAD associated data
* use sizeof for determining length of AEAD associated data
* update algif_rng.c covering all suggestions from Daniel Borkmann
  <dborkman@redhat.com>
* addition of patch 9: add digestsize interface for hashes
* addition of patch to update documentation covering the userspace interface
* change numbers of getsockopt options: separate them from sendmsg interface
  definitions

Changes v3:
* remove getsockopt interface
* AEAD: associated data is set prepended to the plain/ciphertext
* AEAD: allowing arbitrary associated data lengths
* remove setkey patch as protection was already in the existing code

Changes v4:
* stand-alone implementation of AEAD
* testing of all interfaces offered by AEAD
* stress testing of AEAD and RNG

Changes v5:
* AEAD: add outer while(size) loop in aead_sendmsg to ensure all data is
  copied into the kernel (reporter Herbert Xu)
* AEAD: aead_sendmsg bug fix: change size -= len; to size -= plen;
* AF_ALG / AEAD: add aead_setauthsize and associated extension to
  struct af_alg_type as well as alg_setsockopt (reporter Herbert Xu)
* RNG: rng_recvmsg: use 128 byte stack variable for output of RNG instead
  of ctx->result (reporter Herbert Xu)
* RNG / AF_ALG: allow user space to seed RNG via setsockopt
* RNG: rng_recvmsg bug fix: use genlen as result variable for
  crypto_rng_get_bytes as previously no negative errors were obtained
* AF_ALG: alg_setop: zeroize buffer before free

Changes v6:
* AEAD/RNG: port to 3.19-rc1 with the iov_iter handling
* RNG: use the setkey interface to obtain the seed and drop the patch adding
  a separate reseeding interface
* extract the zeroization patch for alg_setkey into a stand-alone patch
  submission
* fix bug in aead_sufficient_data (reporter Herbert Xu)
* testing of all interfaces with test application provided with libkcapi version
  0.6.2

Changes v7:
* AEAD: aead_recvmsg: change error code from ENOMEM to EINVAL
* AEAD: drop aead_readable/aead_sufficient_data and only use ctx->more to decide
  whether the read side shall become active. This change requires that the
  patch for crypto_aead_decrypt ensuring that the ciphertext contains the
  authentication tag was added -- see https://lkml.org/lkml/2014/12/30/200.
  Otherwise, user space can trigger a kernel crash.
* RNG: patch dropped as it was applied
* AEAD: port Kconfig/Makefile patch forward to current code base

Changes v8:
* Removed check for aead_assoclen in aead_sendmsg
* Fix endless loop bug in aead_sendmsg (check for sgl->cur > ALG_MAX_PAGES in
  while condition removed -- this condition is checked within the loop already)
* Resurrect aead_sufficient_data and call it in aead_sendmsg, aead_sendpage to
  notify caller about wrong invocation
* Re-add aead_sufficient_data to aead_recvmsg to verify user input data before
  using them to ensure the kernel protects against malicious parameters
* Allow arbitrary size of AD (i.e. up to the maximum buffer size of
  ALG_MAX_PAGES)
* When aead_recvmsg receives an error from decryption, release all pages if the
  error is EBADMSG -- this error implies that a proper decryption was performed
  but the integrity of the message is lost. This error is considered to be a
  valid decryption result.
* Add test cases for sendmsg and splice interface to test large AD sizes (in
  case of sendmsg, use 65504 bytes AD and 32 bytes plaintext; in case of splice
  use 15 pages AD and 32 bytes in the 16th page for plaintext). See [1] for
  updated test case.

Changes v9:
* if socket is not writable during sendmsg/sendpage due to insufficient memory
  and a recvmsg operation is forced, inform userspace about truncated operation
  via MSG_TRUNC
* use -EMSGSIZE in case insufficient data was provided in sendmsg/sendpage
* release all buffers in case insufficient data was provided in sendmsg/sendpage
* bug fix in sendmsg: when a new page is allocated, reset sg->offset to 0 --
  the error is visible with the new tests in [1] when using the -d flag
  with the test application

Changes v10:
* initialize ctx->trunc in aead_accept_parent to zero
* fix one line with code formatting problems

Stephan Mueller (2):
  crypto: AF_ALG: add AEAD support
  crypto: AF_ALG: enable AEAD interface compilation

 crypto/Kconfig      |   9 +
 crypto/Makefile     |   1 +
 crypto/algif_aead.c | 680 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 690 insertions(+)
 create mode 100644 crypto/algif_aead.c

-- 
2.1.0

^ permalink raw reply

* Re: [PATCH] headers_check: don't warn about kexec.h
From: Andrew Morton @ 2015-01-14  0:47 UTC (permalink / raw)
  To: Paul Bolle
  Cc: Arnd Bergmann, Linus Torvalds, Maximilian Attems, Geoff Levand,
	Michal Marek, linux-kernel, linux-api, linux-kbuild
In-Reply-To: <1421183111.15397.34.camel@x220>

On Tue, 13 Jan 2015 22:05:11 +0100 Paul Bolle <pebolle@tiscali.nl> wrote:

> [Dragging Andrew, Linus, and Maximilian into this thread.]
> 
> On Tue, 2015-01-13 at 21:27 +0100, Arnd Bergmann wrote:
> > On Tuesday 13 January 2015 18:13:32 Paul Bolle wrote:
> > > The last time that Geoff has been trying to get that patch applied
> > > should be
> > > http://lkml.kernel.org/r/b0702fc4186db21820d686e89afd6480560823db.1415837218.git.geoff@infradead.org> 
> > > I'd rather see that go in.
> > 
> > Fine with me as well. As long as we can find someone to take one of 
> > the patches, I'm happy.
> 
> Since Geoff's patch (and my preceding, identical patch) basically is a
> partial revert of commit 29a5c67e7a78 ("kexec: export kexec.h to user
> space") that should probably be done by either Andrew or Linus. 
> 
> (This short thread starts at
> http://lkml.kernel.org/r/12825174.7oxZXDxNhV@wuerfel .)

hm, sorry, I can't imagine why I didn't process Geoff's patch series
back in November.  They all look good - I grabbed them.

^ permalink raw reply

* Re: [PATCH] headers_check: don't warn about kexec.h
From: Paul Bolle @ 2015-01-13 21:05 UTC (permalink / raw)
  To: Arnd Bergmann, Andrew Morton, Linus Torvalds
  Cc: Maximilian Attems, Geoff Levand, Michal Marek, linux-kernel,
	linux-api, linux-kbuild
In-Reply-To: <2363197.VbAKlSiQLX@wuerfel>

[Dragging Andrew, Linus, and Maximilian into this thread.]

On Tue, 2015-01-13 at 21:27 +0100, Arnd Bergmann wrote:
> On Tuesday 13 January 2015 18:13:32 Paul Bolle wrote:
> > The last time that Geoff has been trying to get that patch applied
> > should be
> > http://lkml.kernel.org/r/b0702fc4186db21820d686e89afd6480560823db.1415837218.git.geoff@infradead.org> 
> > I'd rather see that go in.
> 
> Fine with me as well. As long as we can find someone to take one of 
> the patches, I'm happy.

Since Geoff's patch (and my preceding, identical patch) basically is a
partial revert of commit 29a5c67e7a78 ("kexec: export kexec.h to user
space") that should probably be done by either Andrew or Linus. 

(This short thread starts at
http://lkml.kernel.org/r/12825174.7oxZXDxNhV@wuerfel .)


Paul Bolle


^ permalink raw reply

* Re: [PATCH] headers_check: don't warn about kexec.h
From: Arnd Bergmann @ 2015-01-13 20:27 UTC (permalink / raw)
  To: Paul Bolle
  Cc: Geoff Levand, Michal Marek, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-kbuild-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1421169212.15397.22.camel@x220>

On Tuesday 13 January 2015 18:13:32 Paul Bolle wrote:
> 
> For the seqbuf_dump() stuff there are apparently users. I forgot the
> details, but the sound people wanted to keep that declaration (and some
> related ancient things) in the header involved to keep some really
> ancient stuff buildable.
> 
> But the kexec_load declaration isn't very useful for userspace, see the
> patch I submitted in
> http://lkml.kernel.org/r/1389791824.17407.9.camel@x220 . And After my
> attempt the export of that declaration has also been discussed in
> http://lkml.kernel.org/r/115373b6ac68ee7a305975896e1c4971e8e51d4c.1408731991.git.geoff-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org 
> 
> In that last discussion no one has been able to point to an actual user
> of it. So, as far as I can tell, no one actually uses it. Which makes
> sense, because including this header by itself doesn't give one access
> to a useful definition of kexec_load. So why bother with the
> declaration?
> 
> The last time that Geoff has been trying to get that patch applied
> should be
> http://lkml.kernel.org/r/b0702fc4186db21820d686e89afd6480560823db.1415837218.git.geoff-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org  I'd rather see that go in.
> 

Fine with me as well. As long as we can find someone to take one of 
the patches, I'm happy.

	Arnd

^ permalink raw reply

* [GIT PULL] kselftest fixes for 3.19-rc5
From: Shuah Khan @ 2015-01-13 18:07 UTC (permalink / raw)
  To: torvalds-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, shuahkh-JPH+aEBZ4P+UEJcrhfAQsw

Hi Linus,

Please pull the following ksefltest fixes for 3.19-rc5

thanks,
-- Shuah

The following changes since commit 6898b627aab6ba553e6d8b40a0b1ddc43c48d42f:

  selftests/exec: Use %zu to format size_t (2014-12-22 11:11:36 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest
tags/linux-kselftest-3.19-rc-5

for you to fetch changes up to f5db310d77ef1742e40bfc303b8625584c55f9e3:

  selftests/vm: fix link error for transhuge-stress test (2015-01-08
09:01:00 -0700)

----------------------------------------------------------------
kselftest fixes for: 3.19-rc5

This update contains 3 patches to fix one compile error,
and two run-time bugs. One of them fixes infinite loop
on ARM.

----------------------------------------------------------------
Andrey Skvortsov (1):
      selftests/vm: fix link error for transhuge-stress test

David Drysdale (1):
      selftests/exec: allow shell return code of 126

dann frazier (1):
      tools: testing: selftests: mq_perf_tests: Fix infinite loop on ARM

 tools/testing/selftests/exec/execveat.c        | 19 +++++++++++++------
 tools/testing/selftests/mqueue/mq_perf_tests.c |  3 +--
 tools/testing/selftests/vm/Makefile            |  2 +-
 3 files changed, 15 insertions(+), 9 deletions(-)


-- 
Shuah Khan
Sr. Linux Kernel Developer
Open Source Innovation Group
Samsung Research America (Silicon Valley)
shuahkh-JPH+aEBZ4P+UEJcrhfAQsw@public.gmane.org | (970) 217-8978

^ permalink raw reply

* Re: [PATCH] headers_check: don't warn about kexec.h
From: Paul Bolle @ 2015-01-13 17:13 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Geoff Levand, Michal Marek, linux-kernel, linux-api, linux-kbuild
In-Reply-To: <12825174.7oxZXDxNhV@wuerfel>

Hi Arnd,

On Tue, 2015-01-13 at 17:19 +0100, Arnd Bergmann wrote:
> We've been getting this warning for ages:
> 
> ./usr/include/linux/kexec.h:61: userspace cannot reference function or variable defined in the kernel
> 
> There is no proper fix for this file, as the declaration is meant for
> user space, not for the kernel, so we should work around it the
> same way that we treat the seqbuf_dump() definition.

For the seqbuf_dump() stuff there are apparently users. I forgot the
details, but the sound people wanted to keep that declaration (and some
related ancient things) in the header involved to keep some really
ancient stuff buildable.

But the kexec_load declaration isn't very useful for userspace, see the
patch I submitted in
http://lkml.kernel.org/r/1389791824.17407.9.camel@x220 . And After my
attempt the export of that declaration has also been discussed in
http://lkml.kernel.org/r/115373b6ac68ee7a305975896e1c4971e8e51d4c.1408731991.git.geoff@infradead.org 

In that last discussion no one has been able to point to an actual user
of it. So, as far as I can tell, no one actually uses it. Which makes
sense, because including this header by itself doesn't give one access
to a useful definition of kexec_load. So why bother with the
declaration?

The last time that Geoff has been trying to get that patch applied
should be
http://lkml.kernel.org/r/b0702fc4186db21820d686e89afd6480560823db.1415837218.git.geoff@infradead.org  I'd rather see that go in.

> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> 
> diff --git a/scripts/headers_check.pl b/scripts/headers_check.pl
> index 62320f93e903..fb051848667c 100755
> --- a/scripts/headers_check.pl
> +++ b/scripts/headers_check.pl
> @@ -69,6 +69,10 @@ sub check_declarations
>  	if ($line =~ m/^void seqbuf_dump\(void\);/) {
>  		return;
>  	}
> +	# user-only declaration from kexec.h
> +	if ($line =~ m/^extern int kexec_load/) {
> +		return;
> +	}
>  	if ($line =~ m/^(\s*extern|unsigned|char|short|int|long|void)\b/) {
>  		printf STDERR "$filename:$lineno: " .
>  			      "userspace cannot reference function or " .

Regards,

Paul Bolle

^ permalink raw reply

* Re: [PATCH 1/7] tuner-core: properly initialize media controller subdev
From: Antti Palosaari @ 2015-01-13 16:35 UTC (permalink / raw)
  To: Mauro Carvalho Chehab, Laurent Pinchart
  Cc: Linux Media Mailing List, Mauro Carvalho Chehab, Hans Verkuil,
	Prabhakar Lad, Sakari Ailus, linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20150111122553.76394653-+RedX5hVuTR+urZeOPWqwQ@public.gmane.org>

On 01/11/2015 04:25 PM, Mauro Carvalho Chehab wrote:
> Em Sun, 11 Jan 2015 16:02:41 +0200
> Laurent Pinchart <laurent.pinchart-ryLnwIuWjnjg/C1BVhZhaw@public.gmane.org> escreveu:

>> I'm not too familiar with tuners, do they all have a single output only and no
>> input ?
>
> They have an input: the antenna connector. However, I don't see any need
> to map it for most tuners, as there's generally just one input, hardwired
> into the tuner chip.
>
> There are some hardware with 2 antenna connectors, but for different
> functions (FM and TV). They're selected automatically when the V4L2
> driver switches between FM and TV.
>
> In any case, the tuner-core doesn't provide any way to select the
> antenna input.
>
> So, if a driver would need to select the input, it would either need
> to not use tuner-core or some patch will be required to add such
> functionality inside tuner-core.

Tuner has antenna as a input and output is intermediate frequency or 
baseband (IF/BB (zero-IF)).

I think most modern silicon tuners actually has more than one physical 
antenna inputs - but those are left unused or same physical antenna 
connector is wired to all those inputs.

Sooner or later there will be receiver having multiple antenna 
connectors which are selectable by software. So let it be at least 
option easy to add later.

regards
Antti

-- 
http://palosaari.fi/

^ permalink raw reply

* [PATCH] headers_check: don't warn about kexec.h
From: Arnd Bergmann @ 2015-01-13 16:19 UTC (permalink / raw)
  To: Michal Marek; +Cc: linux-kernel, linux-api, linux-kbuild

We've been getting this warning for ages:

./usr/include/linux/kexec.h:61: userspace cannot reference function or variable defined in the kernel

There is no proper fix for this file, as the declaration is meant for
user space, not for the kernel, so we should work around it the
same way that we treat the seqbuf_dump() definition.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/scripts/headers_check.pl b/scripts/headers_check.pl
index 62320f93e903..fb051848667c 100755
--- a/scripts/headers_check.pl
+++ b/scripts/headers_check.pl
@@ -69,6 +69,10 @@ sub check_declarations
 	if ($line =~ m/^void seqbuf_dump\(void\);/) {
 		return;
 	}
+	# user-only declaration from kexec.h
+	if ($line =~ m/^extern int kexec_load/) {
+		return;
+	}
 	if ($line =~ m/^(\s*extern|unsigned|char|short|int|long|void)\b/) {
 		printf STDERR "$filename:$lineno: " .
 			      "userspace cannot reference function or " .

^ permalink raw reply related

* Re: [PATCH] MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS
From: Markos Chandras @ 2015-01-13 13:12 UTC (permalink / raw)
  To: Paul Burton, linux-mips-6z/3iImG2C8G8FEW9MqTrA
  Cc: Matthew Fortune, LKML, linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1420719457-690-1-git-send-email-paul.burton-1AXoQHu6uovQT0dZR+AlfA@public.gmane.org>

On 01/08/2015 12:17 PM, Paul Burton wrote:
> Userland code may be built using an ABI which permits linking to objects
> that have more restrictive floating point requirements. For example,
> userland code may be built to target the O32 FPXX ABI. Such code may be
> linked with other FPXX code, or code built for either one of the more
> restrictive FP32 or FP64. When linking with more restrictive code, the
> overall requirement of the process becomes that of the more restrictive
> code. The kernel has no way to know in advance which mode the process
> will need to be executed in, and indeed it may need to change during
> execution. The dynamic loader is the only code which will know the
> overall required mode, and so it needs to have a means to instruct the
> kernel to switch the FP mode of the process.
> 
> This patch introduces 2 new options to the prctl syscall which provide
> such a capability. The FP mode of the process is represented as a
> simple bitmask combining a number of mode bits mirroring those present
> in the hardware. Userland can either retrieve the current FP mode of
> the process:
> 
>   mode = prctl(PR_GET_FP_MODE);
> 
> or modify the current FP mode of the process:
> 
>   err = prctl(PR_SET_FP_MODE, new_mode);
> 
> Signed-off-by: Paul Burton <paul.burton-1AXoQHu6uovQT0dZR+AlfA@public.gmane.org>
> Cc: Matthew Fortune <matthew.fortune-1AXoQHu6uovQT0dZR+AlfA@public.gmane.org>
> Cc: Markos Chandras <markos.chandras-1AXoQHu6uovQT0dZR+AlfA@public.gmane.org>
Hi,

I think the "MIPS,prctl" in the title should be "MIPS: prctl"

I have also CC'd the LKML and the linux-api mailing lists since this
touches the kernel ABI with the new PR_[GS]ET_FP_MODE definitions.

(I intentionally leave the contents of the patch below so people can
comment on it)

> ---
>  arch/mips/include/asm/mmu.h         |  3 ++
>  arch/mips/include/asm/mmu_context.h |  2 +
>  arch/mips/include/asm/processor.h   | 11 +++++
>  arch/mips/kernel/process.c          | 92 +++++++++++++++++++++++++++++++++++++
>  arch/mips/kernel/traps.c            | 19 ++++++++
>  include/uapi/linux/prctl.h          |  5 ++
>  kernel/sys.c                        | 12 +++++
>  7 files changed, 144 insertions(+)
> 
> diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h
> index c436138..1afa1f9 100644
> --- a/arch/mips/include/asm/mmu.h
> +++ b/arch/mips/include/asm/mmu.h
> @@ -1,9 +1,12 @@
>  #ifndef __ASM_MMU_H
>  #define __ASM_MMU_H
>  
> +#include <linux/atomic.h>
> +
>  typedef struct {
>  	unsigned long asid[NR_CPUS];
>  	void *vdso;
> +	atomic_t fp_mode_switching;
>  } mm_context_t;
>  
>  #endif /* __ASM_MMU_H */
> diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
> index 2f82568..87f1107 100644
> --- a/arch/mips/include/asm/mmu_context.h
> +++ b/arch/mips/include/asm/mmu_context.h
> @@ -132,6 +132,8 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
>  	for_each_possible_cpu(i)
>  		cpu_context(i, mm) = 0;
>  
> +	atomic_set(&mm->context.fp_mode_switching, 0);
> +
>  	return 0;
>  }
>  
> diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
> index f1df4cb..9daa386 100644
> --- a/arch/mips/include/asm/processor.h
> +++ b/arch/mips/include/asm/processor.h
> @@ -399,4 +399,15 @@ unsigned long get_wchan(struct task_struct *p);
>  
>  #endif
>  
> +/*
> + * Functions & macros implementing the PR_GET_FP_MODE & PR_SET_FP_MODE options
> + * to the prctl syscall.
> + */
> +extern int mips_get_process_fp_mode(struct task_struct *task);
> +extern int mips_set_process_fp_mode(struct task_struct *task,
> +				    unsigned int value);
> +
> +#define GET_FP_MODE(task)		mips_get_process_fp_mode(task)
> +#define SET_FP_MODE(task,value)		mips_set_process_fp_mode(task, value)
> +
>  #endif /* _ASM_PROCESSOR_H */
> diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
> index eb76434..b732c0c 100644
> --- a/arch/mips/kernel/process.c
> +++ b/arch/mips/kernel/process.c
> @@ -25,6 +25,7 @@
>  #include <linux/completion.h>
>  #include <linux/kallsyms.h>
>  #include <linux/random.h>
> +#include <linux/prctl.h>
>  
>  #include <asm/asm.h>
>  #include <asm/bootinfo.h>
> @@ -550,3 +551,94 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
>  {
>  	smp_call_function(arch_dump_stack, NULL, 1);
>  }
> +
> +int mips_get_process_fp_mode(struct task_struct *task)
> +{
> +	int value = 0;
> +
> +	if (!test_tsk_thread_flag(task, TIF_32BIT_FPREGS))
> +		value |= PR_FP_MODE_FR;
> +	if (test_tsk_thread_flag(task, TIF_HYBRID_FPREGS))
> +		value |= PR_FP_MODE_FRE;
> +
> +	return value;
> +}
> +
> +int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
> +{
> +	const unsigned int known_bits = PR_FP_MODE_FR | PR_FP_MODE_FRE;
> +	unsigned long switch_count;
> +	struct task_struct *t;
> +
> +	/* Check the value is valid */
> +	if (value & ~known_bits)
> +		return -EOPNOTSUPP;
> +
> +	/* Avoid inadvertently triggering emulation */
> +	if ((value & PR_FP_MODE_FR) && cpu_has_fpu &&
> +	    !(current_cpu_data.fpu_id & MIPS_FPIR_F64))
> +		return -EOPNOTSUPP;
> +	if ((value & PR_FP_MODE_FRE) && !cpu_has_fre)
> +		return -EOPNOTSUPP;
> +
> +	/* Save FP & vector context, then disable FPU & MSA */
> +	if (task->signal == current->signal)
> +		lose_fpu(1);
> +
> +	/* Prevent any threads from obtaining live FP context */
> +	atomic_set(&task->mm->context.fp_mode_switching, 1);
> +	smp_mb__after_atomic();
> +
> +	/*
> +	 * If there are multiple online CPUs then wait until all threads whose
> +	 * FP mode is about to change have been context switched. This approach
> +	 * allows us to only worry about whether an FP mode switch is in
> +	 * progress when FP is first used in a tasks time slice. Pretty much all
> +	 * of the mode switch overhead can thus be confined to cases where mode
> +	 * switches are actually occuring. That is, to here. However for the
> +	 * thread performing the mode switch it may take a while...
> +	 */
> +	if (num_online_cpus() > 1) {
> +		spin_lock_irq(&task->sighand->siglock);
> +
> +		for_each_thread(task, t) {
> +			if (t == current)
> +				continue;
> +
> +			switch_count = t->nvcsw + t->nivcsw;
> +
> +			do {
> +				spin_unlock_irq(&task->sighand->siglock);
> +				cond_resched();
> +				spin_lock_irq(&task->sighand->siglock);
> +			} while ((t->nvcsw + t->nivcsw) == switch_count);
> +		}
> +
> +		spin_unlock_irq(&task->sighand->siglock);
> +	}
> +
> +	/*
> +	 * There are now no threads of the process with live FP context, so it
> +	 * is safe to proceed with the FP mode switch.
> +	 */
> +	for_each_thread(task, t) {
> +		/* Update desired FP register width */
> +		if (value & PR_FP_MODE_FR) {
> +			clear_tsk_thread_flag(t, TIF_32BIT_FPREGS);
> +		} else {
> +			set_tsk_thread_flag(t, TIF_32BIT_FPREGS);
> +			clear_tsk_thread_flag(t, TIF_MSA_CTX_LIVE);
> +		}
> +
> +		/* Update desired FP single layout */
> +		if (value & PR_FP_MODE_FRE)
> +			set_tsk_thread_flag(t, TIF_HYBRID_FPREGS);
> +		else
> +			clear_tsk_thread_flag(t, TIF_HYBRID_FPREGS);
> +	}
> +
> +	/* Allow threads to use FP again */
> +	atomic_set(&task->mm->context.fp_mode_switching, 0);
> +
> +	return 0;
> +}
> diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
> index ad3d203..d5fbfb5 100644
> --- a/arch/mips/kernel/traps.c
> +++ b/arch/mips/kernel/traps.c
> @@ -1134,10 +1134,29 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action,
>  	return NOTIFY_OK;
>  }
>  
> +static int wait_on_fp_mode_switch(atomic_t *p)
> +{
> +	/*
> +	 * The FP mode for this task is currently being switched. That may
> +	 * involve modifications to the format of this tasks FP context which
> +	 * make it unsafe to proceed with execution for the moment. Instead,
> +	 * schedule some other task.
> +	 */
> +	schedule();
> +	return 0;
> +}
> +
>  static int enable_restore_fp_context(int msa)
>  {
>  	int err, was_fpu_owner, prior_msa;
>  
> +	/*
> +	 * If an FP mode switch is currently underway, wait for it to
> +	 * complete before proceeding.
> +	 */
> +	wait_on_atomic_t(&current->mm->context.fp_mode_switching,
> +			 wait_on_fp_mode_switch, TASK_KILLABLE);
> +
>  	if (!used_math()) {
>  		/* First time FP context user. */
>  		preempt_disable();
> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index 89f6350..31891d9 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -185,4 +185,9 @@ struct prctl_mm_map {
>  #define PR_MPX_ENABLE_MANAGEMENT  43
>  #define PR_MPX_DISABLE_MANAGEMENT 44
>  
> +#define PR_SET_FP_MODE		45
> +#define PR_GET_FP_MODE		46
> +# define PR_FP_MODE_FR		(1 << 0)	/* 64b FP registers */
> +# define PR_FP_MODE_FRE		(1 << 1)	/* 32b compatibility */
> +
>  #endif /* _LINUX_PRCTL_H */
> diff --git a/kernel/sys.c b/kernel/sys.c
> index a8c9f5a..08b16bb 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -97,6 +97,12 @@
>  #ifndef MPX_DISABLE_MANAGEMENT
>  # define MPX_DISABLE_MANAGEMENT(a)	(-EINVAL)
>  #endif
> +#ifndef GET_FP_MODE
> +# define GET_FP_MODE(a)		(-EINVAL)
> +#endif
> +#ifndef SET_FP_MODE
> +# define SET_FP_MODE(a,b)	(-EINVAL)
> +#endif
>  
>  /*
>   * this is where the system-wide overflow UID and GID are defined, for
> @@ -2215,6 +2221,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
>  	case PR_MPX_DISABLE_MANAGEMENT:
>  		error = MPX_DISABLE_MANAGEMENT(me);
>  		break;
> +	case PR_SET_FP_MODE:
> +		error = SET_FP_MODE(me, arg2);
> +		break;
> +	case PR_GET_FP_MODE:
> +		error = GET_FP_MODE(me);
> +		break;
>  	default:
>  		error = -EINVAL;
>  		break;
> 


-- 
markos

^ permalink raw reply

* Re: [PATCH RESEND v4] sched/fair: Add advisory flag for borrowing a timeslice
From: Peter Zijlstra @ 2015-01-13 11:25 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Khalid Aziz, Ingo Molnar, Thomas Gleixner, corbet, mingo, hpa,
	akpm, rientjes, ak, mgorman, raistlin, kirill.shutemov, atomlin,
	avagin, gorcunov, serge.hallyn, athorlton, oleg, vdavydov,
	daeseok.youn, keescook, yangds.fnst, sbauer, vishnu.ps, axboe,
	paulmck, linux-kernel, linux-doc, linux-api
In-Reply-To: <5499B8A2.4080008@redhat.com>

On Tue, Dec 23, 2014 at 01:46:58PM -0500, Rik van Riel wrote:
> An uncontended futex is taken without ever going into kernel
> space. Adaptive spinning allows short duration futexes to be
> taken without going into kernel space.

The going into kernel is a red herring afaict, a no-op syscall costs ~180
cycles or something like that. So sure you can do a wee spin (~90 cycles
to try and amortize that), but I really doubt that this is the problem.

^ permalink raw reply

* Re: [v8 2/5] ext4: adds project ID support
From: Jan Kara @ 2015-01-12 17:01 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Jan Kara, Andreas Dilger, Li Xi,
	Linux Filesystem Development List, ext4 development,
	linux-api-u79uwXL29TY76Z2rM5mHXA, Theodore Ts'o, Al Viro,
	Christoph Hellwig, Dmitry Monakhov
In-Reply-To: <20150109234627.GN31508@dastard>

On Sat 10-01-15 10:46:27, Dave Chinner wrote:
> On Fri, Jan 09, 2015 at 10:47:58AM +0100, Jan Kara wrote:
> > On Thu 08-01-15 15:20:21, Andreas Dilger wrote:
> > > On Jan 8, 2015, at 1:26 AM, Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org> wrote:
> > > > On Tue 09-12-14 13:22:25, Li Xi wrote:
> > > >> This patch adds a new internal field of ext4 inode to save project
> > > >> identifier. Also a new flag EXT4_INODE_PROJINHERIT is added for
> > > >> inheriting project ID from parent directory.
> > > >  I have noticed one thing you apparently changed in v7 of the patch set.
> > > > See below.
> > > > 
> > > >> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> > > >> index 29c43e7..8bd1da9 100644
> > > >> --- a/fs/ext4/ext4.h
> > > >> +++ b/fs/ext4/ext4.h
> > > >> @@ -377,16 +377,18 @@ struct flex_groups {
> > > >> #define EXT4_EA_INODE_FL	        0x00200000 /* Inode used for large EA */
> > > >> #define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF */
> > > >> #define EXT4_INLINE_DATA_FL		0x10000000 /* Inode has inline data. */
> > > >> +#define EXT4_PROJINHERIT_FL		FS_PROJINHERIT_FL /* Create with parents projid */
> > > >  How did FS_PROJINHERIT_FL get here? There used to be 0x20000000 in older
> > > > version of the patch set which is correct - this definition is defining
> > > > ext4 on-disk format. As such it is an ext4 specific flag and should be
> > > > definined to a fixed constant independed of any other filesystem. It seems
> > > > you are somewhat mixing what is an on-disk format flag value and what is a
> > > > flag value passed from userspace. These two may be different things and
> > > > you need to convert between the values when getting / setting flags...
> > > 
> > > Currently the EXT4_*_FL and FS_*_FL values are all identical, and there
> > > is no reason to change that before it is actually needed.  Since the
> > > FS_PROJINHERIT_FL is used via chattr/lsattr from userspace, this value
> > > must also be kept the same in the future to avoid API breakage, so there
> > > is no reason to worry about incompatibilities.
> >   Agreed. I was somewhat worried about having on-disk flag defined through
> > the external non-ext4 define but you are right that neither can really
> > change once we ship a kernel with it.
> > 
> > > See also the [v8 5/5] patch, which is changing the EXT4_*_FL values to
> > > use FS_*_FL constants, where applicable, so that it is more clear that
> > > these values need to be the same.
> >   OK, I've missed that. So if things will be consistent again, I'm fine
> > with the change.
> 
> Except that I NACK'd that change (i.e patch 4/5) because it's out of
> scope of a "support project quota" patchset. not to mention that it
> is broken because it exhausts the flags space with ext4 specific
> flags and prevents future expansion of the ioctl structure.
  I agree with your objections from that review (which is why I didn't
reply to that email since I didn't have more to say).

> Any extension to the ioctl needs to be done in a spearate patch set,
> with separate justification. This patch set should only implement
> the very minimum needed to use the project quota ioctl flags....
  Agreed. I was just saying that I have nothing against defining ext4 flag
values using FS_*_FL where possible.

							Honza
-- 
Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
SUSE Labs, CR

^ permalink raw reply

* Re: [PATCHv10 man-pages 5/5] execveat.2: initial man page for execveat(2)
From: Rich Felker @ 2015-01-12 16:07 UTC (permalink / raw)
  To: David Drysdale
  Cc: Eric W. Biederman, Al Viro, Michael Kerrisk (man-pages),
	Andy Lutomirski, Meredydd Luff, linux-kernel@vger.kernel.org,
	Andrew Morton, David Miller, Thomas Gleixner, Stephen Rothwell,
	Oleg Nesterov, Ingo Molnar, H. Peter Anvin, Kees Cook,
	Arnd Bergmann, Christoph Hellwig, X86 ML, linux-arch, Linux API,
	sparclinux
In-Reply-To: <CAHse=S822QjRE_FF6JHi-5cUsrXHJnyjRwYJuyYn2O6tsz2dHQ@mail.gmail.com>

On Mon, Jan 12, 2015 at 11:33:49AM +0000, David Drysdale wrote:
> On Sat, Jan 10, 2015 at 1:33 AM, Rich Felker <dalias@aerifal.cx> wrote:
> > On Fri, Jan 09, 2015 at 07:17:41PM -0600, Eric W. Biederman wrote:
> >> Rich Felker <dalias@aerifal.cx> writes:
> >>
> >> > I'm not proposing code because I'm a libc developer not a kernel
> >> > developer. I know what's needed for userspace to provide a conforming
> >> > fexecve to applications, not how to implement that on the kernel side,
> >> > although I'm trying to provide constructive ideas. The hostility is
> >> > really not necessary.
> >>
> >> Conforming to what?
> >>
> >> The open group fexecve says nothing about requiring a file descriptor
> >> passed to fexecve to have O_CLOEXEC.
> >
> > It doesn't require it but it allows it, and in multithreaded programs
> > that might run child processes (or library code that might be used in
> > such situations), O_CLOEXEC is mandatory everywhere to avoid fd leaks.
> 
> As a naive idea related to Andy's suggestion elsewhere, could you
> just have an environment convention for fexecve-ing scripts?  That
> would reduce FD leaks without any need for kernel involvement/changes.
> 
> For example, set _FEXECVED_VIA_FD=4 but don't set
> O_CLOEXEC before fexecve, and the interpreter reads then
> closes that FD.  Or just get the interpreter to spot scripts named
> "/dev/fd/%d" and read-then-close the FD that way, cf. Eric's suggestion
> at https://lkml.org/lkml/2014/10/22/652.

No. Any omission of O_CLOEXEC even momentarily is a potentially
dangerous fd leak. This is the case whenever the process is
multithreaded and it's possible that other threads might fork and
exec. Think of the case of a privileged daemon re-execing itself (e.g.
to switch to an updated version) while there are potentially other
threads spawning non-privileged processes.

Rich

^ permalink raw reply

* Re: [PATCH 0/2] Input: uinput - fix ioctl numbers in uapi/uinput.h
From: Benjamin Tissoires @ 2015-01-12 15:57 UTC (permalink / raw)
  To: Dmitry Torokhov
  Cc: Gabriel Laskar, linux-api-u79uwXL29TY76Z2rM5mHXA, David Herrmann,
	Peter Hutterer
In-Reply-To: <20150112002943.GA31862@dtor-ws>

Hi,

On Jan 11 2015 or thereabouts, Dmitry Torokhov wrote:
> Hi Gabriel,
> 
> On Sat, Jan 10, 2015 at 01:43:34PM +0100, Gabriel Laskar wrote:
> > Ioctls numbers for UI_GET_SYSNAME and UI_GET_VERSION are incorrectly numbered,
> > since nr number is 8bit encoded, 300 and 301 will effectively get 44 and 45.
> > these two patches fixes this
> > 
> 
> Nice catch, thank you! I folded the patches together (as they are fixing
> essentially the same thing) , changed hex to dec (because the rest of
> ioctls in uinput use decimal) and applied.
> 
> I wonder if we need to put a BUILD_BUG_ON in one if _IO* defines to
> catch such errors early on.
> 

As others said, nice catch, thanks a lot. I feel very embarrassed about
this :(

Cheers,
Benjamin

^ permalink raw reply

* Re: [PATCHv10 man-pages 5/5] execveat.2: initial man page for execveat(2)
From: David Drysdale @ 2015-01-12 14:18 UTC (permalink / raw)
  To: Al Viro
  Cc: Rich Felker, Michael Kerrisk (man-pages), Eric W. Biederman,
	Andy Lutomirski, Meredydd Luff, linux-kernel@vger.kernel.org,
	Andrew Morton, David Miller, Thomas Gleixner, Stephen Rothwell,
	Oleg Nesterov, Ingo Molnar, H. Peter Anvin, Kees Cook,
	Arnd Bergmann, Christoph Hellwig, X86 ML, linux-arch, Linux API,
	sparclinux
In-Reply-To: <20150109215042.GM22149@ZenIV.linux.org.uk>

On Fri, Jan 9, 2015 at 9:50 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> On Fri, Jan 09, 2015 at 04:28:52PM -0500, Rich Felker wrote:
>
>> The "magic open-once magic symlink" approach is really the cleanest
>> solution I can find. In the case where the interpreter does not open
>> the script, nothing terribly bad happens; the magic symlink just
>> sticks around until _exit or exec. In the case where the interpreter
>> opens it more than once, you get a failure, but as far as I know
>> existing interpreters don't do this, and it's arguably bad design. In
>> any case it's a caught error.
>
> You know what's cleaner than that?  git revert 27d6ec7ad
> It has just been merged; until 3.19 it's fair game for removal.
>
> And yes, I should've NAKed the damn thing loud and clear, rather than
> asking questions back then, getting no answers and letting it slip.
> Mea culpa.

Al, I'm sorry if I missed a question or concern of yours back in
October -- I certainly didn't intend to (that would be foolish indeed!).

[I thought the main open question was whether a dupfs
implementation would help with /dev/fd/ and /proc/ semantics, but I
had the (possibly incorrect) understanding that that was somewhat
orthogonal to the execveat implementation.]

Are there any changes/fixes/refactorings that I could do (especially
within the 3.19 timeframe) that would help mollify at all?

> Back then the procfs-free environments had been pushed as a serious argument
> in favour of merging the damn thing.  Now you guys turn around and say that
> we not only need procfs mounted, we need a yet-to-be-added kludge in there
> to cope with the actual intended uses.

Not me!

^ permalink raw reply

* Re: [PATCH 0/3] epoll: Add epoll_pwait1 syscall
From: Fam Zheng @ 2015-01-12 13:23 UTC (permalink / raw)
  To: Josh Triplett
  Cc: Andy Lutomirski, Miklos Szeredi,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Thomas Gleixner, Ingo Molnar, H. Peter Anvin, X86 ML,
	Alexander Viro, Andrew Morton, Juri Lelli, Zach Brown,
	David Drysdale, Kees Cook, Alexei Starovoitov, David Herrmann,
	Dario Faggioli, Theodore Ts'o, Peter Zijlstra, Vivek Goyal,
	Mike Frysinger, Heiko Carstens, Rasmus Villemoes, Oleg Nesterov,
	Mathieu
In-Reply-To: <20150112100836.GA13150@thin>

On Mon, 01/12 02:08, Josh Triplett wrote:
> On Mon, Jan 12, 2015 at 04:24:00PM +0800, Fam Zheng wrote:
> > On Thu, 01/08 21:21, Josh Triplett wrote:
> > > On Fri, Jan 09, 2015 at 12:49:08PM +0800, Fam Zheng wrote:
> > > > On Thu, 01/08 18:24, Andy Lutomirski wrote:
> > > > > On Thu, Jan 8, 2015 at 5:52 PM, Fam Zheng <famz-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
> > > > > > On Thu, 01/08 17:28, Andy Lutomirski wrote:
> > > > > >> On Thu, Jan 8, 2015 at 5:25 PM, Fam Zheng <famz-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
> > > > > >> > On Thu, 01/08 09:57, Andy Lutomirski wrote:
> > > > > >> >> I'd like to see a more ambitious change, since the timer isn't the
> > > > > >> >> only problem like this.  Specifically, I'd like a syscall that does a
> > > > > >> >> list of epoll-related things and then waits.  The list of things could
> > > > > >> >> include, at least:
> > > > > >> >>
> > > > > >> >>  - EPOLL_CTL_MOD actions: level-triggered epoll users are likely to
> > > > > >> >> want to turn on and off their requests for events on a somewhat
> > > > > >> >> regular basis.
> > > > > >> >
> > > > > >> > This sounds good to me.
> > > > > >> >
> > > > > >> >>
> > > > > >> >>  - timerfd_settime actions: this allows a single syscall to wait and
> > > > > >> >> adjust *both* monotonic and real-time wakeups.
> > > > > >> >
> > > > > >> > I'm not sure, doesn't this break orthogonality between epoll and timerfd?
> > > > > >>
> > > > > >> Yes.  It's not very elegant, and more elegant ideas are welcome.
> > > > > >
> > > > > > What is the purpose of embedding timerfd operation here? Modifying timerfd
> > > > > > for each poll doesn't sound a common pattern to me.
> > > > > 
> > > > > Setting a timeout is definitely a common pattern, hence this thread.
> > > > > But the current timeout interface sucks, and people should really use
> > > > > absolute time.  (My epoll software uses absolute time.)  But then
> > > > > users need to decide whether to have their timeout based on the
> > > > > monotonic clock or the realtime clock (or something else entirely).
> > > > > Some bigger programs may want both -- they may have internal events
> > > > > queued for certain times and for certain timeouts, and those should
> > > > > use realtime and monotonic respectively.  Heck, users may also want
> > > > > separate slack values on those.
> > > > > 
> > > > > Timerfd is the only thing we have right now that is anywhere near
> > > > > flexible enough.  Obviously if epoll became fancy enough, then we
> > > > > could do away with the timerfd entirely here.
> > > > > 
> > > > > >
> > > > > >>
> > > > > >> >
> > > > > >> >>
> > > > > >> >> Would this make sense?  It could look like:
> > > > > >> >>
> > > > > >> >> int epoll_mod_and_pwait(int epfd,
> > > > > >> >>   struct epoll_event *events, int maxevents,
> > > > > >> >>   struct epoll_command *commands, int ncommands,
> > > > > >> >>   const sigset_t *sigmask);
> > > > > >> >
> > > > > >> > What about flags?
> > > > > >> >
> > > > > >>
> > > > > >> No room.  Maybe it should just be a struct for everything instead of
> > > > > >> separate args.
> > > > > >
> > > > > > Also no room for timeout. A single struct sounds the only way to go.
> > > > > 
> > > > > That's what timerfd is for.  I think it would be a bit weird to
> > > > > support "timeout" and detailed timerfd control.
> > > > 
> > > > I see what you mean. Thanks.
> > > > 
> > > > I still don't like hooking timerfd in the interface. Besides the unclean
> > > > interface, it also feels cubersome and overkill to let users setup and add a
> > > > dedicated timerfd to implement timeout.
> > > > 
> > > > How about this:
> > > > 
> > > > int epoll_mod_wait(int epfd, struct epoll_mod_wait_data *data);
> > > > 
> > > > struct epoll_mod_wait_data {
> > > > 	struct epoll_event *events;
> > > > 	int maxevents;
> > > > 	struct epoll_mod_cmd {
> > > > 		int op,
> > > > 		int fd;
> > > > 		void *data;
> > > > 	} *cmds;
> > > > 	int ncmds;
> > > > 	int flags;
> > > > 	sigset_t *sigmask;
> > > > };
> > > > 
> > > > Commands ops are:
> > > > 
> > > > 	EPOLL_CTL_ADD
> > > > 		@fd is the fd to modify; @data is epoll_event.
> > > > 	EPOLL_CTL_MOD
> > > > 		@fd is the fd to modify; @data is epoll_event.
> > > > 	EPOLL_CTL_DEL
> > > > 		@fd is the fd to modify; @data is epoll_event.
> > > > 
> > > > 	EPOLL_CTL_SET_TIMEOUT
> > > > 		@fd is ignored, @data is timespec.
> > > > 		Clock type and relative/absolute are selected by flags as below.
> > > > 
> > > > Flags are given to override timeout defaults:
> > > > 	EPOLL_FL_MONOTONIC_CLOCK
> > > > 		If set, don't use realtime clock, use monotonic clock.
> > > > 	EPOLL_FL_ABSOLUTE_TIMEOUT
> > > > 		If set, don't use relative timeout, use absolute timeout.
> > > 
> > > I'd suggest using an "int clockid" field instead, like timerfd_settime;
> > > even if it only accepts CLOCK_REALTIME and CLOCK_MONOTONIC, if it needs
> > > extending in the future, it'd be painful to have to remap new CLOCK_*
> > > constants into the EPOLL_FL_* namespace.  (I do think dropping timeouts
> > > in favor of timerfds makes things more nicely orthogonal, but epoll_wait
> > > already has a timeout parameter, so *shrug*.)
> > > 
> > > Also, I think that structure has too many levels of indirection; it'd
> > > produce many unnecessary cache misses; considering you're trying to
> > > eliminate the overhead of one or two extra syscalls, you don't want to
> > > introduce a pile of unnecessary cache misses in the processes.  I'd
> > > suggest inlining cmds as an array at the end of the structure, and
> > > turning "void *data" into an inline epoll_event.  (Or, you could use
> > > "events" as an in/out parameter.)
> > > 
> > > You could drop EPOLL_CTL_SET_TIMEOUT, and just include a clockid and
> > > timespec directly in the top-level structure.
> > > 
> > > And I'd suggest either making flags a top-level parameter or putting it
> > > at the start of the structure, to make future extension easier.
> > 
> > Makes sense to me, thanks.
> > 
> > Also the number of cmds are undecided until we do a copy_from_user for the
> > header fields before another one for specified number of cmds. So I think it's
> > better to move ncmds and cmds to top level parameter.
> 
> That seems like an even better idea, yeah.
> 

One more question I'm not sure regarding the semantics: should we make the
syscall atomic?  I.e if one of the cmds failed even before wait, or if all the
cmds are executed, but the eventual wait failed, should we revert the commands'
effect? Or return overall result and result for each cmd if failed?  Or just
claim that it's possible for a first few cmds to be effective even on error?

It will be way more complicated to make it atomic, so I'd like to be clear what
we should do. Ideas?

Thanks,
Fam

^ permalink raw reply

* Re: [PATCHv10 man-pages 5/5] execveat.2: initial man page for execveat(2)
From: David Drysdale @ 2015-01-12 11:33 UTC (permalink / raw)
  To: Rich Felker
  Cc: Eric W. Biederman, Al Viro, Michael Kerrisk (man-pages),
	Andy Lutomirski, Meredydd Luff, linux-kernel@vger.kernel.org,
	Andrew Morton, David Miller, Thomas Gleixner, Stephen Rothwell,
	Oleg Nesterov, Ingo Molnar, H. Peter Anvin, Kees Cook,
	Arnd Bergmann, Christoph Hellwig, X86 ML, linux-arch, Linux API,
	sparclinux
In-Reply-To: <20150110013324.GB4574@brightrain.aerifal.cx>

On Sat, Jan 10, 2015 at 1:33 AM, Rich Felker <dalias@aerifal.cx> wrote:
> On Fri, Jan 09, 2015 at 07:17:41PM -0600, Eric W. Biederman wrote:
>> Rich Felker <dalias@aerifal.cx> writes:
>>
>> > I'm not proposing code because I'm a libc developer not a kernel
>> > developer. I know what's needed for userspace to provide a conforming
>> > fexecve to applications, not how to implement that on the kernel side,
>> > although I'm trying to provide constructive ideas. The hostility is
>> > really not necessary.
>>
>> Conforming to what?
>>
>> The open group fexecve says nothing about requiring a file descriptor
>> passed to fexecve to have O_CLOEXEC.
>
> It doesn't require it but it allows it, and in multithreaded programs
> that might run child processes (or library code that might be used in
> such situations), O_CLOEXEC is mandatory everywhere to avoid fd leaks.

As a naive idea related to Andy's suggestion elsewhere, could you
just have an environment convention for fexecve-ing scripts?  That
would reduce FD leaks without any need for kernel involvement/changes.

For example, set _FEXECVED_VIA_FD=4 but don't set
O_CLOEXEC before fexecve, and the interpreter reads then
closes that FD.  Or just get the interpreter to spot scripts named
"/dev/fd/%d" and read-then-close the FD that way, cf. Eric's suggestion
at https://lkml.org/lkml/2014/10/22/652.

By the way, FreeBSD has a fexecve(2) syscall that behaves
in the same way as the current Linux code for an O_CLOEXEC
script -- the interpreter fails to open "/dev/fd/6" as it's gone.
Do you know if there are any other OSes that already do
something more sophisticated for this case?

^ permalink raw reply

* Re: [PATCH 0/3] epoll: Add epoll_pwait1 syscall
From: Josh Triplett @ 2015-01-12 10:08 UTC (permalink / raw)
  To: Fam Zheng
  Cc: Andy Lutomirski, Miklos Szeredi, linux-kernel@vger.kernel.org,
	Thomas Gleixner, Ingo Molnar, H. Peter Anvin, X86 ML,
	Alexander Viro, Andrew Morton, Juri Lelli, Zach Brown,
	David Drysdale, Kees Cook, Alexei Starovoitov, David Herrmann,
	Dario Faggioli, Theodore Ts'o, Peter Zijlstra, Vivek Goyal,
	Mike Frysinger, Heiko Carstens, Rasmus Villemoes, Oleg Nesterov,
	Mathieu
In-Reply-To: <20150112082400.GA21123@fam-t430.nay.redhat.com>

On Mon, Jan 12, 2015 at 04:24:00PM +0800, Fam Zheng wrote:
> On Thu, 01/08 21:21, Josh Triplett wrote:
> > On Fri, Jan 09, 2015 at 12:49:08PM +0800, Fam Zheng wrote:
> > > On Thu, 01/08 18:24, Andy Lutomirski wrote:
> > > > On Thu, Jan 8, 2015 at 5:52 PM, Fam Zheng <famz@redhat.com> wrote:
> > > > > On Thu, 01/08 17:28, Andy Lutomirski wrote:
> > > > >> On Thu, Jan 8, 2015 at 5:25 PM, Fam Zheng <famz@redhat.com> wrote:
> > > > >> > On Thu, 01/08 09:57, Andy Lutomirski wrote:
> > > > >> >> I'd like to see a more ambitious change, since the timer isn't the
> > > > >> >> only problem like this.  Specifically, I'd like a syscall that does a
> > > > >> >> list of epoll-related things and then waits.  The list of things could
> > > > >> >> include, at least:
> > > > >> >>
> > > > >> >>  - EPOLL_CTL_MOD actions: level-triggered epoll users are likely to
> > > > >> >> want to turn on and off their requests for events on a somewhat
> > > > >> >> regular basis.
> > > > >> >
> > > > >> > This sounds good to me.
> > > > >> >
> > > > >> >>
> > > > >> >>  - timerfd_settime actions: this allows a single syscall to wait and
> > > > >> >> adjust *both* monotonic and real-time wakeups.
> > > > >> >
> > > > >> > I'm not sure, doesn't this break orthogonality between epoll and timerfd?
> > > > >>
> > > > >> Yes.  It's not very elegant, and more elegant ideas are welcome.
> > > > >
> > > > > What is the purpose of embedding timerfd operation here? Modifying timerfd
> > > > > for each poll doesn't sound a common pattern to me.
> > > > 
> > > > Setting a timeout is definitely a common pattern, hence this thread.
> > > > But the current timeout interface sucks, and people should really use
> > > > absolute time.  (My epoll software uses absolute time.)  But then
> > > > users need to decide whether to have their timeout based on the
> > > > monotonic clock or the realtime clock (or something else entirely).
> > > > Some bigger programs may want both -- they may have internal events
> > > > queued for certain times and for certain timeouts, and those should
> > > > use realtime and monotonic respectively.  Heck, users may also want
> > > > separate slack values on those.
> > > > 
> > > > Timerfd is the only thing we have right now that is anywhere near
> > > > flexible enough.  Obviously if epoll became fancy enough, then we
> > > > could do away with the timerfd entirely here.
> > > > 
> > > > >
> > > > >>
> > > > >> >
> > > > >> >>
> > > > >> >> Would this make sense?  It could look like:
> > > > >> >>
> > > > >> >> int epoll_mod_and_pwait(int epfd,
> > > > >> >>   struct epoll_event *events, int maxevents,
> > > > >> >>   struct epoll_command *commands, int ncommands,
> > > > >> >>   const sigset_t *sigmask);
> > > > >> >
> > > > >> > What about flags?
> > > > >> >
> > > > >>
> > > > >> No room.  Maybe it should just be a struct for everything instead of
> > > > >> separate args.
> > > > >
> > > > > Also no room for timeout. A single struct sounds the only way to go.
> > > > 
> > > > That's what timerfd is for.  I think it would be a bit weird to
> > > > support "timeout" and detailed timerfd control.
> > > 
> > > I see what you mean. Thanks.
> > > 
> > > I still don't like hooking timerfd in the interface. Besides the unclean
> > > interface, it also feels cubersome and overkill to let users setup and add a
> > > dedicated timerfd to implement timeout.
> > > 
> > > How about this:
> > > 
> > > int epoll_mod_wait(int epfd, struct epoll_mod_wait_data *data);
> > > 
> > > struct epoll_mod_wait_data {
> > > 	struct epoll_event *events;
> > > 	int maxevents;
> > > 	struct epoll_mod_cmd {
> > > 		int op,
> > > 		int fd;
> > > 		void *data;
> > > 	} *cmds;
> > > 	int ncmds;
> > > 	int flags;
> > > 	sigset_t *sigmask;
> > > };
> > > 
> > > Commands ops are:
> > > 
> > > 	EPOLL_CTL_ADD
> > > 		@fd is the fd to modify; @data is epoll_event.
> > > 	EPOLL_CTL_MOD
> > > 		@fd is the fd to modify; @data is epoll_event.
> > > 	EPOLL_CTL_DEL
> > > 		@fd is the fd to modify; @data is epoll_event.
> > > 
> > > 	EPOLL_CTL_SET_TIMEOUT
> > > 		@fd is ignored, @data is timespec.
> > > 		Clock type and relative/absolute are selected by flags as below.
> > > 
> > > Flags are given to override timeout defaults:
> > > 	EPOLL_FL_MONOTONIC_CLOCK
> > > 		If set, don't use realtime clock, use monotonic clock.
> > > 	EPOLL_FL_ABSOLUTE_TIMEOUT
> > > 		If set, don't use relative timeout, use absolute timeout.
> > 
> > I'd suggest using an "int clockid" field instead, like timerfd_settime;
> > even if it only accepts CLOCK_REALTIME and CLOCK_MONOTONIC, if it needs
> > extending in the future, it'd be painful to have to remap new CLOCK_*
> > constants into the EPOLL_FL_* namespace.  (I do think dropping timeouts
> > in favor of timerfds makes things more nicely orthogonal, but epoll_wait
> > already has a timeout parameter, so *shrug*.)
> > 
> > Also, I think that structure has too many levels of indirection; it'd
> > produce many unnecessary cache misses; considering you're trying to
> > eliminate the overhead of one or two extra syscalls, you don't want to
> > introduce a pile of unnecessary cache misses in the processes.  I'd
> > suggest inlining cmds as an array at the end of the structure, and
> > turning "void *data" into an inline epoll_event.  (Or, you could use
> > "events" as an in/out parameter.)
> > 
> > You could drop EPOLL_CTL_SET_TIMEOUT, and just include a clockid and
> > timespec directly in the top-level structure.
> > 
> > And I'd suggest either making flags a top-level parameter or putting it
> > at the start of the structure, to make future extension easier.
> 
> Makes sense to me, thanks.
> 
> Also the number of cmds are undecided until we do a copy_from_user for the
> header fields before another one for specified number of cmds. So I think it's
> better to move ncmds and cmds to top level parameter.

That seems like an even better idea, yeah.

- Josh Triplett

^ permalink raw reply

* Re: [PATCH 0/3] epoll: Add epoll_pwait1 syscall
From: Fam Zheng @ 2015-01-12  8:24 UTC (permalink / raw)
  To: Josh Triplett
  Cc: Andy Lutomirski, Miklos Szeredi, linux-kernel@vger.kernel.org,
	Thomas Gleixner, Ingo Molnar, H. Peter Anvin, X86 ML,
	Alexander Viro, Andrew Morton, Juri Lelli, Zach Brown,
	David Drysdale, Kees Cook, Alexei Starovoitov, David Herrmann,
	Dario Faggioli, Theodore Ts'o, Peter Zijlstra, Vivek Goyal,
	Mike Frysinger, Heiko Carstens, Rasmus Villemoes, Oleg Nesterov,
	Mathieu
In-Reply-To: <20150109052129.GA6831@thin>

On Thu, 01/08 21:21, Josh Triplett wrote:
> On Fri, Jan 09, 2015 at 12:49:08PM +0800, Fam Zheng wrote:
> > On Thu, 01/08 18:24, Andy Lutomirski wrote:
> > > On Thu, Jan 8, 2015 at 5:52 PM, Fam Zheng <famz@redhat.com> wrote:
> > > > On Thu, 01/08 17:28, Andy Lutomirski wrote:
> > > >> On Thu, Jan 8, 2015 at 5:25 PM, Fam Zheng <famz@redhat.com> wrote:
> > > >> > On Thu, 01/08 09:57, Andy Lutomirski wrote:
> > > >> >> I'd like to see a more ambitious change, since the timer isn't the
> > > >> >> only problem like this.  Specifically, I'd like a syscall that does a
> > > >> >> list of epoll-related things and then waits.  The list of things could
> > > >> >> include, at least:
> > > >> >>
> > > >> >>  - EPOLL_CTL_MOD actions: level-triggered epoll users are likely to
> > > >> >> want to turn on and off their requests for events on a somewhat
> > > >> >> regular basis.
> > > >> >
> > > >> > This sounds good to me.
> > > >> >
> > > >> >>
> > > >> >>  - timerfd_settime actions: this allows a single syscall to wait and
> > > >> >> adjust *both* monotonic and real-time wakeups.
> > > >> >
> > > >> > I'm not sure, doesn't this break orthogonality between epoll and timerfd?
> > > >>
> > > >> Yes.  It's not very elegant, and more elegant ideas are welcome.
> > > >
> > > > What is the purpose of embedding timerfd operation here? Modifying timerfd
> > > > for each poll doesn't sound a common pattern to me.
> > > 
> > > Setting a timeout is definitely a common pattern, hence this thread.
> > > But the current timeout interface sucks, and people should really use
> > > absolute time.  (My epoll software uses absolute time.)  But then
> > > users need to decide whether to have their timeout based on the
> > > monotonic clock or the realtime clock (or something else entirely).
> > > Some bigger programs may want both -- they may have internal events
> > > queued for certain times and for certain timeouts, and those should
> > > use realtime and monotonic respectively.  Heck, users may also want
> > > separate slack values on those.
> > > 
> > > Timerfd is the only thing we have right now that is anywhere near
> > > flexible enough.  Obviously if epoll became fancy enough, then we
> > > could do away with the timerfd entirely here.
> > > 
> > > >
> > > >>
> > > >> >
> > > >> >>
> > > >> >> Would this make sense?  It could look like:
> > > >> >>
> > > >> >> int epoll_mod_and_pwait(int epfd,
> > > >> >>   struct epoll_event *events, int maxevents,
> > > >> >>   struct epoll_command *commands, int ncommands,
> > > >> >>   const sigset_t *sigmask);
> > > >> >
> > > >> > What about flags?
> > > >> >
> > > >>
> > > >> No room.  Maybe it should just be a struct for everything instead of
> > > >> separate args.
> > > >
> > > > Also no room for timeout. A single struct sounds the only way to go.
> > > 
> > > That's what timerfd is for.  I think it would be a bit weird to
> > > support "timeout" and detailed timerfd control.
> > 
> > I see what you mean. Thanks.
> > 
> > I still don't like hooking timerfd in the interface. Besides the unclean
> > interface, it also feels cubersome and overkill to let users setup and add a
> > dedicated timerfd to implement timeout.
> > 
> > How about this:
> > 
> > int epoll_mod_wait(int epfd, struct epoll_mod_wait_data *data);
> > 
> > struct epoll_mod_wait_data {
> > 	struct epoll_event *events;
> > 	int maxevents;
> > 	struct epoll_mod_cmd {
> > 		int op,
> > 		int fd;
> > 		void *data;
> > 	} *cmds;
> > 	int ncmds;
> > 	int flags;
> > 	sigset_t *sigmask;
> > };
> > 
> > Commands ops are:
> > 
> > 	EPOLL_CTL_ADD
> > 		@fd is the fd to modify; @data is epoll_event.
> > 	EPOLL_CTL_MOD
> > 		@fd is the fd to modify; @data is epoll_event.
> > 	EPOLL_CTL_DEL
> > 		@fd is the fd to modify; @data is epoll_event.
> > 
> > 	EPOLL_CTL_SET_TIMEOUT
> > 		@fd is ignored, @data is timespec.
> > 		Clock type and relative/absolute are selected by flags as below.
> > 
> > Flags are given to override timeout defaults:
> > 	EPOLL_FL_MONOTONIC_CLOCK
> > 		If set, don't use realtime clock, use monotonic clock.
> > 	EPOLL_FL_ABSOLUTE_TIMEOUT
> > 		If set, don't use relative timeout, use absolute timeout.
> 
> I'd suggest using an "int clockid" field instead, like timerfd_settime;
> even if it only accepts CLOCK_REALTIME and CLOCK_MONOTONIC, if it needs
> extending in the future, it'd be painful to have to remap new CLOCK_*
> constants into the EPOLL_FL_* namespace.  (I do think dropping timeouts
> in favor of timerfds makes things more nicely orthogonal, but epoll_wait
> already has a timeout parameter, so *shrug*.)
> 
> Also, I think that structure has too many levels of indirection; it'd
> produce many unnecessary cache misses; considering you're trying to
> eliminate the overhead of one or two extra syscalls, you don't want to
> introduce a pile of unnecessary cache misses in the processes.  I'd
> suggest inlining cmds as an array at the end of the structure, and
> turning "void *data" into an inline epoll_event.  (Or, you could use
> "events" as an in/out parameter.)
> 
> You could drop EPOLL_CTL_SET_TIMEOUT, and just include a clockid and
> timespec directly in the top-level structure.
> 
> And I'd suggest either making flags a top-level parameter or putting it
> at the start of the structure, to make future extension easier.

Makes sense to me, thanks.

Also the number of cmds are undecided until we do a copy_from_user for the
header fields before another one for specified number of cmds. So I think it's
better to move ncmds and cmds to top level parameter.

Fam

> 
> </bikeshed>
> 
> - Josh Triplett

^ permalink raw reply

* Use of the address linux-mei-VuQAYsv1563Yd54FQh9/CA@public.gmane.org in the Linux kernel
From: Julian Brost @ 2015-01-12  0:42 UTC (permalink / raw)
  To: linux-mei-owner-VuQAYsv1563Yd54FQh9/CA
  Cc: Jonathan Corbet, Tomas Winkler, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-doc-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Julian Brost, Fabian Hofmann,
	linux-kernel-xS2giEG5BKQjnolme5KbmQ

Hi,

the e-mail address linux-mei-VuQAYsv1563Yd54FQh9/CA@public.gmane.org is given as contact address
in multiple files in the Linux kernel [1]. Earlier today I cc'd a patch
to this address [2] which got rejected since I'm not a list member.
Subscribing to it doesn't seem to be possible either.

Therefore this address seems inappropriate as a contact address. In my
opinion either sending mail there should be allowed or the address might
be replaced with another one (I don't know any) or removed completely,
but at the moment it's pretty useless.

Regards,das
Julian

[1] Documentation/ABI/testing/sysfs-bus-mei
    Documentation/misc-devices/mei/mei-amt-version.c
    Documentation/misc-devices/mei/mei.txt
    drivers/misc/mei/hw-me-regs.h
    drivers/misc/mei/hw-txe-regs.h
    include/uapi/linux/mei.h
[2] 1421020689-28332-1-git-send-email-linux-kernel-kZFJPixjKZusTnJN9+BGXg@public.gmane.org
    https://lkml.org/lkml/2015/1/11/282

^ permalink raw reply

* Re: [PATCH 2/2] Input: uinput - fix ioctl nr overflow for UI_GET_VERSION
From: Peter Hutterer @ 2015-01-12  0:37 UTC (permalink / raw)
  To: Gabriel Laskar
  Cc: linux-api-u79uwXL29TY76Z2rM5mHXA, Dmitry Torokhov, David Herrmann,
	Benjamin Tissoires
In-Reply-To: <1420893816-11620-3-git-send-email-gabriel-tU7rkvAWjlwhT4uAktR2oQ@public.gmane.org>

On Sat, Jan 10, 2015 at 01:43:36PM +0100, Gabriel Laskar wrote:
> Request number for ioctls are encoded on 8bit. Values for are superior
> to 255. The effective value is 0x2d. The effective ioctl number is still
> the same one, it will not change the api in anyway.
> 
> Signed-off-by: Gabriel Laskar <gabriel-tU7rkvAWjlwhT4uAktR2oQ@public.gmane.org>
> ---
>  include/uapi/linux/uinput.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/uinput.h b/include/uapi/linux/uinput.h
> index 358f7d9..e1daf2e 100644
> --- a/include/uapi/linux/uinput.h
> +++ b/include/uapi/linux/uinput.h
> @@ -91,7 +91,7 @@ struct uinput_ff_erase {
>   * the integer pointed to by the ioctl argument. The protocol version
>   * is hard-coded in the kernel and is independent of the uinput device.
>   */
> -#define UI_GET_VERSION		_IOR(UINPUT_IOCTL_BASE, 301, unsigned int)
> +#define UI_GET_VERSION		_IOR(UINPUT_IOCTL_BASE, 0x2d, unsigned int)

the other uinput ioctls have the number specified as decimal number, I think
the same should be done here.

Also, certainly dodged a bullet there: 301 overflows into the type field but
sets a bit that is already set by UINPUT_IOCTL_BASE so it has no effect
(which I guess is why we didn't spot this before). ACK to the patch (when
changed to decimal) but it seems applying the masks in the _IOC macro may be
prudent to avoid this in the future.

Cheers,
   Peter

>  
>  /*
>   * To write a force-feedback-capable driver, the upload_effect
> -- 
> 2.2.1
> 

^ permalink raw reply

* Re: [PATCH 0/2] Input: uinput - fix ioctl numbers in uapi/uinput.h
From: Dmitry Torokhov @ 2015-01-12  0:29 UTC (permalink / raw)
  To: Gabriel Laskar
  Cc: linux-api-u79uwXL29TY76Z2rM5mHXA, David Herrmann, Peter Hutterer,
	Benjamin Tissoires
In-Reply-To: <1420893816-11620-1-git-send-email-gabriel-tU7rkvAWjlwhT4uAktR2oQ@public.gmane.org>

Hi Gabriel,

On Sat, Jan 10, 2015 at 01:43:34PM +0100, Gabriel Laskar wrote:
> Ioctls numbers for UI_GET_SYSNAME and UI_GET_VERSION are incorrectly numbered,
> since nr number is 8bit encoded, 300 and 301 will effectively get 44 and 45.
> these two patches fixes this
> 

Nice catch, thank you! I folded the patches together (as they are fixing
essentially the same thing) , changed hex to dec (because the rest of
ioctls in uinput use decimal) and applied.

I wonder if we need to put a BUILD_BUG_ON in one if _IO* defines to
catch such errors early on.

Thanks.

-- 
Dmitry

^ permalink raw reply

* Re: [PATCH net-next v2 0/8] net: extend ethtool link mode bitmaps to 48 bits
From: David Decotigny @ 2015-01-11 22:49 UTC (permalink / raw)
  To: Amir Vadai
  Cc: Florian Fainelli, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-api@vger.kernel.org,
	Saeed Mahameed, David S. Miller, Jason Wang, Michael S. Tsirkin,
	Herbert Xu, Al Viro, Ben Hutchings, Masatake YAMATO, Xi Wang,
	Neil Horman, WANG Cong, Flavio Leitner, Tom Gundersen, Jiri Pirko,
	Vlad Yasevich, Eric W. Biederman, Venkata Duvvuru,
	Govindarajulu Varadarajan
In-Reply-To: <54AE4282.20009@mellanox.com>

Thanks for the input. Please ignore this patch series: I'm preparing a
new version: new commands, bitmap-based that should allow us to live
happily ever after, should take your feedback into account. Will send
an RFC patch series in the next hours/days.

On Thu, Jan 8, 2015 at 12:40 AM, Amir Vadai <amirv@mellanox.com> wrote:
> On 1/6/2015 7:36 PM, David Decotigny wrote:
>> Interesting. It seems that the band-aid I was proposing is already
>> obsolete. We could still use the remaining reserved 16 bits to encode
>> 5 more bits per mask (that is: 53 bits / mask total). But if I
>> understand you, it would allow us to survive only a few months longer,
>> as opposed to a few weeks.
>>
>> One short-term alternative solution I can imagine is the following:
>> /* For example bitmap-based for variable length: */
>> struct ethtool_link_mode {
>>   __u32 cmd;
>>   __u8 autoneg :1;
>>   __u8 duplex :2;
>>  __u16 supported_nbits;
>>   __u16 advertising_nbits;
>>   __u16 lp_advertising_nbits;
>>   __u32 reserved[4];
>>   __u8 masks[0];
>> };
>> /* Or simpler, statically limited to 64b / mask, but easier to migrate
>> to for driver authors: */
> I think the first options is better. A driver will have to do changes in
> order to support >32 link modes, so better change it once now, without
> having to change it again for >64 link modes.
>
>> struct ethtool_link_mode {
>>   __u32 cmd;
>>   __u8 autoneg :1;
>>   __u8 duplex :2;
>>    __u64 supported;
>>   __u64 advertising;
>>   __u64 lp_advertising;
>>   __u32 reserved[4];
>> };
>> #define ETHTOOL_GLINK_MODE 0x0000004a
>> #define ETHTOOL_SLINK_MODE 0x0000004b
>> struct ethtool_ops {
>> ...
>>    int (*get_link_mode)(struct net_device *, struct ethtool_link_mode *);
>>    int (*set_link_mode)(struct net_device *, struct ethtool_link_mode *);
>> };
>>
>> The same thing required for EEE.
> Yeh :(
>
>>
>> I am not sure about moving the autoneg and duplex fields into the new
>> struct. Especially the "duplex" field.
> I think so too. ethtool user space will call ETHTOOL_[GS]SET and after
> that ETHTOOL_[GS]LINK_MODE (if supported). No need to get the
> duplex/autoneg fields again.
>
>>
>> Then the idea would be to update the ethtool user-space tool to try
>> get/set_link mode when reporting/changing the autoneg/advertising
>> settings.
>>
>> Both will require significant effort from the driver authors.
>> Especially if the variable-length bitmap approach is preferred:
>>  - most drivers currently use simple bitwise arithmetic in their code,
>> and that goes far beyond get/set_settings, it is sometimes part of the
>> core driver logic. They will have to migrate to the bitmap API if they
>> want to use the larger bitmaps (note: no change needed if they are
>> happy with <= 32b / mask)
> As I said above, it will save as doing this work again in the future,
> and more problematic, save another version to backport in the future. In
> addition, not all drivers will have to do it, only if >32 link speeds is
> needed - this work will be required.
>
>>  - we would have to progressively deprecate the use of #define
>> ADVERTISED_1000baseT_Full in favor of an enum of the bit indices.
> Maybe we could use some macro juggling to define the legacy macro's
> using enum for the first 32 bits, and fail the compilation if used on
>>32. For example, calling this:
> DEFINE_LINK_MODE(ADVERTISED_1000baseT_Full, 5)
>
> Will add the following:
> ADVERTISED_1000baseT_Full_SHIFT = 5
> ADVERTISED_1000baseT_Full = (1<<5)
>
> DEFINE_LINK_MODE(ADVERTISED_100000baseKR5_Full, 50) will add:
> ADVERTISED_100000baseKR5_Full_SHIFT = 50
> ADVERTISED_100000baseKR5_Full = #error new link speeds must be defined
> using [gs]et_link_speed
>
> This will break compilation if ADVERTISED_100000baseKR5_Full is used in
> [gs]et_settings (I know the '#error' will not print something very
> pretty - I used it only to explain what I meant)
>
>>
>> Any feedback welcome. In the meantime, I am going to propose a v3 of
>> current option with 53 bits / mask. I can also propose a prototype of
>> the scheme described above, please let me know.
> I think that it is better to do it once, and skip the 53 bits / mask
> version.
> I'll be happy to assist.
>
> Amir

^ permalink raw reply

* Re: [PATCH v9 1/2] crypto: AF_ALG: add AEAD support
From: Stephan Mueller @ 2015-01-11 16:16 UTC (permalink / raw)
  To: 'Herbert Xu'
  Cc: Daniel Borkmann, 'Quentin Gouchet', 'LKML',
	linux-crypto-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1518783.PStT3Q01B6-PJstQz4BMNNP20K/wil9xYQuADTiUCJX@public.gmane.org>

Am Sonntag, 11. Januar 2015, 04:45:53 schrieb Stephan Mueller:

Hi Herbert,

> +static int aead_accept_parent(void *private, struct sock *sk)
> +{
> +	struct aead_ctx *ctx;
> +	struct alg_sock *ask = alg_sk(sk);
> +	unsigned int len = sizeof(*ctx) + crypto_aead_reqsize(private);
> +	unsigned int ivlen = crypto_aead_ivsize(private);
> +
> +	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
> +	if (!ctx)
> +		return -ENOMEM;
> +	memset(ctx, 0, len);
> +
> +	ctx->iv = sock_kmalloc(sk, ivlen, GFP_KERNEL);
> +	if (!ctx->iv) {
> +		sock_kfree_s(sk, ctx, len);
> +		return -ENOMEM;
> +	}
> +	memset(ctx->iv, 0, ivlen);
> +
> +	ctx->len = len;
> +	ctx->used = 0;
> +	ctx->more = 0;
> +	ctx->merge = 0;
> +	ctx->enc = 0;
> +	ctx->tsgl.cur = 0;

ctx->trunc = 0;

is missing here.

I would wait with a new patch once you had the chance to review the updates 
and provide comments.

Thanks
-- 
Ciao
Stephan

^ permalink raw reply

* Re: [PATCHv2 1/9] media: Fix DVB representation at media controller API
From: Mauro Carvalho Chehab @ 2015-01-11 14:34 UTC (permalink / raw)
  To: Laurent Pinchart
  Cc: Linux Media Mailing List, Mauro Carvalho Chehab,
	linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <10692325.J7AeJnuN2d@avalon>

Em Sun, 11 Jan 2015 16:05:32 +0200
Laurent Pinchart <laurent.pinchart-ryLnwIuWjnjg/C1BVhZhaw@public.gmane.org> escreveu:

> Hi Mauro,
> 
> On Sunday 11 January 2015 11:58:24 Mauro Carvalho Chehab wrote:
> > Em Sun, 11 Jan 2015 15:50:04 +0200 Laurent Pinchart escreveu:
> > > On Saturday 03 January 2015 12:49:03 Mauro Carvalho Chehab wrote:
> > >> The DVB devices are identified via a (major, minor) tuple,
> > >> and not by a random id. Fix it, before we start using it.
> > >> 
> > >> Signed-off-by: Mauro Carvalho Chehab <mchehab-JPH+aEBZ4P+UEJcrhfAQsw@public.gmane.org>
> > >> 
> > >> diff --git a/include/media/media-entity.h b/include/media/media-entity.h
> > >> index e00459185d20..de333cc8261b 100644
> > >> --- a/include/media/media-entity.h
> > >> +++ b/include/media/media-entity.h
> > >> @@ -97,7 +97,10 @@ struct media_entity {
> > >>  			u32 device;
> > >>  			u32 subdevice;
> > >>  		} alsa;
> > >> -		int dvb;
> > >> +		struct {
> > >> +			u32 major;
> > >> +			u32 minor;
> > >> +		} dvb;
> > >> 
> > >>  		/* Sub-device specifications */
> > >>  		/* Nothing needed yet */
> > >> diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
> > >> index d847c760e8f0..7902e800f019 100644
> > >> --- a/include/uapi/linux/media.h
> > >> +++ b/include/uapi/linux/media.h
> > >> @@ -27,7 +27,7 @@
> > >>  #include <linux/types.h>
> > >>  #include <linux/version.h>
> > >> 
> > >> -#define MEDIA_API_VERSION	KERNEL_VERSION(0, 1, 0)
> > >> +#define MEDIA_API_VERSION	KERNEL_VERSION(0, 1, 1)
> > >> 
> > >>  struct media_device_info {
> > >>  	char driver[16];
> > >> @@ -88,7 +88,10 @@ struct media_entity_desc {
> > >>  			__u32 device;
> > >>  			__u32 subdevice;
> > >>  		} alsa;
> > >> -		int dvb;
> > >> +		struct {
> > >> +			__u32 major;
> > >> +			__u32 minor;
> > >> +		} dvb;
> > > 
> > > Won't this break compilation of existing userspace code ? As DVB is not
> > > properly supported in MC at the moment we could consider that only
> > > mediactl will be affected, so it shouldn't be a big issue.
> > 
> > Well, media-ctl uses a local copy of the videodev2.h header, so it won't
> > break.
> 
> It's media.h, but you're correct here.

Ah, yes, that's what I meant ;)

Btw, I have also the patches adding support for DVB at v4l-utils:
	http://git.linuxtv.org/cgit.cgi/mchehab/experimental-v4l-utils.git/log/?h=dvb-media-ctl

> 
> > I'm not aware of any other application using MC for DVB.
> > 
> > Yet, imagining that such application exists, then, IMHO, it is better
> > to break compilation for it, as probably such application was written for
> > some OOT driver that might be using its own version of the media
> > controller implementation.
> 
> OK. I'll remember that argument the next time I want to break a kernel API 
> though ;-)

:)

Actually, we're not breaking the Kernel API here, as DVB support
inside the media controller were never added.

Next time, we should be sure to not add provision for an API at
the Kernel without actually implementing it ;)

Btw, eventually we'll end facing the very same issue when we
merge support for ALSA. IMHO, it is just easier to use major,minor
for all devnodes than to use anything else.

Yet, you're right: maybe we should do, instead:


	union {
		struct {
			u32 major;
			u32 minor;
		} dev;

		/* DEPRECATED: old node specifications */
		struct {
			u32 major;
			u32 minor;
		} v4l;
		struct {
			u32 major;
			u32 minor;
		} fb;
		struct {
			u32 card;
			u32 device;
			u32 subdevice;
		} alsa;
		int dvb;

		/* Sub-device specifications */
		/* Nothing needed yet */
	} info;

And change media-ctl to use info.dev for all devnodes. This will
provide a fix when we add support for alsa devnodes too.

Regards,
Mauro

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox